Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion lib/propolis/src/hw/nvme/admin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,6 @@ impl NvmeCtrl {
}
}

#[allow(dead_code)]
pub(super) fn acmd_doorbell_buf_cfg(
&mut self,
cmd: &cmds::DoorbellBufCfgCmd,
Expand Down
17 changes: 4 additions & 13 deletions lib/propolis/src/hw/nvme/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -844,9 +844,8 @@ impl PciNvme {
nn: 1,
// bit 0 indicates volatile write cache is present
vwc: 1,
// bit 8 indicates Doorbell Buffer support. Theoretically supported,
// but disabled for Propolis issue #1008.
oacs: (0 << 8),
// bit 8 indicates Doorbell Buffer support
oacs: (1 << 8),
..Default::default()
};

Expand Down Expand Up @@ -1340,16 +1339,8 @@ impl PciNvme {
// this can detect it and stop posting async events.
cmds::Completion::generic_err(bits::STS_INVAL_OPC).dnr()
}
AdminCmd::DoorbellBufCfg(_cmd) => {
// XXX: issue #1008 suggests that Doorbell Buffer support
// can end up with guest disks in a state that *looks like*
// we've failed to notify after writing a completion. While
// we're debugging this, we hide Doorbell Buffer support
// from OACS. Instead, treat this the same as an
// `AdminCmd::Unknown`.

// state.acmd_doorbell_buf_cfg(&cmd)
cmds::Completion::generic_err(bits::STS_INTERNAL_ERR)
AdminCmd::DoorbellBufCfg(cmd) => {
state.acmd_doorbell_buf_cfg(&cmd)
}
AdminCmd::Unknown(_) => {
cmds::Completion::generic_err(bits::STS_INTERNAL_ERR)
Expand Down
38 changes: 28 additions & 10 deletions lib/propolis/src/hw/nvme/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,18 +343,36 @@ impl QueueGuard<'_, CompQueueState> {
/// Write update to the EventIdx in Doorbell Buffer page, if possible
fn db_buf_write(&mut self, devq_id: u64, mem: &MemCtx) {
if let Some(db_buf) = self.state.db_buf {
probes::nvme_cq_dbbuf_write!(|| (devq_id, self.state.tail));
// Keep EventIdx populated with the position of the CQ tail. We are
// not especially concerned with receiving timely (doorbell) updates
// from the guest about where the head pointer sits. We keep our
// own tally of how many entries are in the CQ are available for
// completions to land.
// Update EventIdx as far as we're willing to forego doorbell
// updates about the CQ head. We are not especially concerned with
// receiving timely doorbell updates from the guest about the head.
// We keep our own tally of how many entries in the CQ are available
// for completions to land. In the typical case, we will read the
// shadow doorbell from db_buf JIT to update the available CQ space.
//
// When checking for available space before issuing a Permit, we can
// perform our own JIT read from the db_buf to stay updated on the
// true space available.
// However, simply keeping EventIdx matching the queue tail means we
// opt out of *any* notification that a completion is posted. Then,
// if an I/O was submitted, not processed immediately due to
// insufficient CQ space, but the guest submits no further I/Os on
// that queue, we would never notice that there is space in the CQ.
// The I/O would go unfulfilled forever.
//
// For now, leave EventIdx one before the actual CQ tail, so that
// making the CQ empty requires a doorbell notify. This is
// excessive; there are many cases where we don't care if the CQ is
// to be emptied.
if self.avail_occupied() <= 1 {
// The queue was empty and just became non-empty. So `tail` has
// not advanced far enough that we can actually advance
// EventIdx.
return;
}

let next_evtidx = self.idx_sub(self.state.tail, 1);

probes::nvme_cq_dbbuf_write!(|| (devq_id, next_evtidx));
fence(Ordering::Release);
mem.write(db_buf.eventidx, &self.state.tail);
mem.write(db_buf.eventidx, &next_evtidx);
}
}

Expand Down
Loading