hw/block/nvme: add the dataset management command

author Klaus Jensen <k.jensen@samsung.com>

Wed, 21 Oct 2020 12:03:19 +0000 (14:03 +0200)

committer Klaus Jensen <k.jensen@samsung.com>

Mon, 8 Feb 2021 17:55:48 +0000 (18:55 +0100)
author Klaus Jensen <k.jensen@samsung.com>
Wed, 21 Oct 2020 12:03:19 +0000 (14:03 +0200)
committer Klaus Jensen <k.jensen@samsung.com>
Mon, 8 Feb 2021 17:55:48 +0000 (18:55 +0100)
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c

index 53ded460348e988e1b92f73b41615e846b5c566e..37f95951a6b88ddf2719b61dd67a7e8b50870c58 100644 (file)
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -28,10 +28,14 @@
  #include "nvme.h"
  #include "nvme-ns.h"
  
-static void nvme_ns_init(NvmeNamespace *ns)
+#define MIN_DISCARD_GRANULARITY (4 * KiB)
+
+static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
  {
+    BlockDriverInfo bdi;
      NvmeIdNs *id_ns = &ns->id_ns;
      int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
+    int npdg;
  
      ns->id_ns.dlfeat = 0x9;
  
@@ -43,8 +47,19 @@ static void nvme_ns_init(NvmeNamespace *ns)
      id_ns->ncap = id_ns->nsze;
      id_ns->nuse = id_ns->ncap;
  
-    /* support DULBE */
-    id_ns->nsfeat |= 0x4;
+    /* support DULBE and I/O optimization fields */
+    id_ns->nsfeat |= (0x4 | 0x10);
+
+    npdg = ns->blkconf.discard_granularity / ns->blkconf.logical_block_size;
+
+    if (bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi) >= 0 &&
+        bdi.cluster_size > ns->blkconf.discard_granularity) {
+        npdg = bdi.cluster_size / ns->blkconf.logical_block_size;
+    }
+
+    id_ns->npda = id_ns->npdg = npdg - 1;
+
+    return 0;
  }
  
  static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
@@ -60,6 +75,11 @@ static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
          return -1;
      }
  
+    if (ns->blkconf.discard_granularity == -1) {
+        ns->blkconf.discard_granularity =
+            MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
+    }
+
      ns->size = blk_getlength(ns->blkconf.blk);
      if (ns->size < 0) {
          error_setg_errno(errp, -ns->size, "could not get blockdev size");
@@ -93,7 +113,9 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
          return -1;
      }
  
-    nvme_ns_init(ns);
+    if (nvme_ns_init(ns, errp)) {
+        return -1;
+    }
  
      if (nvme_register_namespace(n, ns, errp)) {
          return -1;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c

index 6e6bdb338ad780c10ac996ec2f81d545dd649187..f019d43788ac58253c6c3373c0d75eb2171b3ccb 100644 (file)
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -242,6 +242,7 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
  static void nvme_req_clear(NvmeRequest *req)
  {
      req->ns = NULL;
+    req->opaque = NULL;
      memset(&req->cqe, 0x0, sizeof(req->cqe));
      req->status = NVME_SUCCESS;
  }
@@ -978,6 +979,99 @@ static void nvme_rw_cb(void *opaque, int ret)
      nvme_enqueue_req_completion(nvme_cq(req), req);
  }
  
+static void nvme_aio_discard_cb(void *opaque, int ret)
+{
+    NvmeRequest *req = opaque;
+    uintptr_t *discards = (uintptr_t *)&req->opaque;
+
+    trace_pci_nvme_aio_discard_cb(nvme_cid(req));
+
+    if (ret) {
+        nvme_aio_err(req, ret);
+    }
+
+    (*discards)--;
+
+    if (*discards) {
+        return;
+    }
+
+    nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeNamespace *ns = req->ns;
+    NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd;
+
+    uint32_t attr = le32_to_cpu(dsm->attributes);
+    uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1;
+
+    uint16_t status = NVME_SUCCESS;
+
+    trace_pci_nvme_dsm(nvme_cid(req), nvme_nsid(ns), nr, attr);
+
+    if (attr & NVME_DSMGMT_AD) {
+        int64_t offset;
+        size_t len;
+        NvmeDsmRange range[nr];
+        uintptr_t *discards = (uintptr_t *)&req->opaque;
+
+        status = nvme_dma(n, (uint8_t *)range, sizeof(range),
+                          DMA_DIRECTION_TO_DEVICE, req);
+        if (status) {
+            return status;
+        }
+
+        /*
+         * AIO callbacks may be called immediately, so initialize discards to 1
+         * to make sure the the callback does not complete the request before
+         * all discards have been issued.
+         */
+        *discards = 1;
+
+        for (int i = 0; i < nr; i++) {
+            uint64_t slba = le64_to_cpu(range[i].slba);
+            uint32_t nlb = le32_to_cpu(range[i].nlb);
+
+            if (nvme_check_bounds(ns, slba, nlb)) {
+                trace_pci_nvme_err_invalid_lba_range(slba, nlb,
+                                                     ns->id_ns.nsze);
+                continue;
+            }
+
+            trace_pci_nvme_dsm_deallocate(nvme_cid(req), nvme_nsid(ns), slba,
+                                          nlb);
+
+            offset = nvme_l2b(ns, slba);
+            len = nvme_l2b(ns, nlb);
+
+            while (len) {
+                size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len);
+
+                (*discards)++;
+
+                blk_aio_pdiscard(ns->blkconf.blk, offset, bytes,
+                                 nvme_aio_discard_cb, req);
+
+                offset += bytes;
+                len -= bytes;
+            }
+        }
+
+        /* account for the 1-initialization */
+        (*discards)--;
+
+        if (*discards) {
+            status = NVME_NO_COMPLETE;
+        } else {
+            status = req->status;
+        }
+    }
+
+    return status;
+}
+
  static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
  {
      block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
@@ -1107,6 +1201,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
      case NVME_CMD_WRITE:
      case NVME_CMD_READ:
          return nvme_rw(n, req);
+    case NVME_CMD_DSM:
+        return nvme_dsm(n, req);
      default:
          trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
          return NVME_INVALID_OPCODE | NVME_DNR;
@@ -2829,7 +2925,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
      id->cqes = (0x4 << 4) | 0x4;
      id->nn = cpu_to_le32(n->num_namespaces);
      id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
-                           NVME_ONCS_FEATURES);
+                           NVME_ONCS_FEATURES | NVME_ONCS_DSM);
  
      id->vwc = 0x1;
      id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
diff --git a/hw/block/nvme.h b/hw/block/nvme.h

index e080a2318a50689eb8c6c94c15f32e56dcb29ebd..574333caa3f9a848b8ac44aaf87e1218f903d7c1 100644 (file)
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -28,6 +28,7 @@ typedef struct NvmeRequest {
      struct NvmeNamespace    *ns;
      BlockAIOCB              *aiocb;
      uint16_t                status;
+    void                    *opaque;
      NvmeCqe                 cqe;
      NvmeCmd                 cmd;
      BlockAcctCookie         acct;
@@ -60,6 +61,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
      case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
      case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
      case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
+    case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
      default:                        return "NVME_NVM_CMD_UNKNOWN";
      }
  }
author	Klaus Jensen <k.jensen@samsung.com>
	Wed, 21 Oct 2020 12:03:19 +0000 (14:03 +0200)
committer	Klaus Jensen <k.jensen@samsung.com>
	Mon, 8 Feb 2021 17:55:48 +0000 (18:55 +0100)
hw/block/nvme-ns.c		patch \| blob \| history
hw/block/nvme.c		patch \| blob \| history
hw/block/nvme.h		patch \| blob \| history