From e5f7aa4c8250fd517764f073637f956b78c323c2 Mon Sep 17 00:00:00 2001 From: Dmitry Savitskiy Date: Mon, 19 Aug 2024 23:23:06 +0300 Subject: [PATCH] lvs: additional support for lvs live grow and bdev resize Added helpers for support live lvs grow in client apps. Added experimental support for malloc bdev resize, which is needed for live grow tests. Signed-off-by: Dmitry Savitskiy --- include/spdk/bdev_module.h | 26 ++++++++ include/spdk/blob.h | 18 ++++++ include/spdk/lvol.h | 56 ++++++++++++++--- lib/bdev/bdev.c | 13 ++++ lib/blob/blobstore.c | 18 ++++++ lib/lvol/lvol.c | 49 +++++++++++---- lib/lvol/spdk_lvol.map | 3 +- module/bdev/lvol/vbdev_lvol.c | 12 ++-- module/bdev/lvol/vbdev_lvol.h | 6 +- module/bdev/lvol/vbdev_lvol_rpc.c | 12 +++- module/bdev/malloc/bdev_malloc.c | 101 ++++++++++++++++++++++++++++++ module/bdev/malloc/bdev_malloc.h | 2 + 12 files changed, 281 insertions(+), 35 deletions(-) diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index 1b3c6ef82eb..789cad0e23e 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -1349,6 +1349,32 @@ uint64_t spdk_bdev_io_get_submit_tsc(struct spdk_bdev_io *bdev_io); */ int spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size); +/** + * spdk_bdev_resize() callback. + * + * \param bdev Block device being resized. + * \param cb_arg Argument passed to the resize function. + * \param status 0 for successfull resize, negated errno on failure. + */ +typedef void (*spdk_resize_cb)(struct spdk_bdev *bdev, void *cb_arg, int status); + +/** + * Resizes the bdev. + * + * Change number of blocks for provided block device. + * It can only be called on a registered bdev. + * + * \param bdev Block device to change. + * \param size New size of bdev. + * \param resize_cb Called after the new size is applied. + * \param cb_arg Argument to pass to callback function. + * \return 0 on success, negated errno on failure. + */ +int spdk_bdev_resize(struct spdk_bdev *bdev, + uint64_t size, + spdk_resize_cb resize_cb, + void *cb_arg); + /** * Translates NVMe status codes to SCSI status information. * diff --git a/include/spdk/blob.h b/include/spdk/blob.h index 2b6f49ac046..3049916c09b 100644 --- a/include/spdk/blob.h +++ b/include/spdk/blob.h @@ -476,6 +476,24 @@ uint64_t spdk_bs_free_cluster_count(struct spdk_blob_store *bs); */ uint64_t spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs); +/** + * Get the blobstore metadata size in pages. + * + * \param bs blobstore to query. + * + * \return the blobstore metadata size in pages. + */ +uint64_t spdk_bs_get_md_len(struct spdk_blob_store *bs); + +/** + * Get the number of used metadata pages. + * + * \param bs blobstore to query. + * + * \return the number of used metadata pages. + */ +uint64_t spdk_bs_get_used_md(struct spdk_blob_store *bs); + /** * Get the blob id. * diff --git a/include/spdk/lvol.h b/include/spdk/lvol.h index 0b927ba8f77..7faa72c6843 100644 --- a/include/spdk/lvol.h +++ b/include/spdk/lvol.h @@ -58,7 +58,7 @@ struct spdk_lvs_opts { uint32_t num_md_pages_per_cluster_ratio; /** - * The size of spdk_lvol_opts according to the caller of this library is used for ABI + * The size of spdk_lvs_opts according to the caller of this library is used for ABI * compatibility. The library uses this field to know how many fields in this * structure are valid. And the library will populate any remaining fields with default * values. After that, new added fields should be put in the end of the struct. @@ -190,6 +190,45 @@ int spdk_lvs_unload(struct spdk_lvol_store *lvol_store, int spdk_lvs_destroy(struct spdk_lvol_store *lvol_store, spdk_lvs_op_complete cb_fn, void *cb_arg); +/** + * Parameters for lvs initialization. + */ +struct spdk_lvol_opts { + /** + * The size of spdk_lvol_opts according to the caller of this library is used for ABI + * compatibility. The library uses this field to know how many fields in this + * structure are valid. And the library will populate any remaining fields with default + * values. After that, new added fields should be put in the end of the struct. + */ + uint32_t opts_size; + + /** Name of lvol. */ + const char *name; + + /** Optional uuid of the new volume or null if it should be autogenerated. */ + const char *uuid; + + /** Size of lvol in bytes. */ + uint64_t size; + + /** Enables thin provisioning (disabled by default). */ + bool thin_provision; + + /** Use extent table (enabled by default). */ + bool use_extent_table; + + /** Changes default data clusters clear method. */ + enum lvol_clear_method clear_method; +} __attribute__((packed)); +SPDK_STATIC_ASSERT(sizeof(struct spdk_lvol_opts) == 34, "Incorrect size"); + +/** + * Initialize an spdk_lvol_opts structure to the defaults. + * + * \param opts Pointer to the spdk_lvol_opts structure to initialize. + */ +void spdk_lvol_opts_init(struct spdk_lvol_opts *opts); + /** * Create lvol on given lvolstore with specified size. * @@ -208,22 +247,19 @@ int spdk_lvol_create(struct spdk_lvol_store *lvs, const char *name, uint64_t sz, spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg); /** - * Same as above but allows to specify uuid of the new lvol. + * Create lvol on given lvolstore with specified options. * * \param lvs Handle to lvolstore. - * \param name Name of lvol. - * \param sz size of lvol in bytes. - * \param thin_provisioned Enables thin provisioning. - * \param clear_method Changes default data clusters clear method - * \param uuid uuid of the new volume or null if it should be autogenerated. + * \param lvol_opts Options for lvol. * \param cb_fn Completion callback. * \param cb_arg Completion callback custom arguments. * * \return 0 on success, negative errno on failure. */ -int spdk_lvol_create_with_uuid(struct spdk_lvol_store *lvs, const char *name, uint64_t sz, - bool thin_provisioned, enum lvol_clear_method clear_method, - const char *uuid, spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg); +int spdk_lvol_create_with_opts(struct spdk_lvol_store *lvs, + const struct spdk_lvol_opts *lvol_opts, + spdk_lvol_op_with_handle_complete cb_fn, + void *cb_arg); /** * Create snapshot of given lvol. diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 979337ca8fa..9546f0cdc66 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -5139,6 +5139,13 @@ _resize_notify(void *ctx) int spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size) +{ + return spdk_bdev_resize(bdev, size, NULL, NULL); +} + +int +spdk_bdev_resize(struct spdk_bdev *bdev, uint64_t size, + spdk_resize_cb resize_cb, void *cb_arg) { struct spdk_bdev_desc *desc; int ret; @@ -5153,8 +5160,14 @@ spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size) if (!TAILQ_EMPTY(&bdev->internal.open_descs) && bdev->blockcnt > size) { ret = -EBUSY; + if (resize_cb != NULL) { + resize_cb(bdev, cb_arg, ret); + } } else { bdev->blockcnt = size; + if (resize_cb != NULL) { + resize_cb(bdev, cb_arg, 0); + } TAILQ_FOREACH(desc, &bdev->internal.open_descs, link) { event_notify(desc, _resize_notify); } diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c index 8190e18c301..803fdcef988 100644 --- a/lib/blob/blobstore.c +++ b/lib/blob/blobstore.c @@ -6167,6 +6167,24 @@ spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs) return bs->total_data_clusters; } +uint64_t +spdk_bs_get_md_len(struct spdk_blob_store *bs) +{ + return bs->md_len; +} + +uint64_t +spdk_bs_get_used_md(struct spdk_blob_store *bs) +{ + uint64_t res = 0; + + spdk_spin_lock(&bs->used_lock); + res = spdk_bit_array_count_set(bs->used_md_pages); + spdk_spin_unlock(&bs->used_lock); + + return res; +} + static int bs_register_md_thread(struct spdk_blob_store *bs) { diff --git a/lib/lvol/lvol.c b/lib/lvol/lvol.c index 1142037e9fe..4747eca84bf 100644 --- a/lib/lvol/lvol.c +++ b/lib/lvol/lvol.c @@ -1266,20 +1266,40 @@ lvs_verify_lvol_name(struct spdk_lvol_store *lvs, const char *name) return 0; } +void +spdk_lvol_opts_init(struct spdk_lvol_opts *o) +{ + memset(o, 0, sizeof(*o)); + o->name = NULL; + o->uuid = NULL; + o->size = 0; + o->thin_provision = false; + o->use_extent_table = true; + o->clear_method = LVOL_CLEAR_WITH_NONE; + o->opts_size = sizeof(*o); +} + int spdk_lvol_create(struct spdk_lvol_store *lvs, const char *name, uint64_t sz, bool thin_provision, enum lvol_clear_method clear_method, spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg) { - return spdk_lvol_create_with_uuid(lvs, name, sz, thin_provision, - clear_method, NULL, cb_fn, cb_arg); + struct spdk_lvol_opts opts; + + spdk_lvol_opts_init(&opts); + opts.name = name; + opts.size = sz; + opts.thin_provision = thin_provision; + opts.clear_method = clear_method; + + return spdk_lvol_create_with_opts(lvs, &opts, cb_fn, cb_arg); } int -spdk_lvol_create_with_uuid(struct spdk_lvol_store *lvs, const char *name, uint64_t sz, - bool thin_provision, enum lvol_clear_method clear_method, - const char *uuid, spdk_lvol_op_with_handle_complete cb_fn, +spdk_lvol_create_with_opts(struct spdk_lvol_store *lvs, + const struct spdk_lvol_opts *lvol_opts, + spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg) { struct spdk_lvol_with_handle_req *req; @@ -1290,12 +1310,14 @@ spdk_lvol_create_with_uuid(struct spdk_lvol_store *lvs, const char *name, uint64 char *xattr_names[] = {LVOL_NAME, LVOL_UUID}; int rc; + assert(lvol_opts); + if (lvs == NULL) { SPDK_ERRLOG("lvol store does not exist\n"); return -EINVAL; } - rc = lvs_verify_lvol_name(lvs, name); + rc = lvs_verify_lvol_name(lvs, lvol_opts->name); if (rc < 0) { return rc; } @@ -1310,15 +1332,19 @@ spdk_lvol_create_with_uuid(struct spdk_lvol_store *lvs, const char *name, uint64 req->cb_fn = cb_fn; req->cb_arg = cb_arg; - if (uuid) { - if (spdk_uuid_parse(&parsed_uuid, uuid) != 0) { + if (lvol_opts->uuid) { + if (spdk_uuid_parse(&parsed_uuid, lvol_opts->uuid) != 0) { free(req); SPDK_ERRLOG("Invalid lvol uuid provided\n"); return -EINVAL; } } - lvol = lvol_alloc(lvs, name, uuid ? &parsed_uuid : NULL, thin_provision, clear_method); + lvol = lvol_alloc(lvs, + lvol_opts->name, + lvol_opts->uuid ? &parsed_uuid : NULL, + lvol_opts->thin_provision, + lvol_opts->clear_method); if (!lvol) { free(req); SPDK_ERRLOG("Cannot alloc memory for lvol base pointer\n"); @@ -1327,8 +1353,9 @@ spdk_lvol_create_with_uuid(struct spdk_lvol_store *lvs, const char *name, uint64 req->lvol = lvol; spdk_blob_opts_init(&opts, sizeof(opts)); - opts.thin_provision = thin_provision; - opts.num_clusters = spdk_divide_round_up(sz, spdk_bs_get_cluster_size(bs)); + opts.thin_provision = lvol_opts->thin_provision; + opts.use_extent_table = lvol_opts->use_extent_table; + opts.num_clusters = spdk_divide_round_up(lvol_opts->size, spdk_bs_get_cluster_size(bs)); opts.clear_method = lvol->clear_method; opts.xattrs.count = SPDK_COUNTOF(xattr_names); opts.xattrs.names = xattr_names; diff --git a/lib/lvol/spdk_lvol.map b/lib/lvol/spdk_lvol.map index 4bb1f031a0d..26607c495fa 100644 --- a/lib/lvol/spdk_lvol.map +++ b/lib/lvol/spdk_lvol.map @@ -9,12 +9,13 @@ spdk_lvs_destroy; spdk_lvs_grow; spdk_lvs_grow_live; + spdk_lvol_opts_init; spdk_lvol_create; spdk_lvol_create_snapshot; spdk_lvol_create_snapshot_ext; spdk_lvol_create_clone; spdk_lvol_create_clone_ext; - spdk_lvol_create_with_uuid; + spdk_lvol_create_with_opts; spdk_lvol_create_snapshot_ext; spdk_lvol_rename; spdk_lvol_deletable; diff --git a/module/bdev/lvol/vbdev_lvol.c b/module/bdev/lvol/vbdev_lvol.c index 01f8bfd28c3..9fea367f58e 100644 --- a/module/bdev/lvol/vbdev_lvol.c +++ b/module/bdev/lvol/vbdev_lvol.c @@ -2291,11 +2291,10 @@ vbdev_lvol_set_external_parent(struct spdk_lvol *lvol, const char *esnap_name, SPDK_LOG_REGISTER_COMPONENT(vbdev_lvol) -int -vbdev_lvol_create_with_uuid(struct spdk_lvol_store *lvs, const char *name, uint64_t sz, - bool thin_provision, enum lvol_clear_method clear_method, - const char *uuid, spdk_lvol_op_with_handle_complete cb_fn, - void *cb_arg) +int vbdev_lvol_create_with_opts(struct spdk_lvol_store *lvs, + const struct spdk_lvol_opts *lvol_opts, + spdk_lvol_op_with_handle_complete cb_fn, + void *cb_arg) { struct spdk_lvol_with_handle_req *req; int rc; @@ -2307,8 +2306,7 @@ vbdev_lvol_create_with_uuid(struct spdk_lvol_store *lvs, const char *name, uint6 req->cb_fn = cb_fn; req->cb_arg = cb_arg; - rc = spdk_lvol_create_with_uuid(lvs, name, sz, thin_provision, clear_method, - uuid, _vbdev_lvol_create_cb, req); + rc = spdk_lvol_create_with_opts(lvs, lvol_opts, _vbdev_lvol_create_cb, req); if (rc != 0) { free(req); } diff --git a/module/bdev/lvol/vbdev_lvol.h b/module/bdev/lvol/vbdev_lvol.h index 1deed2a2e3f..9433f6ce17e 100644 --- a/module/bdev/lvol/vbdev_lvol.h +++ b/module/bdev/lvol/vbdev_lvol.h @@ -52,9 +52,9 @@ int vbdev_lvol_create(struct spdk_lvol_store *lvs, const char *name, uint64_t sz spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg); -int vbdev_lvol_create_with_uuid(struct spdk_lvol_store *lvs, const char *name, uint64_t sz, - bool thin_provisioned, enum lvol_clear_method clear_method, - const char *uuid, spdk_lvol_op_with_handle_complete cb_fn, +int vbdev_lvol_create_with_opts(struct spdk_lvol_store *lvs, + const struct spdk_lvol_opts *lvol_opts, + spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg); void vbdev_lvol_create_snapshot(struct spdk_lvol *lvol, const char *snapshot_name, diff --git a/module/bdev/lvol/vbdev_lvol_rpc.c b/module/bdev/lvol/vbdev_lvol_rpc.c index 2f01699a773..85d1a6b5bc5 100644 --- a/module/bdev/lvol/vbdev_lvol_rpc.c +++ b/module/bdev/lvol/vbdev_lvol_rpc.c @@ -1686,6 +1686,7 @@ spdk_rpc_construct_ms_lvol_bdev(struct spdk_jsonrpc_request *request, enum lvol_clear_method clear_method; int rc; struct spdk_lvol_store *lvs = NULL; + struct spdk_lvol_opts lvol_opts; SPDK_INFOLOG(lvol_rpc, "Creating blob\n"); @@ -1725,9 +1726,14 @@ spdk_rpc_construct_ms_lvol_bdev(struct spdk_jsonrpc_request *request, clear_method = LVOL_CLEAR_WITH_DEFAULT; } - rc = vbdev_lvol_create_with_uuid(lvs, req.lvol_name, req.size, - req.thin_provision, clear_method, req.uuid, - rpc_bdev_lvol_create_cb, request); + spdk_lvol_opts_init(&lvol_opts); + lvol_opts.name = req.lvol_name; + lvol_opts.size = req.size; + lvol_opts.thin_provision = req.thin_provision; + lvol_opts.clear_method = clear_method; + lvol_opts.uuid = req.uuid; + + rc = vbdev_lvol_create_with_opts(lvs, &lvol_opts, rpc_bdev_lvol_create_cb, request); if (rc < 0) { goto invalid; } diff --git a/module/bdev/malloc/bdev_malloc.c b/module/bdev/malloc/bdev_malloc.c index d4b4246b6fd..9fa51886124 100644 --- a/module/bdev/malloc/bdev_malloc.c +++ b/module/bdev/malloc/bdev_malloc.c @@ -865,6 +865,107 @@ delete_malloc_disk(const char *name, spdk_delete_malloc_complete cb_fn, void *cb } } +struct malloc_disk_resize_ctx { + void *new_malloc_buf; +}; + +static void +malloc_disk_resize_cb(struct spdk_bdev *bdev, void *cb_arg, int status) +{ + struct malloc_disk_resize_ctx *ctx = cb_arg; + struct malloc_disk *mdisk = bdev->ctxt; + void *old_malloc_buf = mdisk->malloc_buf; + + if (status != 0) { + /* The resize caller will free new_malloc_buf. */ + return; + } + + /* TODO: we've having a very bad race with I/O here. */ + memcpy(ctx->new_malloc_buf, old_malloc_buf, bdev->blockcnt * bdev->blocklen); + mdisk->malloc_buf = ctx->new_malloc_buf; + spdk_free(old_malloc_buf); + ctx->new_malloc_buf = NULL; +} + +static void +dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) +{ +} + +int +resize_malloc_disk(const char *bdev_name, uint64_t new_size_in_mb) +{ + struct spdk_bdev_desc *desc; + struct spdk_bdev *bdev; + struct malloc_disk *mdisk; + struct malloc_disk_resize_ctx *ctx; + + uint64_t current_size_in_mb; + uint64_t new_size_in_byte; + int rc = 0; + + ctx = calloc(1, sizeof(*ctx)); + if (ctx == NULL) { + return -ENOMEM; + } + + rc = spdk_bdev_open_ext(bdev_name, false, dummy_bdev_event_cb, NULL, &desc); + if (rc != 0) { + SPDK_ERRLOG("failed to open bdev; %s.\n", bdev_name); + return rc; + } + + bdev = spdk_bdev_desc_get_bdev(desc); + + if (bdev->module != &malloc_if) { + rc = -EINVAL; + goto exit; + } + + mdisk = bdev->ctxt; + + /* TODO: add support for metadata buffer. */ + if (mdisk->malloc_md_buf != NULL) { + SPDK_ERRLOG("Cannot resize malloc disk with metadata buffer.\n"); + rc = -EINVAL; + goto exit; + } + + current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024); + if (new_size_in_mb < current_size_in_mb) { + SPDK_ERRLOG("The new bdev size must not be smaller than current bdev size.\n"); + rc = -EINVAL; + goto exit; + } + + new_size_in_byte = new_size_in_mb * 1024 * 1024; + + if (new_size_in_mb == current_size_in_mb) { + SPDK_ERRLOG("The bdev size did not change.\n"); + rc = 0; + goto exit; + } + + /* Allocate the new data buffer. Resize callback will copy the data + * and update mdisk->malloc_buf. + */ + ctx->new_malloc_buf = spdk_zmalloc(new_size_in_byte, 2 * 1024 * 1024, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + + rc = spdk_bdev_resize(bdev, new_size_in_byte / bdev->blocklen, + malloc_disk_resize_cb, ctx); + if (rc != 0) { + SPDK_ERRLOG("failed to update or notify block cnt change.\n"); + } + +exit: + spdk_bdev_close(desc); + spdk_free(ctx->new_malloc_buf); + free(ctx); + return rc; +} + static int malloc_completion_poller(void *ctx) { diff --git a/module/bdev/malloc/bdev_malloc.h b/module/bdev/malloc/bdev_malloc.h index 04446341974..e87a53c7a87 100644 --- a/module/bdev/malloc/bdev_malloc.h +++ b/module/bdev/malloc/bdev_malloc.h @@ -30,4 +30,6 @@ int create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *o void delete_malloc_disk(const char *name, spdk_delete_malloc_complete cb_fn, void *cb_arg); +int resize_malloc_disk(const char *name, uint64_t new_size_in_mb); + #endif /* SPDK_BDEV_MALLOC_H */