From 04d5add8452b55b89c39625d9139511584fd4c21 Mon Sep 17 00:00:00 2001 From: Hyunwoo Park Date: Tue, 30 Apr 2024 06:58:12 +0000 Subject: [PATCH] fdp: support scheme placement id (index) selection Add a new placement id selection method called scheme. It allows users to assign a placement ID (index) depending on the offset range. The strategy of the scheme is specified in the file by user and is applicable using the option dp_scheme. Signed-off-by: Hyunwoo Park --- HOWTO.rst | 28 +++++++++++++++-- cconv.c | 2 ++ dataplacement.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++ dataplacement.h | 14 ++++++++- file.h | 1 + fio.1 | 32 +++++++++++++++++++- options.c | 52 ++++++++++++++++++++++++++++++++ server.h | 2 +- thread_options.h | 2 ++ 9 files changed, 206 insertions(+), 5 deletions(-) diff --git a/HOWTO.rst b/HOWTO.rst index 2f8ef6d42c..3b262faeae 100644 --- a/HOWTO.rst +++ b/HOWTO.rst @@ -2529,8 +2529,12 @@ with the caveat that when used on the command line, they must come after the Round robin over available placement IDs. This is the default. - The available placement ID (indices) are defined by the option - :option:`plids`. + **scheme** + Choose a placement ID (index) based on the scheme file defined by + the option :option:`dp_scheme`. + + The available placement ID (indices) are defined by the option :option:`fdp_pli` + or :option:`plids` except for the case of **scheme**. .. option:: plids=str, fdp_pli=str : [io_uring_cmd] [xnvme] @@ -2541,6 +2545,26 @@ with the caveat that when used on the command line, they must come after the identifiers only at indices 0, 2 and 5 specify ``plids=0,2,5``. For streams this should be a comma-separated list of Stream IDs. +.. option:: dp_scheme=str : [io_uring_cmd] [xnvme] + + Defines which placement ID (index) to be selected based on offset(LBA) range. + The file should contains one or more scheme entries in the following format: + + 0, 10737418240, 0 + 10737418240, 21474836480, 1 + 21474836480, 32212254720, 2 + ... + + Each line, a scheme entry, contains start offset, end offset, and placement ID + (index) separated by comma(,). If the write offset is within the range of a certain + scheme entry(start offset ≤ offset < end offset), the corresponding placement ID + (index) will be selected. If the write offset belongs to multiple scheme entries, + the first matched scheme entry will be applied. If the offset is not within any range + of scheme entry, dspec field will be set to 0, default RUH. (Caution: In case of + multiple devices in a job, all devices of the job will be affected by the scheme. If + this option is specified, the option :option:`plids` or :option:`fdp_pli` will be + ignored.) + .. option:: md_per_io_size=int : [io_uring_cmd] [xnvme] Size in bytes for separate metadata buffer per IO. Default: 0. diff --git a/cconv.c b/cconv.c index 16112248a6..9b344940cb 100644 --- a/cconv.c +++ b/cconv.c @@ -94,6 +94,7 @@ int convert_thread_options_to_cpu(struct thread_options *o, string_to_cpu(&o->ioscheduler, top->ioscheduler); string_to_cpu(&o->profile, top->profile); string_to_cpu(&o->cgroup, top->cgroup); + string_to_cpu(&o->dp_scheme_file, top->dp_scheme_file); o->allow_create = le32_to_cpu(top->allow_create); o->allow_mounted_write = le32_to_cpu(top->allow_mounted_write); @@ -398,6 +399,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top, string_to_net(top->ioscheduler, o->ioscheduler); string_to_net(top->profile, o->profile); string_to_net(top->cgroup, o->cgroup); + string_to_net(top->dp_scheme_file, o->dp_scheme_file); top->allow_create = cpu_to_le32(o->allow_create); top->allow_mounted_write = cpu_to_le32(o->allow_mounted_write); diff --git a/dataplacement.c b/dataplacement.c index 1d5b21edfd..8a4c8e6441 100644 --- a/dataplacement.c +++ b/dataplacement.c @@ -100,6 +100,56 @@ static int init_ruh_info(struct thread_data *td, struct fio_file *f) return ret; } +static int init_ruh_scheme(struct thread_data *td, struct fio_file *f) +{ + struct fio_ruhs_scheme *ruh_scheme; + FILE *scheme_fp; + unsigned long long start, end; + uint16_t pli; + int ret = 0; + + if (td->o.dp_id_select != FIO_DP_SCHEME) + return 0; + + /* Get the scheme from the file */ + scheme_fp = fopen(td->o.dp_scheme_file, "r"); + + if (!scheme_fp) { + log_err("fio: ruh scheme failed to open scheme file %s\n", + td->o.dp_scheme_file); + ret = -errno; + goto out; + } + + ruh_scheme = scalloc(1, sizeof(*ruh_scheme)); + if (!ruh_scheme) { + ret = -ENOMEM; + goto out_with_close_fp; + } + + for (int i = 0; + i < DP_MAX_SCHEME_ENTRIES && fscanf(scheme_fp, "%llu,%llu,%hu\n", &start, &end, &pli) == 3; + i++) { + + ruh_scheme->scheme_entries[i].start_offset = start; + ruh_scheme->scheme_entries[i].end_offset = end; + ruh_scheme->scheme_entries[i].pli = pli; + ruh_scheme->nr_schemes++; + } + + if (fscanf(scheme_fp, "%llu,%llu,%hu\n", &start, &end, &pli) == 3) + log_info("fio: too many scheme entries in %s. Only the first %d scheme entries are applied\n", + td->o.dp_scheme_file, + DP_MAX_SCHEME_ENTRIES); + + f->ruhs_scheme = ruh_scheme; + +out_with_close_fp: + fclose(scheme_fp); +out: + return ret; +} + int dp_init(struct thread_data *td) { struct fio_file *f; @@ -109,6 +159,10 @@ int dp_init(struct thread_data *td) ret = init_ruh_info(td, f); if (ret) break; + + ret = init_ruh_scheme(td, f); + if (ret) + break; } return ret; } @@ -119,6 +173,11 @@ void fdp_free_ruhs_info(struct fio_file *f) return; sfree(f->ruhs_info); f->ruhs_info = NULL; + + if (!f->ruhs_scheme) + return; + sfree(f->ruhs_scheme); + f->ruhs_scheme = NULL; } void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u) @@ -138,6 +197,25 @@ void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u) ruhs->pli_loc = 0; dspec = ruhs->plis[ruhs->pli_loc++]; + } else if (td->o.dp_id_select == FIO_DP_SCHEME) { + struct fio_ruhs_scheme *ruhs_scheme = f->ruhs_scheme; + unsigned long long offset = io_u->offset; + int i; + + for (i = 0; i < ruhs_scheme->nr_schemes; i++) { + if (offset >= ruhs_scheme->scheme_entries[i].start_offset && + offset < ruhs_scheme->scheme_entries[i].end_offset) { + dspec = ruhs_scheme->scheme_entries[i].pli; + break; + } + } + + /* + * If the write offset is not affected by any scheme entry, + * 0(default RUH) will be assigned to dspec + */ + if (i == ruhs_scheme->nr_schemes) + dspec = 0; } else { ruhs->pli_loc = rand_between(&td->fdp_state, 0, ruhs->nr_ruhs - 1); dspec = ruhs->plis[ruhs->pli_loc]; diff --git a/dataplacement.h b/dataplacement.h index b5718c869e..71d19d6965 100644 --- a/dataplacement.h +++ b/dataplacement.h @@ -7,6 +7,7 @@ #define FDP_DIR_DTYPE 2 #define FDP_MAX_RUHS 128 #define FIO_MAX_DP_IDS 16 +#define DP_MAX_SCHEME_ENTRIES 32 /* * How fio chooses what placement identifier to use next. Choice of @@ -15,9 +16,9 @@ enum { FIO_DP_RANDOM = 0x1, FIO_DP_RR = 0x2, + FIO_DP_SCHEME = 0x3, }; - enum { FIO_DP_NONE = 0x0, FIO_DP_FDP = 0x1, @@ -30,6 +31,17 @@ struct fio_ruhs_info { uint16_t plis[]; }; +struct fio_ruhs_scheme_entry { + unsigned long long start_offset; + unsigned long long end_offset; + uint16_t pli; +}; + +struct fio_ruhs_scheme { + uint16_t nr_schemes; + struct fio_ruhs_scheme_entry scheme_entries[DP_MAX_SCHEME_ENTRIES]; +}; + int dp_init(struct thread_data *td); void fdp_free_ruhs_info(struct fio_file *f); void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u); diff --git a/file.h b/file.h index deb36e0291..e38ed2f123 100644 --- a/file.h +++ b/file.h @@ -103,6 +103,7 @@ struct fio_file { uint64_t io_size; struct fio_ruhs_info *ruhs_info; + struct fio_ruhs_scheme *ruhs_scheme; /* * Zoned block device information. See also zonemode=zbd. diff --git a/fio.1 b/fio.1 index ee8124946a..1c8e3a5670 100644 --- a/fio.1 +++ b/fio.1 @@ -2294,9 +2294,14 @@ Choose a placement ID at random (uniform). .TP .B roundrobin Round robin over available placement IDs. This is the default. +.TP +.B scheme +Choose a placement ID (index) based on the scheme file defined by +the option \fBdp_scheme\fP. .RE .P -The available placement ID (indices) are defined by the \fBplids\fR option. +The available placement ID (indices) are defined by \fBplids\fR or +\fBfdp_pli\fR option except for the case of \fBscheme\fP. .RE .TP .BI (io_uring_cmd,xnvme)plids=str, fdp_pli \fR=\fPstr @@ -2307,6 +2312,31 @@ jobs. If you want fio to use placement identifier only at indices 0, 2 and 5 specify, you would set `plids=0,2,5`. For streams this should be a comma-separated list of Stream IDs. .TP +.BI (io_uring_cmd,xnvme)\fR\fBdp_scheme\fP=str +Defines which placement ID (index) to be selected based on offset(LBA) range. +The file should contains one or more scheme entries in the following format: +.sp +.RS +.RS +0, 10737418240, 0 +.br +10737418240, 21474836480, 1 +.br +21474836480, 32212254720, 2 +.br +\&... +.RE +.sp +Each line, a scheme entry, contains start offset, end offset, and placement ID +(index) separated by comma(,). If the write offset is within the range of a certain +scheme entry(start offset ≤ offset < end offset), the corresponding placement ID +(index) will be selected. If the write offset belongs to multiple scheme entries, +the first matched scheme entry will be applied. If the offset is not within any range +of scheme entry, dspec field will be set to 0, default RUH. (Caution: In case of +multiple devices in a job, all devices of the job will be affected by the scheme. If +this option is specified, the option \fBplids\fP or \fBfdp_pli\fP will be ignored.) +.RE +.TP .BI (io_uring_cmd,xnvme)md_per_io_size \fR=\fPint Size in bytes for separate metadata buffer per IO. Default: 0. .TP diff --git a/options.c b/options.c index 61ea41cc4e..f5d221c776 100644 --- a/options.c +++ b/options.c @@ -287,6 +287,43 @@ static int str_fdp_pli_cb(void *data, const char *input) return 0; } +/* str_dp_scheme_cb() is a callback function for parsing the fdp_scheme option + This function validates the fdp_scheme filename. */ +static int str_dp_scheme_cb(void *data, const char *input) +{ + struct thread_data *td = cb_data_to_td(data); + struct stat sb; + char *filename; + int ret = 0; + + if (parse_dryrun()) + return 0; + + filename = strdup(td->o.dp_scheme_file); + strip_blank_front(&filename); + strip_blank_end(filename); + + strcpy(td->o.dp_scheme_file, filename); + + if (lstat(filename, &sb) < 0){ + ret = errno; + log_err("fio: lstat() error related to %s\n", filename); + td_verror(td, ret, "lstat"); + goto out; + } + + if (!S_ISREG(sb.st_mode)) { + ret = errno; + log_err("fio: %s is not a file\n", filename); + td_verror(td, ret, "S_ISREG"); + goto out; + } + +out: + free(filename); + return ret; +} + static int str_bssplit_cb(void *data, const char *input) { struct thread_data *td = cb_data_to_td(data); @@ -3760,6 +3797,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .oval = FIO_DP_RR, .help = "Round robin select Placement IDs", }, + { .ival = "scheme", + .oval = FIO_DP_SCHEME, + .help = "Use a scheme(based on LBA) to select Placement IDs", + }, }, }, { @@ -3774,6 +3815,17 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .category = FIO_OPT_C_IO, .group = FIO_OPT_G_INVALID, }, + { + .name = "dp_scheme", + .lname = "Data Placement Scheme", + .type = FIO_OPT_STR_STORE, + .cb = str_dp_scheme_cb, + .off1 = offsetof(struct thread_options, dp_scheme_file), + .maxlen = PATH_MAX, + .help = "scheme file that specifies offset-RUH mapping", + .category = FIO_OPT_C_IO, + .group = FIO_OPT_G_INVALID, + }, { .name = "lockmem", .lname = "Lock memory", diff --git a/server.h b/server.h index 83ce449ba0..e8659f7920 100644 --- a/server.h +++ b/server.h @@ -51,7 +51,7 @@ struct fio_net_cmd_reply { }; enum { - FIO_SERVER_VER = 104, + FIO_SERVER_VER = 105, FIO_SERVER_MAX_FRAGMENT_PDU = 1024, FIO_SERVER_MAX_CMD_MB = 2048, diff --git a/thread_options.h b/thread_options.h index a36b79094f..ccd0c064b9 100644 --- a/thread_options.h +++ b/thread_options.h @@ -396,6 +396,7 @@ struct thread_options { unsigned int dp_id_select; unsigned int dp_ids[FIO_MAX_DP_IDS]; unsigned int dp_nr_ids; + char *dp_scheme_file; unsigned int log_entries; unsigned int log_prio; @@ -713,6 +714,7 @@ struct thread_options_pack { uint32_t dp_id_select; uint32_t dp_ids[FIO_MAX_DP_IDS]; uint32_t dp_nr_ids; + uint8_t dp_scheme_file[FIO_TOP_STR_MAX]; uint32_t num_range; /*