Skip to content

Commit

Permalink
Merge branch 'enable-dataplacement-scheme' of https://github.com/park…
Browse files Browse the repository at this point in the history
…vibes/fio

* 'enable-dataplacement-scheme' of https://github.com/parkvibes/fio:
  t/nvmept_fdp: add tests(302,303,400,401) for fdp scheme
  fdp: support scheme placement id (index) selection
  • Loading branch information
vincentkfu committed May 21, 2024
2 parents 3ed8eea + 96566b0 commit d3bcdd3
Show file tree
Hide file tree
Showing 10 changed files with 329 additions and 13 deletions.
28 changes: 26 additions & 2 deletions HOWTO.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2529,8 +2529,12 @@ with the caveat that when used on the command line, they must come after the
Round robin over available placement IDs. This is the
default.

The available placement ID (indices) are defined by the option
:option:`plids`.
**scheme**
Choose a placement ID (index) based on the scheme file defined by
the option :option:`dp_scheme`.

The available placement ID (indices) are defined by the option :option:`fdp_pli`
or :option:`plids` except for the case of **scheme**.

.. option:: plids=str, fdp_pli=str : [io_uring_cmd] [xnvme]

Expand All @@ -2541,6 +2545,26 @@ with the caveat that when used on the command line, they must come after the
identifiers only at indices 0, 2 and 5 specify ``plids=0,2,5``. For
streams this should be a comma-separated list of Stream IDs.

.. option:: dp_scheme=str : [io_uring_cmd] [xnvme]

Defines which placement ID (index) to be selected based on offset(LBA) range.
The file should contains one or more scheme entries in the following format:

0, 10737418240, 0
10737418240, 21474836480, 1
21474836480, 32212254720, 2
...

Each line, a scheme entry, contains start offset, end offset, and placement ID
(index) separated by comma(,). If the write offset is within the range of a certain
scheme entry(start offset ≤ offset < end offset), the corresponding placement ID
(index) will be selected. If the write offset belongs to multiple scheme entries,
the first matched scheme entry will be applied. If the offset is not within any range
of scheme entry, dspec field will be set to 0, default RUH. (Caution: In case of
multiple devices in a job, all devices of the job will be affected by the scheme. If
this option is specified, the option :option:`plids` or :option:`fdp_pli` will be
ignored.)

.. option:: md_per_io_size=int : [io_uring_cmd] [xnvme]

Size in bytes for separate metadata buffer per IO. Default: 0.
Expand Down
2 changes: 2 additions & 0 deletions cconv.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ int convert_thread_options_to_cpu(struct thread_options *o,
string_to_cpu(&o->ioscheduler, top->ioscheduler);
string_to_cpu(&o->profile, top->profile);
string_to_cpu(&o->cgroup, top->cgroup);
string_to_cpu(&o->dp_scheme_file, top->dp_scheme_file);

o->allow_create = le32_to_cpu(top->allow_create);
o->allow_mounted_write = le32_to_cpu(top->allow_mounted_write);
Expand Down Expand Up @@ -398,6 +399,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
string_to_net(top->ioscheduler, o->ioscheduler);
string_to_net(top->profile, o->profile);
string_to_net(top->cgroup, o->cgroup);
string_to_net(top->dp_scheme_file, o->dp_scheme_file);

top->allow_create = cpu_to_le32(o->allow_create);
top->allow_mounted_write = cpu_to_le32(o->allow_mounted_write);
Expand Down
78 changes: 78 additions & 0 deletions dataplacement.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,56 @@ static int init_ruh_info(struct thread_data *td, struct fio_file *f)
return ret;
}

static int init_ruh_scheme(struct thread_data *td, struct fio_file *f)
{
struct fio_ruhs_scheme *ruh_scheme;
FILE *scheme_fp;
unsigned long long start, end;
uint16_t pli;
int ret = 0;

if (td->o.dp_id_select != FIO_DP_SCHEME)
return 0;

/* Get the scheme from the file */
scheme_fp = fopen(td->o.dp_scheme_file, "r");

if (!scheme_fp) {
log_err("fio: ruh scheme failed to open scheme file %s\n",
td->o.dp_scheme_file);
ret = -errno;
goto out;
}

ruh_scheme = scalloc(1, sizeof(*ruh_scheme));
if (!ruh_scheme) {
ret = -ENOMEM;
goto out_with_close_fp;
}

for (int i = 0;
i < DP_MAX_SCHEME_ENTRIES && fscanf(scheme_fp, "%llu,%llu,%hu\n", &start, &end, &pli) == 3;
i++) {

ruh_scheme->scheme_entries[i].start_offset = start;
ruh_scheme->scheme_entries[i].end_offset = end;
ruh_scheme->scheme_entries[i].pli = pli;
ruh_scheme->nr_schemes++;
}

if (fscanf(scheme_fp, "%llu,%llu,%hu\n", &start, &end, &pli) == 3)
log_info("fio: too many scheme entries in %s. Only the first %d scheme entries are applied\n",
td->o.dp_scheme_file,
DP_MAX_SCHEME_ENTRIES);

f->ruhs_scheme = ruh_scheme;

out_with_close_fp:
fclose(scheme_fp);
out:
return ret;
}

int dp_init(struct thread_data *td)
{
struct fio_file *f;
Expand All @@ -109,6 +159,10 @@ int dp_init(struct thread_data *td)
ret = init_ruh_info(td, f);
if (ret)
break;

ret = init_ruh_scheme(td, f);
if (ret)
break;
}
return ret;
}
Expand All @@ -119,6 +173,11 @@ void fdp_free_ruhs_info(struct fio_file *f)
return;
sfree(f->ruhs_info);
f->ruhs_info = NULL;

if (!f->ruhs_scheme)
return;
sfree(f->ruhs_scheme);
f->ruhs_scheme = NULL;
}

void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u)
Expand All @@ -138,6 +197,25 @@ void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u)
ruhs->pli_loc = 0;

dspec = ruhs->plis[ruhs->pli_loc++];
} else if (td->o.dp_id_select == FIO_DP_SCHEME) {
struct fio_ruhs_scheme *ruhs_scheme = f->ruhs_scheme;
unsigned long long offset = io_u->offset;
int i;

for (i = 0; i < ruhs_scheme->nr_schemes; i++) {
if (offset >= ruhs_scheme->scheme_entries[i].start_offset &&
offset < ruhs_scheme->scheme_entries[i].end_offset) {
dspec = ruhs_scheme->scheme_entries[i].pli;
break;
}
}

/*
* If the write offset is not affected by any scheme entry,
* 0(default RUH) will be assigned to dspec
*/
if (i == ruhs_scheme->nr_schemes)
dspec = 0;
} else {
ruhs->pli_loc = rand_between(&td->fdp_state, 0, ruhs->nr_ruhs - 1);
dspec = ruhs->plis[ruhs->pli_loc];
Expand Down
14 changes: 13 additions & 1 deletion dataplacement.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#define FDP_DIR_DTYPE 2
#define FDP_MAX_RUHS 128
#define FIO_MAX_DP_IDS 16
#define DP_MAX_SCHEME_ENTRIES 32

/*
* How fio chooses what placement identifier to use next. Choice of
Expand All @@ -15,9 +16,9 @@
enum {
FIO_DP_RANDOM = 0x1,
FIO_DP_RR = 0x2,
FIO_DP_SCHEME = 0x3,
};


enum {
FIO_DP_NONE = 0x0,
FIO_DP_FDP = 0x1,
Expand All @@ -30,6 +31,17 @@ struct fio_ruhs_info {
uint16_t plis[];
};

struct fio_ruhs_scheme_entry {
unsigned long long start_offset;
unsigned long long end_offset;
uint16_t pli;
};

struct fio_ruhs_scheme {
uint16_t nr_schemes;
struct fio_ruhs_scheme_entry scheme_entries[DP_MAX_SCHEME_ENTRIES];
};

int dp_init(struct thread_data *td);
void fdp_free_ruhs_info(struct fio_file *f);
void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u);
Expand Down
1 change: 1 addition & 0 deletions file.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ struct fio_file {
uint64_t io_size;

struct fio_ruhs_info *ruhs_info;
struct fio_ruhs_scheme *ruhs_scheme;

/*
* Zoned block device information. See also zonemode=zbd.
Expand Down
32 changes: 31 additions & 1 deletion fio.1
Original file line number Diff line number Diff line change
Expand Up @@ -2294,9 +2294,14 @@ Choose a placement ID at random (uniform).
.TP
.B roundrobin
Round robin over available placement IDs. This is the default.
.TP
.B scheme
Choose a placement ID (index) based on the scheme file defined by
the option \fBdp_scheme\fP.
.RE
.P
The available placement ID (indices) are defined by the \fBplids\fR option.
The available placement ID (indices) are defined by \fBplids\fR or
\fBfdp_pli\fR option except for the case of \fBscheme\fP.
.RE
.TP
.BI (io_uring_cmd,xnvme)plids=str, fdp_pli \fR=\fPstr
Expand All @@ -2307,6 +2312,31 @@ jobs. If you want fio to use placement identifier only at indices 0, 2 and 5
specify, you would set `plids=0,2,5`. For streams this should be a
comma-separated list of Stream IDs.
.TP
.BI (io_uring_cmd,xnvme)\fR\fBdp_scheme\fP=str
Defines which placement ID (index) to be selected based on offset(LBA) range.
The file should contains one or more scheme entries in the following format:
.sp
.RS
.RS
0, 10737418240, 0
.br
10737418240, 21474836480, 1
.br
21474836480, 32212254720, 2
.br
\&...
.RE
.sp
Each line, a scheme entry, contains start offset, end offset, and placement ID
(index) separated by comma(,). If the write offset is within the range of a certain
scheme entry(start offset ≤ offset < end offset), the corresponding placement ID
(index) will be selected. If the write offset belongs to multiple scheme entries,
the first matched scheme entry will be applied. If the offset is not within any range
of scheme entry, dspec field will be set to 0, default RUH. (Caution: In case of
multiple devices in a job, all devices of the job will be affected by the scheme. If
this option is specified, the option \fBplids\fP or \fBfdp_pli\fP will be ignored.)
.RE
.TP
.BI (io_uring_cmd,xnvme)md_per_io_size \fR=\fPint
Size in bytes for separate metadata buffer per IO. Default: 0.
.TP
Expand Down
52 changes: 52 additions & 0 deletions options.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,43 @@ static int str_fdp_pli_cb(void *data, const char *input)
return 0;
}

/* str_dp_scheme_cb() is a callback function for parsing the fdp_scheme option
This function validates the fdp_scheme filename. */
static int str_dp_scheme_cb(void *data, const char *input)
{
struct thread_data *td = cb_data_to_td(data);
struct stat sb;
char *filename;
int ret = 0;

if (parse_dryrun())
return 0;

filename = strdup(td->o.dp_scheme_file);
strip_blank_front(&filename);
strip_blank_end(filename);

strcpy(td->o.dp_scheme_file, filename);

if (lstat(filename, &sb) < 0){
ret = errno;
log_err("fio: lstat() error related to %s\n", filename);
td_verror(td, ret, "lstat");
goto out;
}

if (!S_ISREG(sb.st_mode)) {
ret = errno;
log_err("fio: %s is not a file\n", filename);
td_verror(td, ret, "S_ISREG");
goto out;
}

out:
free(filename);
return ret;
}

static int str_bssplit_cb(void *data, const char *input)
{
struct thread_data *td = cb_data_to_td(data);
Expand Down Expand Up @@ -3760,6 +3797,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.oval = FIO_DP_RR,
.help = "Round robin select Placement IDs",
},
{ .ival = "scheme",
.oval = FIO_DP_SCHEME,
.help = "Use a scheme(based on LBA) to select Placement IDs",
},
},
},
{
Expand All @@ -3774,6 +3815,17 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_INVALID,
},
{
.name = "dp_scheme",
.lname = "Data Placement Scheme",
.type = FIO_OPT_STR_STORE,
.cb = str_dp_scheme_cb,
.off1 = offsetof(struct thread_options, dp_scheme_file),
.maxlen = PATH_MAX,
.help = "scheme file that specifies offset-RUH mapping",
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_INVALID,
},
{
.name = "lockmem",
.lname = "Lock memory",
Expand Down
2 changes: 1 addition & 1 deletion server.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct fio_net_cmd_reply {
};

enum {
FIO_SERVER_VER = 104,
FIO_SERVER_VER = 105,

FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
FIO_SERVER_MAX_CMD_MB = 2048,
Expand Down
Loading

0 comments on commit d3bcdd3

Please sign in to comment.