Skip to content

Commit

Permalink
fdp: support scheme placement id selection
Browse files Browse the repository at this point in the history
Add a new placement id selection method called scheme. It allows
users to assign a placement ID depending on the offset range.
The strategy of the scheme is specified in the file by user and
is applicable using the option dp_scheme.

Signed-off-by: Hyunwoo Park <[email protected]>
Signed-off-by: Vincent Fu <[email protected]>
Signed-off-by: Ankit Kumar <[email protected]>
  • Loading branch information
parkvibes committed May 9, 2024
1 parent 3ed8eea commit bb20e0f
Show file tree
Hide file tree
Showing 9 changed files with 207 additions and 3 deletions.
24 changes: 23 additions & 1 deletion HOWTO.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2529,8 +2529,12 @@ with the caveat that when used on the command line, they must come after the
Round robin over available placement IDs. This is the
default.

**scheme**
Choose a placement ID based on the scheme file defined by the option
:option:`dp_scheme`.

The available placement ID (indices) are defined by the option
:option:`plids`.
:option:`fdp_pli` or :option:`plids`.

.. option:: plids=str, fdp_pli=str : [io_uring_cmd] [xnvme]

Expand All @@ -2541,6 +2545,24 @@ with the caveat that when used on the command line, they must come after the
identifiers only at indices 0, 2 and 5 specify ``plids=0,2,5``. For
streams this should be a comma-separated list of Stream IDs.

.. option:: dp_scheme=filename : [io_uring_cmd] [xnvme]

Defines which placement ID index to be selected based on offset(LBA) range.
The file should contains one or more scheme entries in the following format:

0, 10737418240, 0
10737418240, 21474836480, 1
21474836480, 32212254720, 2
...

Each line, the scheme entry, contains start offset, end offset, and plid index
separated by comma(,). If the write offset is within the range of a certain scheme
entry(start offset ≤ offset < end offset), the corresponding plid index will be selected.
If the write offset belongs to multiple scheme entries are defined, the first matched
scheme entry will be selected. If the offset is not within any range of scheme entry,
dspec field is set to 0, default RUH. (Caution: In case of multiple devices in a job,
all devices of the job will be affected by the scheme.)

.. option:: md_per_io_size=int : [io_uring_cmd] [xnvme]

Size in bytes for separate metadata buffer per IO. Default: 0.
Expand Down
2 changes: 2 additions & 0 deletions cconv.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ int convert_thread_options_to_cpu(struct thread_options *o,
string_to_cpu(&o->ioscheduler, top->ioscheduler);
string_to_cpu(&o->profile, top->profile);
string_to_cpu(&o->cgroup, top->cgroup);
string_to_cpu(&o->dp_scheme_file, top->dp_scheme_file);

o->allow_create = le32_to_cpu(top->allow_create);
o->allow_mounted_write = le32_to_cpu(top->allow_mounted_write);
Expand Down Expand Up @@ -398,6 +399,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
string_to_net(top->ioscheduler, o->ioscheduler);
string_to_net(top->profile, o->profile);
string_to_net(top->cgroup, o->cgroup);
string_to_net(top->dp_scheme_file, o->dp_scheme_file);

top->allow_create = cpu_to_le32(o->allow_create);
top->allow_mounted_write = cpu_to_le32(o->allow_mounted_write);
Expand Down
85 changes: 85 additions & 0 deletions dataplacement.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,63 @@ static int init_ruh_info(struct thread_data *td, struct fio_file *f)
return ret;
}

static int init_ruh_scheme(struct thread_data *td, struct fio_file *f)
{
struct fio_ruhs_scheme *ruh_scheme;
FILE *scheme_fp;
unsigned long long start, end;
uint16_t ruh_index;
int ret = 0;

if (td->o.dp_id_select != FIO_DP_SCHEME)
return 0;

/* Get the scheme from the file */
scheme_fp = fopen(td->o.dp_scheme_file, "r");

if (!scheme_fp) {
log_err("fio: ruh scheme failed to open scheme file %s\n",
td->o.dp_scheme_file);
ret = -errno;
goto out;
}

ruh_scheme = scalloc(1, sizeof(*ruh_scheme));
if (!ruh_scheme) {
ret = -ENOMEM;
goto out_with_close_fp;
}

for (int i = 0;
i < DP_MAX_SCHEME_ENTRIES && fscanf(scheme_fp, "%llu,%llu,%hu\n", &start, &end, &ruh_index) == 3;
i++) {

/* Validate scheme entry */
if (ruh_index >= f->ruhs_info->nr_ruhs) {
log_err("fio: invalid pli index(%d) of %d th scheme entry\n", ruh_index, i);
ret = -EINVAL;
goto out_with_close_fp;
}

ruh_scheme->scheme_entries[i].start_offset = start;
ruh_scheme->scheme_entries[i].end_offset = end;
ruh_scheme->scheme_entries[i].pli_index = ruh_index;
ruh_scheme->nr_schemes++;
}

if (fscanf(scheme_fp, "%llu,%llu,%hu\n", &start, &end, &ruh_index) == 3)
log_info("fio: too many scheme entries in %s. Only the first %d scheme entries are applied\n",
td->o.dp_scheme_file,
DP_MAX_SCHEME_ENTRIES);

f->ruhs_scheme = ruh_scheme;

out_with_close_fp:
fclose(scheme_fp);
out:
return ret;
}

int dp_init(struct thread_data *td)
{
struct fio_file *f;
Expand All @@ -109,6 +166,10 @@ int dp_init(struct thread_data *td)
ret = init_ruh_info(td, f);
if (ret)
break;

ret = init_ruh_scheme(td, f);
if (ret)
break;
}
return ret;
}
Expand All @@ -119,6 +180,11 @@ void fdp_free_ruhs_info(struct fio_file *f)
return;
sfree(f->ruhs_info);
f->ruhs_info = NULL;

if (!f->ruhs_scheme)
return;
sfree(f->ruhs_scheme);
f->ruhs_scheme = NULL;
}

void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u)
Expand All @@ -138,6 +204,25 @@ void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u)
ruhs->pli_loc = 0;

dspec = ruhs->plis[ruhs->pli_loc++];
} else if (td->o.dp_id_select == FIO_DP_SCHEME) {
struct fio_ruhs_scheme *ruhs_scheme = f->ruhs_scheme;
unsigned long long offset = io_u->offset;
int i;

for (i = 0; i < ruhs_scheme->nr_schemes; i++){
if (offset >= ruhs_scheme->scheme_entries[i].start_offset &&
offset < ruhs_scheme->scheme_entries[i].end_offset){
dspec = ruhs->plis[ruhs_scheme->scheme_entries[i].pli_index];
break;
}
}

/*
* If the write offset is not affected by any scheme entry,
* 0(default RUH) will be assigned to dspec
*/
if (i == ruhs_scheme->nr_schemes)
dspec = 0;
} else {
ruhs->pli_loc = rand_between(&td->fdp_state, 0, ruhs->nr_ruhs - 1);
dspec = ruhs->plis[ruhs->pli_loc];
Expand Down
14 changes: 13 additions & 1 deletion dataplacement.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#define FDP_DIR_DTYPE 2
#define FDP_MAX_RUHS 128
#define FIO_MAX_DP_IDS 16
#define DP_MAX_SCHEME_ENTRIES 32

/*
* How fio chooses what placement identifier to use next. Choice of
Expand All @@ -15,9 +16,9 @@
enum {
FIO_DP_RANDOM = 0x1,
FIO_DP_RR = 0x2,
FIO_DP_SCHEME = 0x3,
};


enum {
FIO_DP_NONE = 0x0,
FIO_DP_FDP = 0x1,
Expand All @@ -30,6 +31,17 @@ struct fio_ruhs_info {
uint16_t plis[];
};

struct fio_ruhs_scheme_entry {
unsigned long long start_offset;
unsigned long long end_offset;
uint16_t pli_index;
};

struct fio_ruhs_scheme {
uint16_t nr_schemes;
struct fio_ruhs_scheme_entry scheme_entries[DP_MAX_SCHEME_ENTRIES];
};

int dp_init(struct thread_data *td);
void fdp_free_ruhs_info(struct fio_file *f);
void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u);
Expand Down
1 change: 1 addition & 0 deletions file.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ struct fio_file {
uint64_t io_size;

struct fio_ruhs_info *ruhs_info;
struct fio_ruhs_scheme *ruhs_scheme;

/*
* Zoned block device information. See also zonemode=zbd.
Expand Down
27 changes: 27 additions & 0 deletions fio.1
Original file line number Diff line number Diff line change
Expand Up @@ -2294,6 +2294,9 @@ Choose a placement ID at random (uniform).
.TP
.B roundrobin
Round robin over available placement IDs. This is the default.
.TP
.B scheme
Choose a placement ID based on the scheme file defined by the option \fBdp_scheme\fP.
.RE
.P
The available placement ID (indices) are defined by the \fBplids\fR option.
Expand All @@ -2307,6 +2310,30 @@ jobs. If you want fio to use placement identifier only at indices 0, 2 and 5
specify, you would set `plids=0,2,5`. For streams this should be a
comma-separated list of Stream IDs.
.TP
.BI (io_uring_cmd,xnvme)\fR\fBdp_scheme\fP=filename
Defines which placement ID index to be selected based on offset(LBA) range.
The file should contains one or more scheme entries in the following format:
.sp
.RS
.RS
0, 10737418240, 0
.br
10737418240, 21474836480, 1
.br
21474836480, 32212254720, 2
.br
\&...
.RE
.sp
Each line, the scheme entry, contains start offset, end offset, and plid index
separated by comma(,). If the write offset is within the range of a certain scheme
entry(start offset ≤ offset < end offset), the corresponding plid index will be selected.
If the write offset belongs to multiple scheme entries are defined, the first matched
scheme entry will be selected. If the offset is not within any range of scheme entry,
dspec field is set to 0, default RUH. (Caution: In case of multiple devices in a job,
all devices of the job will be affected by the scheme.)
.RE
.TP
.BI (io_uring_cmd,xnvme)md_per_io_size \fR=\fPint
Size in bytes for separate metadata buffer per IO. Default: 0.
.TP
Expand Down
53 changes: 53 additions & 0 deletions options.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,43 @@ static int str_fdp_pli_cb(void *data, const char *input)
return 0;
}

/* str_dp_scheme_cb() is a callback function for parsing the fdp_scheme option
This function validates the fdp_scheme filename. */
static int str_dp_scheme_cb(void *data, const char *input)
{
struct thread_data *td = cb_data_to_td(data);
struct stat sb;
char *filename;
int ret = 0;

if (parse_dryrun())
return 0;

filename = strdup(td->o.dp_scheme_file);
strip_blank_front(&filename);
strip_blank_end(filename);

strcpy(td->o.dp_scheme_file, filename);

if (lstat(filename, &sb) < 0){
ret = errno;
log_err("fio: lstat() error related to %s\n", filename);
td_verror(td, ret, "lstat");
goto out;
}

if (!S_ISREG(sb.st_mode)) {
ret = errno;
log_err("fio: %s is not a file\n", filename);
td_verror(td, ret, "S_ISREG");
goto out;
}

out:
free(filename);
return ret;
}

static int str_bssplit_cb(void *data, const char *input)
{
struct thread_data *td = cb_data_to_td(data);
Expand Down Expand Up @@ -3760,6 +3797,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.oval = FIO_DP_RR,
.help = "Round robin select Placement IDs",
},
{
.ival = "scheme",
.oval = FIO_DP_SCHEME,
.help = "Use a scheme(based on LBA) to select Placement IDs",
},
},
},
{
Expand All @@ -3774,6 +3816,17 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_INVALID,
},
{
.name = "dp_scheme",
.lname = "Data Placement Scheme",
.type = FIO_OPT_STR_STORE,
.cb = str_dp_scheme_cb,
.off1 = offsetof(struct thread_options, dp_scheme_file),
.maxlen = PATH_MAX,
.help = "scheme file that specifies offset-RUH mapping",
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_INVALID,
},
{
.name = "lockmem",
.lname = "Lock memory",
Expand Down
2 changes: 1 addition & 1 deletion server.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct fio_net_cmd_reply {
};

enum {
FIO_SERVER_VER = 104,
FIO_SERVER_VER = 105,

FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
FIO_SERVER_MAX_CMD_MB = 2048,
Expand Down
2 changes: 2 additions & 0 deletions thread_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ struct thread_options {
unsigned int dp_id_select;
unsigned int dp_ids[FIO_MAX_DP_IDS];
unsigned int dp_nr_ids;
char *dp_scheme_file;

unsigned int log_entries;
unsigned int log_prio;
Expand Down Expand Up @@ -713,6 +714,7 @@ struct thread_options_pack {
uint32_t dp_id_select;
uint32_t dp_ids[FIO_MAX_DP_IDS];
uint32_t dp_nr_ids;
uint8_t dp_scheme_file[FIO_TOP_STR_MAX];

uint32_t num_range;
/*
Expand Down

0 comments on commit bb20e0f

Please sign in to comment.