Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

b/349170185 Backport fixes #14680

Merged
merged 1 commit into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 64 additions & 35 deletions src/client/array/dc_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -2069,18 +2069,24 @@ free_set_size_cb(tse_task_t *task, void *data)
}

static int
punch_extent(daos_handle_t oh, daos_handle_t th, daos_size_t dkey_val, daos_off_t record_i,
daos_size_t num_records, tse_task_t *task, d_list_t *task_list)
punch_dkey_or_extent(daos_handle_t oh, daos_handle_t th, daos_size_t dkey_val, daos_off_t start,
daos_size_t num_records, bool punch_dkey, tse_task_t *task,
d_list_t *task_list)
{
daos_obj_update_t *io_arg;
daos_obj_punch_t *dkey_punch_arg;
daos_iod_t *iod;
d_sg_list_t *sgl;
daos_key_t *dkey;
struct io_params *params = NULL;
tse_task_t *io_task = NULL;
int rc;
int rc;

D_DEBUG(DB_IO, "Punching (%zu, %zu) in Key %zu\n", record_i + 1, num_records, dkey_val);
if (punch_dkey)
D_DEBUG(DB_IO, "Punching dkey %zu\n", dkey_val);
else
D_DEBUG(DB_IO, "Punching (%zu, %zu) in Key %zu\n",
start, num_records, dkey_val);

D_ALLOC_PTR(params);
if (params == NULL)
Expand All @@ -2094,28 +2100,42 @@ punch_extent(daos_handle_t oh, daos_handle_t th, daos_size_t dkey_val, daos_off_
dkey = &params->dkey;
d_iov_set(dkey, &params->dkey_val, sizeof(uint64_t));

/* set descriptor for KV object */
d_iov_set(&iod->iod_name, &params->akey_val, 1);
iod->iod_nr = 1;
iod->iod_size = 0; /* 0 to punch */
iod->iod_type = DAOS_IOD_ARRAY;
D_ALLOC_PTR(iod->iod_recxs);
if (iod->iod_recxs == NULL)
D_GOTO(free, rc = -DER_NOMEM);
iod->iod_recxs[0].rx_idx = record_i + 1;
iod->iod_recxs[0].rx_nr = num_records;

rc = daos_task_create(DAOS_OPC_OBJ_UPDATE, tse_task2sched(task), 0, NULL, &io_task);
if (rc)
D_GOTO(free_reqs, rc);
if (punch_dkey) {
rc = daos_task_create(DAOS_OPC_OBJ_PUNCH_DKEYS, tse_task2sched(task), 0, NULL,
&io_task);
if (rc)
D_GOTO(free_reqs, rc);

dkey_punch_arg = daos_task_get_args(io_task);
dkey_punch_arg->oh = oh;
dkey_punch_arg->th = th;
dkey_punch_arg->dkey = dkey;
dkey_punch_arg->akeys = NULL;
dkey_punch_arg->akey_nr = 0;
} else {
/* set descriptor for KV object */
d_iov_set(&iod->iod_name, &params->akey_val, 1);
iod->iod_nr = 1;
iod->iod_size = 0; /* 0 to punch */
iod->iod_type = DAOS_IOD_ARRAY;
D_ALLOC_PTR(iod->iod_recxs);
if (iod->iod_recxs == NULL)
D_GOTO(free, rc = -DER_NOMEM);
iod->iod_recxs[0].rx_idx = start;
iod->iod_recxs[0].rx_nr = num_records;

rc = daos_task_create(DAOS_OPC_OBJ_UPDATE, tse_task2sched(task), 0, NULL, &io_task);
if (rc)
D_GOTO(free_reqs, rc);

io_arg = daos_task_get_args(io_task);
io_arg->oh = oh;
io_arg->th = th;
io_arg->dkey = dkey;
io_arg->nr = 1;
io_arg->iods = iod;
io_arg->sgls = sgl;
io_arg = daos_task_get_args(io_task);
io_arg->oh = oh;
io_arg->th = th;
io_arg->dkey = dkey;
io_arg->nr = 1;
io_arg->iods = iod;
io_arg->sgls = sgl;
}

rc = tse_task_register_comp_cb(io_task, free_io_params_cb, &params, sizeof(params));
if (rc)
Expand Down Expand Up @@ -2422,18 +2442,26 @@ adjust_array_size_cb(tse_task_t *task, void *data)
memcpy(&dkey_val, ptr, args->kds[i].kd_key_len);
ptr += args->kds[i].kd_key_len;

/*
* Either punch the entire dkey or an extent in that dkey depending on the offset
* where we are truncating to. The first dkey of the array (dkey 1) will always be
* an extent punch to maintain an epoch there.
*/
if (props->size == 0 || dkey_val > props->dkey_val) {
/** Do nothing for DKEY 0 (metadata) */
if (dkey_val == 0)
continue;
/*
* The dkey is higher than the adjustded size so we could punch it here.
* But it's better to punch the extent so that the max_write for the object
* doesn't get lost by aggregation.
*/
D_DEBUG(DB_IO, "Punch full extent in key "DF_U64"\n", dkey_val);
rc = punch_extent(args->oh, args->th, dkey_val, (daos_off_t)-1,
props->chunk_size, props->ptask, &task_list);
if (dkey_val == 1) {
D_DEBUG(DB_IO, "Punch full extent in key " DF_U64 "\n", dkey_val);
rc = punch_dkey_or_extent(args->oh, args->th, dkey_val,
0, props->chunk_size, false,
props->ptask, &task_list);
} else {
D_DEBUG(DB_IO, "Punch dkey " DF_U64 "\n", dkey_val);
rc = punch_dkey_or_extent(args->oh, args->th, dkey_val,
0, props->chunk_size, true,
props->ptask, &task_list);
}
if (rc)
goto out;
} else if (dkey_val == props->dkey_val && props->record_i) {
Expand All @@ -2444,8 +2472,9 @@ adjust_array_size_cb(tse_task_t *task, void *data)
props->chunk_size);
/** Punch all records above record_i */
D_DEBUG(DB_IO, "Punch extent in key "DF_U64"\n", dkey_val);
rc = punch_extent(args->oh, args->th, dkey_val, props->record_i,
props->num_records, props->ptask, &task_list);
rc = punch_dkey_or_extent(args->oh, args->th, dkey_val,
props->record_i + 1, props->num_records,
false, props->ptask, &task_list);
if (rc)
goto out;
}
Expand Down
7 changes: 4 additions & 3 deletions src/common/pool_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -1650,11 +1650,12 @@ gen_pool_buf(struct pool_map *map, struct pool_buf **map_buf_out, int map_versio
map_comp.co_flags = PO_COMPF_NONE;
map_comp.co_nr = 1;

D_DEBUG(DB_TRACE, "adding target: type=0x%hhx, status=%hhu, idx=%d, "
D_DEBUG(DB_TRACE, "adding target: type=0x%hhx, status=%hhu, idx=%d, id=%d, "
"rank=%d, ver=%d, in_ver=%d, fseq=%u, flags=0x%x, nr=%u\n",
map_comp.co_type, map_comp.co_status, map_comp.co_index,
map_comp.co_rank, map_comp.co_ver, map_comp.co_in_ver,
map_comp.co_fseq, map_comp.co_flags, map_comp.co_nr);
map_comp.co_id, map_comp.co_rank, map_comp.co_ver,
map_comp.co_in_ver, map_comp.co_fseq, map_comp.co_flags,
map_comp.co_nr);

rc = pool_buf_attach(map_buf, &map_comp, 1);
if (rc != 0)
Expand Down
65 changes: 47 additions & 18 deletions src/container/srv_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ cont_child_aggregate(struct ds_cont_child *cont, cont_aggregate_cb_t agg_cb,

if (unlikely(DAOS_FAIL_CHECK(DAOS_FORCE_EC_AGG) ||
DAOS_FAIL_CHECK(DAOS_FORCE_EC_AGG_FAIL) ||
DAOS_FAIL_CHECK(DAOS_OBJ_EC_AGG_LEADER_DIFF) ||
DAOS_FAIL_CHECK(DAOS_FORCE_EC_AGG_PEER_FAIL)))
interval = 0;
else
Expand Down Expand Up @@ -627,13 +628,18 @@ cont_child_alloc_ref(void *co_uuid, unsigned int ksize, void *po_uuid,
rc = ABT_cond_create(&cont->sc_scrub_cond);
if (rc != ABT_SUCCESS) {
rc = dss_abterr2der(rc);
goto out_mutex;
goto out_resync_cond;
}
rc = ABT_cond_create(&cont->sc_rebuild_cond);
if (rc != ABT_SUCCESS) {
rc = dss_abterr2der(rc);
goto out_scrub_cond;
}

cont->sc_pool = ds_pool_child_lookup(po_uuid);
if (cont->sc_pool == NULL) {
rc = -DER_NO_HDL;
goto out_cond;
goto out_rebuild_cond;
}

rc = vos_cont_open(cont->sc_pool->spc_hdl, co_uuid, &cont->sc_hdl);
Expand All @@ -659,7 +665,11 @@ cont_child_alloc_ref(void *co_uuid, unsigned int ksize, void *po_uuid,

out_pool:
ds_pool_child_put(cont->sc_pool);
out_cond:
out_rebuild_cond:
ABT_cond_free(&cont->sc_rebuild_cond);
out_scrub_cond:
ABT_cond_free(&cont->sc_scrub_cond);
out_resync_cond:
ABT_cond_free(&cont->sc_dtx_resync_cond);
out_mutex:
ABT_mutex_free(&cont->sc_mutex);
Expand All @@ -686,6 +696,7 @@ cont_child_free_ref(struct daos_llink *llink)
D_FREE(cont->sc_snapshots);
ABT_cond_free(&cont->sc_dtx_resync_cond);
ABT_cond_free(&cont->sc_scrub_cond);
ABT_cond_free(&cont->sc_rebuild_cond);
ABT_mutex_free(&cont->sc_mutex);
D_FREE(cont);
}
Expand Down Expand Up @@ -740,6 +751,12 @@ ds_cont_child_cache_destroy(struct daos_lru_cache *cache)
daos_lru_cache_destroy(cache);
}

static void
cont_child_put(struct daos_lru_cache *cache, struct ds_cont_child *cont)
{
daos_lru_ref_release(cache, &cont->sc_list);
}

/*
* If create == false, then this is assumed to be a pure lookup. In this case,
* -DER_NONEXIST is returned if the ds_cont_child object does not exist.
Expand Down Expand Up @@ -774,12 +791,6 @@ cont_child_lookup(struct daos_lru_cache *cache, const uuid_t co_uuid,
return 0;
}

static void
cont_child_put(struct daos_lru_cache *cache, struct ds_cont_child *cont)
{
daos_lru_ref_release(cache, &cont->sc_list);
}

static inline bool
cont_child_started(struct ds_cont_child *cont_child)
{
Expand All @@ -805,13 +816,17 @@ cont_child_stop(struct ds_cont_child *cont_child)
/* Some ds_cont_child will only created by ds_cont_child_lookup().
* never be started at all
*/
cont_child->sc_stopping = 1;

/* Stop DTX reindex by force. */
stop_dtx_reindex_ult(cont_child, true);

if (cont_child_started(cont_child)) {
D_DEBUG(DB_MD, DF_CONT"[%d]: Stopping container\n",
DP_CONT(cont_child->sc_pool->spc_uuid,
cont_child->sc_uuid),
dss_get_module_info()->dmi_tgt_id);

cont_child->sc_stopping = 1;
d_list_del_init(&cont_child->sc_link);

dtx_cont_deregister(cont_child);
Expand Down Expand Up @@ -1164,6 +1179,7 @@ cont_child_destroy_one(void *vin)
&cont);
if (rc == -DER_NONEXIST)
break;

if (rc != 0)
D_GOTO(out_pool, rc);

Expand All @@ -1187,10 +1203,6 @@ cont_child_destroy_one(void *vin)
ABT_cond_wait(cont->sc_dtx_resync_cond, cont->sc_mutex);
ABT_mutex_unlock(cont->sc_mutex);

/* Give chance to DTX reindex ULT for exit. */
if (unlikely(cont->sc_dtx_reindex))
ABT_thread_yield();

/* Make sure checksum scrubbing has stopped */
ABT_mutex_lock(cont->sc_mutex);
if (cont->sc_scrubbing) {
Expand All @@ -1199,6 +1211,12 @@ cont_child_destroy_one(void *vin)
}
ABT_mutex_unlock(cont->sc_mutex);

/* Make sure rebuild has stopped */
ABT_mutex_lock(cont->sc_mutex);
if (cont->sc_rebuilding)
ABT_cond_wait(cont->sc_rebuild_cond, cont->sc_mutex);
ABT_mutex_unlock(cont->sc_mutex);

retry_cnt++;
if (retry_cnt > 1) {
D_ERROR("container is still in-use: open %u, resync %s, reindex %s\n",
Expand Down Expand Up @@ -1300,9 +1318,20 @@ ds_cont_child_lookup(uuid_t pool_uuid, uuid_t cont_uuid,
struct ds_cont_child **ds_cont)
{
struct dsm_tls *tls = dsm_tls_get();
int rc;

rc = cont_child_lookup(tls->dt_cont_cache, cont_uuid, pool_uuid,
true /* create */, ds_cont);
if (rc != 0)
return rc;

return cont_child_lookup(tls->dt_cont_cache, cont_uuid, pool_uuid,
true /* create */, ds_cont);
if ((*ds_cont)->sc_stopping) {
cont_child_put(tls->dt_cont_cache, *ds_cont);
*ds_cont = NULL;
return -DER_SHUTDOWN;
}

return 0;
}

/**
Expand Down Expand Up @@ -1572,7 +1601,7 @@ ds_cont_local_open(uuid_t pool_uuid, uuid_t cont_hdl_uuid, uuid_t cont_uuid,
DF_UUID": %d\n", DP_UUID(cont_uuid), hdl->sch_cont->sc_open);

hdl->sch_cont->sc_open--;
dtx_cont_close(hdl->sch_cont);
dtx_cont_close(hdl->sch_cont, true);

err_cont:
if (daos_handle_is_valid(poh)) {
Expand Down Expand Up @@ -1694,7 +1723,7 @@ cont_close_hdl(uuid_t cont_hdl_uuid)
D_ASSERT(cont_child->sc_open > 0);
cont_child->sc_open--;
if (cont_child->sc_open == 0)
dtx_cont_close(cont_child);
dtx_cont_close(cont_child, false);

D_DEBUG(DB_MD, DF_CONT": closed (%d): hdl="DF_UUID"\n",
DP_CONT(cont_child->sc_pool->spc_uuid, cont_child->sc_uuid),
Expand Down
26 changes: 20 additions & 6 deletions src/dtx/dtx_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1635,6 +1635,10 @@ start_dtx_reindex_ult(struct ds_cont_child *cont)
while (cont->sc_dtx_reindex_abort)
ABT_thread_yield();

if (cont->sc_stopping)
return -DER_SHUTDOWN;

cont->sc_dtx_delay_reset = 0;
if (cont->sc_dtx_reindex)
return 0;

Expand All @@ -1652,7 +1656,7 @@ start_dtx_reindex_ult(struct ds_cont_child *cont)
}

void
stop_dtx_reindex_ult(struct ds_cont_child *cont)
stop_dtx_reindex_ult(struct ds_cont_child *cont, bool force)
{
/* DTX reindex has been done or not has not been started. */
if (!cont->sc_dtx_reindex)
Expand All @@ -1662,9 +1666,15 @@ stop_dtx_reindex_ult(struct ds_cont_child *cont)
if (dtx_cont_opened(cont))
return;

/* Do not stop DTX reindex if DTX resync is still in-progress. */
if (cont->sc_dtx_resyncing)
/*
* For non-force case, do not stop DTX re-index if DTX resync
* is in-progress. Related DTX resource will be released after
* DTX resync globally done (via rebuild scanning).
*/
if (unlikely(cont->sc_dtx_resyncing && !force)) {
cont->sc_dtx_delay_reset = 1;
return;
}

cont->sc_dtx_reindex_abort = 1;

Expand Down Expand Up @@ -1822,7 +1832,7 @@ dtx_cont_open(struct ds_cont_child *cont)
}

void
dtx_cont_close(struct ds_cont_child *cont)
dtx_cont_close(struct ds_cont_child *cont, bool force)
{
struct dss_module_info *dmi = dss_get_module_info();
struct dtx_batched_pool_args *dbpa;
Expand All @@ -1837,16 +1847,20 @@ dtx_cont_close(struct ds_cont_child *cont)

d_list_for_each_entry(dbca, &dbpa->dbpa_cont_list, dbca_pool_link) {
if (dbca->dbca_cont == cont) {
stop_dtx_reindex_ult(cont);
stop_dtx_reindex_ult(cont, force);
d_list_del(&dbca->dbca_sys_link);
d_list_add_tail(&dbca->dbca_sys_link,
&dmi->dmi_dtx_batched_cont_close_list);
dtx_flush_on_close(dmi, dbca);

/* If nobody reopen the container during dtx_flush_on_close,
* then reset DTX table in VOS to release related resources.
*
* For non-force case, do not reset DTX table if DTX resync
* is in-progress to avoid redoing DTX re-index. We will do
* that after DTX resync done globally.
*/
if (!dtx_cont_opened(cont))
if (likely(!dtx_cont_opened(cont) && cont->sc_dtx_delay_reset == 0))
vos_dtx_cache_reset(cont->sc_hdl, false);
return;
}
Expand Down
Loading
Loading