Skip to content

Commit

Permalink
dump: Don't unfreeze tasks on dump failure with --no-resume-on-error.
Browse files Browse the repository at this point in the history
Make it possible to kill or leave stopped tasks if a dump failed
after stopping the tree.

Signed-off-by: Michał Mirosław <[email protected]>
  • Loading branch information
osctobe committed Aug 4, 2023
1 parent 1db922f commit a48a801
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 2 deletions.
3 changes: 3 additions & 0 deletions Documentation/criu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,9 @@ In other words, do not use it unless really needed.
*-s*, *--leave-stopped*::
Leave tasks in stopped state after checkpoint, instead of killing.

*--no-resume-on-error*::
Leave tasks in stopped state even if checkpoint completed unsuccessfully.

*--external* __type__**[**__id__**]:**__value__::
Dump an instance of an external resource. The generic syntax is
'type' of resource, followed by resource 'id' (enclosed in literal
Expand Down
2 changes: 2 additions & 0 deletions criu/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ void init_opts(void)

/* Default options */
opts.final_state = TASK_DEAD;
opts.resume_on_dump_error = true;
INIT_LIST_HEAD(&opts.ext_mounts);
INIT_LIST_HEAD(&opts.inherit_fds);
INIT_LIST_HEAD(&opts.external);
Expand Down Expand Up @@ -622,6 +623,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
{ "tree", required_argument, 0, 't' },
{ "leave-stopped", no_argument, 0, 's' },
{ "leave-running", no_argument, 0, 'R' },
BOOL_OPT("resume-on-error", &opts.resume_on_dump_error),
BOOL_OPT("restore-detached", &opts.restore_detach),
BOOL_OPT("restore-sibling", &opts.restore_sibling),
BOOL_OPT("daemon", &opts.restore_detach),
Expand Down
6 changes: 4 additions & 2 deletions criu/cr-dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -2065,7 +2065,9 @@ static int cr_dump_finish(int ret)
* consistency of the FS and other resources, we simply
* start rollback procedure and cleanup everything.
*/
if (ret || post_dump_ret || opts.final_state == TASK_ALIVE) {
if (opts.resume_on_dump_error && (ret || post_dump_ret))
opts.final_state = TASK_ALIVE;
if (opts.final_state == TASK_ALIVE) {
unsuspend_lsm();
network_unlock();
delete_link_remaps();
Expand All @@ -2077,7 +2079,7 @@ static int cr_dump_finish(int ret)

if (arch_set_thread_regs(root_item, true) < 0)
return -1;
pstree_switch_state(root_item, (ret || post_dump_ret) ? TASK_ALIVE : opts.final_state);
pstree_switch_state(root_item, opts.final_state);
timing_stop(TIME_FROZEN);
free_pstree(root_item);
seccomp_free_entries();
Expand Down
3 changes: 3 additions & 0 deletions criu/cr-service.c
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,9 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
if (req->has_leave_stopped && req->leave_stopped)
opts.final_state = TASK_STOPPED;

if (req->has_resume_on_dump_error)
opts.resume_on_dump_error = req->resume_on_dump_error;

if (!req->has_pid) {
req->has_pid = true;
req->pid = ids.pid;
Expand Down
2 changes: 2 additions & 0 deletions criu/crtools.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,8 @@ int main(int argc, char *argv[], char *envp[])
" -d|--restore-detached detach after restore\n"
" -S|--restore-sibling restore root task as sibling\n"
" -s|--leave-stopped leave tasks in stopped state after checkpoint\n"
" --no-resume-on-error\n"
" don't resume tasks on dump failure if they were stopped\n"
" -R|--leave-running leave tasks in running state after checkpoint\n"
" -D|--images-dir DIR directory for image files\n"
" --pidfile FILE write root task, service or page-server pid to FILE\n"
Expand Down
1 change: 1 addition & 0 deletions criu/include/cr_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ struct cr_options {
bool daemon_mode;
};
int restore_sibling;
int resume_on_dump_error;
bool ext_unix_sk;
int shell_job;
int handle_file_locks;
Expand Down
1 change: 1 addition & 0 deletions images/rpc.proto
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ message criu_opts {
optional bool leave_stopped = 69;
optional bool display_stats = 70;
optional bool log_to_stderr = 71;
optional bool resume_on_dump_error = 72 [default = true];
/* optional bool check_mounts = 128; */
}

Expand Down

0 comments on commit a48a801

Please sign in to comment.