Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix race conditions when handling barriers #5510

Merged
merged 2 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 22 additions & 29 deletions lib/OpenQA/Resource/Locks.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
# SPDX-License-Identifier: GPL-2.0-or-later

package OpenQA::Resource::Locks;

use strict;
use warnings;
use Mojo::Base -strict, -signatures;

use OpenQA::Jobs::Constants;
use OpenQA::Schema;
Expand All @@ -19,7 +17,7 @@ my %final_states = map { $_ => 1 } OpenQA::Jobs::Constants::NOT_OK_RESULTS();
# Sometimes it is useful to let the parent wait for child. The child job
# can be however killed at any time, while the parent will be still running.
# So we have to specify, which child job is supposed to create the lock
# and watch it's state.
# and watch its state.
#
sub _get_lock {
my ($name, $jobid, $where) = @_;
Expand All @@ -41,14 +39,14 @@ sub _get_lock {
else {
push @maybeowners, map { $_->id } ($job, $job->parents->all);
}
return $schema->resultset('JobLocks')->search({name => $name, owner => {-in => \@maybeowners}})->single;
return $schema->resultset('JobLocks')->search({name => $name, owner => {-in => \@maybeowners}});
}

# returns -1 on unrecoverable error, 1 on have lock, 0 on try later (lock unavailable)
sub lock {
my ($name, $jobid, $where) = @_;

my $lock = _get_lock($name, $jobid, $where);
my $lock = _get_lock($name, $jobid, $where)->single;

if (!$lock and $where =~ /^\d+$/) {
my $schema = OpenQA::Schema->singleton;
Expand All @@ -72,7 +70,7 @@ sub lock {

sub unlock {
my ($name, $jobid, $where) = @_;
my $lock = _get_lock($name, $jobid, $where // 'all');
my $lock = _get_lock($name, $jobid, $where // 'all')->single;
return 0 unless $lock;
# return if not locked
return 1 unless $lock->locked_by;
Expand All @@ -84,7 +82,7 @@ sub unlock {

sub create {
my ($name, $jobid) = @_;
my $lock = _get_lock($name, $jobid, 'all');
my $lock = _get_lock($name, $jobid, 'all')->single;
# nothing if lock already exist
return 0 if $lock;
return 0 unless defined $name && defined $jobid;
Expand All @@ -99,23 +97,21 @@ sub create {
## Barriers
# barriers are created with number of expected jobs. Then wait call waits until the expected number of jobs is waiting

sub barrier_create {
my ($name, $jobid, $expected_jobs) = @_;
sub barrier_create ($name, $jobid, $expected_jobs) {
return 0 unless $name && $jobid && $expected_jobs;
my $barrier = _get_lock($name, $jobid, 'all');
return 0 if $barrier;

my $schema = OpenQA::Schema->singleton;
$barrier = $schema->resultset('JobLocks')->create({name => $name, owner => $jobid, count => $expected_jobs});
return 0 unless $barrier;
return $barrier;
my $barriers = _get_lock($name, $jobid, 'all');
return 0 if $barriers && $barriers->single;
my $dbh = OpenQA::Schema->singleton->storage->dbh;
my $sth = $dbh->prepare('INSERT INTO job_locks (name, owner, count) VALUES (?, ?, ?) ON CONFLICT DO NOTHING');
eval { $sth->execute($name, $jobid, $expected_jobs) };
die "Unable to create barrier for job $jobid with name '$name': $@" if $@;
return $sth->rows > 0;
}

sub barrier_wait {
my ($name, $jobid, $where, $check_dead_job) = @_;

sub barrier_wait ($name, $jobid, $where, $check_dead_job) {
return -1 unless $name && $jobid;
return -1 unless my $barrier = _get_lock($name, $jobid, $where);
return -1 unless my $barriers = _get_lock($name, $jobid, $where);
return -1 unless my $barrier = $barriers->single;

my $jobschema = OpenQA::Schema->singleton->resultset('Jobs');
my @jobs = split ',', $barrier->locked_by // '';
Expand All @@ -125,7 +121,7 @@ sub barrier_wait {
my @results = map { $jobschema->find($_)->result } $jobid, @jobs, @related_ids;
for my $result (@results) {
next unless $final_states{$result};
$barrier->delete;
$barriers->delete;
return -1;
}
}
Expand All @@ -136,18 +132,15 @@ sub barrier_wait {
}

push @jobs, $jobid;
$barrier->update({locked_by => join(',', @jobs)});

return -1 unless $barriers->update({locked_by => join(',', @jobs)}) > 0;
return 1 if @jobs == $barrier->count;
return 0;
}

sub barrier_destroy {
my ($name, $jobid, $where) = @_;
sub barrier_destroy ($name, $jobid, $where) {
return 0 unless $name && $jobid;
my $barrier = _get_lock($name, $jobid, $where);
return 0 unless $barrier;
return $barrier->delete;
return 0 unless my $barriers = _get_lock($name, $jobid, $where);
return $barriers->delete > 0;
}

1;
8 changes: 4 additions & 4 deletions lib/OpenQA/WebAPI/Controller/API/V1/Locks.pm
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ sub mutex_create {
=item barrier_wait()

Blocks execution of the calling job until the method is called by all tasks
using the barrier referenced by "name". Returns a 200 code on success, 410
on error on 409 when the referenced barrier does not exist.
using the barrier referenced by "name". Returns a 200 code on success, 409
on error and 410 when the referenced barrier does not exist.

=back

Expand Down Expand Up @@ -119,8 +119,8 @@ sub barrier_wait {

=item barrier_create()

Creates a new barrier resource for a group of tasks referenced by the argument "task."
Returns a code of 200 on success or of 409 on error.
Creates a new barrier resource for a group of tasks referenced by the argument "tasks".
Returns 200 on success and 409 on error.

=back

Expand Down
Loading