Skip to content

Commit

Permalink
Merge pull request #349 from dwnusbaum/infinite-loop-memory-leak-2
Browse files Browse the repository at this point in the history
Avoid infinite loops due to corrupted flow graphs in some cases and improve resumption error handling
  • Loading branch information
dwnusbaum authored Aug 13, 2024
2 parents c211223 + c406ccc commit ee415d9
Show file tree
Hide file tree
Showing 21 changed files with 322 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -368,14 +368,19 @@ private static final class ParallelResumer {
LOGGER.log(Level.WARNING, "Could not look up FlowNode for " + se + " so it will not be resumed", x);
}
}
for (FlowNode n : nodes.keySet()) {
LinearBlockHoppingScanner scanner = new LinearBlockHoppingScanner();
scanner.setup(n);
for (FlowNode parent : scanner) {
if (parent != n && nodes.containsKey(parent)) {
enclosing.put(n, parent);
break;
for (Map.Entry<FlowNode, StepExecution> entry : nodes.entrySet()) {
FlowNode n = entry.getKey();
try {
LinearBlockHoppingScanner scanner = new LinearBlockHoppingScanner();
scanner.setup(n);
for (FlowNode parent : scanner) {
if (parent != n && nodes.containsKey(parent)) {
enclosing.put(n, parent);
break;
}
}
} catch (Exception x) {
LOGGER.log(Level.WARNING, x, () -> "Unable to compute enclosing blocks for " + n + ", so " + entry.getValue() + " might not resume successfully");
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

/**
Expand Down Expand Up @@ -119,8 +121,11 @@ BlockEndNode bruteForceScanForEnd(@NonNull BlockStartNode start) {
/** Do a brute-force scan for the enclosing blocks **/
BlockStartNode bruteForceScanForEnclosingBlock(@NonNull final FlowNode node) {
FlowNode current = node;

Set<String> visited = new HashSet<>();
while (!(current instanceof FlowStartNode)) { // Hunt back for enclosing blocks, a potentially expensive operation
if (!visited.add(current.getId())) {

Check warning on line 126 in src/main/java/org/jenkinsci/plugins/workflow/graph/StandardGraphLookupView.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 126 is only partially covered, one branch is missing
throw new IllegalStateException("Cycle in flow graph for " + node.getExecution() + " involving " + current);

Check warning on line 127 in src/main/java/org/jenkinsci/plugins/workflow/graph/StandardGraphLookupView.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 127 is not covered by tests
}
if (current instanceof BlockEndNode) {
// Hop over the block to the start
BlockStartNode start = ((BlockEndNode) current).getStartNode();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@
import edu.umd.cs.findbugs.annotations.NonNull;
import net.jcip.annotations.NotThreadSafe;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

Expand Down Expand Up @@ -85,9 +87,12 @@ protected void setHeads(@NonNull Collection<FlowNode> heads) {
@CheckForNull
protected FlowNode jumpBlockScan(@CheckForNull FlowNode node, @NonNull Collection<FlowNode> blacklistNodes) {
FlowNode candidate = node;

Set<String> visited = new HashSet<>();
// Find the first candidate node preceding a block... and filtering by blacklist
while (candidate instanceof BlockEndNode) {
if (!visited.add(candidate.getId())) {
throw new IllegalStateException("Cycle in flow graph for " + candidate.getExecution() + " involving " + candidate);
}
candidate = ((BlockEndNode) candidate).getStartNode();
if (blacklistNodes.contains(candidate)) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,14 @@
import java.io.Serializable;
import java.lang.ref.WeakReference;
import java.util.Collections;
import java.util.Objects;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.LogRecord;
import java.util.stream.Collectors;
import jenkins.model.Jenkins;
import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition;
import org.jenkinsci.plugins.workflow.job.WorkflowJob;
Expand All @@ -65,6 +68,7 @@
import org.jvnet.hudson.test.JenkinsSessionRule;
import org.jvnet.hudson.test.MemoryAssert;
import org.jvnet.hudson.test.TestExtension;
import org.jvnet.hudson.test.recipes.LocalData;
import org.kohsuke.stapler.DataBoundConstructor;

public class FlowExecutionListTest {
Expand Down Expand Up @@ -164,6 +168,35 @@ public class FlowExecutionListTest {
});
}

@LocalData
@Test public void resumeStepExecutionsWithCorruptFlowGraphWithCycle() throws Throwable {
// LocalData created using the following snippet while the build was waiting in the _second_ sleep, except
// for build.xml, which was captured during the sleep step. The StepEndNode for the stage was then adjusted to
// have its startId point to the timeout step's StepStartNode, creating a loop.
/*
sessions.then(r -> {
var stuck = r.createProject(WorkflowJob.class);
stuck.setDefinition(new CpsFlowDefinition("stage('stage') { sleep 30 }; timeout(time: 10) { sleep 30 }", true));
var b = stuck.scheduleBuild2(0).waitForStart();
System.out.println(b.getRootDir());
r.waitForCompletion(b);
});
*/
logging.capture(50);
sessions.then(r -> {
var p = r.jenkins.getItemByFullName("test0", WorkflowJob.class);
var b = p.getBuildByNumber(1);
r.waitForCompletion(b);
assertThat(logging.getMessages(), hasItem(containsString("Unable to compute enclosing blocks")));
var loggedExceptions = logging.getRecords().stream()
.map(LogRecord::getThrown)
.filter(Objects::nonNull)
.map(Throwable::toString)
.collect(Collectors.toList());
assertThat(loggedExceptions, hasItem(containsString("Cycle in flow graph")));
});
}

@Test public void stepExecutionIteratorDoesNotLeakBuildsWhenOneIsStuck() throws Throwable {
sessions.then(r -> {
var notStuck = r.createProject(WorkflowJob.class, "not-stuck");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?xml version='1.1' encoding='UTF-8'?>
<flow-build plugin="[email protected]_ec82f">
<queueId>1</queueId>
<timestamp>1722979974600</timestamp>
<startTime>1722979974615</startTime>
<duration>0</duration>
<charset>UTF-8</charset>
<keepLog>false</keepLog>
<execution class="org.jenkinsci.plugins.workflow.cps.CpsFlowExecution">
<result>SUCCESS</result>
<script>stage(&apos;stage&apos;) { sleep 30 }; timeout(time: 10) { sleep 30 }</script>
<loadedScripts class="linked-hash-map"/>
<durabilityHint>MAX_SURVIVABILITY</durabilityHint>
<timings class="map">
<entry>
<string>flowNode</string>
<long>101709541</long>
</entry>
<entry>
<string>classLoad</string>
<long>124446251</long>
</entry>
<entry>
<string>run</string>
<long>200459289</long>
</entry>
<entry>
<string>parse</string>
<long>166818958</long>
</entry>
<entry>
<string>saveProgram</string>
<long>57936125</long>
</entry>
</timings>
<internalCalls class="sorted-set"/>
<sandbox>true</sandbox>
<iota>5</iota>
<head>1:5</head>
<start>2</start>
<done>false</done>
<resumeBlocked>false</resumeBlocked>
</execution>
<completed>false</completed>
<checkouts class="hudson.util.PersistedList"/>
</flow-build>
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Started
ha:////4LSn6sjy3SQJvLr4M0Jcw5zYiu9jzzxAhCQhf5ciKGcGAAAAoh+LCAAAAAAAAP9tjTEOwjAQBM8BClpKHuFItIiK1krDC0x8GCfWnbEdkooX8TX+gCESFVvtrLSa5wtWKcKBo5UdUu8otU4GP9jS5Mixv3geZcdn2TIl9igbHBs2eJyx4YwwR1SwULBGaj0nRzbDRnX6rmuvydanHMu2V1A5c4MHCFXMWcf8hSnC9jqYxPTz/BXAFEIGsfuclm8zQVqFvQAAAA==[Pipeline] Start of Pipeline
ha:////4FLPcBhXXN+T+Uy5Bq+9NjiuG45smS/CK+MQ8sUKcTBsAAAApR+LCAAAAAAAAP9tjTEOwjAUQ3+KOrAycohUghExsUZZOEFIQkgb/d8mKe3EibgadyBQiQlLlmxL1nu+oE4RjhQdby12HpP2vA+jK4lPFLtroIm3dOGaMFGwXNpJkrGnpUrKFhaxClYC1hZ1oOTRZdiIVt1VExS65pxj2Q4CKm8GeAAThZxVzN8yR9jeRpMIf5y/AJj7DGxXvP/86jduZBmjwAAAAA==[Pipeline] stage
ha:////4MPOLP4Go7JuW3NkAKhksIfyE+NroTrcISNM8xfRFLQ8AAAApR+LCAAAAAAAAP9tjTEOwjAUQ3+KOrAycoh0gA0xsUZZOEFIQkgb/d8mKe3EibgadyBQiQlLlmxL1nu+oE4RjhQdby12HpP2vA+jK4lPFLtroIm3dOGaMFGwXNpJkrGnpUrKFhaxClYC1hZ1oOTRZdiIVt1VExS65pxj2Q4CKm8GeAAThZxVzN8yR9jeRpMIf5y/AJj7DGxXvP/86jfoP95RwAAAAA==[Pipeline] { (stage)
ha:////4E53KhegWm+s/q0TJkIC5MI9kTq62Eqnzz2Qdi1URRTJAAAAoh+LCAAAAAAAAP9tjTEOAiEURD9rLGwtPQTbaGWsbAmNJ0AWEZb8zwLrbuWJvJp3kLiJlZNMMm+a93rDOic4UbLcG+wdZu14DKOti0+U+lugiXu6ck2YKRguzSSpM+cFJRUDS1gDKwEbgzpQdmgLbIVXD9UGhba9lFS/o4DGdQM8gYlqLiqVL8wJdvexy4Q/z18BzLEA29ce4gdpL1fxvAAAAA==[Pipeline] sleep
Sleeping for 30 sec
ha:////4G8hLCAAqKEvMe92YhTNPJa4MSOZpWK2lhgTDgEHbCXUAAAAoh+LCAAAAAAAAP9tjTEOAiEURD9rLGwtPQTbGBtjZUtoPAGyiLDkfxZYdytP5NW8g8RNrJxkknnTvNcb1jnBiZLl3mDvMGvHYxhtXXyi1N8CTdzTlWvCTMFwaSZJnTkvKKkYWMIaWAnYGNSBskNbYCu8eqg2KLTtpaT6HQU0rhvgCUxUc1GpfGFOsLuPXSb8ef4KYI6xADvU7j8OXFZ7vAAAAA==[Pipeline] }
ha:////4AnScT3OQumBbV+luAyxvhEcCl/8MozDCcq/aC6iNLpjAAAAox+LCAAAAAAAAP9tjTEOAiEURD9rLGwtPQRbWFgYK1tC4wmQRYQl/7PAult5Iq/mHSRuYuUkk8yb5r3esM4JTpQs9wZ7h1k7HsNo6+ITpf4WaOKerlwTZgqGSzNJ6sx5QUnFwBLWwErAxqAOlB3aAlvh1UO1QaFtLyXV7yigcd0AT2CimotK5Qtzgt197DLhz/NXAHOMBdihdv8BHeBS2LwAAAA=[Pipeline] // stage
ha:////4PTj6M4JscP6Gdk49EfTAaLMCwLZYd9IOq+brFvOiJPAAAAAph+LCAAAAAAAAP9tjUEKwjAURH8qXbh16SFScCWIK7chG08QkxjThv/bJLVdeSKv5h2MFlw5MDAzMLznC+oU4UjR8dZi5zFpz/swupL4RLG7Bpp4SxeuCRMFy6WdJBl7WqqkbGERq2AlYG1RB0oeXYaNaNVdNUGha845lu0goPJmgAcwUchZxfwtc4TtbTSJ8Mf5C4C5z8B2xfvPr34DrZTeycAAAAA=[Pipeline] timeout
Timeout set to expire in 10 min
ha:////4M9FYx/jFzgoF1Ji4m6uzCtxEvJBQzBzYoKBBTbKepUTAAAApR+LCAAAAAAAAP9tjTEOwjAUQ3+KOrAycoh0BSEm1igLJwhJCGmj/9skpZ04EVfjDgQqMWHJkm3Jes8X1CnCkaLjrcXOY9Ke92F0JfGJYncNNPGWLlwTJgqWSztJMva0VEnZwiJWwUrA2qIOlDy6DBvRqrtqgkLXnHMs20FA5c0AD2CikLOK+VvmCNvbaBLhj/MXAHOfge2K959f/QbB16AVwAAAAA==[Pipeline] {
ha:////4O/MG/IybiYM4oG30m877qNjUwTyRLwWY87qTVAOsZwoAAAAoh+LCAAAAAAAAP9tjTEOwjAQBDdBFLSUPMKBEiEqWisNLzCJMU6su2BfSCpexNf4AxGRqNhqZ5p5vbFMEUeOTjWWWk+p8qoLvZueGji218CDaviiKqbEwarSDiXX9jRjyWIxL8ux0FhZqgInT06w1o15mCIYcsVZ4uQOGrmv73gi01NZTJQvjBGbW18npl/nbwBjJ8j2gny37T6VOYoyvQAAAA==[Pipeline] sleep
Sleeping for 30 sec
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1232 5
1252
2157 8
2189
2789 10
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepAtomNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>9</string>
</parentIds>
<id>10</id>
<descriptorId>org.jenkinsci.plugins.workflow.steps.SleepStep</descriptorId>
</node>
<actions>
<cps.a.ArgumentsActionImpl plugin="[email protected]_2c931e7935">
<arguments>
<entry>
<string>time</string>
<long>30</long>
</entry>
</arguments>
<sensitiveVariables/>
<isUnmodifiedBySanitization>true</isUnmodifiedBySanitization>
</cps.a.ArgumentsActionImpl>
<wf.a.TimingAction>
<startTime>1722980005296</startTime>
</wf.a.TimingAction>
<s.a.LogStorageAction/>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="org.jenkinsci.plugins.workflow.graph.FlowStartNode">
<parentIds/>
<id>2</id>
</node>
<actions>
<wf.a.TimingAction>
<startTime>1722979974868</startTime>
</wf.a.TimingAction>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepStartNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>2</string>
</parentIds>
<id>3</id>
<descriptorId>org.jenkinsci.plugins.workflow.support.steps.StageStep</descriptorId>
</node>
<actions>
<s.a.LogStorageAction/>
<cps.a.ArgumentsActionImpl plugin="[email protected]_2c931e7935">
<arguments>
<entry>
<string>name</string>
<string>stage</string>
</entry>
</arguments>
<sensitiveVariables/>
<isUnmodifiedBySanitization>true</isUnmodifiedBySanitization>
</cps.a.ArgumentsActionImpl>
<wf.a.TimingAction>
<startTime>1722979974972</startTime>
</wf.a.TimingAction>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepStartNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>3</string>
</parentIds>
<id>4</id>
<descriptorId>org.jenkinsci.plugins.workflow.support.steps.StageStep</descriptorId>
</node>
<actions>
<wf.a.BodyInvocationAction/>
<wf.a.LabelAction>
<displayName>stage</displayName>
</wf.a.LabelAction>
<wf.a.TimingAction>
<startTime>1722979974988</startTime>
</wf.a.TimingAction>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepAtomNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>4</string>
</parentIds>
<id>5</id>
<descriptorId>org.jenkinsci.plugins.workflow.steps.SleepStep</descriptorId>
</node>
<actions>
<cps.a.ArgumentsActionImpl plugin="[email protected]_2c931e7935">
<arguments>
<entry>
<string>time</string>
<long>30</long>
</entry>
</arguments>
<sensitiveVariables/>
<isUnmodifiedBySanitization>true</isUnmodifiedBySanitization>
</cps.a.ArgumentsActionImpl>
<wf.a.TimingAction>
<startTime>1722979975077</startTime>
</wf.a.TimingAction>
<s.a.LogStorageAction/>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepEndNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>5</string>
</parentIds>
<id>6</id>
<startId>4</startId>
</node>
<actions>
<wf.a.BodyInvocationAction/>
<wf.a.TimingAction>
<startTime>1722980005117</startTime>
</wf.a.TimingAction>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepEndNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>6</string>
</parentIds>
<id>7</id>
<startId>8</startId> <!-- Manually edited to simulate corruption observed in the real world. -->
</node>
<actions>
<wf.a.TimingAction>
<startTime>1722980005177</startTime>
</wf.a.TimingAction>
</actions>
</Tag>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version='1.1' encoding='UTF-8'?>
<Tag plugin="[email protected]_ed8a_573">
<node class="cps.n.StepStartNode" plugin="[email protected]_2c931e7935">
<parentIds>
<string>7</string>
</parentIds>
<id>8</id>
<descriptorId>org.jenkinsci.plugins.workflow.steps.TimeoutStep</descriptorId>
</node>
<actions>
<s.a.LogStorageAction/>
<cps.a.ArgumentsActionImpl plugin="[email protected]_2c931e7935">
<arguments>
<entry>
<string>time</string>
<int>10</int>
</entry>
</arguments>
<sensitiveVariables/>
<isUnmodifiedBySanitization>true</isUnmodifiedBySanitization>
</cps.a.ArgumentsActionImpl>
<wf.a.TimingAction>
<startTime>1722980005227</startTime>
</wf.a.TimingAction>
</actions>
</Tag>
Loading

0 comments on commit ee415d9

Please sign in to comment.