Skip to content

Commit ee415d9

Browse files
authored
Merge pull request #349 from dwnusbaum/infinite-loop-memory-leak-2
Avoid infinite loops due to corrupted flow graphs in some cases and improve resumption error handling
2 parents c211223 + c406ccc commit ee415d9

File tree

21 files changed

+322
-9
lines changed

21 files changed

+322
-9
lines changed

src/main/java/org/jenkinsci/plugins/workflow/flow/FlowExecutionList.java

+12-7
Original file line numberDiff line numberDiff line change
@@ -368,14 +368,19 @@ private static final class ParallelResumer {
368368
LOGGER.log(Level.WARNING, "Could not look up FlowNode for " + se + " so it will not be resumed", x);
369369
}
370370
}
371-
for (FlowNode n : nodes.keySet()) {
372-
LinearBlockHoppingScanner scanner = new LinearBlockHoppingScanner();
373-
scanner.setup(n);
374-
for (FlowNode parent : scanner) {
375-
if (parent != n && nodes.containsKey(parent)) {
376-
enclosing.put(n, parent);
377-
break;
371+
for (Map.Entry<FlowNode, StepExecution> entry : nodes.entrySet()) {
372+
FlowNode n = entry.getKey();
373+
try {
374+
LinearBlockHoppingScanner scanner = new LinearBlockHoppingScanner();
375+
scanner.setup(n);
376+
for (FlowNode parent : scanner) {
377+
if (parent != n && nodes.containsKey(parent)) {
378+
enclosing.put(n, parent);
379+
break;
380+
}
378381
}
382+
} catch (Exception x) {
383+
LOGGER.log(Level.WARNING, x, () -> "Unable to compute enclosing blocks for " + n + ", so " + entry.getValue() + " might not resume successfully");
379384
}
380385
}
381386
}

src/main/java/org/jenkinsci/plugins/workflow/graph/StandardGraphLookupView.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
import java.io.IOException;
1212
import java.util.ArrayList;
1313
import java.util.Collections;
14+
import java.util.HashSet;
1415
import java.util.List;
16+
import java.util.Set;
1517
import java.util.concurrent.ConcurrentHashMap;
1618

1719
/**
@@ -119,8 +121,11 @@ BlockEndNode bruteForceScanForEnd(@NonNull BlockStartNode start) {
119121
/** Do a brute-force scan for the enclosing blocks **/
120122
BlockStartNode bruteForceScanForEnclosingBlock(@NonNull final FlowNode node) {
121123
FlowNode current = node;
122-
124+
Set<String> visited = new HashSet<>();
123125
while (!(current instanceof FlowStartNode)) { // Hunt back for enclosing blocks, a potentially expensive operation
126+
if (!visited.add(current.getId())) {
127+
throw new IllegalStateException("Cycle in flow graph for " + node.getExecution() + " involving " + current);
128+
}
124129
if (current instanceof BlockEndNode) {
125130
// Hop over the block to the start
126131
BlockStartNode start = ((BlockEndNode) current).getStartNode();

src/main/java/org/jenkinsci/plugins/workflow/graphanalysis/LinearBlockHoppingScanner.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@
3232
import edu.umd.cs.findbugs.annotations.NonNull;
3333
import net.jcip.annotations.NotThreadSafe;
3434
import java.util.Collection;
35+
import java.util.HashSet;
3536
import java.util.Iterator;
3637
import java.util.List;
38+
import java.util.Set;
3739
import java.util.logging.Level;
3840
import java.util.logging.Logger;
3941

@@ -85,9 +87,12 @@ protected void setHeads(@NonNull Collection<FlowNode> heads) {
8587
@CheckForNull
8688
protected FlowNode jumpBlockScan(@CheckForNull FlowNode node, @NonNull Collection<FlowNode> blacklistNodes) {
8789
FlowNode candidate = node;
88-
90+
Set<String> visited = new HashSet<>();
8991
// Find the first candidate node preceding a block... and filtering by blacklist
9092
while (candidate instanceof BlockEndNode) {
93+
if (!visited.add(candidate.getId())) {
94+
throw new IllegalStateException("Cycle in flow graph for " + candidate.getExecution() + " involving " + candidate);
95+
}
9196
candidate = ((BlockEndNode) candidate).getStartNode();
9297
if (blacklistNodes.contains(candidate)) {
9398
return null;

src/test/java/org/jenkinsci/plugins/workflow/flow/FlowExecutionListTest.java

+33
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,14 @@
4141
import java.io.Serializable;
4242
import java.lang.ref.WeakReference;
4343
import java.util.Collections;
44+
import java.util.Objects;
4445
import java.util.HashMap;
4546
import java.util.Map;
4647
import java.util.Set;
4748
import java.util.concurrent.TimeUnit;
4849
import java.util.logging.Level;
50+
import java.util.logging.LogRecord;
51+
import java.util.stream.Collectors;
4952
import jenkins.model.Jenkins;
5053
import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition;
5154
import org.jenkinsci.plugins.workflow.job.WorkflowJob;
@@ -65,6 +68,7 @@
6568
import org.jvnet.hudson.test.JenkinsSessionRule;
6669
import org.jvnet.hudson.test.MemoryAssert;
6770
import org.jvnet.hudson.test.TestExtension;
71+
import org.jvnet.hudson.test.recipes.LocalData;
6872
import org.kohsuke.stapler.DataBoundConstructor;
6973

7074
public class FlowExecutionListTest {
@@ -164,6 +168,35 @@ public class FlowExecutionListTest {
164168
});
165169
}
166170

171+
@LocalData
172+
@Test public void resumeStepExecutionsWithCorruptFlowGraphWithCycle() throws Throwable {
173+
// LocalData created using the following snippet while the build was waiting in the _second_ sleep, except
174+
// for build.xml, which was captured during the sleep step. The StepEndNode for the stage was then adjusted to
175+
// have its startId point to the timeout step's StepStartNode, creating a loop.
176+
/*
177+
sessions.then(r -> {
178+
var stuck = r.createProject(WorkflowJob.class);
179+
stuck.setDefinition(new CpsFlowDefinition("stage('stage') { sleep 30 }; timeout(time: 10) { sleep 30 }", true));
180+
var b = stuck.scheduleBuild2(0).waitForStart();
181+
System.out.println(b.getRootDir());
182+
r.waitForCompletion(b);
183+
});
184+
*/
185+
logging.capture(50);
186+
sessions.then(r -> {
187+
var p = r.jenkins.getItemByFullName("test0", WorkflowJob.class);
188+
var b = p.getBuildByNumber(1);
189+
r.waitForCompletion(b);
190+
assertThat(logging.getMessages(), hasItem(containsString("Unable to compute enclosing blocks")));
191+
var loggedExceptions = logging.getRecords().stream()
192+
.map(LogRecord::getThrown)
193+
.filter(Objects::nonNull)
194+
.map(Throwable::toString)
195+
.collect(Collectors.toList());
196+
assertThat(loggedExceptions, hasItem(containsString("Cycle in flow graph")));
197+
});
198+
}
199+
167200
@Test public void stepExecutionIteratorDoesNotLeakBuildsWhenOneIsStuck() throws Throwable {
168201
sessions.then(r -> {
169202
var notStuck = r.createProject(WorkflowJob.class, "not-stuck");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<?xml version='1.1' encoding='UTF-8'?>
2+
<flow-build plugin="[email protected]_ec82f">
3+
<queueId>1</queueId>
4+
<timestamp>1722979974600</timestamp>
5+
<startTime>1722979974615</startTime>
6+
<duration>0</duration>
7+
<charset>UTF-8</charset>
8+
<keepLog>false</keepLog>
9+
<execution class="org.jenkinsci.plugins.workflow.cps.CpsFlowExecution">
10+
<result>SUCCESS</result>
11+
<script>stage(&apos;stage&apos;) { sleep 30 }; timeout(time: 10) { sleep 30 }</script>
12+
<loadedScripts class="linked-hash-map"/>
13+
<durabilityHint>MAX_SURVIVABILITY</durabilityHint>
14+
<timings class="map">
15+
<entry>
16+
<string>flowNode</string>
17+
<long>101709541</long>
18+
</entry>
19+
<entry>
20+
<string>classLoad</string>
21+
<long>124446251</long>
22+
</entry>
23+
<entry>
24+
<string>run</string>
25+
<long>200459289</long>
26+
</entry>
27+
<entry>
28+
<string>parse</string>
29+
<long>166818958</long>
30+
</entry>
31+
<entry>
32+
<string>saveProgram</string>
33+
<long>57936125</long>
34+
</entry>
35+
</timings>
36+
<internalCalls class="sorted-set"/>
37+
<sandbox>true</sandbox>
38+
<iota>5</iota>
39+
<head>1:5</head>
40+
<start>2</start>
41+
<done>false</done>
42+
<resumeBlocked>false</resumeBlocked>
43+
</execution>
44+
<completed>false</completed>
45+
<checkouts class="hudson.util.PersistedList"/>
46+
</flow-build>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Started
2+
ha:////4LSn6sjy3SQJvLr4M0Jcw5zYiu9jzzxAhCQhf5ciKGcGAAAAoh+LCAAAAAAAAP9tjTEOwjAQBM8BClpKHuFItIiK1krDC0x8GCfWnbEdkooX8TX+gCESFVvtrLSa5wtWKcKBo5UdUu8otU4GP9jS5Mixv3geZcdn2TIl9igbHBs2eJyx4YwwR1SwULBGaj0nRzbDRnX6rmuvydanHMu2V1A5c4MHCFXMWcf8hSnC9jqYxPTz/BXAFEIGsfuclm8zQVqFvQAAAA==[Pipeline] Start of Pipeline
3+
ha:////4FLPcBhXXN+T+Uy5Bq+9NjiuG45smS/CK+MQ8sUKcTBsAAAApR+LCAAAAAAAAP9tjTEOwjAUQ3+KOrAycohUghExsUZZOEFIQkgb/d8mKe3EibgadyBQiQlLlmxL1nu+oE4RjhQdby12HpP2vA+jK4lPFLtroIm3dOGaMFGwXNpJkrGnpUrKFhaxClYC1hZ1oOTRZdiIVt1VExS65pxj2Q4CKm8GeAAThZxVzN8yR9jeRpMIf5y/AJj7DGxXvP/86jduZBmjwAAAAA==[Pipeline] stage
4+
ha:////4MPOLP4Go7JuW3NkAKhksIfyE+NroTrcISNM8xfRFLQ8AAAApR+LCAAAAAAAAP9tjTEOwjAUQ3+KOrAycoh0gA0xsUZZOEFIQkgb/d8mKe3EibgadyBQiQlLlmxL1nu+oE4RjhQdby12HpP2vA+jK4lPFLtroIm3dOGaMFGwXNpJkrGnpUrKFhaxClYC1hZ1oOTRZdiIVt1VExS65pxj2Q4CKm8GeAAThZxVzN8yR9jeRpMIf5y/AJj7DGxXvP/86jfoP95RwAAAAA==[Pipeline] { (stage)
5+
ha:////4E53KhegWm+s/q0TJkIC5MI9kTq62Eqnzz2Qdi1URRTJAAAAoh+LCAAAAAAAAP9tjTEOAiEURD9rLGwtPQTbaGWsbAmNJ0AWEZb8zwLrbuWJvJp3kLiJlZNMMm+a93rDOic4UbLcG+wdZu14DKOti0+U+lugiXu6ck2YKRguzSSpM+cFJRUDS1gDKwEbgzpQdmgLbIVXD9UGhba9lFS/o4DGdQM8gYlqLiqVL8wJdvexy4Q/z18BzLEA29ce4gdpL1fxvAAAAA==[Pipeline] sleep
6+
Sleeping for 30 sec
7+
ha:////4G8hLCAAqKEvMe92YhTNPJa4MSOZpWK2lhgTDgEHbCXUAAAAoh+LCAAAAAAAAP9tjTEOAiEURD9rLGwtPQTbGBtjZUtoPAGyiLDkfxZYdytP5NW8g8RNrJxkknnTvNcb1jnBiZLl3mDvMGvHYxhtXXyi1N8CTdzTlWvCTMFwaSZJnTkvKKkYWMIaWAnYGNSBskNbYCu8eqg2KLTtpaT6HQU0rhvgCUxUc1GpfGFOsLuPXSb8ef4KYI6xADvU7j8OXFZ7vAAAAA==[Pipeline] }
8+
ha:////4AnScT3OQumBbV+luAyxvhEcCl/8MozDCcq/aC6iNLpjAAAAox+LCAAAAAAAAP9tjTEOAiEURD9rLGwtPQRbWFgYK1tC4wmQRYQl/7PAult5Iq/mHSRuYuUkk8yb5r3esM4JTpQs9wZ7h1k7HsNo6+ITpf4WaOKerlwTZgqGSzNJ6sx5QUnFwBLWwErAxqAOlB3aAlvh1UO1QaFtLyXV7yigcd0AT2CimotK5Qtzgt197DLhz/NXAHOMBdihdv8BHeBS2LwAAAA=[Pipeline] // stage
9+
ha:////4PTj6M4JscP6Gdk49EfTAaLMCwLZYd9IOq+brFvOiJPAAAAAph+LCAAAAAAAAP9tjUEKwjAURH8qXbh16SFScCWIK7chG08QkxjThv/bJLVdeSKv5h2MFlw5MDAzMLznC+oU4UjR8dZi5zFpz/swupL4RLG7Bpp4SxeuCRMFy6WdJBl7WqqkbGERq2AlYG1RB0oeXYaNaNVdNUGha845lu0goPJmgAcwUchZxfwtc4TtbTSJ8Mf5C4C5z8B2xfvPr34DrZTeycAAAAA=[Pipeline] timeout
10+
Timeout set to expire in 10 min
11+
ha:////4M9FYx/jFzgoF1Ji4m6uzCtxEvJBQzBzYoKBBTbKepUTAAAApR+LCAAAAAAAAP9tjTEOwjAUQ3+KOrAycoh0BSEm1igLJwhJCGmj/9skpZ04EVfjDgQqMWHJkm3Jes8X1CnCkaLjrcXOY9Ke92F0JfGJYncNNPGWLlwTJgqWSztJMva0VEnZwiJWwUrA2qIOlDy6DBvRqrtqgkLXnHMs20FA5c0AD2CikLOK+VvmCNvbaBLhj/MXAHOfge2K959f/QbB16AVwAAAAA==[Pipeline] {
12+
ha:////4O/MG/IybiYM4oG30m877qNjUwTyRLwWY87qTVAOsZwoAAAAoh+LCAAAAAAAAP9tjTEOwjAQBDdBFLSUPMKBEiEqWisNLzCJMU6su2BfSCpexNf4AxGRqNhqZ5p5vbFMEUeOTjWWWk+p8qoLvZueGji218CDaviiKqbEwarSDiXX9jRjyWIxL8ux0FhZqgInT06w1o15mCIYcsVZ4uQOGrmv73gi01NZTJQvjBGbW18npl/nbwBjJ8j2gny37T6VOYoyvQAAAA==[Pipeline] sleep
13+
Sleeping for 30 sec
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
1232 5
2+
1252
3+
2157 8
4+
2189
5+
2789 10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<?xml version='1.1' encoding='UTF-8'?>
2+
<Tag plugin="[email protected]_ed8a_573">
3+
<node class="cps.n.StepAtomNode" plugin="[email protected]_2c931e7935">
4+
<parentIds>
5+
<string>9</string>
6+
</parentIds>
7+
<id>10</id>
8+
<descriptorId>org.jenkinsci.plugins.workflow.steps.SleepStep</descriptorId>
9+
</node>
10+
<actions>
11+
<cps.a.ArgumentsActionImpl plugin="[email protected]_2c931e7935">
12+
<arguments>
13+
<entry>
14+
<string>time</string>
15+
<long>30</long>
16+
</entry>
17+
</arguments>
18+
<sensitiveVariables/>
19+
<isUnmodifiedBySanitization>true</isUnmodifiedBySanitization>
20+
</cps.a.ArgumentsActionImpl>
21+
<wf.a.TimingAction>
22+
<startTime>1722980005296</startTime>
23+
</wf.a.TimingAction>
24+
<s.a.LogStorageAction/>
25+
</actions>
26+
</Tag>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<?xml version='1.1' encoding='UTF-8'?>
2+
<Tag plugin="[email protected]_ed8a_573">
3+
<node class="org.jenkinsci.plugins.workflow.graph.FlowStartNode">
4+
<parentIds/>
5+
<id>2</id>
6+
</node>
7+
<actions>
8+
<wf.a.TimingAction>
9+
<startTime>1722979974868</startTime>
10+
</wf.a.TimingAction>
11+
</actions>
12+
</Tag>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<?xml version='1.1' encoding='UTF-8'?>
2+
<Tag plugin="[email protected]_ed8a_573">
3+
<node class="cps.n.StepStartNode" plugin="[email protected]_2c931e7935">
4+
<parentIds>
5+
<string>2</string>
6+
</parentIds>
7+
<id>3</id>
8+
<descriptorId>org.jenkinsci.plugins.workflow.support.steps.StageStep</descriptorId>
9+
</node>
10+
<actions>
11+
<s.a.LogStorageAction/>
12+
<cps.a.ArgumentsActionImpl plugin="[email protected]_2c931e7935">
13+
<arguments>
14+
<entry>
15+
<string>name</string>
16+
<string>stage</string>
17+
</entry>
18+
</arguments>
19+
<sensitiveVariables/>
20+
<isUnmodifiedBySanitization>true</isUnmodifiedBySanitization>
21+
</cps.a.ArgumentsActionImpl>
22+
<wf.a.TimingAction>
23+
<startTime>1722979974972</startTime>
24+
</wf.a.TimingAction>
25+
</actions>
26+
</Tag>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<?xml version='1.1' encoding='UTF-8'?>
2+
<Tag plugin="[email protected]_ed8a_573">
3+
<node class="cps.n.StepStartNode" plugin="[email protected]_2c931e7935">
4+
<parentIds>
5+
<string>3</string>
6+
</parentIds>
7+
<id>4</id>
8+
<descriptorId>org.jenkinsci.plugins.workflow.support.steps.StageStep</descriptorId>
9+
</node>
10+
<actions>
11+
<wf.a.BodyInvocationAction/>
12+
<wf.a.LabelAction>
13+
<displayName>stage</displayName>
14+
</wf.a.LabelAction>
15+
<wf.a.TimingAction>
16+
<startTime>1722979974988</startTime>
17+
</wf.a.TimingAction>
18+
</actions>
19+
</Tag>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<?xml version='1.1' encoding='UTF-8'?>
2+
<Tag plugin="[email protected]_ed8a_573">
3+
<node class="cps.n.StepAtomNode" plugin="[email protected]_2c931e7935">
4+
<parentIds>
5+
<string>4</string>
6+
</parentIds>
7+
<id>5</id>
8+
<descriptorId>org.jenkinsci.plugins.workflow.steps.SleepStep</descriptorId>
9+
</node>
10+
<actions>
11+
<cps.a.ArgumentsActionImpl plugin="[email protected]_2c931e7935">
12+
<arguments>
13+
<entry>
14+
<string>time</string>
15+
<long>30</long>
16+
</entry>
17+
</arguments>
18+
<sensitiveVariables/>
19+
<isUnmodifiedBySanitization>true</isUnmodifiedBySanitization>
20+
</cps.a.ArgumentsActionImpl>
21+
<wf.a.TimingAction>
22+
<startTime>1722979975077</startTime>
23+
</wf.a.TimingAction>
24+
<s.a.LogStorageAction/>
25+
</actions>
26+
</Tag>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<?xml version='1.1' encoding='UTF-8'?>
2+
<Tag plugin="[email protected]_ed8a_573">
3+
<node class="cps.n.StepEndNode" plugin="[email protected]_2c931e7935">
4+
<parentIds>
5+
<string>5</string>
6+
</parentIds>
7+
<id>6</id>
8+
<startId>4</startId>
9+
</node>
10+
<actions>
11+
<wf.a.BodyInvocationAction/>
12+
<wf.a.TimingAction>
13+
<startTime>1722980005117</startTime>
14+
</wf.a.TimingAction>
15+
</actions>
16+
</Tag>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?xml version='1.1' encoding='UTF-8'?>
2+
<Tag plugin="[email protected]_ed8a_573">
3+
<node class="cps.n.StepEndNode" plugin="[email protected]_2c931e7935">
4+
<parentIds>
5+
<string>6</string>
6+
</parentIds>
7+
<id>7</id>
8+
<startId>8</startId> <!-- Manually edited to simulate corruption observed in the real world. -->
9+
</node>
10+
<actions>
11+
<wf.a.TimingAction>
12+
<startTime>1722980005177</startTime>
13+
</wf.a.TimingAction>
14+
</actions>
15+
</Tag>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<?xml version='1.1' encoding='UTF-8'?>
2+
<Tag plugin="[email protected]_ed8a_573">
3+
<node class="cps.n.StepStartNode" plugin="[email protected]_2c931e7935">
4+
<parentIds>
5+
<string>7</string>
6+
</parentIds>
7+
<id>8</id>
8+
<descriptorId>org.jenkinsci.plugins.workflow.steps.TimeoutStep</descriptorId>
9+
</node>
10+
<actions>
11+
<s.a.LogStorageAction/>
12+
<cps.a.ArgumentsActionImpl plugin="[email protected]_2c931e7935">
13+
<arguments>
14+
<entry>
15+
<string>time</string>
16+
<int>10</int>
17+
</entry>
18+
</arguments>
19+
<sensitiveVariables/>
20+
<isUnmodifiedBySanitization>true</isUnmodifiedBySanitization>
21+
</cps.a.ArgumentsActionImpl>
22+
<wf.a.TimingAction>
23+
<startTime>1722980005227</startTime>
24+
</wf.a.TimingAction>
25+
</actions>
26+
</Tag>

0 commit comments

Comments
 (0)