diff --git a/pom.xml b/pom.xml index b7b531db8..cc75b5b77 100644 --- a/pom.xml +++ b/pom.xml @@ -69,7 +69,7 @@ 2.17 2.0.8 1.24 - 2.33 + 2.37 @@ -141,7 +141,7 @@ org.jenkins-ci.plugins.workflow workflow-job - 2.17 + 2.18 test diff --git a/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsFlowExecution.java b/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsFlowExecution.java index eeb511658..b34366d14 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsFlowExecution.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsFlowExecution.java @@ -134,6 +134,8 @@ import java.util.Set; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeUnit; +import java.util.stream.Collector; +import java.util.stream.Collectors; import javax.annotation.CheckForNull; import javax.annotation.Nonnull; import javax.annotation.concurrent.GuardedBy; @@ -282,7 +284,8 @@ public class CpsFlowExecution extends FlowExecution implements BlockableResume { private transient List startNodesSerial; // used only between unmarshal and onLoad @GuardedBy("this") - private /* almost final*/ NavigableMap heads = new TreeMap(); + /* almost final*/ NavigableMap heads = new TreeMap(); // Non-private for unit tests + @SuppressFBWarnings({"IS_FIELD_NOT_GUARDED", "IS2_INCONSISTENT_SYNC"}) // irrelevant here private transient Map headsSerial; // used only between unmarshal and onLoad @@ -313,7 +316,7 @@ public class CpsFlowExecution extends FlowExecution implements BlockableResume { * {@link FlowExecution} gets loaded into memory for the build records that have been completed, * and for those we don't want to load the program state, so that check should be efficient. */ - private boolean done; + boolean done; // Only non-private for unit test use. /** * Groovy compiler with CPS+sandbox transformation correctly setup. @@ -598,14 +601,35 @@ int approximateNodeCount() { return iota.get(); } + /** For diagnostic purposes only, this logs current heads to assist with troubleshooting. */ + private synchronized String getHeadsAsString() { + NavigableMap myHeads = this.heads; + if (myHeads == null) { + return "null-heads"; + } else if (myHeads.size() == 0) { + return "empty-heads"; + } else { + return myHeads.entrySet().stream().map(h->h.getKey()+"::"+h.getValue()).collect(Collectors.joining(",")); + } + + } + /** Handle failures where we can't load heads. */ private void rebuildEmptyGraph() { synchronized (this) { // something went catastrophically wrong and there's no live head. fake one + LOGGER.log(Level.WARNING, "Failed to load pipeline heads, so faking some up for execution " + this.toString()); if (this.startNodes == null) { this.startNodes = new Stack(); } - this.heads.clear(); + + if (this.heads != null && this.heads.size() > 0) { + if (LOGGER.isLoggable(Level.INFO)) { + LOGGER.log(Level.INFO, "Resetting heads to rebuild the Pipeline structure, tossing existing heads: "+getHeadsAsString()); + } + this.heads.clear(); + } + this.startNodes.clear(); FlowHead head = new FlowHead(this); heads.put(head.getId(), head); @@ -637,6 +661,7 @@ protected synchronized void initializeStorage() throws IOException { h.setForDeserialize(storage.getNode(entry.getValue())); heads.put(h.getId(), h); } else { + LOGGER.log(Level.WARNING, "Tried to load head FlowNodes for execution "+this.owner+" but FlowNode was not found in storage for head id:FlowNodeId "+entry.getKey()+":"+entry.getValue()); storageErrors = true; break; } @@ -654,6 +679,7 @@ protected synchronized void initializeStorage() throws IOException { startNodes.add((BlockStartNode) storage.getNode(id)); } else { // TODO if possible, consider trying to close out unterminated blocks using heads, to keep existing graph history + LOGGER.log(Level.WARNING, "Tried to load startNode FlowNodes for execution "+this.owner+" but FlowNode was not found in storage for FlowNode Id "+id); storageErrors = true; break; } @@ -687,6 +713,7 @@ public boolean canResume() { } @Override + @SuppressFBWarnings(value = "RC_REF_COMPARISON_BAD_PRACTICE_BOOLEAN", justification = "We want to explicitly check for boolean not-null and true") public void onLoad(FlowExecutionOwner owner) throws IOException { this.owner = owner; try { @@ -696,13 +723,15 @@ public void onLoad(FlowExecutionOwner owner) throws IOException { if (canResume()) { loadProgramAsync(getProgramDataFile()); } else { - // TODO if possible, consider tyring to close out unterminated blocks to keep existing graph history + // TODO if possible, consider trying to close out unterminated blocks to keep existing graph history // That way we can visualize the graph in some error cases. LOGGER.log(Level.WARNING, "Pipeline state not properly persisted, cannot resume "+owner.getUrl()); throw new IOException("Cannot resume build -- was not cleanly saved when Jenkins shut down."); } + } else if (done && !super.isComplete()) { + LOGGER.log(Level.WARNING, "Completed flow without FlowEndNode: "+this+" heads:"+getHeadsAsString()); } - } catch (IOException e) { + } catch (Exception e) { // Multicatch ensures that failure to load does not nuke the master SettableFuture p = SettableFuture.create(); programPromise = p; loadProgramFailed(e, p); @@ -829,6 +858,11 @@ void croak(Throwable t) { setResult(Result.FAILURE); onProgramEnd(new Outcome(null, t)); cleanUpHeap(); + try { + saveOwner(); + } catch (Exception ex) { + LOGGER.log(Level.WARNING, "Failed to persist WorkflowRun after noting a serious failure for run: " + owner, ex); + } } /** @@ -1155,8 +1189,9 @@ public static void maybeAutoPersistNode(@Nonnull FlowNode node) { } @Override + @SuppressFBWarnings(value = "RC_REF_COMPARISON_BAD_PRACTICE_BOOLEAN", justification = "We want to explicitly check for boolean not-null and true") public boolean isComplete() { - return done || super.isComplete(); + return done || super.isComplete(); // Compare to Boolean.TRUE so null == false. } /** @@ -1170,12 +1205,24 @@ synchronized void onProgramEnd(Outcome outcome) { } // shrink everything into a single new head - done = true; - if (heads != null) { - FlowHead first = getFirstHead(); - first.setNewHead(head); - heads.clear(); - heads.put(first.getId(),first); + try { + if (heads != null) { + FlowHead first = getFirstHead(); + first.setNewHead(head); + done = Boolean.TRUE; // After setting the final head + heads.clear(); + heads.put(first.getId(), first); + + String tempIotaStr = Integer.toString(this.iota.get()); + FlowHead lastHead = heads.get(first.getId()); + if (lastHead == null || lastHead.get() == null || !(lastHead.get().getId().equals(tempIotaStr))) { + // Warning of problems with the final call to FlowHead.setNewHead + LOGGER.log(Level.WARNING, "Invalid final head for execution "+this.owner+" with head: "+lastHead); + } + } + } catch (Exception ex) { + done = Boolean.TRUE; + throw ex; } try { @@ -1183,7 +1230,6 @@ synchronized void onProgramEnd(Outcome outcome) { } catch (IOException ioe) { LOGGER.log(Level.WARNING, "Error flushing FlowNodeStorage to disk at end of run", ioe); } - } void cleanUpHeap() { @@ -1431,6 +1477,7 @@ public void pause(final boolean v) throws IOException { if (executable instanceof AccessControlled) { ((AccessControlled) executable).checkPermission(Item.CANCEL); } + done = Boolean.FALSE; Futures.addCallback(programPromise, new FutureCallback() { @Override public void onSuccess(CpsThreadGroup g) { if (v) { @@ -1523,6 +1570,7 @@ public void marshal(Object source, HierarchicalStreamWriter w, MarshallingContex for (BlockStartNode st : e.startNodes) { writeChild(w, context, "start", st.getId(), String.class); } + writeChild(w, context, "done", e.done, Boolean.class); } writeChild(w, context, "resumeBlocked", e.resumeBlocked, Boolean.class); @@ -1587,6 +1635,9 @@ public Object unmarshal(HierarchicalStreamReader reader, final UnmarshallingCont } else if (nodeName.equals("iota")) { Integer iota = readChild(reader, context, Integer.class, result); setField(result, "iota", new AtomicInteger(iota)); + } else if (nodeName.equals("done")) { + Boolean isDone = readChild(reader, context, Boolean.class, result); + setField(result, "done", isDone); } else if (nodeName.equals("start")) { String id = readChild(reader, context, String.class, result); result.startNodesSerial.add(id); @@ -1750,7 +1801,7 @@ public void autopersist(@Nonnull FlowNode n) throws IOException { /** Save the owner that holds this execution. */ void saveOwner() { try { - if (this.owner.getExecutable() instanceof Saveable) { + if (this.owner != null && this.owner.getExecutable() instanceof Saveable) { // Null-check covers some anomalous cases we've seen Saveable saveable = (Saveable)(this.owner.getExecutable()); saveable.save(); } @@ -1769,6 +1820,13 @@ private void checkpoint() { boolean persistOk = true; FlowNodeStorage storage = getStorage(); if (storage != null) { + try { // Node storage must be flushed first so program can be restored + storage.flush(); + } catch (IOException ioe) { + persistOk=false; + LOGGER.log(Level.WARNING, "Error persisting FlowNode storage before shutdown", ioe); + } + // Try to ensure we've saved the appropriate things -- the program is the last stumbling block. try { final SettableFuture myOutcome = SettableFuture.create(); @@ -1800,13 +1858,6 @@ public void onFailure(Throwable t) { persistOk = false; LOGGER.log(Level.FINE, "Error saving program, that should be handled elsewhere.", ex); } - - try { - storage.flush(); - } catch (IOException ioe) { - persistOk=false; - LOGGER.log(Level.WARNING, "Error persisting FlowNode storage before shutdown", ioe); - } persistedClean = persistOk; saveOwner(); } diff --git a/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsStepContext.java b/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsStepContext.java index d6e83fdc4..d1195c358 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsStepContext.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsStepContext.java @@ -534,9 +534,12 @@ synchronized boolean switchToAsyncMode() { @Override public void onSuccess(CpsThreadGroup result) { try { // TODO keep track of whether the program was saved anyway after saveState was called but before now, and do not bother resaving it in that case - result.saveProgram(); + if (result.getExecution().getDurabilityHint().isPersistWithEveryStep()) { + result.getExecution().getStorage().flush(); + result.saveProgram(); + } f.set(null); - } catch (IOException x) { + } catch (Exception x) { f.setException(x); } } diff --git a/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsThreadGroup.java b/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsThreadGroup.java index 70277a071..fd0b8ae75 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsThreadGroup.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/cps/CpsThreadGroup.java @@ -68,6 +68,7 @@ import static org.jenkinsci.plugins.workflow.cps.CpsFlowExecution.*; import static org.jenkinsci.plugins.workflow.cps.persistence.PersistenceContext.*; import org.jenkinsci.plugins.workflow.pickles.PickleFactory; +import org.jenkinsci.plugins.workflow.support.storage.FlowNodeStorage; /** * List of {@link CpsThread}s that form a single {@link CpsFlowExecution}. @@ -357,7 +358,6 @@ private boolean run() { } else { stillRunnable |= t.isRunnable(); } - changed = true; } } @@ -426,6 +426,16 @@ public CpsThreadDump getThreadDump() { void saveProgramIfPossible(boolean enteringQuietState) { if (this.getExecution() != null && (this.getExecution().getDurabilityHint().isPersistWithEveryStep() || enteringQuietState)) { + + try { // Program may depend on flownodes being saved, so save nodes + FlowNodeStorage storage = this.execution.getStorage(); + if (storage != null) { + storage.flush(); + } + } catch (IOException ioe) { + LOGGER.log(Level.WARNING, "Error persisting FlowNode storage before saving program", ioe); + } + try { saveProgram(); } catch (IOException x) { diff --git a/src/main/java/org/jenkinsci/plugins/workflow/cps/FlowHead.java b/src/main/java/org/jenkinsci/plugins/workflow/cps/FlowHead.java index bdf825770..b9f7f7e57 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/cps/FlowHead.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/cps/FlowHead.java @@ -45,6 +45,8 @@ import org.jenkinsci.plugins.workflow.graph.FlowNode; import org.jenkinsci.plugins.workflow.graph.FlowStartNode; +import javax.annotation.Nonnull; + /** * Growing tip of the node graph. * @@ -114,29 +116,33 @@ void newStartNode(FlowStartNode n) throws IOException { execution.storage.storeNode(head, false); } - void setNewHead(FlowNode v) { + void setNewHead(@Nonnull FlowNode v) { + if (v == null) { + // Because Findbugs isn't 100% at catching cases where this can happen and we really need to fail hard-and-fast + throw new IllegalArgumentException("FlowHead.setNewHead called on FlowHead id="+this.id+" with a null FlowNode, execution="+this.execution); + } try { if (this.head != null) { CpsFlowExecution.maybeAutoPersistNode(head); } execution.storage.storeNode(v, true); v.addAction(new TimingAction()); - this.head = v; - CpsThreadGroup c = CpsThreadGroup.current(); - if (c !=null) { - // if the manipulation is from within the program executing thread, then - // defer the notification till we get to a safe point. - c.notifyNewHead(v); - } else { - // in recovering from error and such situation, we sometimes need to grow the graph - // without running the program. - // TODO can CpsThreadGroup.notifyNewHead be used instead to notify both kinds of listeners? - execution.notifyListeners(Collections.singletonList(v), true); - execution.notifyListeners(Collections.singletonList(v), false); - } } catch (IOException e) { LOGGER.log(Level.FINE, "Failed to record new head: " + v, e); } + this.head = v; + CpsThreadGroup c = CpsThreadGroup.current(); + if (c !=null) { + // if the manipulation is from within the program executing thread, then + // defer the notification till we get to a safe point. + c.notifyNewHead(v); + } else { + // in recovering from error and such situation, we sometimes need to grow the graph + // without running the program. + // TODO can CpsThreadGroup.notifyNewHead be used instead to notify both kinds of listeners? + execution.notifyListeners(Collections.singletonList(v), true); + execution.notifyListeners(Collections.singletonList(v), false); + } } FlowNode get() { @@ -181,4 +187,9 @@ private Object readResolve() { private static final Logger LOGGER = Logger.getLogger(FlowHead.class.getName()); private static final long serialVersionUID = 1L; + + @Override + public String toString() { + return id+":"+head; + } } diff --git a/src/main/java/org/jenkinsci/plugins/workflow/cps/SandboxContinuable.java b/src/main/java/org/jenkinsci/plugins/workflow/cps/SandboxContinuable.java index a6e42bba0..179b9d79e 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/cps/SandboxContinuable.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/cps/SandboxContinuable.java @@ -2,6 +2,7 @@ import com.cloudbees.groovy.cps.Continuable; import com.cloudbees.groovy.cps.Outcome; +import groovy.lang.GroovyShell; import java.io.IOException; import java.util.List; @@ -43,9 +44,18 @@ class SandboxContinuable extends Continuable { public Outcome run0(final Outcome cn, final List categories) { try { CpsFlowExecution e = thread.group.getExecution(); - return GroovySandbox.runInSandbox(new Callable() { - @Override - public Outcome call() { + if (e == null) { + throw new IllegalStateException("JENKINS-50407: no loaded execution"); + } + GroovyShell shell = e.getShell(); + if (shell == null) { + throw new IllegalStateException("JENKINS-50407: no loaded shell in " + e); + } + GroovyShell trustedShell = e.getTrustedShell(); + if (trustedShell == null) { + throw new IllegalStateException("JENKINS-50407: no loaded trustedShell in " + e); + } + return GroovySandbox.runInSandbox(() -> { Outcome outcome = SandboxContinuable.super.run0(cn, categories); RejectedAccessException x = findRejectedAccessException(outcome.getAbnormal()); if (x != null) { @@ -58,10 +68,9 @@ public Outcome call() { } } return outcome; - } }, new GroovyClassLoaderWhitelist(CpsWhitelist.get(), - e.getTrustedShell().getClassLoader(), - e.getShell().getClassLoader())); + trustedShell.getClassLoader(), + shell.getClassLoader())); } catch (RuntimeException e) { throw e; } catch (Exception e) { diff --git a/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java b/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java index 6e6e59c05..dd5e95cd2 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/cps/FlowDurabilityTest.java @@ -28,6 +28,7 @@ import org.jenkinsci.plugins.workflow.graphanalysis.NodeStepNamePredicate; import org.jenkinsci.plugins.workflow.job.WorkflowJob; import org.jenkinsci.plugins.workflow.job.WorkflowRun; +import org.jenkinsci.plugins.workflow.job.properties.DurabilityHintJobProperty; import org.jenkinsci.plugins.workflow.steps.StepDescriptor; import org.jenkinsci.plugins.workflow.support.storage.FlowNodeStorage; import org.jenkinsci.plugins.workflow.support.storage.BulkFlowNodeStorage; @@ -56,6 +57,7 @@ import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Random; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -163,6 +165,8 @@ static WorkflowRun createAndRunBasicJob(Jenkins jenkins, String jobName, FlowDur WorkflowRun run = job.scheduleBuild2(0).getStartCondition().get(); SemaphoreStep.waitForStart("halt/"+semaphoreIndex, run); Assert.assertEquals(durabilityHint, run.getExecution().getDurabilityHint()); + Assert.assertFalse(run.getExecution().isComplete()); + Assert.assertFalse(((CpsFlowExecution)(run.getExecution())).done); if (durabilityHint.isPersistWithEveryStep()) { assertBaseStorageType(run.getExecution(), SimpleXStreamFlowNodeStorage.class); } else { @@ -188,6 +192,8 @@ static WorkflowRun createAndRunSleeperJob(Jenkins jenkins, String jobName, FlowD job.setDefinition(def); WorkflowRun run = job.scheduleBuild2(0).getStartCondition().get(); Thread.sleep(4000L); // Hacky but we just need to ensure this can start up + Assert.assertFalse(run.getExecution().isComplete()); + Assert.assertFalse(((CpsFlowExecution)(run.getExecution())).done); Assert.assertEquals(durabilityHint, run.getExecution().getDurabilityHint()); Assert.assertEquals("sleep", run.getExecution().getCurrentHeads().get(0).getDisplayFunctionName()); return run; @@ -333,6 +339,12 @@ static void verifyCompletedCleanly(Jenkins j, WorkflowRun run) throws Exception Assert.assertFalse("Should always be able to retrieve script", StringUtils.isEmpty(cpsFlow.getScript())); Assert.assertNull("We should have no Groovy shell left or that's a memory leak", cpsFlow.getShell()); Assert.assertNull("We should have no Groovy shell left or that's a memory leak", cpsFlow.getTrustedShell()); + Assert.assertTrue(cpsFlow.done); + assert cpsFlow.isComplete(); + assert cpsFlow.heads.size() == 1; + Map.Entry finalHead = cpsFlow.heads.entrySet().iterator().next(); + assert finalHead.getValue().get() instanceof FlowEndNode; + Assert.assertEquals(cpsFlow.storage.getNode(finalHead.getValue().get().getId()), finalHead.getValue().get()); } verifyExecutionRemoved(run); @@ -778,67 +790,90 @@ public void evaluate() throws Throwable { }); } - /** Test interrupting build by randomly dying at unpredictable times. */ + /** Launches the job used for fuzzing in {@link #fuzzTimingDurable()} and {@link #fuzzTimingNonDurable()} -- including the timeout. */ + private WorkflowRun runFuzzerJob(JenkinsRule jrule, String jobName, FlowDurabilityHint hint) throws Exception { + Jenkins jenkins = jrule.jenkins; + WorkflowJob job = jenkins.getItemByFullName(jobName, WorkflowJob.class); + if (job == null) { // Job may already have been created + job = jenkins.createProject(WorkflowJob.class, jobName); + job.addProperty(new DurabilityHintJobProperty(hint)); + job.setDefinition(new CpsFlowDefinition( + "echo 'first'\n" + + "def steps = [:]\n" + + "steps['1'] = {\n" + + " echo 'do 1 stuff'\n" + + "}\n" + + "steps['2'] = {\n" + + " echo '2a'\n" + + " echo '2b'\n" + + " def nested = [:]\n" + + " nested['2-1'] = {\n" + + " echo 'do 2-1'\n" + + " } \n" + + " nested['2-2'] = {\n" + + " sleep 1\n" + + " echo '2 section 2'\n" + + " }\n" + + " parallel nested\n" + + "}\n" + + "parallel steps\n" + + "echo 'final'" + )); + } + + // First we need to build the job to get an appropriate estimate for how long we need to wait before hard-restarting Jenkins in order to catch it in the middle + story.j.buildAndAssertSuccess(job); + long millisDuration = job.getLastBuild().getDuration(); + System.out.println("Test fuzzer job in completed in "+millisDuration+" ms"); + + // Now we run the job again and wait an appropriate amount of time -- but we return the job so tests can grab info before restarting. + int time = new Random().nextInt((int) millisDuration); + System.out.println("Starting fuzzer job and waiting "+time+" ms before restarting."); + WorkflowRun run = job.scheduleBuild2(0).getStartCondition().get(); + run.getExecutionPromise().get(); // Ensures run has begun so that it *can* complete cleanly. + Thread.sleep(time); + return run; + } + + /** Test interrupting build by randomly dying at unpredictable times. + * May fail rarely due to files being copied in a different order than they are modified as part of simulating a dirty restart. + * See {@link RestartableJenkinsRule#simulateAbruptShutdown()} for why that copying happens. */ @Test @Ignore //Too long to run as part of main suite - @TimedRepeatRule.RepeatForTime(repeatMillis = 170_000) + @TimedRepeatRule.RepeatForTime(repeatMillis = 150_000) public void fuzzTimingDurable() throws Exception { final String jobName = "NestedParallelDurableJob"; final String[] logStart = new String[1]; final List nodesOut = new ArrayList(); + final int[] buildNumber = new int [1]; // Create thread that eventually interrupts Jenkins with a hard shutdown at a random time interval story.addStepWithDirtyShutdown(new Statement() { @Override public void evaluate() throws Throwable { - Jenkins jenkins = story.j.jenkins; - WorkflowJob job = jenkins.getItemByFullName(jobName, WorkflowJob.class); - if (job == null) { // Job may already have been created - job = jenkins.createProject(WorkflowJob.class, jobName); - FlowDurabilityHint hint = FlowDurabilityHint.MAX_SURVIVABILITY; - TestDurabilityHintProvider provider = Jenkins.getInstance().getExtensionList(TestDurabilityHintProvider.class).get(0); - provider.registerHint(job.getFullName(), hint); - job.setDefinition(new CpsFlowDefinition( - "echo 'first'\n" + - "def steps = [:]\n" + - "steps['1'] = {\n" + - " echo 'do 1 stuff'\n" + - "}\n" + - "steps['2'] = {\n" + - " echo '2a'\n" + - " echo '2b'\n" + - " def nested = [:]\n" + - " nested['2-1'] = {\n" + - " echo 'do 2-1'\n" + - " } \n" + - " nested['2-2'] = {\n" + - " sleep 1\n" + - " echo '2 section 2'\n" + - " }\n" + - " parallel nested\n" + - "}\n" + - "parallel steps\n" + - "echo 'final'" - )); - } - story.j.buildAndAssertSuccess(job); - long millisDuration = job.getLastBuild().getDuration(); - - int time = new Random().nextInt((int) millisDuration); - WorkflowRun run = job.scheduleBuild2(0).getStartCondition().get(); - Thread.sleep(time); + WorkflowRun run = runFuzzerJob(story.j, jobName, FlowDurabilityHint.MAX_SURVIVABILITY); logStart[0] = JenkinsRule.getLog(run); nodesOut.clear(); nodesOut.addAll(new DepthFirstScanner().allNodes(run.getExecution())); nodesOut.sort(FlowScanningUtils.ID_ORDER_COMPARATOR); + if (run.getExecution() != null) { + Assert.assertEquals(FlowDurabilityHint.MAX_SURVIVABILITY, run.getExecution().getDurabilityHint()); + } } }); story.addStep(new Statement() { @Override public void evaluate() throws Throwable { WorkflowRun run = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getLastBuild(); + if (run.getExecution() != null) { + Assert.assertEquals(FlowDurabilityHint.MAX_SURVIVABILITY, run.getExecution().getDurabilityHint()); + } if (run.isBuilding()) { - story.j.waitForCompletion(run); + try { + story.j.waitUntilNoActivityUpTo(30_000); + } catch (AssertionError ase) { + throw new AssertionError("Build hung: "+run, ase); + } Assert.assertEquals(Result.SUCCESS, run.getResult()); } else { verifyCompletedCleanly(story.j.jenkins, run); @@ -849,4 +884,46 @@ public void evaluate() throws Throwable { }); } + + /** Test interrupting build by randomly dying at unpredictable times. + * May fail rarely due to files being copied in a different order than they are modified as part of simulating a dirty restart. + * See {@link RestartableJenkinsRule#simulateAbruptShutdown()} for why that copying happens. */ + @Test + @Ignore //Too long to run as part of main suite + @TimedRepeatRule.RepeatForTime(repeatMillis = 150_000) + public void fuzzTimingNonDurable() throws Exception { + final String jobName = "NestedParallelDurableJob"; + final String[] logStart = new String[1]; + + // Create thread that eventually interrupts Jenkins with a hard shutdown at a random time interval + story.addStepWithDirtyShutdown(new Statement() { + @Override + public void evaluate() throws Throwable { + WorkflowRun run = runFuzzerJob(story.j, jobName, FlowDurabilityHint.PERFORMANCE_OPTIMIZED); + logStart[0] = JenkinsRule.getLog(run); + if (run.getExecution() != null) { + Assert.assertEquals(FlowDurabilityHint.PERFORMANCE_OPTIMIZED, run.getExecution().getDurabilityHint()); + } + } + }); + story.addStep(new Statement() { + @Override + public void evaluate() throws Throwable { + WorkflowRun run = story.j.jenkins.getItemByFullName(jobName, WorkflowJob.class).getLastBuild(); + if (run.getExecution() != null) { + Assert.assertEquals(FlowDurabilityHint.PERFORMANCE_OPTIMIZED, run.getExecution().getDurabilityHint()); + } + if (run.isBuilding()) { + try { + story.j.waitUntilNoActivityUpTo(30_000); + } catch (AssertionError ase) { + throw new AssertionError("Build hung: "+run, ase); + } + } + verifyCompletedCleanly(story.j.jenkins, run); + story.j.assertLogContains(logStart[0], run); + } + }); + + } }