-
Notifications
You must be signed in to change notification settings - Fork 200
Fix a whole wad of Resume issues and add a bunch of diagnostic logging for FlowHead problems etc #216
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix a whole wad of Resume issues and add a bunch of diagnostic logging for FlowHead problems etc #216
Changes from 11 commits
a7a926e
7b2ba86
c028682
046e27a
d33c90c
1ad3eb4
cf829bb
a017be9
5d08569
4c545d6
1f47c90
b34e1f1
ab88c6d
f4aad61
16dcb35
1a279ef
5aa15b5
b91d775
e69a24c
9d8f0b1
95d7ee2
a9c52d6
a4110d2
773855b
91212d2
5ecfd2c
1360644
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -69,7 +69,7 @@ | |
| <workflow-support-plugin.version>2.17</workflow-support-plugin.version> | ||
| <scm-api-plugin.version>2.0.8</scm-api-plugin.version> | ||
| <groovy-cps.version>1.24</groovy-cps.version> | ||
| <jenkins-test-harness.version>2.33</jenkins-test-harness.version> | ||
| <jenkins-test-harness.version>2.37</jenkins-test-harness.version> | ||
| </properties> | ||
| <dependencies> | ||
| <dependency> | ||
|
|
@@ -141,7 +141,7 @@ | |
| <dependency> | ||
| <groupId>org.jenkins-ci.plugins.workflow</groupId> | ||
| <artifactId>workflow-job</artifactId> | ||
| <version>2.18-20180329.215807-4</version> | ||
| <version>2.18-20180406.172304-8</version> | ||
|
||
| <scope>test</scope> | ||
| </dependency> | ||
| <dependency> | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -316,7 +316,7 @@ public class CpsFlowExecution extends FlowExecution implements BlockableResume { | |
| * {@link FlowExecution} gets loaded into memory for the build records that have been completed, | ||
| * and for those we don't want to load the program state, so that check should be efficient. | ||
| */ | ||
| Boolean done; // Only non-private for unit test use. | ||
| boolean done; // Only non-private for unit test use. | ||
|
|
||
| /** | ||
| * Groovy compiler with CPS+sandbox transformation correctly setup. | ||
|
|
@@ -728,7 +728,7 @@ public void onLoad(FlowExecutionOwner owner) throws IOException { | |
| LOGGER.log(Level.WARNING, "Pipeline state not properly persisted, cannot resume "+owner.getUrl()); | ||
| throw new IOException("Cannot resume build -- was not cleanly saved when Jenkins shut down."); | ||
| } | ||
| } else if (done == Boolean.TRUE && !super.isComplete()) { | ||
| } else if (done && !super.isComplete()) { | ||
| LOGGER.log(Level.WARNING, "Completed flow without FlowEndNode: "+this+" heads:"+getHeadsAsString()); | ||
| } | ||
| } catch (Exception e) { // Multicatch ensures that failure to load does not nuke the master | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ahem jenkinsci/workflow-api-plugin#54 :-) |
||
|
|
@@ -820,7 +820,6 @@ private void loadProgramFailed(final Throwable problem, SettableFuture<CpsThread | |
| } else { | ||
| head = getFirstHead(); | ||
| } | ||
| done = Boolean.TRUE; | ||
| } | ||
|
|
||
| if (head==null) { | ||
|
|
@@ -857,13 +856,12 @@ private void loadProgramFailed(final Throwable problem, SettableFuture<CpsThread | |
| /** Report a fatal error in the VM. */ | ||
| void croak(Throwable t) { | ||
| setResult(Result.FAILURE); | ||
| done = Boolean.TRUE; | ||
| onProgramEnd(new Outcome(null, t)); | ||
| cleanUpHeap(); | ||
| try { | ||
| saveOwner(); | ||
| } catch (Exception ex) { | ||
| LOGGER.log(Level.WARNING, "Failed to persist WorkflowRun after noting a serious failure for run:", owner); | ||
| LOGGER.log(Level.WARNING, "Failed to persist WorkflowRun after noting a serious failure for run: " + owner, ex); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1193,7 +1191,7 @@ public static void maybeAutoPersistNode(@Nonnull FlowNode node) { | |
| @Override | ||
| @SuppressFBWarnings(value = "RC_REF_COMPARISON_BAD_PRACTICE_BOOLEAN", justification = "We want to explicitly check for boolean not-null and true") | ||
| public boolean isComplete() { | ||
| return done == Boolean.TRUE || super.isComplete(); // Compare to Boolean.TRUE so null == false. | ||
| return done || super.isComplete(); // Compare to Boolean.TRUE so null == false. | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -1207,18 +1205,24 @@ synchronized void onProgramEnd(Outcome outcome) { | |
| } | ||
|
|
||
| // shrink everything into a single new head | ||
| done = Boolean.TRUE; | ||
| if (heads != null) { | ||
| FlowHead first = getFirstHead(); | ||
| first.setNewHead(head); | ||
| heads.clear(); | ||
| heads.put(first.getId(), first); | ||
|
|
||
| String tempIotaStr = Integer.toString(this.iota.get()); | ||
| FlowHead lastHead = heads.get(first.getId()); | ||
| if (lastHead == null || lastHead.get() == null || !(lastHead.get().getId().equals(tempIotaStr))) { | ||
| LOGGER.log(Level.WARNING, "Invalid final head for execution "+this.owner+" with head: "+lastHead); | ||
| try { | ||
| if (heads != null) { | ||
| FlowHead first = getFirstHead(); | ||
| first.setNewHead(head); | ||
| done = Boolean.TRUE; // After setting the final head | ||
| heads.clear(); | ||
| heads.put(first.getId(), first); | ||
|
|
||
| String tempIotaStr = Integer.toString(this.iota.get()); | ||
| FlowHead lastHead = heads.get(first.getId()); | ||
| if (lastHead == null || lastHead.get() == null || !(lastHead.get().getId().equals(tempIotaStr))) { | ||
| // Warning of problems with the final call to FlowHead.setNewHead | ||
| LOGGER.log(Level.WARNING, "Invalid final head for execution "+this.owner+" with head: "+lastHead); | ||
| } | ||
| } | ||
| } catch (Exception ex) { | ||
| done = Boolean.TRUE; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| throw ex; | ||
| } | ||
|
|
||
| try { | ||
|
|
@@ -1566,9 +1570,7 @@ public void marshal(Object source, HierarchicalStreamWriter w, MarshallingContex | |
| for (BlockStartNode st : e.startNodes) { | ||
| writeChild(w, context, "start", st.getId(), String.class); | ||
| } | ||
| if (e.done != null) { | ||
| writeChild(w, context, "done", e.done, Boolean.class); | ||
| } | ||
| writeChild(w, context, "done", e.done, Boolean.class); | ||
| } | ||
| writeChild(w, context, "resumeBlocked", e.resumeBlocked, Boolean.class); | ||
|
|
||
|
|
@@ -1818,6 +1820,13 @@ private void checkpoint() { | |
| boolean persistOk = true; | ||
| FlowNodeStorage storage = getStorage(); | ||
| if (storage != null) { | ||
| try { // Node storage must be flushed first so program can be restored | ||
| storage.flush(); | ||
| } catch (IOException ioe) { | ||
| persistOk=false; | ||
| LOGGER.log(Level.WARNING, "Error persisting FlowNode storage before shutdown", ioe); | ||
| } | ||
|
|
||
| // Try to ensure we've saved the appropriate things -- the program is the last stumbling block. | ||
| try { | ||
| final SettableFuture<Void> myOutcome = SettableFuture.create(); | ||
|
|
@@ -1849,13 +1858,6 @@ public void onFailure(Throwable t) { | |
| persistOk = false; | ||
| LOGGER.log(Level.FINE, "Error saving program, that should be handled elsewhere.", ex); | ||
| } | ||
|
|
||
| try { | ||
| storage.flush(); | ||
| } catch (IOException ioe) { | ||
| persistOk=false; | ||
| LOGGER.log(Level.WARNING, "Error persisting FlowNode storage before shutdown", ioe); | ||
| } | ||
| persistedClean = persistOk; | ||
| saveOwner(); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -534,9 +534,12 @@ synchronized boolean switchToAsyncMode() { | |
| @Override public void onSuccess(CpsThreadGroup result) { | ||
| try { | ||
| // TODO keep track of whether the program was saved anyway after saveState was called but before now, and do not bother resaving it in that case | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW how does durability interact with
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jglick Otherwise (for Of course with a clean shutdown the pipeline will persist via the |
||
| result.saveProgram(); | ||
| if (result.getExecution().getDurabilityHint().isPersistWithEveryStep()) { | ||
| result.getExecution().getStorage().flush(); | ||
| result.saveProgram(); | ||
| } | ||
| f.set(null); | ||
| } catch (IOException x) { | ||
| } catch (Exception x) { | ||
| f.setException(x); | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
file a request to update the parent POM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jglick jenkinsci/plugin-pom#101 - Done