-
-
Notifications
You must be signed in to change notification settings - Fork 105
[JENKINS-52165] Calling Controller.watch API #63
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
5c20d5e
b4ff3a5
cb05308
13885df
af31da4
bd27dc6
5eccfbb
756a87d
6c424e0
0dcfd8e
12667dd
e2536cc
458d5ee
8440d04
335d00f
9a094f0
38e5a73
86d8074
705b254
6722cdb
62aeb45
ed4551a
c41f3ec
c859378
3673025
43b6d27
24ee12b
3f8ced4
d54b065
efb481c
77b81c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,13 +29,16 @@ | |
| import hudson.AbortException; | ||
| import hudson.EnvVars; | ||
| import hudson.FilePath; | ||
| import hudson.Functions; | ||
| import hudson.Launcher; | ||
| import hudson.Main; | ||
| import hudson.model.TaskListener; | ||
| import hudson.util.DaemonThreadFactory; | ||
| import hudson.util.FormValidation; | ||
| import hudson.util.LogTaskListener; | ||
| import hudson.util.NamingThreadFactory; | ||
| import java.io.IOException; | ||
| import java.io.InputStream; | ||
| import java.nio.charset.Charset; | ||
| import java.util.Set; | ||
| import java.util.concurrent.ScheduledFuture; | ||
|
|
@@ -46,8 +49,10 @@ | |
| import javax.annotation.CheckForNull; | ||
| import javax.annotation.Nonnull; | ||
| import jenkins.util.Timer; | ||
| import org.apache.commons.io.IOUtils; | ||
| import org.jenkinsci.plugins.durabletask.Controller; | ||
| import org.jenkinsci.plugins.durabletask.DurableTask; | ||
| import org.jenkinsci.plugins.durabletask.Handler; | ||
| import org.jenkinsci.plugins.workflow.FilePathUtils; | ||
| import org.jenkinsci.plugins.workflow.steps.AbstractStepExecutionImpl; | ||
| import org.jenkinsci.plugins.workflow.steps.Step; | ||
|
|
@@ -129,15 +134,20 @@ public FormValidation doCheckReturnStatus(@QueryParameter boolean returnStdout, | |
|
|
||
| } | ||
|
|
||
| interface ExecutionRemotable { | ||
| void exited(int code, byte[] output) throws Exception; | ||
| } | ||
|
|
||
| /** | ||
| * Represents one task that is believed to still be running. | ||
| */ | ||
| @SuppressFBWarnings(value="SE_TRANSIENT_FIELD_NOT_RESTORED", justification="recurrencePeriod is set in onResume, not deserialization") | ||
| static final class Execution extends AbstractStepExecutionImpl implements Runnable { | ||
| static final class Execution extends AbstractStepExecutionImpl implements Runnable, ExecutionRemotable { | ||
|
|
||
| private static final long MIN_RECURRENCE_PERIOD = 250; // ¼s | ||
| private static final long MAX_RECURRENCE_PERIOD = 15000; // 15s | ||
| private static final float RECURRENCE_PERIOD_BACKOFF = 1.2f; | ||
| private static final long WATCHING_RECURRENCE_PERIOD = Main.isUnitTest ? /* 5s */5_000: /* 5m */300_000; | ||
|
||
|
|
||
| private static final ScheduledThreadPoolExecutor THREAD_POOL = new ScheduledThreadPoolExecutor(25, new NamingThreadFactory(new DaemonThreadFactory(), DurableTaskStep.class.getName())); | ||
| static { | ||
|
|
@@ -156,6 +166,7 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab | |
| private boolean returnStdout; // serialized default is false | ||
| private String encoding; // serialized default is irrelevant | ||
| private boolean returnStatus; // serialized default is false | ||
| private boolean watching; | ||
|
|
||
| Execution(StepContext context, DurableTaskStep step) { | ||
| super(context); | ||
|
|
@@ -173,9 +184,16 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab | |
| if (returnStdout) { | ||
| durableTask.captureOutput(); | ||
| } | ||
| controller = durableTask.launch(context.get(EnvVars.class), ws, context.get(Launcher.class), context.get(TaskListener.class)); | ||
| TaskListener listener = context.get(TaskListener.class); | ||
| controller = durableTask.launch(context.get(EnvVars.class), ws, context.get(Launcher.class), listener); | ||
| this.remote = ws.getRemote(); | ||
| setupTimer(); | ||
| try { | ||
|
||
| controller.watch(ws, new HandlerImpl(this, ws, listener), listener); | ||
| watching = true; | ||
| } catch (UnsupportedOperationException x) { | ||
| LOGGER.log(Level.WARNING, null, x); | ||
| } | ||
| setupTimer(watching ? WATCHING_RECURRENCE_PERIOD : MIN_RECURRENCE_PERIOD); | ||
| return false; | ||
| } | ||
|
|
||
|
|
@@ -186,26 +204,43 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab | |
| LOGGER.log(Level.FINE, "Jenkins is not running, no such node {0}, or it is offline", node); | ||
| return null; | ||
| } | ||
| if (watching) { | ||
| try { | ||
| controller.watch(ws, new HandlerImpl(this, ws, listener()), listener()); | ||
| recurrencePeriod = WATCHING_RECURRENCE_PERIOD; | ||
| } catch (UnsupportedOperationException x) { | ||
| getContext().onFailure(x); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be useful to set
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes it should never happen; can make that clearer in a comment. |
||
| } catch (Exception x) { | ||
| getWorkspaceProblem(x); | ||
| return null; | ||
| } | ||
| } | ||
| } | ||
| boolean directory; | ||
| try (Timeout timeout = Timeout.limit(10, TimeUnit.SECONDS)) { | ||
| directory = ws.isDirectory(); | ||
| } catch (Exception x) { | ||
| // RequestAbortedException, ChannelClosedException, EOFException, wrappers thereof; InterruptedException if it just takes too long. | ||
| LOGGER.log(Level.FINE, node + " is evidently offline now", x); | ||
| ws = null; | ||
| if (!printedCannotContactMessage) { | ||
| listener().getLogger().println("Cannot contact " + node + ": " + x); | ||
| printedCannotContactMessage = true; | ||
| } | ||
| getWorkspaceProblem(x); | ||
| return null; | ||
| } | ||
| if (!directory) { | ||
| throw new AbortException("missing workspace " + remote + " on " + node); | ||
| } | ||
| LOGGER.log(Level.FINER, "{0} seems to be online so using {1}", new Object[] {node, remote}); | ||
| return ws; | ||
| } | ||
|
|
||
| private void getWorkspaceProblem(Exception x) { | ||
| // RequestAbortedException, ChannelClosedException, EOFException, wrappers thereof; InterruptedException if it just takes too long. | ||
| LOGGER.log(Level.FINE, node + " is evidently offline now", x); | ||
| ws = null; | ||
| recurrencePeriod = MIN_RECURRENCE_PERIOD; | ||
| if (!printedCannotContactMessage) { | ||
| listener().getLogger().println("Cannot contact " + node + ": " + x); | ||
| printedCannotContactMessage = true; | ||
| } | ||
| } | ||
|
|
||
| private @Nonnull TaskListener listener() { | ||
| TaskListener l; | ||
| StepContext context = getContext(); | ||
|
|
@@ -246,6 +281,14 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab | |
| recurrencePeriod = 0; | ||
| listener().getLogger().println("After 10s process did not stop"); | ||
| getContext().onFailure(cause); | ||
| try { | ||
| FilePath workspace = getWorkspace(); | ||
| if (workspace != null) { | ||
| controller.cleanup(workspace); | ||
| } | ||
| } catch (IOException | InterruptedException x) { | ||
| Functions.printStackTrace(x, listener().getLogger()); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🐜 Might be worth mentioning that we failed to do workspace cleanup (so it's clear this is not super serious).
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could do that.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cancel that—at this point we have already printed an unusual message above, and any exception here probably came from |
||
| } | ||
| } | ||
| } | ||
| }, 10, TimeUnit.SECONDS); | ||
|
|
@@ -309,10 +352,20 @@ private void check() { | |
| return; | ||
| } | ||
| if (workspace == null) { | ||
| recurrencePeriod = Math.min((long) (recurrencePeriod * RECURRENCE_PERIOD_BACKOFF), MAX_RECURRENCE_PERIOD); | ||
| return; // slave not yet ready, wait for another day | ||
| } | ||
| TaskListener listener = listener(); | ||
| try (Timeout timeout = Timeout.limit(10, TimeUnit.SECONDS)) { | ||
| if (watching) { | ||
| Integer exitCode = controller.exitStatus(workspace, launcher()); | ||
| if (exitCode == null) { | ||
| LOGGER.log(Level.FINE, "still running in {0} on {1}", new Object[] {remote, node}); | ||
| } else { | ||
| LOGGER.log(Level.FINE, "exited with {0} in {1} on {2}; expect asynchronous exit soon", new Object[] {exitCode, remote, node}); | ||
| // TODO if we get here again and exited has still not been called, assume we lost the notification somehow and end the step | ||
|
||
| } | ||
| } else { // legacy mode | ||
| if (controller.writeLog(workspace, listener.getLogger())) { | ||
| getContext().saveState(); | ||
| recurrencePeriod = MIN_RECURRENCE_PERIOD; // got output, maybe we will get more soon | ||
|
|
@@ -337,6 +390,7 @@ private void check() { | |
| recurrencePeriod = 0; | ||
| controller.cleanup(workspace); | ||
| } | ||
| } | ||
| } catch (Exception x) { | ||
| LOGGER.log(Level.FINE, "could not check " + workspace, x); | ||
| ws = null; | ||
|
|
@@ -347,17 +401,67 @@ private void check() { | |
| } | ||
| } | ||
|
|
||
| // called remotely from HandlerImpl | ||
| @Override public void exited(int exitCode, byte[] output) throws Exception { | ||
| try { | ||
| getContext().get(TaskListener.class); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we checking for TaskListener in the context in order to identify this condition? I'll take my answer in the form of a code comment, please.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ack
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (done) |
||
| } catch (IOException | InterruptedException x) { | ||
| LOGGER.log(Level.FINE, "asynchronous exit notification with code " + exitCode + " in " + remote + " on " + node + " ignored since step already seems dead", x); | ||
| return; | ||
| } | ||
| LOGGER.log(Level.FINE, "asynchronous exit notification with code {0} in {1} on {2}", new Object[] {exitCode, remote, node}); | ||
| if (returnStdout && output == null) { | ||
| getContext().onFailure(new IllegalStateException("expected output but got none")); | ||
| return; | ||
| } else if (!returnStdout && output != null) { | ||
| getContext().onFailure(new IllegalStateException("did not expect output but got some")); | ||
| return; | ||
| } | ||
| recurrencePeriod = 0; | ||
| if (returnStatus || exitCode == 0) { | ||
| getContext().onSuccess(returnStatus ? exitCode : returnStdout ? new String(output, encoding) : null); | ||
| } else { | ||
| if (returnStdout) { | ||
| listener().getLogger().write(output); // diagnostic | ||
| } | ||
| getContext().onFailure(new AbortException("script returned exit code " + exitCode)); | ||
| } | ||
| } | ||
|
|
||
| @Override public void onResume() { | ||
| setupTimer(); | ||
| ws = null; // find it from scratch please, rewatching as needed | ||
| setupTimer(MIN_RECURRENCE_PERIOD); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does not look right when we were jenkinsci/durable-task-plugin#60 (comment) is something different: when we were not
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Never mind, I forgot that
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jglick If you're the author and couldn't remember that, it's probably worth a code comment so future maintainers (cough) don't make the same assumption and waste time.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I actually did have a comment, but apparently need to expand it. |
||
| } | ||
|
|
||
| private void setupTimer() { | ||
| recurrencePeriod = MIN_RECURRENCE_PERIOD; | ||
| private void setupTimer(long initialRecurrencePeriod) { | ||
| recurrencePeriod = initialRecurrencePeriod; | ||
| task = THREAD_POOL.schedule(this, recurrencePeriod, TimeUnit.MILLISECONDS); | ||
| } | ||
|
|
||
| private static final long serialVersionUID = 1L; | ||
|
|
||
| } | ||
|
|
||
| private static class HandlerImpl extends Handler { | ||
|
|
||
| private static final long serialVersionUID = 1L; | ||
|
|
||
| private final ExecutionRemotable execution; | ||
| private final TaskListener listener; | ||
|
|
||
| HandlerImpl(Execution execution, FilePath workspace, TaskListener listener) { | ||
| this.execution = workspace.getChannel().export(ExecutionRemotable.class, execution); | ||
| this.listener = listener; | ||
| } | ||
|
|
||
| @Override public void output(InputStream stream) throws Exception { | ||
| IOUtils.copy(stream, listener.getLogger()); | ||
|
||
| } | ||
|
|
||
| @Override public void exited(int code, byte[] output) throws Exception { | ||
| execution.exited(code, output); | ||
| } | ||
|
|
||
| } | ||
|
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will flesh this out with an explanation of the expected state changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(done)