Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
5c20d5e
[JENKINS-38381] Calling Controller.watch API.
jglick Feb 9, 2018
b4ff3a5
Merge branch 'UTF-8-JENKINS-31096' into watch-plus-UTF-8-JENKINS-31096
jglick Feb 13, 2018
cb05308
Merge branch 'master' into watch-JENKINS-38381
jglick Jun 8, 2018
13885df
Matching https://github.com/jenkinsci/durable-task-plugin/pull/60/com…
jglick Jun 8, 2018
af31da4
Merge branch 'UTF-8-JENKINS-31096' into watch-plus-UTF-8-JENKINS-31096
jglick Jun 9, 2018
bd27dc6
Merge branch 'watch-JENKINS-38381' into watch-plus-UTF-8-JENKINS-31096
jglick Jun 9, 2018
5eccfbb
Review comments from @oleg-nenashev.
jglick Jun 9, 2018
756a87d
Merge branch 'UTF-8-JENKINS-31096' into watch-plus-UTF-8-JENKINS-31096
jglick Jun 9, 2018
6c424e0
Merge branch 'watch-JENKINS-38381' into watch-plus-UTF-8-JENKINS-31096
jglick Jun 9, 2018
0dcfd8e
Merge branch 'master' into watch-plus-UTF-8-JENKINS-31096
jglick Aug 7, 2018
12667dd
https://github.com/jenkinsci/workflow-job-plugin/pull/89 has already …
jglick Aug 7, 2018
e2536cc
Using a production value for WATCHING_RECURRENCE_PERIOD by default in…
jglick Aug 7, 2018
458d5ee
Added a feature flag for watching mode in case it needs to be disable…
jglick Aug 7, 2018
8440d04
FindBugs
jglick Aug 8, 2018
335d00f
Introduced awaitingAsynchExit flag to handle a possible corner case.
jglick Aug 8, 2018
9a094f0
Process.destroyForcibly should make for more colorful tests!
jglick Aug 8, 2018
38e5a73
Merge branch 'master' into watch-JENKINS-38381
jglick Aug 8, 2018
86d8074
Added ExecutorStepTest.shellOutputAcrossRestart to see what happens t…
jglick Aug 10, 2018
705b254
Merge branch 'jnlpProc' into watch-JENKINS-38381
jglick Aug 10, 2018
6722cdb
Noting a way to reproduce a loss of a small amount of output during r…
jglick Aug 10, 2018
62aeb45
Work around exception suppression from PrintStream.
jglick Aug 10, 2018
ed4551a
Verifying https://github.com/jenkinsci/durable-task-plugin/pull/60/co…
jglick Aug 10, 2018
c41f3ec
Comments.
jglick Aug 10, 2018
c859378
After discussion with @dwnusbaum, attempting to proceed after errors …
jglick Aug 10, 2018
3673025
Richer Javadoc.
jglick Aug 13, 2018
43b6d27
When writing directly to PrintStream.out, synchronize on it like Prin…
jglick Aug 14, 2018
24ee12b
Synchronization should be on the PrintStream, not its underlying Outp…
jglick Aug 14, 2018
3f8ced4
Fail with a more informative exception in case the PrintStream is clo…
jglick Aug 14, 2018
d54b065
Merge branch 'master' into watch-JENKINS-38381
jglick Aug 22, 2018
efb481c
Merge branch 'master' into watch-JENKINS-38381
jglick Sep 4, 2018
77b81c3
Bump.
jglick Sep 4, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
<dependency>
<groupId>org.jenkins-ci.plugins</groupId>
<artifactId>durable-task</artifactId>
<version>1.18</version>
<version>1.23-rc122.6974ae23243a</version> <!-- TODO https://github.com/jenkinsci/durable-task-plugin/pull/60 -->
</dependency>
<dependency>
<groupId>org.jenkins-ci.plugins.workflow</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,16 @@
import hudson.AbortException;
import hudson.EnvVars;
import hudson.FilePath;
import hudson.Functions;
import hudson.Launcher;
import hudson.Main;
import hudson.model.TaskListener;
import hudson.util.DaemonThreadFactory;
import hudson.util.FormValidation;
import hudson.util.LogTaskListener;
import hudson.util.NamingThreadFactory;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Set;
import java.util.concurrent.ScheduledFuture;
Expand All @@ -46,8 +49,10 @@
import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import jenkins.util.Timer;
import org.apache.commons.io.IOUtils;
import org.jenkinsci.plugins.durabletask.Controller;
import org.jenkinsci.plugins.durabletask.DurableTask;
import org.jenkinsci.plugins.durabletask.Handler;
import org.jenkinsci.plugins.workflow.FilePathUtils;
import org.jenkinsci.plugins.workflow.steps.AbstractStepExecutionImpl;
import org.jenkinsci.plugins.workflow.steps.Step;
Expand Down Expand Up @@ -129,15 +134,20 @@ public FormValidation doCheckReturnStatus(@QueryParameter boolean returnStdout,

}

interface ExecutionRemotable {
void exited(int code, byte[] output) throws Exception;
}

/**
* Represents one task that is believed to still be running.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will flesh this out with an explanation of the expected state changes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(done)

*/
@SuppressFBWarnings(value="SE_TRANSIENT_FIELD_NOT_RESTORED", justification="recurrencePeriod is set in onResume, not deserialization")
static final class Execution extends AbstractStepExecutionImpl implements Runnable {
static final class Execution extends AbstractStepExecutionImpl implements Runnable, ExecutionRemotable {

private static final long MIN_RECURRENCE_PERIOD = 250; // ¼s
private static final long MAX_RECURRENCE_PERIOD = 15000; // 15s
private static final float RECURRENCE_PERIOD_BACKOFF = 1.2f;
private static final long WATCHING_RECURRENCE_PERIOD = Main.isUnitTest ? /* 5s */5_000: /* 5m */300_000;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should probably be switched to nonfinal constant initialized via system property and overridden in the relevant test only.


private static final ScheduledThreadPoolExecutor THREAD_POOL = new ScheduledThreadPoolExecutor(25, new NamingThreadFactory(new DaemonThreadFactory(), DurableTaskStep.class.getName()));
static {
Expand All @@ -156,6 +166,7 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab
private boolean returnStdout; // serialized default is false
private String encoding; // serialized default is irrelevant
private boolean returnStatus; // serialized default is false
private boolean watching;

Execution(StepContext context, DurableTaskStep step) {
super(context);
Expand All @@ -173,9 +184,16 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab
if (returnStdout) {
durableTask.captureOutput();
}
controller = durableTask.launch(context.get(EnvVars.class), ws, context.get(Launcher.class), context.get(TaskListener.class));
TaskListener listener = context.get(TaskListener.class);
controller = durableTask.launch(context.get(EnvVars.class), ws, context.get(Launcher.class), listener);
this.remote = ws.getRemote();
setupTimer();
try {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should introduce a system property to act as a kill switch and fall back to polling.

controller.watch(ws, new HandlerImpl(this, ws, listener), listener);
watching = true;
} catch (UnsupportedOperationException x) {
LOGGER.log(Level.WARNING, null, x);
}
setupTimer(watching ? WATCHING_RECURRENCE_PERIOD : MIN_RECURRENCE_PERIOD);
return false;
}

Expand All @@ -186,26 +204,43 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab
LOGGER.log(Level.FINE, "Jenkins is not running, no such node {0}, or it is offline", node);
return null;
}
if (watching) {
try {
controller.watch(ws, new HandlerImpl(this, ws, listener()), listener());
recurrencePeriod = WATCHING_RECURRENCE_PERIOD;
} catch (UnsupportedOperationException x) {
getContext().onFailure(x);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be useful to set watching = false and fall back to polling in this case? Is there even any way to get here without modifying the serialized state? I guess if the controller implementation supported watch, and then we changed the implementation and removed the override, then any pipelines that resume after upgrading this plugin would break, but that feels like a pretty contrived case. Maybe worth a comment that this should never happen IIUC.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it should never happen; can make that clearer in a comment.

} catch (Exception x) {
getWorkspaceProblem(x);
return null;
}
}
}
boolean directory;
try (Timeout timeout = Timeout.limit(10, TimeUnit.SECONDS)) {
directory = ws.isDirectory();
} catch (Exception x) {
// RequestAbortedException, ChannelClosedException, EOFException, wrappers thereof; InterruptedException if it just takes too long.
LOGGER.log(Level.FINE, node + " is evidently offline now", x);
ws = null;
if (!printedCannotContactMessage) {
listener().getLogger().println("Cannot contact " + node + ": " + x);
printedCannotContactMessage = true;
}
getWorkspaceProblem(x);
return null;
}
if (!directory) {
throw new AbortException("missing workspace " + remote + " on " + node);
}
LOGGER.log(Level.FINER, "{0} seems to be online so using {1}", new Object[] {node, remote});
return ws;
}

private void getWorkspaceProblem(Exception x) {
// RequestAbortedException, ChannelClosedException, EOFException, wrappers thereof; InterruptedException if it just takes too long.
LOGGER.log(Level.FINE, node + " is evidently offline now", x);
ws = null;
recurrencePeriod = MIN_RECURRENCE_PERIOD;
if (!printedCannotContactMessage) {
listener().getLogger().println("Cannot contact " + node + ": " + x);
printedCannotContactMessage = true;
}
}

private @Nonnull TaskListener listener() {
TaskListener l;
StepContext context = getContext();
Expand Down Expand Up @@ -246,6 +281,14 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab
recurrencePeriod = 0;
listener().getLogger().println("After 10s process did not stop");
getContext().onFailure(cause);
try {
FilePath workspace = getWorkspace();
if (workspace != null) {
controller.cleanup(workspace);
}
} catch (IOException | InterruptedException x) {
Functions.printStackTrace(x, listener().getLogger());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🐜 Might be worth mentioning that we failed to do workspace cleanup (so it's clear this is not super serious).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could do that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cancel that—at this point we have already printed an unusual message above, and any exception here probably came from getWorkspace.

}
}
}
}, 10, TimeUnit.SECONDS);
Expand Down Expand Up @@ -309,10 +352,20 @@ private void check() {
return;
}
if (workspace == null) {
recurrencePeriod = Math.min((long) (recurrencePeriod * RECURRENCE_PERIOD_BACKOFF), MAX_RECURRENCE_PERIOD);
return; // slave not yet ready, wait for another day
}
TaskListener listener = listener();
try (Timeout timeout = Timeout.limit(10, TimeUnit.SECONDS)) {
if (watching) {
Integer exitCode = controller.exitStatus(workspace, launcher());
if (exitCode == null) {
LOGGER.log(Level.FINE, "still running in {0} on {1}", new Object[] {remote, node});
} else {
LOGGER.log(Level.FINE, "exited with {0} in {1} on {2}; expect asynchronous exit soon", new Object[] {exitCode, remote, node});
// TODO if we get here again and exited has still not been called, assume we lost the notification somehow and end the step
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reminding myself to check whether this concern is still valid.

}
} else { // legacy mode
if (controller.writeLog(workspace, listener.getLogger())) {
getContext().saveState();
recurrencePeriod = MIN_RECURRENCE_PERIOD; // got output, maybe we will get more soon
Expand All @@ -337,6 +390,7 @@ private void check() {
recurrencePeriod = 0;
controller.cleanup(workspace);
}
}
} catch (Exception x) {
LOGGER.log(Level.FINE, "could not check " + workspace, x);
ws = null;
Expand All @@ -347,17 +401,67 @@ private void check() {
}
}

// called remotely from HandlerImpl
@Override public void exited(int exitCode, byte[] output) throws Exception {
try {
getContext().get(TaskListener.class);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we checking for TaskListener in the context in order to identify this condition?

I'll take my answer in the form of a code comment, please.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(done)

} catch (IOException | InterruptedException x) {
LOGGER.log(Level.FINE, "asynchronous exit notification with code " + exitCode + " in " + remote + " on " + node + " ignored since step already seems dead", x);
return;
}
LOGGER.log(Level.FINE, "asynchronous exit notification with code {0} in {1} on {2}", new Object[] {exitCode, remote, node});
if (returnStdout && output == null) {
getContext().onFailure(new IllegalStateException("expected output but got none"));
return;
} else if (!returnStdout && output != null) {
getContext().onFailure(new IllegalStateException("did not expect output but got some"));
return;
}
recurrencePeriod = 0;
if (returnStatus || exitCode == 0) {
getContext().onSuccess(returnStatus ? exitCode : returnStdout ? new String(output, encoding) : null);
} else {
if (returnStdout) {
listener().getLogger().write(output); // diagnostic
}
getContext().onFailure(new AbortException("script returned exit code " + exitCode));
}
}

@Override public void onResume() {
setupTimer();
ws = null; // find it from scratch please, rewatching as needed
setupTimer(MIN_RECURRENCE_PERIOD);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does not look right when we were watching in the previous session; probably needs to again call Controller.watch (since the agent connection can be assumed to have been broken) and use WATCHING_RECURRENCE_PERIOD. The latter is not readily testable (just timing); the former did not trigger a failure in ExecutorStepTest.buildShellScriptAcrossRestart or buildShellScriptAcrossDisconnect, perhaps because these do not focus much on log output.

jenkinsci/durable-task-plugin#60 (comment) is something different: when we were not watching in the previous session, because the plugin was older. In that case the watching field should deserialize to false and we finish out the step with the old code path. ExecutorStepTest.serialForm exercises this scenario already.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Never mind, I forgot that getWorkspace does both of these things when resumed in watching mode.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jglick If you're the author and couldn't remember that, it's probably worth a code comment so future maintainers (cough) don't make the same assumption and waste time.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually did have a comment, but apparently need to expand it.

}

private void setupTimer() {
recurrencePeriod = MIN_RECURRENCE_PERIOD;
private void setupTimer(long initialRecurrencePeriod) {
recurrencePeriod = initialRecurrencePeriod;
task = THREAD_POOL.schedule(this, recurrencePeriod, TimeUnit.MILLISECONDS);
}

private static final long serialVersionUID = 1L;

}

private static class HandlerImpl extends Handler {

private static final long serialVersionUID = 1L;

private final ExecutionRemotable execution;
private final TaskListener listener;

HandlerImpl(Execution execution, FilePath workspace, TaskListener listener) {
this.execution = workspace.getChannel().export(ExecutionRemotable.class, execution);
this.listener = listener;
}

@Override public void output(InputStream stream) throws Exception {
IOUtils.copy(stream, listener.getLogger());
Copy link
Member Author

@jglick jglick Aug 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@svanoort asked about handling of “interrupts”, which is a somewhat broad topic.

  • build interruption inside a durable task: covered by ShellStepTest.abort
  • interruption of the agent → master Remoting call (Chunk): coverage unknown
  • other network errors: coverage unknown
  • error in getWorkspace: covered by ExecutorStepTest.buildShellScriptAcrossDisconnect

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

other network errors: coverage unknown

😒

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This implementation means it's absolutely a requirement that the invoker know to re-send log content if it fails as the result of transitory network conditions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think so. Network conditions are unlikely to be “transitory”. If there are some lost packets or whatever, TCP will recover and the application layer is not going to know any different.

If the socket is broken—or there is otherwise a serious error in the Remoting transport, say Kafka (under development)—we hope the Channel will get closed somehow, and we may subsequently reconnect and resume log transmission. In such cases, we are making a best effort to resume the log where we left off, but we cannot know exactly where that was—the source channel position is an estimate. At worst we lose, or duplicate, a bit of the log file, which is hardly a serious matter when a build is a low-value object (and there is a good chance this build would wind up failing anyway).

At any rate such considerations really only apply to on-master file storage; cloud storage will have completely different characteristics.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At any rate such considerations really only apply to on-master file storage; cloud storage will have completely different characteristics.

Actually, it's even worse in Cloud environments.

I'll let Google make my argument here about how to design resilient distributed systems and exactly how little you can trust the network: http://www.hpcs.cs.tsukuba.ac.jp/~tatebe/lecture/h23/dsys/dsd-tutorial.html#Design

Both clients and servers must be able to deal with unresponsive senders/receivers.

When you design distributed systems, you have to say, "Failure happens all the time." So when you design, you design for failure. It is your number one concern. What does designing for failure mean? One classic problem is partial failure. If I send a message to you and then a network failure occurs, there are two possible outcomes. One is that the message got to you, and then the network broke, and I just didn't get the response. The other is the message never got to you because the network broke before it arrived.

I'll out of band cite a couple issues for you too.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, this is actually worse than it seems, because apparently the PrintStream returned by getLogger doesn't even throw an exception on failed write operations performed by IOUtils.copy(stream, listener.getLogger()); - it just sets a 'trouble' flag you have to fetch via 'checkError' :-/

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately yes, TaskListener specifies PrintStream which swallows exceptions @since JDK1.0. Of course the same is true for any content that gets printed from the agent side. At “best” we can checkError both before and after each block of output and throw a blank IOException if it changes from false to true (under the guess that the transition was due to this call and not some other concurrent one).

It is not obvious to me at least whether that would actually improve behavior under any real failure mode. A cloudy implementation would preferably perform its own error handling. For the built-in sink, the most plausible IOException would be a failure in Channel.send(Chunk) from ProxyOutputStream, which would likely be associated with a termination of the channel based on the transport being broken (including the case that it already was closed and we are getting a ChannelClosedException), in which case we switch to the rewatch scenario. If the channel stays open and recovers later (if that is even possible), at worst we lose some log text. If it stays open but persists in throwing IOException on every new call, after 1–2 × WATCHING_RECURRENCE_PERIOD we give up and abort the step. TBD how the new Kafka implementation behaves in various edge cases.

In no event do I think it is wise to “flush” or otherwise block on a response from the sink about the precise status (using some different API than TaskListener), as this would just add network overhead in the common case that packets are delivered, and possibly introduce its own more serious failure modes. We already see basically these same problems in “pull” mode.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking about this a bit deeper, I really think this is broken (a hard 🐛 ) but primarily because of the PrintStream having all-but nonexistent error handling. There must be some way to signal and propagate errors from this process so implementations can signal "hey this thing is broken" and it can get bubbled up.

I would have deep, deep reservations about treating the log as without value -- IMO half the value of Cloud storage for logs is that they can be audited, and if we don't make some effort to deal with failures, we've just lot all ability to audit.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the value of Cloud storage for logs is that they can be audited, and if we don't make some effort to deal with failures

My only concern right now is tolerable behavior for the built-in Remoting (to master file) log sink in the face of corner cases. It is OK to lose a bit of text when the Channel is dying or dead: we likely already were in the polling implementation, and the build is often going to fail anyway with an error of no interest to developers. It is not OK to have the build hang indefinitely, a master Thread consumed, etc.—things which happened in earlier iterations of the polling implementation and are now I think defended against in most cases.

Cloudy storage of logs is a separate case. Quality of service levels are up to that implementation, not this generic code. I would envision that a production implementation would make some effort to buffer and resend messages in the background, giving up at some point. If it has some evidence that an outage is somehow tied to the state of the agent JVM and that an agent restart would actually solve something, then it could override PrintStream.write to throw RuntimeException, though I doubt this would be productive.

More of interest is a routine ChannelClosedException during master restart, for which we would prefer to have exact log content if at all possible. Not sure if that is effectively testable but can try it. One trick which may help is to extract the FilterOutputStream.out.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More of interest

(resolved)

}

@Override public void exited(int code, byte[] output) throws Exception {
execution.exited(code, output);
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,15 @@
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.logging.ConsoleHandler;
import java.util.logging.Handler;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import jenkins.model.Jenkins;
import jenkins.security.QueueItemAuthenticator;
import jenkins.security.QueueItemAuthenticatorConfiguration;
import org.acegisecurity.Authentication;
import org.apache.commons.io.FileUtils;
import org.apache.tools.ant.util.JavaEnvUtils;
import org.jenkinsci.plugins.durabletask.FileMonitoringTask;
import org.hamcrest.Matchers;
import org.jboss.marshalling.ObjectResolver;
import org.jenkinsci.plugins.workflow.actions.QueueItemAction;
Expand All @@ -99,6 +97,7 @@
import org.junit.runners.model.Statement;
import org.jvnet.hudson.test.BuildWatcher;
import org.jvnet.hudson.test.Issue;
import org.jvnet.hudson.test.LoggerRule;
import org.jvnet.hudson.test.JenkinsRule;
import org.jvnet.hudson.test.MockAuthorizationStrategy;
import org.jvnet.hudson.test.RestartableJenkinsRule;
Expand All @@ -112,8 +111,9 @@ public class ExecutorStepTest {
@ClassRule public static BuildWatcher buildWatcher = new BuildWatcher();
@Rule public RestartableJenkinsRule story = new RestartableJenkinsRule();
@Rule public TemporaryFolder tmp = new TemporaryFolder();
@Rule public LoggerRule logging = new LoggerRule();
/* Currently too noisy due to unrelated warnings; might clear up if test dependencies updated:
@Rule public LoggerRule logging = new LoggerRule().record(ExecutorStepExecution.class, Level.FINE);
.record(ExecutorStepExecution.class, Level.FINE);
*/

/**
Expand Down Expand Up @@ -227,11 +227,7 @@ private void startJnlpProc() throws Exception {
story.addStep(new Statement() {
@SuppressWarnings("SleepWhileInLoop")
@Override public void evaluate() throws Throwable {
Logger LOGGER = Logger.getLogger(DurableTaskStep.class.getName());
LOGGER.setLevel(Level.FINE);
Handler handler = new ConsoleHandler();
handler.setLevel(Level.ALL);
LOGGER.addHandler(handler);
logging.record(DurableTaskStep.class, Level.FINE).record(FileMonitoringTask.class, Level.FINE);
// Cannot use regular JenkinsRule.createSlave due to JENKINS-26398.
// Nor can we can use JenkinsRule.createComputerLauncher, since spawned commands are killed by CommandLauncher somehow (it is not clear how; apparently before its onClosed kills them off).
DumbSlave s = new DumbSlave("dumbo", "dummy", tmp.getRoot().getAbsolutePath(), "1", Node.Mode.NORMAL, "", new JNLPLauncher(), RetentionStrategy.NOOP, Collections.<NodeProperty<?>>emptyList());
Expand Down Expand Up @@ -280,11 +276,7 @@ private void startJnlpProc() throws Exception {
story.addStep(new Statement() {
@SuppressWarnings("SleepWhileInLoop")
@Override public void evaluate() throws Throwable {
Logger LOGGER = Logger.getLogger(DurableTaskStep.class.getName());
LOGGER.setLevel(Level.FINE);
Handler handler = new ConsoleHandler();
handler.setLevel(Level.ALL);
LOGGER.addHandler(handler);
logging.record(DurableTaskStep.class, Level.FINE).record(FileMonitoringTask.class, Level.FINE);
DumbSlave s = new DumbSlave("dumbo", "dummy", tmp.getRoot().getAbsolutePath(), "1", Node.Mode.NORMAL, "", new JNLPLauncher(), RetentionStrategy.NOOP, Collections.<NodeProperty<?>>emptyList());
story.j.jenkins.addNode(s);
startJnlpProc();
Expand Down