-
Notifications
You must be signed in to change notification settings - Fork 96
Better fix to [JENKINS-25519] -- avoid errors when trying to read exitStatus from durable task before finished writing #66
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
fce0a64
3e5c724
031732c
72ab895
b90d99e
664a6ae
989f290
8660ea5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -138,9 +138,12 @@ public String getScript() { | |
| String cmd; | ||
| FilePath logFile = c.getLogFile(ws); | ||
| FilePath resultFile = c.getResultFile(ws); | ||
| if (resultFile.exists()) { | ||
| resultFile.delete(); // Maybe overly cautious, but better safe than sorry, similarly we should make sure no duplicate logfile? | ||
| } | ||
| FilePath controlDir = c.controlDir(ws); | ||
| if (capturingOutput) { | ||
| cmd = String.format("{ while [ -d '%s' -a \\! -f '%s' ]; do touch '%s'; sleep 3; done } & jsc=%s; %s=$jsc '%s' > '%s' 2> '%s'; echo $? > '%s'; wait", | ||
| cmd = String.format("{ while [ -d '%s' -a \\! -f '%s' ]; do touch '%s'; sleep 3; done } & jsc=%s; %s=$jsc '%s' > '%s' 2> '%s'; echo $? > '%s.tmp'; mv '%s.tmp' '%s'; wait", | ||
| controlDir, | ||
| resultFile, | ||
| logFile, | ||
|
|
@@ -149,17 +152,17 @@ public String getScript() { | |
| scriptPath, | ||
| c.getOutputFile(ws), | ||
| logFile, | ||
| resultFile); | ||
| resultFile, resultFile, resultFile); | ||
| } else { | ||
| cmd = String.format("{ while [ -d '%s' -a \\! -f '%s' ]; do touch '%s'; sleep 3; done } & jsc=%s; %s=$jsc '%s' > '%s' 2>&1; echo $? > '%s'; wait", | ||
| cmd = String.format("{ while [ -d '%s' -a \\! -f '%s' ]; do touch '%s'; sleep 3; done } & jsc=%s; %s=$jsc '%s' > '%s' 2>&1; echo $? > '%s.tmp'; mv '%s.tmp' '%s'; wait", | ||
| controlDir, | ||
| resultFile, | ||
| logFile, | ||
| cookieValue, | ||
| cookieVariable, | ||
| scriptPath, | ||
| logFile, | ||
| resultFile); | ||
| resultFile, resultFile, resultFile); | ||
| } | ||
| cmd = cmd.replace("$", "$$"); // escape against EnvVars jobEnv in LocalLauncher.launch | ||
|
|
||
|
|
@@ -207,6 +210,7 @@ private FilePath pidFile(FilePath ws) throws IOException, InterruptedException { | |
| return controlDir(ws).child("pid"); | ||
| } | ||
|
|
||
| // TODO run as one big MasterToSlaveCallable<Integer> to avoid extra network roundtrips | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Irrelevant after #60.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| @Override public Integer exitStatus(FilePath workspace, Launcher launcher, TaskListener listener) throws IOException, InterruptedException { | ||
| Integer status = super.exitStatus(workspace, launcher, listener); | ||
| if (status != null) { | ||
|
|
@@ -231,7 +235,7 @@ private FilePath pidFile(FilePath ws) throws IOException, InterruptedException { | |
| if (pidFile.exists()) { | ||
| listener.getLogger().println("still have " + pidFile + " so heartbeat checks unreliable; process may or may not be alive"); | ||
| } else { | ||
| listener.getLogger().println("wrapper script does not seem to be touching the log file in " + controlDir); | ||
| listener.getLogger().println("s " + controlDir); | ||
|
||
| listener.getLogger().println("(JENKINS-48300: if on a laggy filesystem, consider -Dorg.jenkinsci.plugins.durabletask.BourneShellScript.HEARTBEAT_CHECK_INTERVAL=300)"); | ||
| return recordExitStatus(workspace, -1); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ | |
|
|
||
| package org.jenkinsci.plugins.durabletask; | ||
|
|
||
| import com.google.common.io.Files; | ||
| import hudson.EnvVars; | ||
| import hudson.FilePath; | ||
| import hudson.Launcher; | ||
|
|
@@ -40,6 +41,7 @@ | |
| import java.io.OutputStream; | ||
| import java.io.RandomAccessFile; | ||
| import java.io.StringWriter; | ||
| import java.nio.charset.Charset; | ||
| import java.util.Collections; | ||
| import java.util.Map; | ||
| import java.util.TreeMap; | ||
|
|
@@ -48,6 +50,9 @@ | |
| import java.util.logging.Logger; | ||
| import jenkins.MasterToSlaveFileCallable; | ||
| import org.apache.commons.io.IOUtils; | ||
| import org.jenkinsci.remoting.RoleChecker; | ||
|
|
||
| import javax.annotation.CheckForNull; | ||
|
|
||
| /** | ||
| * A task which forks some external command and then waits for log and status files to be updated/created. | ||
|
|
@@ -160,20 +165,40 @@ private static class WriteLog extends MasterToSlaveFileCallable<Long> { | |
| } | ||
| } | ||
|
|
||
| // TODO would be more efficient to allow API to consolidate writeLog with exitStatus (save an RPC call) | ||
| @Override public Integer exitStatus(FilePath workspace, Launcher launcher, TaskListener listener) throws IOException, InterruptedException { | ||
| FilePath status = getResultFile(workspace); | ||
| if (status.exists()) { | ||
| try { | ||
| return Integer.parseInt(status.readToString().trim()); | ||
| } catch (NumberFormatException x) { | ||
| throw new IOException("corrupted content in " + status + ": " + x, x); | ||
| /** Avoids excess round-tripping when reading status file. */ | ||
| static class StatusCheck extends MasterToSlaveFileCallable<Integer> { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just gratuitous merge conflict creation against #60. Better to keep the patch short and to the point.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have one particular user who has longer latencies and this change likely helps keep them under the timeouts - thus it is hardly "gratuitous." |
||
| @Override | ||
| @CheckForNull | ||
| public Integer invoke(File f, VirtualChannel channel) throws IOException, InterruptedException { | ||
| if (f.exists() && f.length() > 0) { | ||
| try { | ||
| String fileString = Files.readFirstLine(f, Charset.defaultCharset()); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Anyway you could just get rid of the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it's all agent-local then it doesn't really matter, does it? Also: as a rule of thumb in Java one should not be throwing Exceptions to signal routine and expected conditions. They come with a higher than normal level of baggage. As opposed to Python where that's the idiomatic way to signal things like an Iterator being done.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, packaging all of this up in a Remoting call is a pretty high level of baggage, which would swamp any tiny overhead of
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor note: use of
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A really critical point - it looks like this is already broken, per #28 -- I think we'd need to address it in more comprehensive work to get it to play nicely in any case. |
||
| if (fileString == null || fileString.isEmpty()) { | ||
| return null; | ||
| } else { | ||
| fileString = fileString.trim(); | ||
| if (fileString.isEmpty()) { | ||
| return null; | ||
| } else { | ||
| return Integer.parseInt(fileString); | ||
| } | ||
| } | ||
| } catch (NumberFormatException x) { | ||
| throw new IOException("corrupted content in " + f + ": " + x, x); | ||
| } | ||
| } | ||
| } else { | ||
| return null; | ||
| } | ||
| } | ||
|
|
||
| static final StatusCheck STATUS_CHECK_INSTANCE = new StatusCheck(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You know |
||
|
|
||
| // TODO would be more efficient to allow API to consolidate writeLog with exitStatus (save an RPC call) | ||
| @Override public Integer exitStatus(FilePath workspace, Launcher launcher, TaskListener listener) throws IOException, InterruptedException { | ||
| FilePath status = getResultFile(workspace); | ||
| return status.act(STATUS_CHECK_INSTANCE); | ||
| } | ||
|
|
||
| @Override public byte[] getOutput(FilePath workspace, Launcher launcher) throws IOException, InterruptedException { | ||
| // TODO could perhaps be more efficient for large files to send a MasterToSlaveFileCallable<byte[]> | ||
| try (InputStream is = getOutputFile(workspace).read()) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,17 +60,18 @@ public String getScript() { | |
| BatchController c = new BatchController(ws); | ||
|
|
||
| String cmd; | ||
| String quotedResultFile = quote(c.getResultFile(ws)); | ||
| if (capturingOutput) { | ||
| cmd = String.format("@echo off \r\ncmd /c \"\"%s\"\" > \"%s\" 2> \"%s\"\r\necho %%ERRORLEVEL%% > \"%s\"\r\n", | ||
| cmd = String.format("@echo off \r\ncmd /c \"\"%s\"\" > \"%s\" 2> \"%s\"\r\necho %%ERRORLEVEL%% > \"%s.tmp\"\r\nmove \"%s.tmp\" \"%s\"\r\n", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And PowerShell?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sensible. |
||
| quote(c.getBatchFile2(ws)), | ||
| quote(c.getOutputFile(ws)), | ||
| quote(c.getLogFile(ws)), | ||
| quote(c.getResultFile(ws))); | ||
| quotedResultFile, quotedResultFile, quotedResultFile); | ||
| } else { | ||
| cmd = String.format("@echo off \r\ncmd /c \"\"%s\"\" > \"%s\" 2>&1\r\necho %%ERRORLEVEL%% > \"%s\"\r\n", | ||
| cmd = String.format("@echo off \r\ncmd /c \"\"%s\"\" > \"%s\" 2>&1\r\necho %%ERRORLEVEL%% > \"%s.tmp\"\r\nmove \"%s.tmp\" \"%s\"\n", | ||
| quote(c.getBatchFile2(ws)), | ||
| quote(c.getLogFile(ws)), | ||
| quote(c.getResultFile(ws))); | ||
| quotedResultFile, quotedResultFile, quotedResultFile); | ||
| } | ||
| c.getBatchFile1(ws).write(cmd, "UTF-8"); | ||
| c.getBatchFile2(ws).write(script, "UTF-8"); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well, the whole control directory should not exist before we start, or should be empty.