From fce0a644b8ecf7837f69e1761f46d00ceaf138e0 Mon Sep 17 00:00:00 2001 From: Sam Van Oort Date: Sat, 3 Mar 2018 18:31:54 -0500 Subject: [PATCH 1/8] Basic linux fix to do atomic write of exitStatus + 0-length check --- .../plugins/durabletask/BourneShellScript.java | 11 ++++++++--- .../plugins/durabletask/FileMonitoringTask.java | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java b/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java index e93b7be3..251aa848 100644 --- a/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java +++ b/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java @@ -138,9 +138,12 @@ public String getScript() { String cmd; FilePath logFile = c.getLogFile(ws); FilePath resultFile = c.getResultFile(ws); + if (resultFile.exists()) { + resultFile.delete(); // Maybe overly cautious, but better safe than sorry, similarly we should make sure no duplicate logfile? + } FilePath controlDir = c.controlDir(ws); if (capturingOutput) { - cmd = String.format("{ while [ -d '%s' -a \\! -f '%s' ]; do touch '%s'; sleep 3; done } & jsc=%s; %s=$jsc '%s' > '%s' 2> '%s'; echo $? > '%s'; wait", + cmd = String.format("{ while [ -d '%s' -a \\! -f '%s' ]; do touch '%s'; sleep 3; done } & jsc=%s; %s=$jsc '%s' > '%s' 2> '%s'; echo $? > '%s.tmp'; mv '%s.tmp' '%s'; wait", controlDir, resultFile, logFile, @@ -149,9 +152,10 @@ public String getScript() { scriptPath, c.getOutputFile(ws), logFile, + resultFile, resultFile); } else { - cmd = String.format("{ while [ -d '%s' -a \\! -f '%s' ]; do touch '%s'; sleep 3; done } & jsc=%s; %s=$jsc '%s' > '%s' 2>&1; echo $? > '%s'; wait", + cmd = String.format("{ while [ -d '%s' -a \\! -f '%s' ]; do touch '%s'; sleep 3; done } & jsc=%s; %s=$jsc '%s' > '%s' 2>&1; echo $? > '%s.tmp'; mv '%s.tmp' '%s'; wait", controlDir, resultFile, logFile, @@ -159,6 +163,7 @@ public String getScript() { cookieVariable, scriptPath, logFile, + resultFile, resultFile); } cmd = cmd.replace("$", "$$"); // escape against EnvVars jobEnv in LocalLauncher.launch @@ -231,7 +236,7 @@ private FilePath pidFile(FilePath ws) throws IOException, InterruptedException { if (pidFile.exists()) { listener.getLogger().println("still have " + pidFile + " so heartbeat checks unreliable; process may or may not be alive"); } else { - listener.getLogger().println("wrapper script does not seem to be touching the log file in " + controlDir); + listener.getLogger().println("s " + controlDir); listener.getLogger().println("(JENKINS-48300: if on a laggy filesystem, consider -Dorg.jenkinsci.plugins.durabletask.BourneShellScript.HEARTBEAT_CHECK_INTERVAL=300)"); return recordExitStatus(workspace, -1); } diff --git a/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java b/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java index 7ef7d3d7..6b14694a 100644 --- a/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java +++ b/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java @@ -163,7 +163,7 @@ private static class WriteLog extends MasterToSlaveFileCallable { // TODO would be more efficient to allow API to consolidate writeLog with exitStatus (save an RPC call) @Override public Integer exitStatus(FilePath workspace, Launcher launcher, TaskListener listener) throws IOException, InterruptedException { FilePath status = getResultFile(workspace); - if (status.exists()) { + if (status.exists() && status.length() > 0) { // Length 0 can happen if we're sloppy and do not write status atomically. try { return Integer.parseInt(status.readToString().trim()); } catch (NumberFormatException x) { From 3e5c724578249cce52773b524e43bd18a27ec10b Mon Sep 17 00:00:00 2001 From: Sam Van Oort Date: Sat, 3 Mar 2018 18:40:45 -0500 Subject: [PATCH 2/8] Attempt atomic write for Batch version --- .../plugins/durabletask/WindowsBatchScript.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/jenkinsci/plugins/durabletask/WindowsBatchScript.java b/src/main/java/org/jenkinsci/plugins/durabletask/WindowsBatchScript.java index 7fac3075..4d4ec94d 100644 --- a/src/main/java/org/jenkinsci/plugins/durabletask/WindowsBatchScript.java +++ b/src/main/java/org/jenkinsci/plugins/durabletask/WindowsBatchScript.java @@ -60,17 +60,18 @@ public String getScript() { BatchController c = new BatchController(ws); String cmd; + String quotedResultFile = quote(c.getResultFile(ws)); if (capturingOutput) { - cmd = String.format("@echo off \r\ncmd /c \"\"%s\"\" > \"%s\" 2> \"%s\"\r\necho %%ERRORLEVEL%% > \"%s\"\r\n", + cmd = String.format("@echo off \r\ncmd /c \"\"%s\"\" > \"%s\" 2> \"%s\"\r\necho %%ERRORLEVEL%% > \"%s.tmp\"\r\nmove \"%s.tmp\" \"%s\"\r\n", quote(c.getBatchFile2(ws)), quote(c.getOutputFile(ws)), quote(c.getLogFile(ws)), - quote(c.getResultFile(ws))); + quotedResultFile, quotedResultFile, quotedResultFile); } else { - cmd = String.format("@echo off \r\ncmd /c \"\"%s\"\" > \"%s\" 2>&1\r\necho %%ERRORLEVEL%% > \"%s\"\r\n", + cmd = String.format("@echo off \r\ncmd /c \"\"%s\"\" > \"%s\" 2>&1\r\necho %%ERRORLEVEL%% > \"%s.tmp\"\r\nmove \"%s.tmp\" \"%s\"\n", quote(c.getBatchFile2(ws)), quote(c.getLogFile(ws)), - quote(c.getResultFile(ws))); + quotedResultFile, quotedResultFile, quotedResultFile); } c.getBatchFile1(ws).write(cmd, "UTF-8"); c.getBatchFile2(ws).write(script, "UTF-8"); From 031732c05a4580592ece5c5c4c3554ad16f7581b Mon Sep 17 00:00:00 2001 From: Sam Van Oort Date: Sat, 3 Mar 2018 18:56:45 -0500 Subject: [PATCH 3/8] Use a FileCallable for exit status check to avoid roundtrips --- .../durabletask/FileMonitoringTask.java | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java b/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java index 6b14694a..cf3a8d4b 100644 --- a/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java +++ b/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java @@ -24,6 +24,7 @@ package org.jenkinsci.plugins.durabletask; +import com.google.common.io.Files; import hudson.EnvVars; import hudson.FilePath; import hudson.Launcher; @@ -40,6 +41,7 @@ import java.io.OutputStream; import java.io.RandomAccessFile; import java.io.StringWriter; +import java.nio.charset.Charset; import java.util.Collections; import java.util.Map; import java.util.TreeMap; @@ -48,6 +50,9 @@ import java.util.logging.Logger; import jenkins.MasterToSlaveFileCallable; import org.apache.commons.io.IOUtils; +import org.jenkinsci.remoting.RoleChecker; + +import javax.annotation.CheckForNull; /** * A task which forks some external command and then waits for log and status files to be updated/created. @@ -160,20 +165,30 @@ private static class WriteLog extends MasterToSlaveFileCallable { } } - // TODO would be more efficient to allow API to consolidate writeLog with exitStatus (save an RPC call) - @Override public Integer exitStatus(FilePath workspace, Launcher launcher, TaskListener listener) throws IOException, InterruptedException { - FilePath status = getResultFile(workspace); - if (status.exists() && status.length() > 0) { // Length 0 can happen if we're sloppy and do not write status atomically. - try { - return Integer.parseInt(status.readToString().trim()); - } catch (NumberFormatException x) { - throw new IOException("corrupted content in " + status + ": " + x, x); + /** Avoids excess round-tripping when reading status file. */ + static class StatusCheck extends MasterToSlaveFileCallable { + @Override + @CheckForNull + public Integer invoke(File f, VirtualChannel channel) throws IOException, InterruptedException { + if (f.exists() && f.length() > 0) { + try { + return Integer.parseInt(Files.readFirstLine(f, Charset.defaultCharset())); + } catch (NumberFormatException x) { + throw new IOException("corrupted content in " + f + ": " + x, x); + } } - } else { return null; } } + static final StatusCheck STATUS_CHECK_INSTANCE = new StatusCheck(); + + // TODO would be more efficient to allow API to consolidate writeLog with exitStatus (save an RPC call) + @Override public Integer exitStatus(FilePath workspace, Launcher launcher, TaskListener listener) throws IOException, InterruptedException { + FilePath status = getResultFile(workspace); + return status.act(STATUS_CHECK_INSTANCE); + } + @Override public byte[] getOutput(FilePath workspace, Launcher launcher) throws IOException, InterruptedException { // TODO could perhaps be more efficient for large files to send a MasterToSlaveFileCallable try (InputStream is = getOutputFile(workspace).read()) { From 72ab895ecdc23ce33d8a1756ec3dd6629b2fccfd Mon Sep 17 00:00:00 2001 From: Sam Van Oort Date: Sun, 4 Mar 2018 08:59:47 -0500 Subject: [PATCH 4/8] Fix small oopsy --- .../jenkinsci/plugins/durabletask/BourneShellScript.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java b/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java index 251aa848..5e3851ea 100644 --- a/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java +++ b/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java @@ -152,8 +152,7 @@ public String getScript() { scriptPath, c.getOutputFile(ws), logFile, - resultFile, - resultFile); + resultFile, resultFile, resultFile); } else { cmd = String.format("{ while [ -d '%s' -a \\! -f '%s' ]; do touch '%s'; sleep 3; done } & jsc=%s; %s=$jsc '%s' > '%s' 2>&1; echo $? > '%s.tmp'; mv '%s.tmp' '%s'; wait", controlDir, @@ -163,8 +162,7 @@ public String getScript() { cookieVariable, scriptPath, logFile, - resultFile, - resultFile); + resultFile, resultFile, resultFile); } cmd = cmd.replace("$", "$$"); // escape against EnvVars jobEnv in LocalLauncher.launch @@ -212,6 +210,7 @@ private FilePath pidFile(FilePath ws) throws IOException, InterruptedException { return controlDir(ws).child("pid"); } + // TODO run as one big MasterToSlaveCallable to avoid extra network roundtrips @Override public Integer exitStatus(FilePath workspace, Launcher launcher, TaskListener listener) throws IOException, InterruptedException { Integer status = super.exitStatus(workspace, launcher, listener); if (status != null) { From b90d99e9d36a417cd701482072359bd07be8e065 Mon Sep 17 00:00:00 2001 From: Sam Van Oort Date: Mon, 5 Mar 2018 10:35:17 -0500 Subject: [PATCH 5/8] Fix trimming of StatusCheck --- .../plugins/durabletask/FileMonitoringTask.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java b/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java index cf3a8d4b..5bbc6207 100644 --- a/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java +++ b/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java @@ -172,7 +172,17 @@ static class StatusCheck extends MasterToSlaveFileCallable { public Integer invoke(File f, VirtualChannel channel) throws IOException, InterruptedException { if (f.exists() && f.length() > 0) { try { - return Integer.parseInt(Files.readFirstLine(f, Charset.defaultCharset())); + String fileString = Files.readFirstLine(f, Charset.defaultCharset()); + if (fileString == null || fileString.isEmpty()) { + return null; + } else { + fileString = fileString.trim(); + if (fileString.isEmpty()) { + return null; + } else { + return Integer.parseInt(fileString); + } + } } catch (NumberFormatException x) { throw new IOException("corrupted content in " + f + ": " + x, x); } From 664a6aeb2c918884e3194f8e46b36de96d840f4f Mon Sep 17 00:00:00 2001 From: Sam Van Oort Date: Mon, 5 Mar 2018 11:00:40 -0500 Subject: [PATCH 6/8] Remove needless TODO --- .../org/jenkinsci/plugins/durabletask/FileMonitoringTask.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java b/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java index 5bbc6207..cbae4446 100644 --- a/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java +++ b/src/main/java/org/jenkinsci/plugins/durabletask/FileMonitoringTask.java @@ -193,7 +193,6 @@ public Integer invoke(File f, VirtualChannel channel) throws IOException, Interr static final StatusCheck STATUS_CHECK_INSTANCE = new StatusCheck(); - // TODO would be more efficient to allow API to consolidate writeLog with exitStatus (save an RPC call) @Override public Integer exitStatus(FilePath workspace, Launcher launcher, TaskListener listener) throws IOException, InterruptedException { FilePath status = getResultFile(workspace); return status.act(STATUS_CHECK_INSTANCE); From 989f290a5c1fb2776c821869c9b76c1d0279e02a Mon Sep 17 00:00:00 2001 From: Sam Van Oort Date: Mon, 5 Mar 2018 18:29:08 -0500 Subject: [PATCH 7/8] Fix one oopsy --- .../org/jenkinsci/plugins/durabletask/BourneShellScript.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java b/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java index 5e3851ea..36a92c9e 100644 --- a/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java +++ b/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java @@ -235,7 +235,7 @@ private FilePath pidFile(FilePath ws) throws IOException, InterruptedException { if (pidFile.exists()) { listener.getLogger().println("still have " + pidFile + " so heartbeat checks unreliable; process may or may not be alive"); } else { - listener.getLogger().println("s " + controlDir); + listener.getLogger().println("wrapper script does not seem to be touching the log file in " + controlDir); listener.getLogger().println("(JENKINS-48300: if on a laggy filesystem, consider -Dorg.jenkinsci.plugins.durabletask.BourneShellScript.HEARTBEAT_CHECK_INTERVAL=300)"); return recordExitStatus(workspace, -1); } From 8660ea5d4b0aa8d7769818ce1afad8d3e4115491 Mon Sep 17 00:00:00 2001 From: Sam Van Oort Date: Mon, 5 Mar 2018 18:30:41 -0500 Subject: [PATCH 8/8] Maybe do not be as cautious --- .../org/jenkinsci/plugins/durabletask/BourneShellScript.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java b/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java index 36a92c9e..3c15d3c3 100644 --- a/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java +++ b/src/main/java/org/jenkinsci/plugins/durabletask/BourneShellScript.java @@ -138,9 +138,6 @@ public String getScript() { String cmd; FilePath logFile = c.getLogFile(ws); FilePath resultFile = c.getResultFile(ws); - if (resultFile.exists()) { - resultFile.delete(); // Maybe overly cautious, but better safe than sorry, similarly we should make sure no duplicate logfile? - } FilePath controlDir = c.controlDir(ws); if (capturingOutput) { cmd = String.format("{ while [ -d '%s' -a \\! -f '%s' ]; do touch '%s'; sleep 3; done } & jsc=%s; %s=$jsc '%s' > '%s' 2> '%s'; echo $? > '%s.tmp'; mv '%s.tmp' '%s'; wait",