Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ public abstract class Controller implements Serializable {
* @param workspace the workspace in use
* @param sink where to send new log output
* @return true if something was written and the controller should be resaved, false if everything is idle
* @see DurableTask#charset
* @see DurableTask#defaultCharset
*/
public abstract boolean writeLog(FilePath workspace, OutputStream sink) throws IOException, InterruptedException;

Expand Down Expand Up @@ -88,6 +90,8 @@ public abstract class Controller implements Serializable {
* @param workspace the workspace in use
* @param launcher a way to start processes
* @return the output of the process as raw bytes (may be empty but not null)
* @see DurableTask#charset
* @see DurableTask#defaultCharset
*/
public @Nonnull byte[] getOutput(@Nonnull FilePath workspace, @Nonnull Launcher launcher) throws IOException, InterruptedException {
throw new IOException("Did not implement getOutput in " + getClass().getName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import hudson.model.AbstractDescribableImpl;
import hudson.model.TaskListener;
import java.io.IOException;
import java.nio.charset.Charset;
import javax.annotation.Nonnull;

/**
* A task which may be run asynchronously on a build node and withstand disconnection of the slave agent.
Expand Down Expand Up @@ -63,4 +65,23 @@ public void captureOutput() throws UnsupportedOperationException {
throw new UnsupportedOperationException("Capturing of output is not implemented in " + getClass().getName());
}

/**
* Requests that a specified charset be used to transcode process output.
* The encoding of {@link Controller#writeLog} and {@link Controller#getOutput} is then presumed to be UTF-8.
* If not called, no translation is performed.
* @param cs the character set in which process output is expected to be
*/
public void charset(@Nonnull Charset cs) {
// by default, ignore
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add some FINE/DEBUG logging to indicate that the method is not implemented?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could do that. In practice there are only three implementations anyway, all in this plugin.

}

/**
* Requests that the node’s system charset be used to transcode process output.
* The encoding of {@link Controller#writeLog} and {@link Controller#getOutput} is then presumed to be UTF-8.
* If not called, no translation is performed.
*/
public void defaultCharset() {
// by default, ignore
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,25 @@
import hudson.util.StreamTaskListener;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.io.StringWriter;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.charset.Charset;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Map;
import java.util.TreeMap;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import jenkins.MasterToSlaveFileCallable;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.jenkinsci.remoting.RoleChecker;

import javax.annotation.CheckForNull;

/**
* A task which forks some external command and then waits for log and status files to be updated/created.
Expand All @@ -63,19 +66,37 @@ public abstract class FileMonitoringTask extends DurableTask {

private static final String COOKIE = "JENKINS_SERVER_COOKIE";

/** Value of {@link #charset} used to mean the node’s system default. */
private static final String SYSTEM_DEFAULT_CHARSET = "SYSTEM_DEFAULT";

/**
* Charset name to use for transcoding, or {@link #SYSTEM_DEFAULT_CHARSET}, or null for no transcoding.
*/
private @CheckForNull String charset;

private static String cookieFor(FilePath workspace) {
return "durable-" + Util.getDigestOf(workspace.getRemote());
}

@Override public final Controller launch(EnvVars env, FilePath workspace, Launcher launcher, TaskListener listener) throws IOException, InterruptedException {
return launchWithCookie(workspace, launcher, listener, env, COOKIE, cookieFor(workspace));
FileMonitoringController controller = launchWithCookie(workspace, launcher, listener, env, COOKIE, cookieFor(workspace));
controller.charset = charset;
return controller;
}

protected FileMonitoringController launchWithCookie(FilePath workspace, Launcher launcher, TaskListener listener, EnvVars envVars, String cookieVariable, String cookieValue) throws IOException, InterruptedException {
envVars.put(cookieVariable, cookieValue); // ensure getCharacteristicEnvVars does not match, so Launcher.killAll will leave it alone
return doLaunch(workspace, launcher, listener, envVars);
}

@Override public final void charset(Charset cs) {
charset = cs.name();
}

@Override public final void defaultCharset() {
charset = SYSTEM_DEFAULT_CHARSET;
}

/**
* Should start a process which sends output to {@linkplain FileMonitoringController#getLogFile(FilePath) log file}
* in the workspace and finally writes its exit code to {@linkplain FileMonitoringController#getResultFile(FilePath) result file}.
Expand Down Expand Up @@ -115,6 +136,9 @@ protected static class FileMonitoringController extends Controller {
*/
private long lastLocation;

/** @see FileMonitoringTask#charset */
private @CheckForNull String charset;

protected FileMonitoringController(FilePath ws) throws IOException, InterruptedException {
// can't keep ws reference because Controller is expected to be serializable
ws.mkdirs();
Expand All @@ -125,7 +149,7 @@ protected FileMonitoringController(FilePath ws) throws IOException, InterruptedE

@Override public final boolean writeLog(FilePath workspace, OutputStream sink) throws IOException, InterruptedException {
FilePath log = getLogFile(workspace);
Long newLocation = log.act(new WriteLog(lastLocation, new RemoteOutputStream(sink)));
Long newLocation = log.act(new WriteLog(lastLocation, new RemoteOutputStream(sink), charset));
if (newLocation != null) {
LOGGER.log(Level.FINE, "copied {0} bytes from {1}", new Object[] {newLocation - lastLocation, log});
lastLocation = newLocation;
Expand All @@ -137,9 +161,11 @@ protected FileMonitoringController(FilePath ws) throws IOException, InterruptedE
private static class WriteLog extends MasterToSlaveFileCallable<Long> {
private final long lastLocation;
private final OutputStream sink;
WriteLog(long lastLocation, OutputStream sink) {
private final @CheckForNull String charset;
WriteLog(long lastLocation, OutputStream sink, String charset) {
this.lastLocation = lastLocation;
this.sink = sink;
this.charset = charset;
}
@Override public Long invoke(File f, VirtualChannel channel) throws IOException, InterruptedException {
long len = f.length();
Expand All @@ -154,7 +180,12 @@ private static class WriteLog extends MasterToSlaveFileCallable<Long> {
// TODO is this efficient for large amounts of output? Would it be better to stream data, or return a byte[] from the callable?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with this comment... really really this is the place to stream it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See the other set of PRs which deprecate this code path anyway.

byte[] buf = new byte[(int) toRead];
raf.readFully(buf);
sink.write(buf);
ByteBuffer transcoded = maybeTranscode(buf, charset);
if (transcoded == null) {
sink.write(buf);
} else {
Channels.newChannel(sink).write(transcoded);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may be writing a number of bytes to CountingOutputStream which is different than toRead (when the full write succeeds). Probably the transcoding needs to be done on the master side, which will be a little trickier since any defaultCharset call needs to happen on the agent side.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that the bug, if there is one, is rendered obsolete by #62 which renders this dead code.

}
} finally {
raf.close();
}
Expand Down Expand Up @@ -198,13 +229,42 @@ public Integer invoke(File f, VirtualChannel channel) throws IOException, Interr
return status.act(STATUS_CHECK_INSTANCE);
}

@Override public byte[] getOutput(FilePath workspace, Launcher launcher) throws IOException, InterruptedException {
// TODO could perhaps be more efficient for large files to send a MasterToSlaveFileCallable<byte[]>
try (InputStream is = getOutputFile(workspace).read()) {
return IOUtils.toByteArray(is);
/**
* Transcode process output to UTF-8 if necessary.
* @param data output presumed to be in local encoding
* @param charset a particular encoding name, or the empty string for the system default encoding, or null to skip transcoding
* @return a buffer of UTF-8 encoded data ({@link CodingErrorAction#REPLACE} is used),
* or null if not performing transcoding because it was not requested or the data was already thought to be in UTF-8
*/
private static @CheckForNull ByteBuffer maybeTranscode(@Nonnull byte[] data, @CheckForNull String charset) {
if (charset == null) { // no transcoding requested, do raw copy and YMMV
return null;
} else {
Charset cs = charset.equals(SYSTEM_DEFAULT_CHARSET) ? Charset.defaultCharset() : Charset.forName(charset);
if (cs.equals(StandardCharsets.UTF_8)) { // transcoding unnecessary as output was already UTF-8
return null;
} else { // decode output in specified charset and reëncode in UTF-8
return StandardCharsets.UTF_8.encode(cs.decode(ByteBuffer.wrap(data)));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🐛 We should do everything we can to allocate and reuse a buffer here -- i.e. create a transient byte[] / Buffer that is used for storing encoded output, rather than StandardCharsets.UTF_8.encode -- otherwise we're going to generate massive amounts of memory garbage.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, hardly “massive” as this is typically dwarfed by unrelated overhead related to launching and joining processes, but I can check if there is some premature optimization possible here if you care.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Separate paths for optimization - process launch/join has some fixed overheads and some scaling issues that are addressed by changes in the algorithm.

If we're injecting new APIs though we should ensure the API is friendly to efficient implementations -- coupling directly to newly-allocated-and-not-reused byte[] will bite us.

}
}
}

@Override public byte[] getOutput(FilePath workspace, Launcher launcher) throws IOException, InterruptedException {
return getOutputFile(workspace).act(new MasterToSlaveFileCallable<byte[]>() {
@Override public byte[] invoke(File f, VirtualChannel channel) throws IOException, InterruptedException {
byte[] buf = FileUtils.readFileToByteArray(f);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🐛 Allocating and reading the whole file rather than doing streaming handling (or doing a chunk at a time with the buffer). This is a problem because it will generate tons of memory garbage and may create demand excessive memory spikes that impact stability.

Also, streaming APIs are just a more natural fit with our normal logging model if possible.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only for returnStdout: true, where we are anyway returning a String to the program. There is no reason to even attempt to stream anything.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 to @jglick. It would be reasonable if the operation supported any parameters (e.g. tail or so), but currently the change does not make the code worse

ByteBuffer transcoded = maybeTranscode(buf, charset);
if (transcoded == null) {
return buf;
} else {
byte[] buf2 = new byte[transcoded.remaining()];
transcoded.get(buf2);
return buf2;
}
}
});
}

@Override public final void stop(FilePath workspace, Launcher launcher) throws IOException, InterruptedException {
launcher.kill(Collections.singletonMap(COOKIE, cookieFor(workspace)));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@
import hudson.util.VersionNumber;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.logging.Level;
import jenkins.security.MasterToSlaveCallable;
import org.apache.commons.io.output.TeeOutputStream;
import static org.hamcrest.Matchers.*;
import org.jenkinsci.test.acceptance.docker.Docker;
Expand Down Expand Up @@ -236,4 +239,141 @@ private void runOnDocker(DumbSlave s) throws Exception {
runOnDocker(new DumbSlave("docker", "/home/jenkins/agent", new SimpleCommandLauncher("docker run -i --rm --name agent --init jenkinsci/slave:3.7-1 java -jar /usr/share/jenkins/slave.jar")));
}

@Issue("JENKINS-31096")
@Test public void encoding() throws Exception {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is unreadable IMHO. Would it be possible to split it to several tests or at least explicitly indicate test stages? Assert logic could be also refactored to a separate method

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it could be refactored to use a helper assertion method with parameters.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(resolved)

JavaContainer container = dockerUbuntu.get();
DumbSlave s = new DumbSlave("docker", "/home/test", new SSHLauncher(container.ipBound(22), container.port(22), "test", "test", "", "-Dfile.encoding=ISO-8859-1"));
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this test can run without jenkinsci/docker-fixtures#19 since it is only concerned with file contents, not name. (Passing -Dsun.jnu.encoding=… to the SSH launcher does not seem to work for that.)

j.jenkins.addNode(s);
j.waitOnline(s);
assertEquals("ISO-8859-1", s.getChannel().call(new DetectCharset()));
FilePath dockerWS = s.getWorkspaceRoot();
dockerWS.child("latin").write("¡Ole!", "ISO-8859-1");
dockerWS.child("eastern").write("Čau!", "ISO-8859-2");
dockerWS.child("mixed").write("¡Čau → there!", "UTF-8");
Launcher dockerLauncher = s.createLauncher(listener);
// control: no transcoding
Controller c = new BourneShellScript("cat latin").launch(new EnvVars(), dockerWS, dockerLauncher, listener);
while (c.exitStatus(dockerWS, dockerLauncher, listener) == null) {
Thread.sleep(100);
}
ByteArrayOutputStream baos = new ByteArrayOutputStream();
c.writeLog(dockerWS, baos);
assertEquals(0, c.exitStatus(dockerWS, dockerLauncher, listener).intValue());
assertThat(baos.toString("ISO-8859-1"), containsString("¡Ole!"));
c.cleanup(dockerWS);
// and with output capture:
BourneShellScript dt = new BourneShellScript("cat latin");
dt.captureOutput();
c = dt.launch(new EnvVars(), dockerWS, dockerLauncher, listener);
while (c.exitStatus(dockerWS, dockerLauncher, listener) == null) {
Thread.sleep(100);
}
c.writeLog(dockerWS, System.err);
assertEquals(0, c.exitStatus(dockerWS, dockerLauncher, listener).intValue());
assertEquals("¡Ole!", new String(c.getOutput(dockerWS, launcher), "ISO-8859-1"));
c.cleanup(dockerWS);
// test: specify particular charset (UTF-8)
dt = new BourneShellScript("cat mixed");
dt.charset(StandardCharsets.UTF_8);
c = dt.launch(new EnvVars(), dockerWS, dockerLauncher, listener);
while (c.exitStatus(dockerWS, dockerLauncher, listener) == null) {
Thread.sleep(100);
}
baos = new ByteArrayOutputStream();
c.writeLog(dockerWS, baos);
assertEquals(0, c.exitStatus(dockerWS, dockerLauncher, listener).intValue());
assertThat(baos.toString("UTF-8"), containsString("¡Čau → there!"));
c.cleanup(dockerWS);
// and with output capture:
dt = new BourneShellScript("cat mixed");
dt.charset(StandardCharsets.UTF_8);
dt.captureOutput();
c = dt.launch(new EnvVars(), dockerWS, dockerLauncher, listener);
while (c.exitStatus(dockerWS, dockerLauncher, listener) == null) {
Thread.sleep(100);
}
c.writeLog(dockerWS, System.err);
assertEquals(0, c.exitStatus(dockerWS, dockerLauncher, listener).intValue());
assertEquals("¡Čau → there!", new String(c.getOutput(dockerWS, launcher), "UTF-8"));
c.cleanup(dockerWS);
// test: specify particular charset (unrelated)
dt = new BourneShellScript("cat eastern");
dt.charset(Charset.forName("ISO-8859-2"));
c = dt.launch(new EnvVars(), dockerWS, dockerLauncher, listener);
while (c.exitStatus(dockerWS, dockerLauncher, listener) == null) {
Thread.sleep(100);
}
baos = new ByteArrayOutputStream();
c.writeLog(dockerWS, baos);
assertEquals(0, c.exitStatus(dockerWS, dockerLauncher, listener).intValue());
assertThat(baos.toString("UTF-8"), containsString("Čau!"));
c.cleanup(dockerWS);
// and with output capture:
dt = new BourneShellScript("cat eastern");
dt.charset(Charset.forName("ISO-8859-2"));
dt.captureOutput();
c = dt.launch(new EnvVars(), dockerWS, dockerLauncher, listener);
while (c.exitStatus(dockerWS, dockerLauncher, listener) == null) {
Thread.sleep(100);
}
c.writeLog(dockerWS, System.err);
assertEquals(0, c.exitStatus(dockerWS, dockerLauncher, listener).intValue());
assertEquals("Čau!", new String(c.getOutput(dockerWS, launcher), "UTF-8"));
c.cleanup(dockerWS);
// test: specify agent default charset
dt = new BourneShellScript("cat latin");
dt.defaultCharset();
c = dt.launch(new EnvVars(), dockerWS, dockerLauncher, listener);
while (c.exitStatus(dockerWS, dockerLauncher, listener) == null) {
Thread.sleep(100);
}
baos = new ByteArrayOutputStream();
c.writeLog(dockerWS, baos);
assertEquals(0, c.exitStatus(dockerWS, dockerLauncher, listener).intValue());
assertThat(baos.toString("UTF-8"), containsString("¡Ole!"));
c.cleanup(dockerWS);
// and with output capture:
dt = new BourneShellScript("cat latin");
dt.defaultCharset();
dt.captureOutput();
c = dt.launch(new EnvVars(), dockerWS, dockerLauncher, listener);
while (c.exitStatus(dockerWS, dockerLauncher, listener) == null) {
Thread.sleep(100);
}
c.writeLog(dockerWS, System.err);
assertEquals(0, c.exitStatus(dockerWS, dockerLauncher, listener).intValue());
assertEquals("¡Ole!", new String(c.getOutput(dockerWS, launcher), "UTF-8"));
c.cleanup(dockerWS);
// test: inappropriate charset, some replacement characters
dt = new BourneShellScript("cat mixed");
dt.charset(StandardCharsets.US_ASCII);
c = dt.launch(new EnvVars(), dockerWS, dockerLauncher, listener);
while (c.exitStatus(dockerWS, dockerLauncher, listener) == null) {
Thread.sleep(100);
}
baos = new ByteArrayOutputStream();
c.writeLog(dockerWS, baos);
assertEquals(0, c.exitStatus(dockerWS, dockerLauncher, listener).intValue());
assertThat(baos.toString("UTF-8"), containsString("����au ��� there!"));
c.cleanup(dockerWS);
// and with output capture:
dt = new BourneShellScript("cat mixed");
dt.charset(StandardCharsets.US_ASCII);
dt.captureOutput();
c = dt.launch(new EnvVars(), dockerWS, dockerLauncher, listener);
while (c.exitStatus(dockerWS, dockerLauncher, listener) == null) {
Thread.sleep(100);
}
c.writeLog(dockerWS, System.err);
assertEquals(0, c.exitStatus(dockerWS, dockerLauncher, listener).intValue());
assertEquals("����au ��� there!", new String(c.getOutput(dockerWS, launcher), "UTF-8"));
c.cleanup(dockerWS);
s.toComputer().disconnect(new OfflineCause.UserCause(null, null));
}
private static class DetectCharset extends MasterToSlaveCallable<String, RuntimeException> {
@Override public String call() throws RuntimeException {
return Charset.defaultCharset().name();
}
}

}