From fdc0b699693b0a2c1dc7d1ab373189808742fec6 Mon Sep 17 00:00:00 2001 From: Ed Smith Date: Sun, 13 Nov 2022 20:54:07 +0000 Subject: [PATCH 1/9] Support masking directly in the gitlab-runner-rs crate This brings in a dependency on the mask crate. The code to support masking artifacts will come later. This only masks the logs passed to Gitlab. --- gitlab-runner/Cargo.toml | 1 + gitlab-runner/src/run.rs | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/gitlab-runner/Cargo.toml b/gitlab-runner/Cargo.toml index 9eca5dd..6340415 100644 --- a/gitlab-runner/Cargo.toml +++ b/gitlab-runner/Cargo.toml @@ -29,6 +29,7 @@ tracing-subscriber = "0.3.8" tracing = "0.1.30" doc-comment = "0.3.3" tokio-util = { version = "0.7", features = [ "io" ] } +masker = "0.0.1" [dev-dependencies] tokio = { version = "1.5.0", features = [ "full", "test-util" ] } diff --git a/gitlab-runner/src/run.rs b/gitlab-runner/src/run.rs index 3c6d240..645048a 100644 --- a/gitlab-runner/src/run.rs +++ b/gitlab-runner/src/run.rs @@ -1,4 +1,5 @@ use bytes::Bytes; +use masker::Masker; use std::future::Future; use std::path::PathBuf; use std::sync::Arc; @@ -15,6 +16,8 @@ use crate::uploader::Uploader; use crate::CancellableJobHandler; use crate::{JobResult, Phase}; +const GITLAB_MASK: &str = "[MASKED]"; + async fn run( job: Job, client: Client, @@ -140,7 +143,13 @@ impl Run { buf: Bytes, cancel_token: &CancellationToken, ) -> Option { - assert!(!buf.is_empty()); + if buf.is_empty() { + // It's convenient to permit this because if we are + // masking, the masker may not have produced any output, + // so we'd just end up doing the same test in every + // caller, rather than once here. + return None; + } let len = buf.len(); match self @@ -198,6 +207,17 @@ impl Run { ); tokio::pin!(join); + let masked_variables = self + .response + .variables + .iter() + .filter(|(_, v)| v.masked) + .map(|(_, v)| v.value.as_str()) + .collect::>(); + + let masker = Masker::new(&masked_variables, GITLAB_MASK); + let mut cm = masker.mask_chunks(); + let result = loop { tokio::select! { _ = self.interval.tick() => { @@ -206,6 +226,7 @@ impl Run { let now = Instant::now(); if let Some(buf) = self.joblog.split_trace() { // TODO be resiliant against send errors + let buf = cm.mask_chunk(buf).into(); if let Some(interval) = self.send_trace(buf, &cancel_token).await { if interval != self.interval.period() { self.interval = Self::create_interval(now, interval); @@ -224,8 +245,12 @@ impl Run { // Send the remaining trace buffer back to gitlab. if let Some(buf) = self.joblog.split_trace() { + let buf = cm.mask_chunk(buf).into(); self.send_trace(buf, &cancel_token).await; } + // Flush anything the masker was holding back + let buf = cm.finish().into(); + self.send_trace(buf, &cancel_token).await; // Don't bother updating the status if cancelled, since it will just fail. if !cancel_token.is_cancelled() { From 6cd41e5b256f90ce76be5388faac98f5ba54b95c Mon Sep 17 00:00:00 2001 From: Ed Smith Date: Sun, 13 Nov 2022 23:34:16 +0000 Subject: [PATCH 2/9] Add a test for log masking This is only a basic test that the masking is in fact being applied to variables marked as masked. This is not a test of the functionality of the masking crate, only of its integration into this crate. --- gitlab-runner/tests/integration.rs | 47 ++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/gitlab-runner/tests/integration.rs b/gitlab-runner/tests/integration.rs index 5ffd1a6..657e9d4 100644 --- a/gitlab-runner/tests/integration.rs +++ b/gitlab-runner/tests/integration.rs @@ -412,6 +412,53 @@ async fn job_log() { .await; } +#[tokio::test] +async fn job_mask_log() { + let mock = GitlabRunnerMock::start().await; + let job = { + let mut b = mock.job_builder("log".to_string()); + b.add_variable("SECRET".to_string(), "$ecret".to_string(), false, true); + b.add_step( + MockJobStepName::Script, + vec!["dummy".to_string()], + 3600, + MockJobStepWhen::OnSuccess, + false, + ); + b.build() + }; + mock.enqueue_job(job.clone()); + + let dir = tempfile::tempdir().unwrap(); + let (mut runner, layer) = Runner::new_with_layer( + mock.uri(), + mock.runner_token().to_string(), + dir.path().to_path_buf(), + ); + + let subscriber = Registry::default().with(layer); + async { + tracing::info!("TEST"); + let got_job = runner + .request_job(|job| async move { + tracing::info!("TEST1234"); + outputln!("aa-$ecret"); + job.trace("bb$ec"); + outputln!("retxyz"); + SimpleRun::dummy(Ok(())).await + }) + .with_current_subscriber() + .await + .unwrap(); + assert!(got_job); + runner.wait_for_space(1).await; + assert_eq!(MockJobState::Success, job.state()); + assert_eq!(b"aa-[MASKED]\nbb[MASKED]xyz\n", job.log().as_slice()); + } + .with_subscriber(subscriber) + .await; +} + #[tokio::test] async fn job_steps() { let mock = GitlabRunnerMock::start().await; From e4d8dce506716d7e79ff5d1f48a554644a2c61e4 Mon Sep 17 00:00:00 2001 From: Ed Smith Date: Sun, 13 Nov 2022 22:11:13 +0000 Subject: [PATCH 3/9] Allow client crates to upload masked files Although we already automatically mask the trace sent to Gitlab, some runners will produce additional log files or metadata, which must also be masked to prevent secrets from being revealed in the artifact data. This permits client crates to identify a file they wish to upload as additionally requiring masking. Note that the version bump in the masker crate is because we now need to be able to clone the masker for each Uploader to use. --- gitlab-runner/Cargo.toml | 2 +- gitlab-runner/src/run.rs | 22 ++++++------ gitlab-runner/src/uploader.rs | 63 +++++++++++++++++++++++++++++++---- 3 files changed, 69 insertions(+), 18 deletions(-) diff --git a/gitlab-runner/Cargo.toml b/gitlab-runner/Cargo.toml index 6340415..5f68651 100644 --- a/gitlab-runner/Cargo.toml +++ b/gitlab-runner/Cargo.toml @@ -29,7 +29,7 @@ tracing-subscriber = "0.3.8" tracing = "0.1.30" doc-comment = "0.3.3" tokio-util = { version = "0.7", features = [ "io" ] } -masker = "0.0.1" +masker = "0.0.2" [dev-dependencies] tokio = { version = "1.5.0", features = [ "full", "test-util" ] } diff --git a/gitlab-runner/src/run.rs b/gitlab-runner/src/run.rs index 645048a..3da559d 100644 --- a/gitlab-runner/src/run.rs +++ b/gitlab-runner/src/run.rs @@ -22,6 +22,7 @@ async fn run( job: Job, client: Client, response: Arc, + masker: Masker, process: F, build_dir: PathBuf, cancel_token: CancellationToken, @@ -65,7 +66,7 @@ where }); let r = if upload { - if let Ok(mut uploader) = Uploader::new(client, &build_dir, response) { + if let Ok(mut uploader) = Uploader::new(client, &build_dir, response, masker) { let r = handler.upload_artifacts(&mut uploader).await; if r.is_ok() { uploader.upload().await.and(script_result) @@ -187,6 +188,15 @@ impl Run { { let cancel_token = CancellationToken::new(); + let masked_variables = self + .response + .variables + .iter() + .filter(|(_, v)| v.masked) + .map(|(_, v)| v.value.as_str()) + .collect::>(); + let masker = Masker::new(&masked_variables, GITLAB_MASK); + let job = Job::new( self.client.clone(), self.response.clone(), @@ -198,6 +208,7 @@ impl Run { job, self.client.clone(), self.response.clone(), + masker.clone(), process, build_dir, cancel_token.clone(), @@ -207,15 +218,6 @@ impl Run { ); tokio::pin!(join); - let masked_variables = self - .response - .variables - .iter() - .filter(|(_, v)| v.masked) - .map(|(_, v)| v.value.as_str()) - .collect::>(); - - let masker = Masker::new(&masked_variables, GITLAB_MASK); let mut cm = masker.mask_chunks(); let result = loop { diff --git a/gitlab-runner/src/uploader.rs b/gitlab-runner/src/uploader.rs index f17aaf7..e5a2364 100644 --- a/gitlab-runner/src/uploader.rs +++ b/gitlab-runner/src/uploader.rs @@ -8,6 +8,7 @@ use std::thread; use std::{sync::Arc, task::Poll}; use futures::{future::BoxFuture, AsyncWrite, FutureExt}; +use masker::{ChunkMasker, Masker}; use reqwest::Body; use tokio::fs::File as AsyncFile; use tokio::sync::mpsc::{self, error::SendError}; @@ -71,6 +72,7 @@ fn zip_thread(mut temp: File, mut rx: mpsc::Receiver) { pub struct UploadFile<'a> { tx: &'a mpsc::Sender, state: UploadFileState<'a>, + masker: Option>, } impl<'a> AsyncWrite for UploadFile<'a> { @@ -84,10 +86,12 @@ impl<'a> AsyncWrite for UploadFile<'a> { match this.state { UploadFileState::Idle => { let (tx, rx) = oneshot::channel(); - let send = this - .tx - .send(UploadRequest::WriteData(Vec::from(buf), tx)) - .boxed(); + let buf = if let Some(masker) = &mut this.masker { + masker.mask_chunk(buf) + } else { + Vec::from(buf) + }; + let send = this.tx.send(UploadRequest::WriteData(buf, tx)).boxed(); this.state = UploadFileState::Writing(Some(send), rx) } UploadFileState::Writing(ref mut send, ref mut rx) => { @@ -114,9 +118,33 @@ impl<'a> AsyncWrite for UploadFile<'a> { fn poll_close( self: std::pin::Pin<&mut Self>, - _cx: &mut std::task::Context<'_>, + cx: &mut std::task::Context<'_>, ) -> std::task::Poll> { - Poll::Ready(Ok(())) + let this = self.get_mut(); + if let Some(masker) = this.masker.take() { + let (tx, mut rx) = oneshot::channel(); + let buf = masker.finish(); + let mut send = Some(this.tx.send(UploadRequest::WriteData(buf, tx)).boxed()); + + loop { + if let Some(mut f) = send { + // Phase 1: Waiting for the send to the writer + // thread to complete. + + // TODO error handling + let _r = futures::ready!(f.as_mut().poll(cx)); + send = None; + } else { + // Phase 2: Waiting for the writer thread to + // signal write completion. + + let _r = futures::ready!(Pin::new(&mut rx).poll(cx)); + return Poll::Ready(Ok(())); + } + } + } else { + Poll::Ready(Ok(())) + } } } @@ -125,6 +153,7 @@ pub struct Uploader { client: Client, data: Arc, tx: mpsc::Sender, + masker: Masker, } impl Uploader { @@ -132,13 +161,19 @@ impl Uploader { client: Client, build_dir: &Path, data: Arc, + masker: Masker, ) -> Result { let temp = tempfile::tempfile_in(build_dir) .map_err(|e| warn!("Failed to create artifacts temp file: {:?}", e))?; let (tx, rx) = mpsc::channel(2); thread::spawn(move || zip_thread(temp, rx)); - Ok(Self { client, data, tx }) + Ok(Self { + client, + data, + tx, + masker, + }) } /// Create a new file to be uploaded @@ -149,6 +184,20 @@ impl Uploader { .expect("Failed to create file"); UploadFile { tx: &self.tx, + masker: None, + state: UploadFileState::Idle, + } + } + + /// Create a new file to be uploaded, which will be masked + pub async fn masked_file(&mut self, name: String) -> UploadFile<'_> { + self.tx + .send(UploadRequest::NewFile(name)) + .await + .expect("Failed to create file"); + UploadFile { + tx: &self.tx, + masker: Some(self.masker.mask_chunks()), state: UploadFileState::Idle, } } From 1ea6febdf1684e5d3fc73902c628ecd2b96e0b4e Mon Sep 17 00:00:00 2001 From: Ed Smith Date: Sun, 13 Nov 2022 20:49:26 +0000 Subject: [PATCH 4/9] Parse GitlabFeatures from the JobResponse This is the other half of the capability signalling that Gitlab does. Not only does a runner signal what features it is capable of supporting when making a request, but the server also provides some information back to the runner about what capabilities it provides. There are currently three capabilities Gitlab can indicate it has (this commit only adds parsing not support): - Separating the trace into sections for easier viewing. - Prefix masking (it's not totally clear why this is a Gitlab feature - but this is the mechanism by which the prefix masks are communicated to a runner willing to mask them). - Failure reasons. The end goal here is support for prefix masking, so that things like PAT tokens are all automatically masked system-wide in logs. --- gitlab-runner/src/client.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/gitlab-runner/src/client.rs b/gitlab-runner/src/client.rs index 6e8c5ca..a70cf70 100644 --- a/gitlab-runner/src/client.rs +++ b/gitlab-runner/src/client.rs @@ -168,6 +168,16 @@ pub(crate) struct JobDependency { pub artifacts_file: Option, } +#[derive(Debug, Clone, Deserialize, PartialEq, Eq)] +pub(crate) struct GitlabFeatures { + #[serde(default)] + pub trace_sections: bool, + #[serde(default, deserialize_with = "deserialize_null_default")] + pub token_mask_prefixes: Vec, + #[serde(default, deserialize_with = "deserialize_null_default")] + pub failure_reasons: Vec, +} + #[derive(Debug, Clone, Deserialize, PartialEq, Eq)] pub(crate) struct JobResponse { pub id: u64, @@ -180,6 +190,8 @@ pub(crate) struct JobResponse { pub dependencies: Vec, #[serde(deserialize_with = "deserialize_null_default")] pub artifacts: Vec, + #[serde(default)] + pub features: Option, #[serde(flatten)] unparsed: JsonValue, } From f8865ba3ca7813724463c7f5f555042ef7ef0eb6 Mon Sep 17 00:00:00 2001 From: Ed Smith Date: Thu, 24 Nov 2022 22:31:49 +0000 Subject: [PATCH 5/9] Enable token prefix masking If the features passed by the Gitlab server indicate that there are token prefixes that should be masked, this adds those prefixes to the Masker. This is fully transparent to users of this crate; the correct thing will happen provided they correctly identify which artifacts they believe should be masked. The trace will always be masked correctly. --- gitlab-runner/Cargo.toml | 2 +- gitlab-runner/src/run.rs | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/gitlab-runner/Cargo.toml b/gitlab-runner/Cargo.toml index 5f68651..261cc0b 100644 --- a/gitlab-runner/Cargo.toml +++ b/gitlab-runner/Cargo.toml @@ -29,7 +29,7 @@ tracing-subscriber = "0.3.8" tracing = "0.1.30" doc-comment = "0.3.3" tokio-util = { version = "0.7", features = [ "io" ] } -masker = "0.0.2" +masker = "0.0.3" [dev-dependencies] tokio = { version = "1.5.0", features = [ "full", "test-util" ] } diff --git a/gitlab-runner/src/run.rs b/gitlab-runner/src/run.rs index 3da559d..63da440 100644 --- a/gitlab-runner/src/run.rs +++ b/gitlab-runner/src/run.rs @@ -1,5 +1,5 @@ use bytes::Bytes; -use masker::Masker; +use masker::{Masker, MatchData}; use std::future::Future; use std::path::PathBuf; use std::sync::Arc; @@ -17,6 +17,8 @@ use crate::CancellableJobHandler; use crate::{JobResult, Phase}; const GITLAB_MASK: &str = "[MASKED]"; +const GITLAB_TOKEN_SUFFIX_CHARS: &str = + "-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz="; async fn run( job: Job, @@ -195,7 +197,22 @@ impl Run { .filter(|(_, v)| v.masked) .map(|(_, v)| v.value.as_str()) .collect::>(); - let masker = Masker::new(&masked_variables, GITLAB_MASK); + let prefixes = self + .response + .features + .iter() + .flat_map(|x| x.token_mask_prefixes.iter()) + // This matches the behaviour of the gitlab runner, which + // explicitly supports a maximum of 10 prefixes. + .take(10) + .map(|p| MatchData { + prefix: p.trim().as_bytes(), + suffix: GITLAB_TOKEN_SUFFIX_CHARS.as_bytes(), + mask_prefix: false, + }) + .collect::>(); + + let masker = Masker::new_with_match_data(&masked_variables, &prefixes, GITLAB_MASK); let job = Job::new( self.client.clone(), From e7288ea3de28fce5204463c929149ce139315a9b Mon Sep 17 00:00:00 2001 From: Ed Smith Date: Thu, 24 Nov 2022 22:34:19 +0000 Subject: [PATCH 6/9] Add support for token prefix masking in the mock Allow token prefixes to be registered on the mock job. If they are present, add them to the Gitlab features provided by the mock with that job, as Gitlab itself does. --- gitlab-runner-mock/src/api/request.rs | 69 ++++++++++++++++----------- gitlab-runner-mock/src/job.rs | 12 +++++ 2 files changed, 53 insertions(+), 28 deletions(-) diff --git a/gitlab-runner-mock/src/api/request.rs b/gitlab-runner-mock/src/api/request.rs index 9ddaaa6..ed8a4b5 100644 --- a/gitlab-runner-mock/src/api/request.rs +++ b/gitlab-runner-mock/src/api/request.rs @@ -107,6 +107,46 @@ impl Respond for JobRequestResponder { value }) .collect(); + + let failure_reasons = json!([ + "unknown_failure", + "script_failure", + "api_failure", + "stuck_or_timeout_failure", + "runner_system_failure", + "missing_dependency_failure", + "runner_unsupported", + "stale_schedule", + "job_execution_timeout", + "archived_failure", + "unmet_prerequisites", + "scheduler_failure", + "data_integrity_failure", + "forward_deployment_failure", + "user_blocked", + "project_deleted", + "insufficient_bridge_permissions", + "downstream_bridge_project_not_found", + "invalid_bridge_trigger", + "bridge_pipeline_is_child_pipeline", + "downstream_pipeline_creation_failed", + "secrets_provider_not_found", + "reached_max_descendant_pipelines_depth" + ]); + + let features = if !job.token_prefixes().is_empty() { + json!({ + "trace_sections": true, + "token_mask_prefixes": job.token_prefixes(), + "failure_reasons": failure_reasons + }) + } else { + json!({ + "trace_sections": true, + "failure_reasons": failure_reasons + }) + }; + ResponseTemplate::new(201).set_body_json(json!({ "id": job.id(), "token": job.token(), @@ -155,34 +195,7 @@ impl Respond for JobRequestResponder { } ], "dependencies": dependencies, - "features": { - "trace_sections": true, - "failure_reasons": [ - "unknown_failure", - "script_failure", - "api_failure", - "stuck_or_timeout_failure", - "runner_system_failure", - "missing_dependency_failure", - "runner_unsupported", - "stale_schedule", - "job_execution_timeout", - "archived_failure", - "unmet_prerequisites", - "scheduler_failure", - "data_integrity_failure", - "forward_deployment_failure", - "user_blocked", - "project_deleted", - "insufficient_bridge_permissions", - "downstream_bridge_project_not_found", - "invalid_bridge_trigger", - "bridge_pipeline_is_child_pipeline", - "downstream_pipeline_creation_failed", - "secrets_provider_not_found", - "reached_max_descendant_pipelines_depth" - ] - } + "features": features })) } else { ResponseTemplate::new(StatusCode::NoContent) diff --git a/gitlab-runner-mock/src/job.rs b/gitlab-runner-mock/src/job.rs index 9b27e12..a0c4d6d 100644 --- a/gitlab-runner-mock/src/job.rs +++ b/gitlab-runner-mock/src/job.rs @@ -117,6 +117,7 @@ pub struct MockJob { steps: Vec, dependencies: Vec, artifacts: Vec, + token_prefixes: Vec, inner: Arc>, } @@ -143,6 +144,7 @@ impl MockJob { steps: Vec::new(), dependencies: Vec::new(), artifacts: Vec::new(), + token_prefixes: Vec::new(), inner: Arc::new(Mutex::new(MockJobInner { state: MockJobState::Success, state_updates: 2, @@ -177,6 +179,10 @@ impl MockJob { &self.variables } + pub fn token_prefixes(&self) -> &[String] { + &self.token_prefixes + } + pub fn steps(&self) -> &[MockJobStep] { &self.steps } @@ -265,6 +271,7 @@ pub struct MockJobBuilder { variables: HashMap, steps: Vec, dependencies: Vec, + token_prefixes: Vec, artifacts: Vec, } @@ -293,6 +300,10 @@ impl MockJobBuilder { ); } + pub fn add_token_prefix(&mut self, prefix: String) { + self.token_prefixes.push(prefix); + } + pub fn add_step( &mut self, name: MockJobStepName, @@ -373,6 +384,7 @@ impl MockJobBuilder { variables: self.variables.into_values().collect(), dependencies: self.dependencies, artifacts: self.artifacts, + token_prefixes: self.token_prefixes, inner, } } From 6c796023480a854c6bdf41a7ee3000f53d5bddfa Mon Sep 17 00:00:00 2001 From: Ed Smith Date: Thu, 24 Nov 2022 22:34:49 +0000 Subject: [PATCH 7/9] Allow job artifacts to be retrieved from the mock This is useful when testing whether the artifacts uploaded are as expected. In particular, we need this now to test whether masked artifacts are actually being masked as we expect them to be. Note that we are returning the raw binary of the zipfile, and we'll need to use the zipfile crate to actually interpret this data. That seems acceptable for now. --- gitlab-runner-mock/src/lib.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/gitlab-runner-mock/src/lib.rs b/gitlab-runner-mock/src/lib.rs index e75b00f..076eefa 100644 --- a/gitlab-runner-mock/src/lib.rs +++ b/gitlab-runner-mock/src/lib.rs @@ -134,6 +134,15 @@ impl GitlabRunnerMock { jobs.jobs.push(job); } + pub fn get_job_artifact(&self, id: u64) -> Option> { + let jobs = self.inner.jobs.lock().unwrap(); + + jobs.jobs + .iter() + .find(|j| j.id() == id) + .map(|j| j.artifact().as_slice().to_vec()) + } + fn grab_pending_job(&self) -> Option { let jobs = self.inner.jobs.lock().unwrap(); for job in jobs.jobs.iter() { From fda0bc25e9c0d207abf6410134c0be1fbb6c4272 Mon Sep 17 00:00:00 2001 From: Ed Smith Date: Thu, 24 Nov 2022 22:36:26 +0000 Subject: [PATCH 8/9] Use less common token values for masked variables Using very common short names for tokens like 'token', when this is also masked, can now have odd effects on other tests, because we may unexpectedly mask out these words from log data. It's safer to use slightly longer stand-ins. In particular, they conflict with Gitlab's own tests of prefix matching. --- gitlab-runner-mock/src/variables.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gitlab-runner-mock/src/variables.rs b/gitlab-runner-mock/src/variables.rs index 659cdcc..1236f5e 100644 --- a/gitlab-runner-mock/src/variables.rs +++ b/gitlab-runner-mock/src/variables.rs @@ -28,7 +28,7 @@ pub fn default_job_variables(job_id: u64) -> Vec { }, MockJobVariable { key: "CI_JOB_TOKEN".to_owned(), - value: "tokn".to_owned(), + value: "job-token".to_owned(), public: false, masked: true, }, @@ -46,7 +46,7 @@ pub fn default_job_variables(job_id: u64) -> Vec { }, MockJobVariable { key: "CI_BUILD_TOKEN".to_owned(), - value: "tokn".to_owned(), + value: "build-token".to_owned(), public: false, masked: true, }, @@ -58,7 +58,7 @@ pub fn default_job_variables(job_id: u64) -> Vec { }, MockJobVariable { key: "CI_REGISTRY_PASSWORD".to_owned(), - value: "token".to_owned(), + value: "registry-password".to_owned(), public: false, masked: true, }, @@ -77,7 +77,7 @@ pub fn default_job_variables(job_id: u64) -> Vec { }, MockJobVariable { key: "CI_DEPENDENCY_PROXY_PASSWORD".to_owned(), - value: "token".to_owned(), + value: "proxy-password".to_owned(), public: false, masked: true, }, From 42e38e06ea74d7e5df71f328aed053ee1151a15f Mon Sep 17 00:00:00 2001 From: Ed Smith Date: Thu, 24 Nov 2022 22:38:49 +0000 Subject: [PATCH 9/9] Add the gitlab prefix masking tests directly This uses the actual source code of those tests, which is MIT licensed (as is this crate), to be certain that we match the behaviour of the upstream runner. --- gitlab-runner/tests/masking.rs | 242 +++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 gitlab-runner/tests/masking.rs diff --git a/gitlab-runner/tests/masking.rs b/gitlab-runner/tests/masking.rs new file mode 100644 index 0000000..f471f2f --- /dev/null +++ b/gitlab-runner/tests/masking.rs @@ -0,0 +1,242 @@ +use futures::AsyncWriteExt; +use gitlab_runner::uploader::Uploader; +use gitlab_runner::{outputln, JobHandler, JobResult, Phase, Runner}; +use gitlab_runner_mock::{GitlabRunnerMock, MockJobState, MockJobStepName, MockJobStepWhen}; +use std::io::{Cursor, Read}; +use tracing::instrument::WithSubscriber; +use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt; +use tracing_subscriber::Registry; +use zip::ZipArchive; + +struct MaskTest { + log_text: String, +} + +#[async_trait::async_trait] +impl JobHandler for MaskTest { + async fn step(&mut self, _script: &[String], _phase: Phase) -> JobResult { + outputln!("{}", self.log_text.split('|').collect::>().join("")); + Ok(()) + } + + async fn upload_artifacts(&mut self, uploader: &mut Uploader) -> JobResult { + let mut f = uploader.masked_file("masked".to_string()).await; + f.write_all( + format!( + "{}\n", + self.log_text.split('|').collect::>().join("") + ) + .as_bytes(), + ) + .await + .expect("Couldn't write test data"); + drop(f); + let mut f = uploader.file("baseline".to_string()).await; + f.write_all(self.log_text.as_bytes()) + .await + .expect("Couldn't write test data"); + + Ok(()) + } +} + +#[derive(Debug, Clone, Default)] +struct TestCase { + prefixes: Vec, + input: String, + expected: String, + name: String, +} + +macro_rules! emit_prefixes { + ( $( $pfx:expr ),* ) => { + vec![ + $( + $pfx.to_string() + ),* + ] + }; +} + +macro_rules! parse_test_case { + ( $data:expr, ) => {}; + + ( $data:expr, $name:literal : { input: $input:expr, expected: $expected:expr, } ) => { + $data.push( TestCase { prefixes : Vec::new(), input: $input.to_string(), expected: format!("{}\n", $expected), name: $name.to_string() } ); + }; + + ( $data:expr, $name:literal : { input: $input:expr, expected: $expected:expr, }, $($tail:tt)+ ) => { + $data.push( TestCase { prefixes : Vec::new(), input: $input.to_string(), expected: format!("{}\n", $expected), name: $name.to_string() } ); + parse_test_case![$data, $($tail)*] + }; + + ( $data:expr, $name:literal : { prefixes: []string{ $($pfx:expr),* }, input: $input:expr, expected: $expected:expr, } ) => { + $data.push( TestCase { prefixes : emit_prefixes![$($pfx),*], input: $input.to_string(), expected: format!("{}\n", $expected), name: $name.to_string() } ); + }; + + ( $data:expr, $name:literal : { prefixes: []string{ $($pfx:expr),* }, input: $input:expr, expected: $expected:expr, }, $($tail:tt)* ) => { + $data.push( TestCase { prefixes : emit_prefixes![$($pfx),*], input: $input.to_string(), expected: format!("{}\n", $expected), name: $name.to_string() } ); + parse_test_case![$data, $($tail)*] + }; +} + +macro_rules! test_cases { + () => {}; + + ( $($tail:tt)* ) => { + { + let mut data = Vec::new(); + parse_test_case!(data, $($tail)+); + data + } + }; +} + +#[tokio::test] +async fn mask_test() { + // These test data are taken directly from the official + // gitlab-runner source, and are still formatted for Go. We use a + // simple pair of macros to convert them into an appropriate Rust + // data structure. + let test_cases = test_cases![ + "simple prefix masking": { + input: "Lorem ipsum dolor sit amet, ex ea commodo glpat-imperdiet in voluptate velit esse", + expected: "Lorem ipsum dolor sit amet, ex ea commodo glpat-[MASKED] in voluptate velit esse", + }, + "prefix at the end of the line": { + input: "Lorem ipsum dolor sit amet, ex ea commodo in voluptate velit esseglpat-imperdiet", + expected: "Lorem ipsum dolor sit amet, ex ea commodo in voluptate velit esseglpat-[MASKED]", + }, + "prefix at the beginning of the line": { + input: "glpat-imperdiet Lorem ipsum dolor sit amet, ex ea commodo in voluptate velit esse", + expected: "glpat-[MASKED] Lorem ipsum dolor sit amet, ex ea commodo in voluptate velit esse", + }, + "prefix inside of the line": { + input: "esseglpat-imperdiet=_-. end Lorem ipsum dolor sit amet, ex ea commodo in voluptate velit", + expected: "esseglpat-[MASKED] end Lorem ipsum dolor sit amet, ex ea commodo in voluptate velit", + }, + "two prefix concatenate": { + input: "glpat-impglpat-erdiet Lorem ipsum dolor sit amet, ex ea commodo in voluptate velit esse", + expected: "glpat-[MASKED] Lorem ipsum dolor sit amet, ex ea commodo in voluptate velit esse", + }, + "multiple packets pat masking": { + input: "glpat|-imperdiet Lorem ipsum dolor sit amet, ex ea commodo gl|pat-imperdiet in voluptate velit esse", + expected: "glpat-[MASKED] Lorem ipsum dolor sit amet, ex ea commodo glpat-[MASKED] in voluptate velit esse", + }, + "second multiple packets pat masking": { + input: "glpat| -imperdiet Lorem ipsum dolor sit amet", + expected: "glpat -imperdiet Lorem ipsum dolor sit amet", + }, + "long input": { + input: "Lorglpat-ipsu dolor sit amglpat-t, consglpat-ctglpat-tur adipiscing glpat-lit, sglpat-d do glpat-iusmod tglpat-mpor incididunt ut laborglpat-=_ glpat-t dolorglpat-=_ magna aliqua.", + expected: "Lorglpat-[MASKED] dolor sit amglpat-[MASKED], consglpat-[MASKED] adipiscing glpat-[MASKED], sglpat-[MASKED] do glpat-[MASKED] tglpat-[MASKED] incididunt ut laborglpat-[MASKED] glpat-[MASKED] dolorglpat-[MASKED] magna aliqua.", + }, + "multiple packets long input": { + input: "Lorglpat-ipsu dolor sit amglp|at-t, consglpat-ctg|lpat-tur adipiscing glpat-lit, sglpat-|d do glpat-iusmod t|glpat-mpor incididunt ut |laborglpat-=_ glpat-t dolorglpat-=_ magna aliqua.", + expected: "Lorglpat-[MASKED] dolor sit amglpat-[MASKED], consglpat-[MASKED] adipiscing glpat-[MASKED], sglpat-[MASKED] do glpat-[MASKED] tglpat-[MASKED] incididunt ut laborglpat-[MASKED] glpat-[MASKED] dolorglpat-[MASKED] magna aliqua.", + }, + "second long input": { + input: "Lorglpat- ipsu dolor sit amglpat-t, consglpat-ctglpat-tur adipiscing glpat-lit, sglpat-d do glpat-iusmod tglpat-mpor incididunt ut laborglpat-=_ glpat-t dolorglpat-=_ magna aliqua.", + expected: "Lorglpat- ipsu dolor sit amglpat-[MASKED], consglpat-[MASKED] adipiscing glpat-[MASKED], sglpat-[MASKED] do glpat-[MASKED] tglpat-[MASKED] incididunt ut laborglpat-[MASKED] glpat-[MASKED] dolorglpat-[MASKED] magna aliqua.", + }, + "custom prefix with default one at the beginning of the line": { + prefixes: []string{"token-"}, + input: "token-imperdiet Lorem ipsum dolor sit amet, ex ea commodo in voluptate velit esse", + expected: "token-[MASKED] Lorem ipsum dolor sit amet, ex ea commodo in voluptate velit esse", + }, + "custom prefix with default one multiple packets long input": { + prefixes: []string{"tok-"}, + input: "Lortok-ipsu dolor sit amt|ok-t, cons-ctg|lpat-tur adipiscing tok-lit, stok-|d do tok-iusmod t|tok-mpor incididunt ut |labortok-=_ tok-t dolortok-=_ magna aliqua. Tglpat-llus orci ac auctor auguglpat-eee mauris auguglpat-wEr_ lorem", + expected: "Lortok-[MASKED] dolor sit amtok-[MASKED], cons-ctglpat-[MASKED] adipiscing tok-[MASKED], stok-[MASKED] do tok-[MASKED] ttok-[MASKED] incididunt ut labortok-[MASKED] tok-[MASKED] dolortok-[MASKED] magna aliqua. Tglpat-[MASKED] orci ac auctor auguglpat-[MASKED] mauris auguglpat-[MASKED] lorem", + }, + "ignored eleventh prefix and more": { + prefixes: []string{"mask1-", "mask2-", "mask3-", "mask4-", "mask5-", "mask6-", "mask7-", "mask8-", "mask9-", "mask10-", "mask11-"}, + input: "Lormask1-ipsu dolor sit amm|ask2-t, cons-ctg|lpat-tur adipiscing mask5-lit, smask11-|d do mask7-iusmod t|glpat-mpor incididunt ut |labormask10-=_ mask9-t", + expected: "Lormask1-[MASKED] dolor sit ammask2-[MASKED], cons-ctglpat-[MASKED] adipiscing mask5-[MASKED], smask11-d do mask7-[MASKED] tglpat-[MASKED] incididunt ut labormask10-=_ mask9-[MASKED]", + }, + "whitespaced prefixes": { + prefixes: []string{" mask1- ", " mask2-", "mask3- ", "mask4-", "mask5-", "mask6-", "mask7-", "mask8-", "mask9-"}, + input: "Lormask1-ipsu dolor sit amm|ask2-t, cons-ctg|lpat-tur adipiscing mask5-lit, smask11-|d do mask7-iusmod t|glpat-mpor incididunt ut |labormask10-=_ mask9-t", + expected: "Lormask1-[MASKED] dolor sit ammask2-[MASKED], cons-ctglpat-[MASKED] adipiscing mask5-[MASKED], smask11-d do mask7-[MASKED] tglpat-[MASKED] incididunt ut labormask10-=_ mask9-[MASKED]", + }, + ]; + + let mock = GitlabRunnerMock::start().await; + + for t in test_cases { + println!("{}", t.name); + let mut log_job = mock.job_builder(t.name.to_string()); + log_job.add_step( + MockJobStepName::Script, + vec!["dummy".to_string()], + 3600, + MockJobStepWhen::OnSuccess, + false, + ); + + log_job.add_token_prefix("glpat-".to_string()); + for pfx in t.prefixes.iter().take(9) { + log_job.add_token_prefix(pfx.to_string()); + } + + log_job.add_artifact_paths(vec!["*".to_string()]); + let log_job = log_job.build(); + mock.enqueue_job(log_job.clone()); + + let dir = tempfile::tempdir().unwrap(); + + let (mut runner, layer) = Runner::new_with_layer( + mock.uri(), + mock.runner_token().to_string(), + dir.path().to_path_buf(), + ); + + let subscriber = Registry::default().with(layer); + async { + // Upload job comes first + let inp = t.input.clone(); + let got_job = runner + .request_job(|_job| async move { Ok(MaskTest { log_text: inp }) }) + .await + .unwrap(); + + assert!(got_job); + runner.wait_for_space(1).await; + assert_eq!(MockJobState::Success, log_job.state()); + + let data = log_job.log(); + println!( + "IN: {}\n LEFT: {} RIGHT: {}", + t.input, + String::from_utf8_lossy(data.as_slice()), + t.expected + ); + assert_eq!(data.as_slice(), t.expected.as_bytes()); + } + .with_subscriber(subscriber) + .await; + + let data = { + let raw = mock + .get_job_artifact(log_job.id()) + .expect("failed to get job artifact"); + let mut z = + ZipArchive::new(Cursor::new(raw)).expect("failed to open job artifact as zip"); + let mut f = z + .by_name("masked") + .expect("failed to obtain masked log file"); + let mut data = Vec::new(); + f.read_to_end(&mut data) + .expect("failed to read data from masked log file"); + data + }; + + println!( + "IN: {}\n LEFT: {} RIGHT: {}", + t.input, + String::from_utf8_lossy(data.as_slice()), + t.expected + ); + assert_eq!(data.as_slice(), t.expected.as_bytes()); + } +}