diff --git a/changelog.d/2181.fixed.md b/changelog.d/2181.fixed.md new file mode 100644 index 00000000000..704b09df4ca --- /dev/null +++ b/changelog.d/2181.fixed.md @@ -0,0 +1 @@ +Add support for shebang containing spaces like asdf's node does \ No newline at end of file diff --git a/mirrord/sip/src/lib.rs b/mirrord/sip/src/lib.rs index c0ca70b91ca..4bf6a61d1fd 100644 --- a/mirrord/sip/src/lib.rs +++ b/mirrord/sip/src/lib.rs @@ -5,7 +5,6 @@ mod codesign; mod error; mod rpath; -mod whitespace; mod main { use std::{ @@ -334,21 +333,20 @@ mod main { /// Extract shebang from file contents. /// "#!/usr/bin/env bash\n..." -> Some("#!/usr/bin/env") fn get_shebang_from_string(file_contents: &str) -> Option { - const BOM: &str = "\u{feff}"; - let content = file_contents.strip_prefix(BOM).unwrap_or(file_contents); - let rest = content.strip_prefix("#!")?; + let rest = file_contents.strip_prefix("#!")?; - if whitespace::skip(rest).starts_with('[') { - None + let mut char_iter = rest + .char_indices() + .skip_while(|(_, c)| c.is_whitespace()) // any whitespace directly after #! + .skip_while(|(_, c)| !c.is_whitespace()); // Any non-whitespace characters after that (path) + + let shebang = if let Some((path_len, _next_char)) = char_iter.next() { + file_contents.get(..path_len + 2)? // +2 for #! because the index is in `rest` } else { - content - .split_once('\n') - .map(|(line, _)| line) - .unwrap_or(content) - .split_whitespace() - .next() - .map(ToString::to_string) - } + // There is no next character after the shebang, so the whole file is just shebang. + file_contents + }; + Some(shebang.to_string()) } /// Including '#!', just until whitespace, no arguments. @@ -689,19 +687,38 @@ mod main { assert_eq!(cpu_type, macho::CPU_TYPE_X86_64); } - #[test] - fn patch_script_with_shebang() { + fn test_patch_script_with_shebang( + file_contents: &str, + patched_binary_path: &str, + new_file_contents: &str, + ) { let mut original_file = tempfile::NamedTempFile::new().unwrap(); let patched_path = env::temp_dir().join(original_file.path().strip_prefix("/").unwrap()); - original_file - .write_all("#!/usr/bin/env bash\n".as_ref()) - .unwrap(); + original_file.write_all(file_contents.as_ref()).unwrap(); original_file.flush().unwrap(); std::fs::create_dir_all(patched_path.parent().unwrap()).unwrap(); - patch_script(original_file.path(), &patched_path, "/test/shebang").unwrap(); + patch_script(original_file.path(), &patched_path, patched_binary_path).unwrap(); let new_contents = std::fs::read(&patched_path).unwrap(); - assert_eq!(new_contents, "#!/test/shebang bash\n".as_bytes()) + assert_eq!(new_contents, new_file_contents.as_bytes()) + } + + #[test] + fn patch_script_with_shebang() { + test_patch_script_with_shebang( + "#!/usr/bin/env bash\n", + "/test/shebang", + "#!/test/shebang bash\n", + ); + } + + #[test] + fn patch_script_with_shebang_with_space() { + test_patch_script_with_shebang( + "#! /usr/bin/env bash\n", + "/test/shebang", + "#!/test/shebang bash\n", + ); } #[test] @@ -713,6 +730,20 @@ mod main { ) } + #[test] + fn shebang_from_string_with_space() { + let contents = "#! /usr/bin/env bash\n".to_string(); + assert_eq!( + get_shebang_from_string(&contents).unwrap(), + "#! /usr/bin/env" + ); + let contents = "#! /usr/bin/env bash\n".to_string(); + assert_eq!( + get_shebang_from_string(&contents).unwrap(), + "#! /usr/bin/env" + ) + } + /// Run `sip_patch` on a script with a shebang that points to `env`, verify that a path to /// a new script is returned, in which the shebang points to a patched version of `env` /// that is not SIPed. diff --git a/mirrord/sip/src/whitespace.rs b/mirrord/sip/src/whitespace.rs deleted file mode 100644 index 4ded151d2d8..00000000000 --- a/mirrord/sip/src/whitespace.rs +++ /dev/null @@ -1,95 +0,0 @@ -/* -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. -*/ - -#![allow(clippy::indexing_slicing)] - -// source: https://github.com/dtolnay/syn/blob/master/src/whitespace.rs - -pub fn skip(mut s: &str) -> &str { - 'skip: while !s.is_empty() { - let byte = s.as_bytes()[0]; - if byte == b'/' { - if s.starts_with("//") - && (!s.starts_with("///") || s.starts_with("////")) - && !s.starts_with("//!") - { - if let Some(i) = s.find('\n') { - s = &s[i + 1..]; - continue; - } else { - return ""; - } - } else if s.starts_with("/**/") { - s = &s[4..]; - continue; - } else if s.starts_with("/*") - && (!s.starts_with("/**") || s.starts_with("/***")) - && !s.starts_with("/*!") - { - let mut depth = 0; - let bytes = s.as_bytes(); - let mut i = 0; - let upper = bytes.len() - 1; - while i < upper { - if bytes[i] == b'/' && bytes[i + 1] == b'*' { - depth += 1; - i += 1; // eat '*' - } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { - depth -= 1; - if depth == 0 { - s = &s[i + 2..]; - continue 'skip; - } - i += 1; // eat '/' - } - i += 1; - } - return s; - } - } - match byte { - b' ' | 0x09..=0x0d => { - s = &s[1..]; - continue; - } - b if b <= 0x7f => {} - _ => { - let ch = s.chars().next().unwrap(); - if is_whitespace(ch) { - s = &s[ch.len_utf8()..]; - continue; - } - } - } - return s; - } - s -} - -fn is_whitespace(ch: char) -> bool { - // Rust treats left-to-right mark and right-to-left mark as whitespace - ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}' -}