diff --git a/CARGO_README.md b/CARGO_README.md index f1205f2cf2..ea4e871465 100644 --- a/CARGO_README.md +++ b/CARGO_README.md @@ -57,8 +57,10 @@ sub-crates: - [`tectonic_bridge_graphite2`](https://crates.io/crates/tectonic_bridge_graphite2) - [`tectonic_bridge_harfbuzz`](https://crates.io/crates/tectonic_bridge_harfbuzz) - [`tectonic_bridge_icu`](https://crates.io/crates/tectonic_bridge_icu) +- [`tectonic_bundles`](https://crates.io/crates/tectonic_bundles) - [`tectonic_cfg_support`](https://crates.io/crates/tectonic_cfg_support) - [`tectonic_dep_support`](https://crates.io/crates/tectonic_dep_support) +- [`tectonic_docmodel`](https://crates.io/crates/tectonic_docmodel) - [`tectonic_engine_bibtex`](https://crates.io/crates/tectonic_engine_bibtex) - [`tectonic_engine_xdvipdfmx`](https://crates.io/crates/tectonic_engine_xdvipdfmx) - [`tectonic_engine_xetex`](https://crates.io/crates/tectonic_engine_xetex) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd035c37ff..6a9543021f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,60 @@ +# tectonic 0.6.0 (2021-06-15) + +This release adds some helpful new utilities and internal cleanups, which +involve breaking API changes (see below). + +- New V2 command `tectonic -X show user-cache-dir` to print out the + location of the per-user cache directory. FAQ, answered! (@pkgw, #786) +- New V2 command `tectonic -X bundle search` to print out listings of files + contained in the "bundle" of TeX support files. If run in a workspace + containing a `Tectonic.toml` file, the workspace’s bundle is queried; + otherwise, the default bundle is queried. (@pkgw, #786) +- New V2 command `tectonic -X bundle cat` to print out one of the support files, + with the same general behavior as the `search` command. You could also use + this to ensure that a particular file has been loaded into the local cache. + (@pkgw, #786). +- Improved security model regarding the "shell-escape" feature, which has the + potential to be abused by untrusted inputs. A new `--untrusted` argument to + the V1 CLI and `tectonic -X build` disables the use of shell-escape, and any + other known-insecure features, regardless of the presence of `-Z shell-escape` + or any other options. Therefore, if you're writing a script that processes + untrusted input, if you make sure to run `tectonic --untrusted ...` you can be + confident that further command-line arguments can't undo your sandboxing. + Furthermore, if the environment variable `$TECTONIC_UNTRUSTED_MODE` is set to + a non-empty value, the effect is as if `--untrusted` had been provided. + (@pkgw, #787) +- You know what ... get rid of the "beta" message in the V1 CLI. +- Fix SyncTeX output, we hope (e.g., #720, #744; @hulloanson, @pkgw, #762). + Tectonic's SyncTeX files should now include correct, absolute filesystem paths + when appropriate. +- Fix some broken low-level XeTeX built-ins, reported by @burrbull (@pkgw, #714, + #783) + +A few more more words on the security model: the terminology is a bit slippery +here since we of course never intend to deliver a product that has security +flaws. But features like shell-escape, while providing useful functionality, can +certainly be abused to do malicious things given a hostile input. The default UX +aims to be conservative about these features, but if a user wants to enable +them, we'll allow them -- in the same way that Rust/Cargo will compile and run +`build.rs` scripts that in principle could do just about anything on your +machine. Our main security distinction is therefore whether the input is trusted +by the user running Tectonic. The decision of whether to "trust" an input or not +is something that fundamentally has to be made at a level higher above Tectonic +itself. Therefore the goal of Tectonic in this area is to provide the user with +straightforward and effective tools to express that decision. + +For developers, this release adds two new Cargo crates to the Tectonic +ecosystem: `tectonic_docmodel`, allowing manipulation of `Tectonic.toml` files +and their related data structures; and `tectonic_bundles`, allowing manipulation +of the Tectonic support file bundles. In both cases, third-party tools might +wish to use these formats without having to pull in all of the heavyweight +dependencies of the main `tectonic` crate. And in both cases, the separation has +led to many API improvements and cleanups that greatly improve the overall code +structure. These changes break the API of the `tectonic` crate by removing some +old modules and changing the particular traits and types used to implement these +systems. (@pkgw, #785, #786) + + # tectonic 0.5.2 (2021-06-08) - Update dependencies, including [`watchexec`]. We believe that this should fix diff --git a/Cargo.lock b/Cargo.lock index d9eeeef8e6..066825e326 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2102,9 +2102,8 @@ dependencies = [ [[package]] name = "tectonic" -version = "0.5.2" +version = "0.6.0" dependencies = [ - "app_dirs2", "atty", "byte-unit", "cfg-if 1.0.0", @@ -2123,6 +2122,8 @@ dependencies = [ "sha2", "structopt", "tectonic_bridge_core", + "tectonic_bundles", + "tectonic_docmodel", "tectonic_engine_bibtex", "tectonic_engine_xdvipdfmx", "tectonic_engine_xetex", @@ -2143,7 +2144,7 @@ dependencies = [ [[package]] name = "tectonic_bridge_core" -version = "0.1.0" +version = "0.2.0" dependencies = [ "cbindgen", "cc", @@ -2196,6 +2197,19 @@ dependencies = [ "tectonic_dep_support", ] +[[package]] +name = "tectonic_bundles" +version = "0.1.0" +dependencies = [ + "flate2", + "fs2", + "tectonic_errors", + "tectonic_geturl", + "tectonic_io_base", + "tectonic_status_base", + "zip", +] + [[package]] name = "tectonic_cfg_support" version = "0.1.3" @@ -2211,6 +2225,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "tectonic_docmodel" +version = "0.1.0" +dependencies = [ + "serde", + "tectonic_errors", + "toml", +] + [[package]] name = "tectonic_engine_bibtex" version = "0.1.1" @@ -2236,7 +2259,7 @@ dependencies = [ [[package]] name = "tectonic_engine_xetex" -version = "0.1.0" +version = "0.1.1" dependencies = [ "cbindgen", "cc", @@ -2260,7 +2283,7 @@ dependencies = [ [[package]] name = "tectonic_geturl" -version = "0.2.0" +version = "0.2.1" dependencies = [ "cfg-if 1.0.0", "curl", @@ -2271,8 +2294,9 @@ dependencies = [ [[package]] name = "tectonic_io_base" -version = "0.2.0" +version = "0.3.0" dependencies = [ + "app_dirs2", "flate2", "libc", "sha2", @@ -2294,7 +2318,7 @@ dependencies = [ [[package]] name = "tectonic_status_base" -version = "0.1.0" +version = "0.2.0" dependencies = [ "tectonic_errors", ] diff --git a/Cargo.toml b/Cargo.toml index e3bd9305db..875e5f6ded 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ [package] name = "tectonic" -version = "0.5.2" +version = "0.6.0" authors = ["Peter Williams "] description = """ A modernized, complete, embeddable TeX/LaTeX engine. Tectonic is forked from the XeTeX @@ -32,8 +32,10 @@ members = [ "crates/bridge_graphite2", "crates/bridge_harfbuzz", "crates/bridge_icu", + "crates/bundles", "crates/cfg_support", "crates/dep_support", + "crates/docmodel", "crates/engine_bibtex", "crates/engine_xdvipdfmx", "crates/engine_xetex", @@ -51,7 +53,6 @@ name = "tectonic" crate-type = ["rlib"] [dependencies] -app_dirs = { version = "2", package = "app_dirs2" } atty = "0.2" byte-unit = "^4.0" cfg-if = "1.0" @@ -65,13 +66,15 @@ open = "1.4.0" serde = { version = "^1.0", features = ["derive"], optional = true } sha2 = "^0.9" structopt = "0.3" -tectonic_bridge_core = { path = "crates/bridge_core", version =">=0.1.0,<1"} +tectonic_bridge_core = { path = "crates/bridge_core", version =">=0.2.0,<1"} +tectonic_bundles = { path = "crates/bundles", version =">=0.1.0,<1", default-features = false } +tectonic_docmodel = { path = "crates/docmodel", version =">=0.1.0,<1", optional = true } tectonic_engine_bibtex = { path = "crates/engine_bibtex", version =">=0.1.0,<1"} tectonic_engine_xdvipdfmx = { path = "crates/engine_xdvipdfmx", version =">=0.1.0,<1"} -tectonic_engine_xetex = { path = "crates/engine_xetex", version =">=0.1.0,<1"} +tectonic_engine_xetex = { path = "crates/engine_xetex", version =">=0.1.1,<1"} tectonic_errors = { path = "crates/errors", version =">=0.1.0,<1"} tectonic_geturl = { path = "crates/geturl", version =">=0.2.0,<1", default-features = false } -tectonic_io_base = { path = "crates/io_base", version =">=0.2.0,<1"} +tectonic_io_base = { path = "crates/io_base", version =">=0.3.0,<1"} tectonic_status_base = { path = "crates/status_base", version =">=0.1.0,<1"} tectonic_xdv = { path = "crates/xdv", version =">=0.1.9,<1"} tectonic_xetex_layout = { path = "crates/xetex_layout", version =">=0.1.0,<1"} @@ -85,17 +88,20 @@ zip = { version = "^0.5", default-features = false, features = ["deflate"] } [features] default = ["geturl-reqwest", "serialization"] -# Note: we used to have this to couple "serde" and "serde-derive", but we've -# adopted the newer scheme to avoid having to depend on both -- should maybe -# just get rid of this feature: -serialization = ["serde", "toml"] +# The main motivation for this feature was to be able to compile without +# proc-macros (via serde-derive), for statically-linked targets which can't use +# them. In the CI, we now build for statically-linked targets using a +# cross-compilation model that allows us to have proc-macros anyway. So maybe +# this feature should go away? It's kind of annoying to support, and at this +# point proc-macros may have snuck into the dependency tree elsewhere, anyway. +serialization = ["serde", "tectonic_docmodel", "toml"] external-harfbuzz = ["tectonic_engine_xetex/external-harfbuzz"] -geturl-curl = ["tectonic_geturl/curl"] -geturl-reqwest = ["tectonic_geturl/reqwest"] +geturl-curl = ["tectonic_bundles/geturl-curl", "tectonic_geturl/curl"] +geturl-reqwest = ["tectonic_bundles/geturl-reqwest", "tectonic_geturl/reqwest"] -native-tls-vendored = ["tectonic_geturl/native-tls-vendored"] +native-tls-vendored = ["tectonic_bundles/native-tls-vendored", "tectonic_geturl/native-tls-vendored"] # developer feature to compile with the necessary flags for profiling tectonic. profile = [] @@ -119,19 +125,21 @@ x86_64-unknown-linux-gnu = { install = ["fontconfig","freetype","harfbuzz[icu,gr x86_64-pc-windows-msvc = { triplet = "x64-windows-static", install = ["fontconfig","freetype","harfbuzz[icu,graphite2]"] } [package.metadata.internal_dep_versions] -tectonic_bridge_core = "thiscommit:2021-06-02:Oiyoowe2" +tectonic_bridge_core = "thiscommit:2021-06-14:3sp2O1O" tectonic_bridge_flate = "thiscommit:2021-01-01:eer4ahL4" tectonic_bridge_graphite2 = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" tectonic_bridge_harfbuzz = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" tectonic_bridge_icu = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" +tectonic_bundles = "thiscommit:2021-06-13:Q0esYor" tectonic_cfg_support = "thiscommit:aeRoo7oa" tectonic_dep_support = "5faf4205bdd3d31101b749fc32857dd746f9e5bc" +tectonic_docmodel = "cd77b60d48b1ae3ef80d708e6858ea91cd9fa812" tectonic_engine_bibtex = "thiscommit:2021-01-17:KuhaeG1e" tectonic_engine_xdvipdfmx = "7dcbc52e58f9774b3d592919a9105377faeac509" -tectonic_engine_xetex = "thiscommit:2021-06-02:nahbie2O" +tectonic_engine_xetex = "b7a4085fa67c831d4532da6661bddafd1f9c24ff" tectonic_errors = "317ae79ceaa2593fb56090e37bf1f5cc24213dd9" tectonic_geturl = "thiscommit:2021-01-16:Aikoob9c" -tectonic_io_base = "thiscommit:2021-01-16:SaeK7eex" +tectonic_io_base = "thiscommit:2021-06-13:XFjtSsZ" tectonic_status_base = "317ae79ceaa2593fb56090e37bf1f5cc24213dd9" tectonic_xdv = "c91f2ef37858d1a0a724a5c3ddc2f7ea46373c77" tectonic_xetex_layout = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" diff --git a/crates/bridge_core/CHANGELOG.md b/crates/bridge_core/CHANGELOG.md index c3837948cc..f0bf042f21 100644 --- a/crates/bridge_core/CHANGELOG.md +++ b/crates/bridge_core/CHANGELOG.md @@ -1,3 +1,15 @@ +# tectonic_bridge_core 0.2.0 (2021-06-15) + +- Add a security infrastructure that gives a systematic way to control whether + features that can be abused by untrusted inputs, like shell-escape, are + enabled. The default is to disable all such features. Callers can request to + allow their use, but we use a centralized approach that ensures that such + requests will always be denied if the environment variable + `$TECTONIC_UNTRUSTED_MODE` is set to a nonempty value (@pkgw, #787). +- Add a C API allowing us to expose the filesystem paths for just-opened + inputs. This is needed for correct SyncTeX support (@hullanson, @pkgw, #762). + + # tectonic_bridge_core 0.1.0 (2021-06-03) This is the first release of the "core" bridge crate. It provides a baseline of diff --git a/crates/bridge_core/Cargo.toml b/crates/bridge_core/Cargo.toml index cbb813d09a..ac96b516f3 100644 --- a/crates/bridge_core/Cargo.toml +++ b/crates/bridge_core/Cargo.toml @@ -5,7 +5,7 @@ [package] name = "tectonic_bridge_core" -version = "0.1.0" +version = "0.2.0" authors = ["Peter Williams "] description = """ Exposing core backend APIs to the Tectonic C/C++ code. @@ -24,7 +24,7 @@ lazy_static = "^1.4" libc = "^0.2" md-5 = "^0.9" tectonic_errors = { path = "../errors", version =">=0.2.0,<1"} -tectonic_io_base = { path = "../io_base", version =">=0.2.0,<1"} +tectonic_io_base = { path = "../io_base", version =">=0.3.0,<1"} tectonic_status_base = { path = "../status_base", version =">=0.1.0,<1"} [build-dependencies] @@ -33,5 +33,5 @@ cc = "^1.0.66" [package.metadata.internal_dep_versions] tectonic_errors = "5c9ba661edf5ef669f24f9904f99cca369d999e7" -tectonic_io_base = "thiscommit:2021-01-16:go5rieNg" +tectonic_io_base = "f7eeff461778f7082db7ed5097d93aa63119eb12" tectonic_status_base = "317ae79ceaa2593fb56090e37bf1f5cc24213dd9" diff --git a/crates/bridge_core/README.md b/crates/bridge_core/README.md index 84ae7a8a77..06dbc3f3c1 100644 --- a/crates/bridge_core/README.md +++ b/crates/bridge_core/README.md @@ -28,6 +28,6 @@ use tectonic_bridge_core; ## Cargo features -This crate does not currently provides any [Cargo features][features]. +This crate does not currently provide any [Cargo features][features]. [features]: https://doc.rust-lang.org/cargo/reference/features.html diff --git a/crates/bridge_core/src/lib.rs b/crates/bridge_core/src/lib.rs index a5e20af801..79679c49f7 100644 --- a/crates/bridge_core/src/lib.rs +++ b/crates/bridge_core/src/lib.rs @@ -35,9 +35,11 @@ use flate2::{read::GzDecoder, Compression, GzBuilder}; use md5::{Digest, Md5}; use std::{ + convert::TryInto, ffi::CStr, fmt::{Display, Error as FmtError, Formatter}, io::{self, Read, SeekFrom, Write}, + path::PathBuf, ptr, result::Result as StdResult, slice, @@ -214,14 +216,32 @@ impl std::error::Error for EngineAbortedError {} pub struct CoreBridgeLauncher<'a> { hooks: &'a mut dyn DriverHooks, status: &'a mut dyn StatusBackend, + security: SecuritySettings, } impl<'a> CoreBridgeLauncher<'a> { /// Set up a new context for launching bridged FFI code. + /// + /// This function uses the default security stance, which disallows all + /// known-insecure engine features. Use [`Self::new_with_security`] to + /// provide your own security settings that can attempt to allow the use of + /// such features. pub fn new(hooks: &'a mut dyn DriverHooks, status: &'a mut dyn StatusBackend) -> Self { - CoreBridgeLauncher { hooks, status } + Self::new_with_security(hooks, status, SecuritySettings::default()) } + /// Set up a new context for launching bridged FFI code. + pub fn new_with_security( + hooks: &'a mut dyn DriverHooks, + status: &'a mut dyn StatusBackend, + security: SecuritySettings, + ) -> Self { + CoreBridgeLauncher { + hooks, + status, + security, + } + } /// Invoke a function to launch a bridged FFI engine with a global mutex /// held. /// @@ -240,7 +260,7 @@ impl<'a> CoreBridgeLauncher<'a> { F: FnOnce(&mut CoreBridgeState<'_>) -> Result, { let _guard = ENGINE_LOCK.lock().unwrap(); - let mut state = CoreBridgeState::new(self.hooks, self.status); + let mut state = CoreBridgeState::new(self.security.clone(), self.hooks, self.status); let result = callback(&mut state); if let Err(ref e) = result { @@ -260,6 +280,9 @@ impl<'a> CoreBridgeLauncher<'a> { /// these state structures into the C/C++ layer. It is essential that lifetimes /// be properly managed across the Rust/C boundary. pub struct CoreBridgeState<'a> { + /// The security settings for this invocation + security: SecuritySettings, + /// The driver hooks associated with this engine invocation. hooks: &'a mut dyn DriverHooks, @@ -271,18 +294,30 @@ pub struct CoreBridgeState<'a> { #[allow(clippy::vec_box)] output_handles: Vec>, + + /// A semi-hack to allow us to feed input file path information to SyncTeX. + /// This field is updated every time a new input file is opened. The XeTeX + /// engine queries it when opening new source input files to get the + /// absolute filesystem path info that SyncTeX wants. This field might be + /// None because we're still reading the primary input, or because the most + /// recent input didn't have a filesystem path (it came from a bundle or + /// memory or something else). + latest_input_path: Option, } impl<'a> CoreBridgeState<'a> { fn new( + security: SecuritySettings, hooks: &'a mut dyn DriverHooks, status: &'a mut dyn StatusBackend, ) -> CoreBridgeState<'a> { CoreBridgeState { + security, hooks, status, output_handles: Vec::new(), input_handles: Vec::new(), + latest_input_path: None, } } @@ -290,18 +325,20 @@ impl<'a> CoreBridgeState<'a> { &mut self, name: &str, format: FileFormat, - ) -> OpenResult { + ) -> OpenResult<(InputHandle, Option)> { let io = self.hooks.io(); - let r = if let FileFormat::Format = format { - io.input_open_format(name, self.status) + if let FileFormat::Format = format { + match io.input_open_format(name, self.status) { + OpenResult::NotAvailable => {} + OpenResult::Err(e) => return OpenResult::Err(e), + OpenResult::Ok(h) => return OpenResult::Ok((h, None)), + } } else { - io.input_open_name(name, self.status) - }; - - match r { - OpenResult::NotAvailable => {} - r => return r, + match io.input_open_name_with_abspath(name, self.status) { + OpenResult::NotAvailable => {} + r => return r, + } } // It wasn't available under the immediately-given name. Try adding @@ -313,13 +350,19 @@ impl<'a> CoreBridgeState<'a> { let ext = format!("{}.{}", name, e); if let FileFormat::Format = format { - if let r @ OpenResult::Ok(_) = io.input_open_format(&ext, self.status) { - return r; + match io.input_open_format(&ext, self.status) { + OpenResult::NotAvailable => {} + OpenResult::Err(e) => return OpenResult::Err(e), + OpenResult::Ok(h) => return OpenResult::Ok((h, None)), + } + } else { + match io.input_open_name_with_abspath(&ext, self.status) { + OpenResult::NotAvailable => {} + r => return r, } - } else if let r @ OpenResult::Ok(_) = io.input_open_name(&ext, self.status) { - return r; } } + OpenResult::NotAvailable } @@ -328,7 +371,7 @@ impl<'a> CoreBridgeState<'a> { name: &str, format: FileFormat, is_gz: bool, - ) -> OpenResult { + ) -> OpenResult<(InputHandle, Option)> { let base = self.input_open_name_format(name, format); if !is_gz { @@ -336,11 +379,11 @@ impl<'a> CoreBridgeState<'a> { } match base { - OpenResult::Ok(ih) => { + OpenResult::Ok((ih, path)) => { let origin = ih.origin(); let dr = GzDecoder::new(ih.into_inner()); - OpenResult::Ok(InputHandle::new(name, dr, origin)) + OpenResult::Ok((InputHandle::new(name, dr, origin), path)) } _ => base, } @@ -356,7 +399,7 @@ impl<'a> CoreBridgeState<'a> { // idea to just go and read the file. let mut ih = match self.input_open_name_format(&name, FileFormat::Tex) { - OpenResult::Ok(ih) => ih, + OpenResult::Ok((ih, _path)) => ih, OpenResult::NotAvailable => { // We could issue a warning here, but the standard LaTeX // "rerun check" implementations trigger it very often, which @@ -498,8 +541,8 @@ impl<'a> CoreBridgeState<'a> { fn input_open(&mut self, name: &str, format: FileFormat, is_gz: bool) -> *mut InputHandle { let name = normalize_tex_path(name); - let ih = match self.input_open_name_format_gz(&name, format, is_gz) { - OpenResult::Ok(ih) => ih, + let (ih, path) = match self.input_open_name_format_gz(&name, format, is_gz) { + OpenResult::Ok(tup) => tup, OpenResult::NotAvailable => { return ptr::null_mut(); } @@ -510,14 +553,15 @@ impl<'a> CoreBridgeState<'a> { }; self.input_handles.push(Box::new(ih)); + self.latest_input_path = path; &mut **self.input_handles.last_mut().unwrap() } fn input_open_primary(&mut self) -> *mut InputHandle { let io = self.hooks.io(); - let ih = match io.input_open_primary(self.status) { - OpenResult::Ok(ih) => ih, + let (ih, path) = match io.input_open_primary_with_abspath(self.status) { + OpenResult::Ok(tup) => tup, OpenResult::NotAvailable => { tt_error!(self.status, "primary input not available (?!)"); return ptr::null_mut(); @@ -529,6 +573,7 @@ impl<'a> CoreBridgeState<'a> { }; self.input_handles.push(Box::new(ih)); + self.latest_input_path = path; &mut **self.input_handles.last_mut().unwrap() } @@ -614,22 +659,119 @@ impl<'a> CoreBridgeState<'a> { } fn shell_escape(&mut self, command: &str) -> bool { - match self.hooks.sysrq_shell_escape(command, self.status) { - Ok(_) => false, + if self.security.allow_shell_escape() { + match self.hooks.sysrq_shell_escape(command, self.status) { + Ok(_) => false, - Err(e) => { - tt_error!( - self.status, - "failed to execute the shell-escape command \"{}\": {}", - command, - e - ); - true + Err(e) => { + tt_error!( + self.status, + "failed to execute the shell-escape command \"{}\": {}", + command, + e + ); + true + } } + } else { + tt_error!( + self.status, + "forbidden to execute shell-escape command \"{}\"", + command + ); + true } } } +/// A type for storing settings about potentially insecure engine features. +/// +/// This type encapsulates configuration about which potentially insecure engine +/// features are enabled. Methods that configure or instantiate engines require +/// values of this type, and values of this type can only be created through +/// centralized methods that respect standard environment variables, ensuring +/// that there is some level of uniform control over the activation of any +/// known-insecure features. +/// +/// The purpose of this framework is to manage the use of engine features that +/// are known to create security risks with *untrusted* input, but that trusted +/// users may wish to use due to the extra functionalities they bring. (This is +/// why these are settings and not simply security flaws!) The primary example +/// of this is the TeX engine’s shell-escape feature. +/// +/// Of course, this framework is only as good as our understanding of Tectonic’s +/// security profile. Future versions might disable or restrict different pieces +/// of functionality as new risks are discovered. +#[derive(Clone, Debug)] +pub struct SecuritySettings { + /// While we might eventually gain finer-grained enable/disable settings, + /// there should always be a hard "disable everything known to be risky" + /// option that supersedes everything else. + disable_insecures: bool, +} + +/// Different high-level security stances that can be adopted when creating +/// [`SecuritySettings`]. +#[derive(Clone, Debug)] +pub enum SecurityStance { + /// Ensure that all known-insecure features are disabled. + /// + /// Use this stance if you are processing untrusted input. + DisableInsecures, + + /// Request to allow the use of known-insecure features. + /// + /// Use this stance if you are processing trusted input *and* there is some + /// user-level request to use such features. The request to allow insecure + /// features might be overridden if the environment variable + /// `TECTONIC_UNTRUSTED_MODE` is set. + MaybeAllowInsecures, +} + +impl Default for SecurityStance { + fn default() -> Self { + // Obvi, the default is secure!!! + SecurityStance::DisableInsecures + } +} + +impl SecuritySettings { + /// Create a new security configuration. + /// + /// The *stance* argument specifies the high-level security stance. If your + /// program will be run by a trusted user, they should be able to control + /// the setting through a command-line argument or something comparable. + /// Even if there is a request to enable known-insecure features, however, + /// such a request might be overridden by other mechanisms. In particular, + /// if the environment variable `TECTONIC_UNTRUSTED_MODE` is set to any + /// value, insecure features will always be disabled regardless of the + /// user-level setting. Other mechanisms for disable known-insecure features + /// may be added in the future. + pub fn new(stance: SecurityStance) -> Self { + let disable_insecures = if std::env::var_os("TECTONIC_UNTRUSTED_MODE").is_some() { + true + } else { + match stance { + SecurityStance::DisableInsecures => true, + SecurityStance::MaybeAllowInsecures => false, + } + }; + + SecuritySettings { disable_insecures } + } + + /// Query whether the shell-escape TeX engine feature is allowed to be used. + pub fn allow_shell_escape(&self) -> bool { + !self.disable_insecures + } +} + +impl Default for SecuritySettings { + fn default() -> Self { + SecuritySettings::new(SecurityStance::default()) + } +} + // The entry points. /// Issue a warning. @@ -815,6 +957,53 @@ pub extern "C" fn ttbc_input_open_primary(es: &mut CoreBridgeState) -> *mut Inpu es.input_open_primary() } +/// Get the filesystem path of the most-recently-opened input file. +/// +/// This function is needed by SyncTeX, because its output file should contain +/// absolute filesystem paths to the input source files. In principle this +/// functionality could be implemented in a few different ways, but the approach +/// used here is the most backward-compatible. This function will fill in the +/// caller's buffer with the filesystem path associated with the most +/// recently-opened input file, including a terminating NUL, if possible. +/// +/// It returns 0 if no such path is known, -1 if the path cannot be expressed +/// UTF-8, -2 if the destination buffer is not big enough, or the number of +/// bytes written into the buffer (including a terminating NUL) otherwise. +/// +/// # Safety +/// +/// This function is unsafe because it dereferences raw C pointers. +#[no_mangle] +pub unsafe extern "C" fn ttbc_get_last_input_abspath( + es: &mut CoreBridgeState, + buffer: *mut u8, + len: libc::size_t, +) -> libc::ssize_t { + match es.latest_input_path { + None => 0, + + Some(ref p) => { + // In principle we could try to handle the full fun of + // cross-platform PathBuf/Unicode conversions, but synctex and + // friends will be treating our data as a traditional C string in + // the end. So play it safe and stick to UTF-8. + let p = match p.to_str() { + Some(s) => s.as_bytes(), + None => return -1, + }; + + let n = p.len(); + if n + 1 > len { + return -2; + } + + std::ptr::copy(p.as_ptr(), buffer, n); + *buffer.offset(n.try_into().unwrap()) = b'\0'; + (n + 1).try_into().unwrap() + } + } +} + /// Get the size of a Tectonic input file. #[no_mangle] pub extern "C" fn ttbc_input_get_size( diff --git a/crates/bridge_core/support/support.c b/crates/bridge_core/support/support.c index 1a6c9e516e..1d49263817 100644 --- a/crates/bridge_core/support/support.c +++ b/crates/bridge_core/support/support.c @@ -238,6 +238,12 @@ ttstub_input_open_primary(void) } +ssize_t +ttstub_get_last_input_abspath(char *buffer, size_t len) +{ + return ttbc_get_last_input_abspath(tectonic_global_bridge_core, (uint8_t *) buffer, len); +} + size_t ttstub_input_get_size(rust_input_handle_t handle) { diff --git a/crates/bridge_core/support/tectonic_bridge_core.h b/crates/bridge_core/support/tectonic_bridge_core.h index ed167ec34d..0ae5495c72 100644 --- a/crates/bridge_core/support/tectonic_bridge_core.h +++ b/crates/bridge_core/support/tectonic_bridge_core.h @@ -238,6 +238,7 @@ int ttstub_output_close(rust_output_handle_t handle); rust_input_handle_t ttstub_input_open(char const *path, ttbc_file_format format, int is_gz); rust_input_handle_t ttstub_input_open_primary(void); +ssize_t ttstub_get_last_input_abspath(char *buffer, size_t len); size_t ttstub_input_get_size(rust_input_handle_t handle); time_t ttstub_input_get_mtime(rust_input_handle_t handle); size_t ttstub_input_seek(rust_input_handle_t handle, ssize_t offset, int whence); diff --git a/crates/bundles/CHANGELOG.md b/crates/bundles/CHANGELOG.md new file mode 100644 index 0000000000..b582f3794c --- /dev/null +++ b/crates/bundles/CHANGELOG.md @@ -0,0 +1,10 @@ +# tectonic_bundles 0.1.0 (2021-06-15) + +Add the `tectonic_bundles` crate! This separates out the implementation of the +various Tectonic file “bundles” into a standalone crate, so that you can use +them without having to link to harfbuzz and everything else pulled in by the +main crate. + +As usual, separating out this crate led to some good API clarifications and +improvements. The API offered here includes some nontrivial breakage compared to +the old APIs in `tectonic::io::*`, but it's much more rationalized. diff --git a/crates/bundles/Cargo.toml b/crates/bundles/Cargo.toml new file mode 100644 index 0000000000..2775cd2403 --- /dev/null +++ b/crates/bundles/Cargo.toml @@ -0,0 +1,39 @@ +# Copyright 2020-2021 the Tectonic Project +# Licensed under the MIT License. + +# See README.md for discussion of features (or lack thereof) in this crate. + +[package] +name = "tectonic_bundles" +version = "0.1.0" +authors = ["Peter Williams "] +description = """ +Tectonic "bundle" (support file collection) implementations. +""" +homepage = "https://tectonic-typesetting.github.io/" +documentation = "https://docs.rs/tectonic_bundles" +repository = "https://github.com/tectonic-typesetting/tectonic/" +readme = "README.md" +license = "MIT" +edition = "2018" + +[dependencies] +flate2 = { version = "^1.0.19", default-features = false, features = ["zlib"] } +fs2 = "^0.4" +tectonic_errors = { path = "../errors", version =">=0.2.0,<1"} +tectonic_geturl = { path = "../geturl", version =">=0.2.0,<1", default-features = false } +tectonic_io_base = { path = "../io_base", version =">=0.3.0,<1"} +tectonic_status_base = { path = "../status_base", version =">=0.1.0,<1"} +zip = { version = "^0.5", default-features = false, features = ["deflate"] } + +[features] +default = ["geturl-reqwest"] +geturl-curl = ["tectonic_geturl/curl"] +geturl-reqwest = ["tectonic_geturl/reqwest"] +native-tls-vendored = ["tectonic_geturl/native-tls-vendored"] + +[package.metadata.internal_dep_versions] +tectonic_errors = "5c9ba661edf5ef669f24f9904f99cca369d999e7" +tectonic_geturl = "c828bee7361ebd30e28392507a1406d27dc8fdbb" +tectonic_io_base = "thiscommit:2021-06-13:s9130zU" +tectonic_status_base = "317ae79ceaa2593fb56090e37bf1f5cc24213dd9" diff --git a/crates/bundles/README.md b/crates/bundles/README.md new file mode 100644 index 0000000000..67f582df6d --- /dev/null +++ b/crates/bundles/README.md @@ -0,0 +1,29 @@ +# The `tectonic_bundles` crate + +[![](http://meritbadge.herokuapp.com/tectonic_bundles)](https://crates.io/crates/tectonic_bundles) + +This crate is part of [the Tectonic +project](https://tectonic-typesetting.github.io/en-US/). It implements various +Tectonic “bundles” that provide access to collections of TeX support files. + +- [API documentation](https://docs.rs/tectonic_bundles/). +- [Main Git repository](https://github.com/tectonic-typesetting/tectonic/). + + +## Cargo features + +This crate provides the following [Cargo features][features]: + +[features]: https://doc.rust-lang.org/cargo/reference/features.html + +- `geturl-curl`: use the [curl] crate to implement HTTP requests. In order for + this to take effect, you must use `--no-default-features` because + `geturl-reqwest` is a default feature and it takes precedence +- `geturl-reqwest`: use the [reqwest] crate to implement HTTP requests (enabled + by default) +- `native-tls-vendored`: if using [reqwest], activate the `vendored` option in + the [native-tls] crate, causing OpenSSL to be vendored + +[curl]: https://docs.rs/curl/ +[reqwest]: https://docs.rs/reqwest/ +[native-tls]: https://github.com/sfackler/rust-native-tls diff --git a/crates/bundles/src/cache.rs b/crates/bundles/src/cache.rs new file mode 100644 index 0000000000..6674557c52 --- /dev/null +++ b/crates/bundles/src/cache.rs @@ -0,0 +1,752 @@ +// Copyright 2017-2021 the Tectonic Project +// Licensed under the MIT License. + +//! Local caching of bundle data. +//! +//! This module implements Tectonic’s local filesystem caching mechanism for TeX +//! support files. To enable efficient caching with proper invalidation +//! semantics, the caching layer does *not* merely wrap [`IoProvider`] +//! implementations. Instead, a cacheable bundle must implement the +//! [`CacheBackend`] trait defined in this module. An example of such a bundle +//! is the [`crate::itar::IndexedTarBackend`] for bundles served over HTTP. +//! +//! In order to access a cacheable bundle, you need a handle to a local +//! [`Cache`], probably obtained with [`Cache::get_user_default()`], and a URL, +//! which you’ll pass to [`Cache::open()`]. When using this function, you must +//! explicitly specify the concrete [`CacheBackend`] type that will service +//! backend requests. + +use fs2::FileExt; +use std::{ + collections::HashMap, + fs::{self, File}, + io::{BufRead, BufReader, Error as IoError, ErrorKind as IoErrorKind, Read, Write}, + path::{Path, PathBuf}, + str::FromStr, +}; +use tectonic_errors::prelude::*; +use tectonic_io_base::{ + app_dirs, + digest::{self, Digest, DigestData}, + try_open_file, InputHandle, InputOrigin, IoProvider, OpenResult, +}; +use tectonic_status_base::{tt_warning, StatusBackend}; + +use crate::Bundle; + +/// A cache of data from one or more bundles using the local filesystem. +#[derive(Debug)] +pub struct Cache { + root: PathBuf, +} + +impl Cache { + /// Get a handle to a bundle cache, using default per-user settings. + /// + /// This method may perform I/O to create the user cache directory, so it is + /// fallible. (Due to its `app_dirs2` implementation, it would have to be + /// fallible even if it didn't perform I/O.) + pub fn get_user_default() -> Result { + Ok(Cache { + root: app_dirs::ensure_user_cache_dir("")?, + }) + } + + /// Get a handle to a bundle cache, using a custom cache directory. + pub fn get_for_custom_directory>(root: P) -> Self { + Cache { root: root.into() } + } + + /// Get the root directory of this cache. + pub fn root(&self) -> &Path { + &self.root + } + + /// Open a bundle through the cache layer. + /// + /// The URL specifies where the backend data live; it must be understood by, + /// and contain data appropriate for, the [`CacheBackend`] type associated + /// with the bundle that you’re creating. If *only_cached* is true, this + /// instance will never actually connect to the backend; if any uncached + /// files are requested, they will be represented as “not found”. + pub fn open( + &mut self, + url: &str, + only_cached: bool, + status: &mut dyn StatusBackend, + ) -> Result> { + CachingBundle::new(url, only_cached, status, &self.root) + } +} + +/// Information describing a cache backend. +/// +/// This type is returned by a [`CacheBackend`] on a "pull", a first-time +/// connection to the backend. It contains the detailed information that needs +/// to be saved in the cache to provide for efficient operation in subsequent +/// uses. +#[derive(Clone, Debug)] +pub struct BackendPullData { + /// The final, "resolved" URL pointing to the backing content, in the case + /// that the starting URL redirects. + pub resolved_url: String, + + /// The digest of the overall bundle content. + pub digest: DigestData, + + /// The bundle indexing data, allowing efficient retrieval of files from the + /// backend. + /// + /// This is a multi-line string, where each line is an entry for a file. + /// These lines will be parsed by [`CacheBackend::parse_index_line`]. This + /// string will potentially contain several megabytes of data. + pub index: String, +} + +/// A source of files that can supply a cache-based bundle. +/// +/// This trait is combined with [`CachingBundle`] to implement a caching bundle +/// interface. +pub trait CacheBackend: Sized { + /// Information about a file stored in the backend. + /// + /// This information should be serializable to a single line of text. It is + /// parsed out of the contents of [`BackendPullData::index`] by + /// [`Self::parse_index_line`], and later passed to [`Self::get_file`] to + /// enable the backend to efficiently retrieve the file in question. For + /// instance, it might contain offset information informing the backend how + /// to efficiently retrieve the file in question. + type FileInfo: Clone; + + /// Connect to the backend and download its key information. + /// + /// This method is used the first time that the cache connects to a backend. + /// The return value includes a package of information ([`BackendPullData`]) + /// that the cache will store to enable efficient operation on subsequent + /// requests. + fn open_with_pull( + start_url: &str, + status: &mut dyn StatusBackend, + ) -> Result<(Self, BackendPullData)>; + + /// Connect to the backend and fetch validation information. + /// + /// This method is used when this backend has already been accessed by the + /// cache during a previous execution. If we need to download more data from + /// the backend, we first need to verify that the cached data still look + /// valid. This method asks the backend to pull its “digest file” (currently + /// named `SHA256SUM`) and return its contents for validate. The method + /// should return `Err` on actual errors, and `Ok(None)` if there are any + /// indications that the cached indexing data should be thrown out and + /// re-fetched. + fn open_with_quick_check( + resolved_url: &str, + digest_file_info: &Self::FileInfo, + status: &mut dyn StatusBackend, + ) -> Result>; + + /// Parse a line of the indexing data. + /// + /// The returned tuple should give the file name and an opaque + /// [`Self::FileInfo`] that may help the backend retrieve the file in the + /// future. The indexing data are originally obtained from + /// [`BackendPullData::index`], but are stored in a file locally. This + /// method should return an error if this particular line of index data + /// seems to be malformatted. Such lines will probably just be silently + /// ignored. + fn parse_index_line(line: &str) -> Result<(String, Self::FileInfo)>; + + /// Obtain a file from the backend. + /// + /// Backend-specific retrieval information can be passed in the + /// [`Self::FileInfo`] item, which is constructed from the backend’s index + /// information. The file should be returned as one large byte vector. + fn get_file( + &mut self, + name: &str, + info: &Self::FileInfo, + status: &mut dyn StatusBackend, + ) -> Result>; +} + +/// Information about a cached file. +#[derive(Clone, Copy, Debug)] +struct CachedFileInfo { + /// The length of the file in bytes. + /// + /// This field isn't currently used, but seems handy to keep around. + _length: u64, + + /// The digest of the file contents. + /// + /// This digest is used to locate the cached data on disk. + digest: DigestData, +} + +/// A caching bundle that obtains files from some a backend. +/// +/// This bundle implementation is the key to Tectonic’s ability to download TeX +/// support files on the fly. The cache backend is generally expected to be some +/// kind of network-based resource, and the caching scheme is designed so that a +/// document build can avoid touching the network altogether if no new files +/// need to be downloaded. +#[derive(Debug)] +pub struct CachingBundle { + /// The URL specifying where to start looking for the bundle data. + /// + /// The caching layer maintains two URLs: the "start" URL and the "resolved" + /// URL. The goal here is to be able to store a single URL for fetching + /// data, but maintain the capability to update the bundle data behind that + /// URL. Requests to the start URL may get redirected (one or more times) + /// until eventually we arrive at the "resolved" URL. While the redirection + /// of the start URL might change, the contents of a resolved URL should + /// never change once published. + start_url: String, + + /// The "resolved" URL for the backing data. + /// + /// The bundle data located at this URL should never change. + resolved_url: String, + + /// The cached value of the backend’s content digest. + /// + /// This is stored in a file at [`Self::digest_path`]. This value may be + /// inaccurate, if the backing bundle has been updated (or if the cache is + /// corrupt, etc.) and we haven't yet synchronized with the backend and + /// discovered that fact. + cached_digest: DigestData, + + /// Information about all of the files that have been cached locally. + /// + /// This maps filenames to summary information that can then be used to + /// retrieve file data from [`Self::data_base`]. The contents are loaded + /// from the manifest file if the cache is non-empty. + contents: HashMap, + + /// Information about all of the files known to the backend. + /// + /// This maps filenames to [`CacheBackend::FileInfo`] data that can be used + /// to retrieve a file from the backend if needed. + index: HashMap, + + /// If true, only use cached files -- never connect to the backend. + /// + /// This option can be useful if we are operating disconnected from the + /// network (e.g., on an airplane). If you add a new figure to your + /// document, the engine will inquire about several related files that it + /// thinks might exist. Without this option, such an inquiry might require + /// Tectonic to hit the network, when the user knows for sure that the + /// bundle is not going to contain these files. + only_cached: bool, + + /// The connection to the cache backend, maybe. + /// + /// This field will be `None` if there are locally cached data present and + /// there has not yet been a need to connect to the backend. If it becomes + /// necessary to "pull" and/or download a new file from the backend, this + /// value will become `Some` — it represents something like an open network + /// connection. + backend: Option, + + /// The path to a file containing a cached copy of the backend's content + /// digest. + /// + /// This file path is based on [`Self::start_url`]. + digest_path: PathBuf, + + /// A directory where we will save [`Self::resolved_url`]. + /// + /// We need to cache `resolved_url` to enable the "quick check" backend + /// reconnection path. The actual cache file path is based on the backend’s + /// content digest. + resolved_base: PathBuf, + + /// A directory where we will save the cache manifest. + /// + /// The manifest file contains information about the files that have + /// actually been fetched from the backend and saved locally. The actual + /// manifest file path is based on the backend’s content digest. + manifest_path: PathBuf, + + /// A directory where we will save cached file data. + /// + /// This directory contains the actual cached file contents, in a directory + /// structured based on the digest of each file’s content. + data_base: PathBuf, +} + +/// A locally-cached analogue of [`BackendPullData`]. +/// +/// This data structure is what we try to recover from the cache to see if we +/// can avoid connecting to the backend. +#[derive(Clone, Debug)] +struct CachedPullData { + /// The saved backend content digest. + pub digest: DigestData, + + /// The saved "resolved URL" for the backend. + pub resolved_url: String, + + /// The saved indexing information for the backend. + pub index: HashMap, +} + +impl CachingBundle { + fn new( + start_url: &str, + only_cached: bool, + status: &mut dyn StatusBackend, + cache_root: &Path, + ) -> Result { + // Set up our paths. + let digest_path = + ensure_cache_dir(cache_root, "urls")?.join(app_dirs::app_dirs2::sanitized(start_url)); + let resolved_base = ensure_cache_dir(cache_root, "redirects")?; + let index_base = ensure_cache_dir(cache_root, "indexes")?; + let manifest_base = ensure_cache_dir(cache_root, "manifests")?; + let data_base = ensure_cache_dir(cache_root, "files")?; + + // The whole point of this cache is to avoid connecting to the backend + // if at all possible. So we first see if we have cached the "pull data" + // that describe the overall backend contents. + + let mut backend = None; + + let cached_pull_data = + match load_cached_pull_data::(&digest_path, &resolved_base, &index_base)? { + Some(c) => c, + None => { + // Some portion of the required cached data is missing. We need to + // do a complete pull and then cache the results. + + let (new_backend, pull_data) = CB::open_with_pull(start_url, status)?; + backend = Some(new_backend); + + let digest_text = pull_data.digest.to_string(); + file_create_write(&digest_path, |f| writeln!(f, "{}", &digest_text))?; + file_create_write(make_txt_path(&resolved_base, &digest_text), |f| { + f.write_all(pull_data.resolved_url.as_bytes()) + })?; + file_create_write(make_txt_path(&index_base, &digest_text), |f| { + f.write_all(pull_data.index.as_bytes()) + })?; + + // Now that we've done that, load_cached_pull_data() really ought to succeed ... + atry!( + load_cached_pull_data::(&digest_path, &resolved_base, &index_base)?; + ["cache files missing even after they were created"] + ) + } + }; + + // We call this `cached_digest`, but if `backend` is Some, it is a + // validated, fresh digest. + + let cached_digest = cached_pull_data.digest; + + // Now that we have the backend content digest, we know which manifest + // to use. Read it in, if it exists. + + let manifest_path = make_txt_path(&manifest_base, &cached_digest.to_string()); + let mut contents = HashMap::new(); + + match try_open_file(&manifest_path) { + OpenResult::NotAvailable => {} + OpenResult::Err(e) => { + return Err(e); + } + OpenResult::Ok(mfile) => { + // Note that the lock is released when the file is closed, + // which is good since BufReader::new() and BufReader::lines() + // consume their objects. + if let Err(e) = mfile.lock_shared() { + tt_warning!(status, "failed to lock manifest file \"{}\" for reading; this might be fine", + manifest_path.display(); e.into()); + } + + let f = BufReader::new(mfile); + + for res in f.lines() { + let line = res?; + let mut bits = line.rsplitn(3, ' '); + + let (original_name, length, digest) = + match (bits.next(), bits.next(), bits.next(), bits.next()) { + (Some(s), Some(t), Some(r), None) => (r, t, s), + _ => continue, + }; + + let name = original_name.to_owned(); + + let length = match length.parse::() { + Ok(l) => l, + Err(_) => continue, + }; + + let digest = if digest == "-" { + continue; + } else { + match DigestData::from_str(&digest) { + Ok(d) => d, + Err(e) => { + tt_warning!(status, "ignoring bad digest data \"{}\" for \"{}\" in \"{}\"", + &digest, original_name, manifest_path.display() ; e); + continue; + } + } + }; + + contents.insert( + name, + CachedFileInfo { + _length: length, + digest, + }, + ); + } + } + } + + // All set. + + Ok(CachingBundle { + start_url: start_url.to_owned(), + resolved_url: cached_pull_data.resolved_url, + digest_path, + cached_digest, + manifest_path, + data_base, + resolved_base, + contents, + only_cached, + backend, + index: cached_pull_data.index, + }) + } + + /// Save data about a file to our local cache manifest. + fn save_to_manifest(&mut self, name: &str, length: u64, digest: DigestData) -> Result<()> { + let digest_text = digest.to_string(); + + // Due to a quirk about permissions for file locking on Windows, we + // need to add `.read(true)` to be able to lock a file opened in + // append mode. + let mut man = fs::OpenOptions::new() + .append(true) + .create(true) + .read(true) + .open(&self.manifest_path)?; + + // Lock will be released when file is closed at the end of this function. + atry!( + man.lock_exclusive(); + ["failed to lock manifest file \"{}\" for writing", self.manifest_path.display()] + ); + + // If a filename contains newline characters, it will mess up our + // line-based manifest format. Be paranoid and refuse to record such + // filenames. + if !name.contains(|c| c == '\n' || c == '\r') { + writeln!(man, "{} {} {}", name, length, digest_text)?; + } + + self.contents.insert( + name.to_owned(), + CachedFileInfo { + _length: length, + digest, + }, + ); + + Ok(()) + } + + /// Ensure that the backend is connected and valid. + /// + /// Here we do a "quick check" to see if the backend's digest is what we + /// expect. If not, we do a lame thing where we error out but set things up + /// so that things should succeed if the program is re-run. Exactly the lame + /// TeX user experience that I've been trying to avoid! + /// + /// After this function has been called, you can assume that `self.backend` + /// is Some. + fn ensure_backend_validity(&mut self, status: &mut dyn StatusBackend) -> Result<()> { + // If backend is Some, we already have a validated connection to it. + if self.backend.is_some() { + return Ok(()); + } + + // Do the quick check. If anything goes wrong, eat the error and try a + // fresh pull. + if let Some(info) = self.index.get(digest::DIGEST_NAME) { + if let Ok(Some((backend, digest))) = + CB::open_with_quick_check(&self.resolved_url, &info, status) + { + if self.cached_digest == digest { + // We managed to pull some data that match the digest. We + // can be quite confident that the bundle is what we expect + // it to be. + self.backend = Some(backend); + return Ok(()); + } + } + } + + // The quick check failed. Try to pull all data to make sure that it + // wasn't a network error or that the resolved URL hasn't been updated. + let (new_backend, pull_data) = CB::open_with_pull(&self.start_url, status)?; + + if self.cached_digest != pull_data.digest { + // Crap! The backend isn't what we thought it was. We may have been + // giving incorrect results if we pulled files out of the cache + // before this invocation. Rewrite the digest file so that next time + // we'll start afresh, then bail. + file_create_write(&self.digest_path, |f| { + writeln!(f, "{}", pull_data.digest.to_string()) + })?; + bail!("backend digest changed; rerun tectonic to use updated information"); + } + + if self.resolved_url != pull_data.resolved_url { + // The resolved URL has changed, but the digest is the same. So + // let's just update the URL and keep going. + let resolved_path = make_txt_path(&self.resolved_base, &pull_data.digest.to_string()); + file_create_write(&resolved_path, |f| { + f.write_all(pull_data.resolved_url.as_bytes()) + })?; + + self.resolved_url = pull_data.resolved_url; + } + + // OK, it seems that everything is in order. + self.backend = Some(new_backend); + Ok(()) + } + + /// Make sure that a file is available, and return its filesystem path. + /// + /// If the file is already cached, just pull it out. Otherwise, fetch it + /// from the backend. + fn ensure_file_availability( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult { + // Already in the cache? + if let Some(info) = self.contents.get(name) { + return match info.digest.create_two_part_path(&self.data_base) { + Ok(p) => OpenResult::Ok(p), + Err(e) => OpenResult::Err(e), + }; + } + + // No, it's not. Are we in cache-only mode? + if self.only_cached { + return OpenResult::NotAvailable; + } + + // Is the file in the backend at all? + let info = match self.index.get(name).cloned() { + Some(info) => info, + None => return OpenResult::NotAvailable, + }; + + // Yes, it is. Time to fetch it! In order to do that, we need to ensure + // that we have a valid backend connection. + if let Err(e) = self.ensure_backend_validity(status) { + return OpenResult::Err(e); + } + + // Cool, we're connected to the backend now. Get the file. Note that we + // don't need to check for updates to the index after the + // ensure-validity, because we require that the contents of the bundle + // are unchanged (as expressed in the content digest): if they did + // change, ensure_backend_validity() would have bailed, because we might + // have returned incorrect data for previous requests that hit the + // cache. + + let content = match self.backend.as_mut().unwrap().get_file(name, &info, status) { + Ok(c) => c, + Err(e) => return OpenResult::Err(e), + }; + + let length = content.len(); + + let mut digest_builder = digest::create(); + digest_builder.update(&content); + let digest = DigestData::from(digest_builder); + + let final_path = match digest.create_two_part_path(&self.data_base) { + Ok(p) => p, + Err(e) => return OpenResult::Err(e), + }; + + // Perform a racy check for the destination existing, because this + // matters on Windows: if the destination is already there, we'll get + // an error because the destination is marked read-only. Assuming + // non-pathological filesystem manipulation, though, we'll only be + // subject to the race once. + + if !final_path.exists() { + if let Err(e) = file_create_write(&final_path, |f| f.write_all(&content)) { + return OpenResult::Err(e); + } + + // Now we can make the file readonly. It would be nice to set the + // permissions using the already-open file handle owned by the + // tempfile, but mkstemp doesn't give us access. + let mut perms = match fs::metadata(&final_path) { + Ok(p) => p, + Err(e) => { + return OpenResult::Err(e.into()); + } + } + .permissions(); + perms.set_readonly(true); + + if let Err(e) = fs::set_permissions(&final_path, perms) { + return OpenResult::Err(e.into()); + } + } + + // And finally add a record of this file to our manifest. Note that + // we're opening and closing the manifest every time we cache a new + // file; not so efficient, but whatever. + + if let Err(e) = self.save_to_manifest(name, length as u64, digest) { + return OpenResult::Err(e); + } + + OpenResult::Ok(final_path) + } +} + +impl IoProvider for CachingBundle { + fn input_open_name( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult { + let path = match self.ensure_file_availability(name, status) { + OpenResult::Ok(p) => p, + OpenResult::NotAvailable => return OpenResult::NotAvailable, + OpenResult::Err(e) => return OpenResult::Err(e), + }; + + let f = match File::open(&path) { + Ok(f) => f, + Err(e) => return OpenResult::Err(e.into()), + }; + + OpenResult::Ok(InputHandle::new_read_only( + name, + BufReader::new(f), + InputOrigin::Other, + )) + } +} + +impl Bundle for CachingBundle { + fn get_digest(&mut self, _status: &mut dyn StatusBackend) -> Result { + Ok(self.cached_digest) + } + + fn all_files(&mut self, status: &mut dyn StatusBackend) -> Result> { + if !self.only_cached { + self.ensure_backend_validity(status)?; + } + Ok(self.index.keys().cloned().collect()) + } +} + +/// Load the cached "pull" data for a backend. +/// +/// If any of the files are not found or otherwise have issues, return None. +fn load_cached_pull_data( + digest_path: &Path, + resolved_base: &Path, + index_base: &Path, +) -> Result>> { + // Convert file-not-found errors into None. + return match inner::(digest_path, resolved_base, index_base) { + Ok(r) => Ok(Some(r)), + Err(e) => { + if let Some(ioe) = e.downcast_ref::() { + if ioe.kind() == IoErrorKind::NotFound { + return Ok(None); + } + } + + Err(e) + } + }; + + fn inner( + digest_path: &Path, + resolved_base: &Path, + index_base: &Path, + ) -> Result> { + let digest_text = { + let f = File::open(digest_path)?; + let mut digest_text = String::with_capacity(digest::DIGEST_LEN); + f.take(digest::DIGEST_LEN as u64) + .read_to_string(&mut digest_text)?; + digest_text + }; + + let resolved_path = make_txt_path(resolved_base, &digest_text); + let resolved_url = fs::read_to_string(resolved_path)?; + + let index_path = make_txt_path(index_base, &digest_text); + let index = { + let f = File::open(index_path)?; + let mut index = HashMap::new(); + for line in BufReader::new(f).lines() { + if let Ok((name, info)) = CB::parse_index_line(&line?) { + index.insert(name, info); + } + } + index + }; + + Ok(CachedPullData { + digest: DigestData::from_str(&digest_text)?, + resolved_url, + index, + }) + } +} + +/// A convenience method to provide a better error message when writing to a created file. +fn file_create_write(path: P, write_fn: F) -> Result<()> +where + P: AsRef, + F: FnOnce(&mut File) -> std::result::Result<(), E>, + E: std::error::Error + 'static + Sync + Send, +{ + let path = path.as_ref(); + let mut f = atry!( + File::create(path); + ["couldn't open {} for writing", path.display()] + ); + atry!( + write_fn(&mut f); + ["couldn't write to {}", path.display()] + ); + Ok(()) +} + +/// Ensure that a directory exists. +fn ensure_cache_dir(root: &Path, path: &str) -> Result { + let full_path = root.join(path); + atry!( + fs::create_dir_all(&full_path); + ["failed to create directory `{}` or one of its parents", full_path.display()] + ); + Ok(full_path) +} + +/// Convenience to generate a text filename +fn make_txt_path(base: &Path, name: &str) -> PathBuf { + base.join(&name).with_extension("txt") +} diff --git a/crates/bundles/src/dir.rs b/crates/bundles/src/dir.rs new file mode 100644 index 0000000000..db431ddfa5 --- /dev/null +++ b/crates/bundles/src/dir.rs @@ -0,0 +1,76 @@ +// Copyright 2017-2021 the Tectonic Project +// Licensed under the MIT License. + +//! A module for the directory bundle [`DirBundle`]. + +use std::{ + fs, + path::{Path, PathBuf}, +}; +use tectonic_errors::prelude::*; +use tectonic_io_base::{filesystem::FilesystemIo, InputHandle, IoProvider, OpenResult}; +use tectonic_status_base::StatusBackend; + +use super::Bundle; + +/// A "bundle" of a bunch of files in a directory. +/// +/// This implementation essentially just wraps +/// [`tectonic_io_base::filesystem::FilesystemIo`], ensuring that it is +/// read-only, self-contained, and implements the [`Bundle`] trait. The +/// directory should contain a file named `SHA256SUM` if the bundle fingerprint +/// will be needed. +pub struct DirBundle(FilesystemIo); + +impl DirBundle { + /// Create a new directory bundle. + /// + /// No validation of the input path is performed, which is why this function + /// is infallible. + pub fn new>(dir: P) -> DirBundle { + DirBundle(FilesystemIo::new( + dir.as_ref(), + false, // no writes + false, // no absolute paths + Default::default(), // no hidden files + )) + } +} + +impl IoProvider for DirBundle { + fn input_open_name( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult { + self.0.input_open_name(name, status) + } + + fn input_open_name_with_abspath( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + self.0.input_open_name_with_abspath(name, status) + } +} + +impl Bundle for DirBundle { + fn all_files(&mut self, _status: &mut dyn StatusBackend) -> Result> { + let mut files = Vec::new(); + + // We intentionally do not explore the directory recursively. + for entry in fs::read_dir(&self.0.root())? { + let entry = entry?; + + // This catches both regular files and symlinks:` + if !entry.file_type()?.is_dir() { + if let Some(s) = entry.file_name().to_str() { + files.push(s.to_owned()); + } + } + } + + Ok(files) + } +} diff --git a/crates/bundles/src/itar.rs b/crates/bundles/src/itar.rs new file mode 100644 index 0000000000..c486e9c891 --- /dev/null +++ b/crates/bundles/src/itar.rs @@ -0,0 +1,195 @@ +// Copyright 2017-2021 the Tectonic Project +// Licensed under the MIT License. + +//! The web-friendly "indexed tar" bundle backend. +//! +//! The main type offered by this module is the [`IndexedTarBackend`] struct, +//! which cannot be used directly as a [`tectonic_io_base::IoProvider`] but is +//! the default backend for cached web-based bundle access through the +//! [`crate::cache::CachingBundle`] framework. +//! +//! While the on-server file format backing the “indexed tar” backend is indeed +//! a standard `tar` file, as far as the client is concerned, this backend is +//! centered on HTTP byte-range requests. For each file contained in the backing +//! resource, the index file merely contains a byte offset and length that are +//! then used to construct an HTTP Range request to obtain the file as needed. + +use flate2::read::GzDecoder; +use std::{convert::TryInto, io::Read, str::FromStr}; +use tectonic_errors::prelude::*; +use tectonic_geturl::{DefaultBackend, DefaultRangeReader, GetUrlBackend, RangeReader}; +use tectonic_io_base::digest::{self, DigestData}; +use tectonic_status_base::{tt_note, tt_warning, StatusBackend}; + +use crate::cache::{BackendPullData, CacheBackend}; + +const MAX_HTTP_ATTEMPTS: usize = 4; + +/// The internal file-information struct used by the [`IndexedTarBackend`]. +#[derive(Clone, Copy, Debug)] +pub struct FileInfo { + offset: u64, + length: u64, +} + +/// A simple web-based file backend based on HTTP Range requests. +/// +/// This type implements the [`CacheBackend`] trait and so can be used for +/// web-based bundle access thorugh the [`crate::cache::CachingBundle`] +/// framework. +#[derive(Debug)] +pub struct IndexedTarBackend { + reader: DefaultRangeReader, +} + +impl CacheBackend for IndexedTarBackend { + type FileInfo = FileInfo; + + fn open_with_pull( + start_url: &str, + status: &mut dyn StatusBackend, + ) -> Result<(Self, BackendPullData)> { + // Step 1: resolve URL + let mut geturl_backend = DefaultBackend::default(); + let resolved_url = geturl_backend.resolve_url(start_url, status)?; + + // Step 2: fetch index + let index = { + let mut index = String::new(); + let index_url = format!("{}.index.gz", &resolved_url); + tt_note!(status, "downloading index {}", index_url); + GzDecoder::new(geturl_backend.get_url(&index_url, status)?) + .read_to_string(&mut index)?; + index + }; + + // Step 3: get digest, setting up instance as we go + + let mut cache_backend = IndexedTarBackend { + reader: geturl_backend.open_range_reader(&resolved_url), + }; + + let digest_info = { + let mut digest_info = None; + + for line in index.lines() { + if let Ok((name, info)) = Self::parse_index_line(line) { + if name == digest::DIGEST_NAME { + digest_info = Some(info); + break; + } + } + } + + atry!( + digest_info; + ["backend does not provide needed {} file", digest::DIGEST_NAME] + ) + }; + + let digest_text = + String::from_utf8(cache_backend.get_file(digest::DIGEST_NAME, &digest_info, status)?) + .map_err(|e| e.utf8_error())?; + let digest = DigestData::from_str(&digest_text)?; + + // All done. + Ok(( + cache_backend, + BackendPullData { + resolved_url, + digest, + index, + }, + )) + } + + fn open_with_quick_check( + resolved_url: &str, + digest_file_info: &Self::FileInfo, + status: &mut dyn StatusBackend, + ) -> Result> { + let mut cache_backend = IndexedTarBackend { + reader: DefaultBackend::default().open_range_reader(resolved_url), + }; + + if let Ok(d) = cache_backend.get_file(digest::DIGEST_NAME, &digest_file_info, status) { + if let Ok(d) = String::from_utf8(d) { + if let Ok(d) = DigestData::from_str(&d) { + return Ok(Some((cache_backend, d))); + } + } + } + + Ok(None) + } + + fn parse_index_line(line: &str) -> Result<(String, Self::FileInfo)> { + let mut bits = line.split_whitespace(); + + if let (Some(name), Some(offset), Some(length)) = (bits.next(), bits.next(), bits.next()) { + Ok(( + name.to_owned(), + FileInfo { + offset: offset.parse::()?, + length: length.parse::()?, + }, + )) + } else { + // TODO: preserve the warning info or something! + bail!("malformed index line"); + } + } + + fn get_file( + &mut self, + name: &str, + info: &Self::FileInfo, + status: &mut dyn StatusBackend, + ) -> Result> { + tt_note!(status, "downloading {}", name); + + // Historically, sometimes our web service would drop connections when + // fetching a bunch of resource files (i.e., on the first invocation). + // The error manifested itself in a way that has a not-so-nice user + // experience. Our solution: retry the request a few times in case it + // was a transient problem. + + let n = info.length.try_into().unwrap(); + let mut buf = Vec::with_capacity(n); + let mut overall_failed = true; + let mut any_failed = false; + + for _ in 0..MAX_HTTP_ATTEMPTS { + let mut stream = match self.reader.read_range(info.offset, n) { + Ok(r) => r, + Err(e) => { + tt_warning!(status, "failure requesting \"{}\" from network", name; e); + any_failed = true; + continue; + } + }; + + if let Err(e) = stream.read_to_end(&mut buf) { + tt_warning!(status, "failure downloading \"{}\" from network", name; e.into()); + any_failed = true; + continue; + } + + overall_failed = false; + break; + } + + if overall_failed { + bail!( + "failed to retrieve \"{}\" from the network; \ + this most probably is not Tectonic's fault \ + -- please check your network connection.", + name + ); + } else if any_failed { + tt_note!(status, "download succeeded after retry"); + } + + Ok(buf) + } +} diff --git a/crates/bundles/src/lib.rs b/crates/bundles/src/lib.rs new file mode 100644 index 0000000000..a0d40ee2b4 --- /dev/null +++ b/crates/bundles/src/lib.rs @@ -0,0 +1,126 @@ +// Copyright 2016-2021 the Tectonic Project +// Licensed under the MIT License. + +#![deny(missing_docs)] + +//! Implementations of Tectonic bundle formats. +//! +//! A Tectonic “bundle” is a collection of TeX support files. In code, bundles +//! implement the [`Bundle`] trait defined here, although most of the action in +//! a bundle will be in its implementation of [`tectonic_io_base::IoProvider`]. +//! +//! This crate provides the following bundle implementations: +//! +//! - [`cache::CachingBundle`] for access to remote bundles with local +//! filesystem caching. +//! - [`dir::DirBundle`] turns a directory full of files into a bundle; it is +//! useful for testing and lightweight usage. +//! - [`zip::ZipBundle`] for a ZIP-format bundle. + +use std::{io::Read, str::FromStr}; +use tectonic_errors::{anyhow::bail, atry, Result}; +use tectonic_io_base::{digest, digest::DigestData, IoProvider, OpenResult}; +use tectonic_status_base::StatusBackend; + +pub mod cache; +pub mod dir; +pub mod itar; +pub mod zip; + +/// A trait for bundles of Tectonic support files. +/// +/// A “bundle” is an [`IoProvider`] with a few special properties. Bundles are +/// read-only, and their contents can be enumerated In principle a bundle is +/// completely defined by its file contents, which can be summarized by a +/// cryptographic digest, obtainable using the [`Self::get_digest`] method: two +/// bundles with the same digest should contain exactly the same set of files, +/// and if any aspect of a bundle’s file contents change, so should its digest. +/// Finally, it is generally expected that a bundle will contain a large number +/// of TeX support files, and that you can generate one or more TeX format files +/// using only the files contained in a bundle. +pub trait Bundle: IoProvider { + /// Get a cryptographic digest summarizing this bundle’s contents. + /// + /// The digest summarizes the exact contents of every file in the bundle. It + /// is computed from the sorted names and SHA256 digests of the component + /// files [as implemented in the TeXLive bundle builder][x]. + /// + /// [x]: https://github.com/tectonic-typesetting/tectonic-texlive-bundles/blob/master/scripts/ttb_utils.py#L321 + /// + /// The default implementation gets the digest from a file named + /// `SHA256SUM`, which is expected to contain the digest in hex-encoded + /// format. + fn get_digest(&mut self, status: &mut dyn StatusBackend) -> Result { + let digest_text = match self.input_open_name(digest::DIGEST_NAME, status) { + OpenResult::Ok(h) => { + let mut text = String::new(); + h.take(64).read_to_string(&mut text)?; + text + } + + OpenResult::NotAvailable => { + // Broken or un-cacheable backend. + bail!("bundle does not provide needed SHA256SUM file"); + } + + OpenResult::Err(e) => { + return Err(e); + } + }; + + Ok(atry!(DigestData::from_str(&digest_text); ["corrupted SHA256 digest data"])) + } + + /// Enumerate the files in this bundle. + /// + /// This interface is intended to be used for diagnostics, not by anything + /// during actual execution of an engine. This should include meta-files + /// such as the `SHA256SUM` file. The ordering of the returned filenames is + /// unspecified. + /// + /// To ease implementation, the filenames are returned in one big vector of + /// owned strings. For a large bundle, the memory consumed by this operation + /// might be fairly substantial (although we are talking megabytes, not + /// gigabytes). + fn all_files(&mut self, status: &mut dyn StatusBackend) -> Result>; +} + +impl Bundle for Box { + fn get_digest(&mut self, status: &mut dyn StatusBackend) -> Result { + (**self).get_digest(status) + } + + fn all_files(&mut self, status: &mut dyn StatusBackend) -> Result> { + (**self).all_files(status) + } +} + +/// The URL of the default bundle. +/// +/// This is a hardcoded URL of a default bundle that will provide some +/// "sensible" set of TeX support files. The higher-level `tectonic` crate +/// provides a configuration mechanism to allow the user to override this +/// setting, so you should use that if you are in a position to do so. +/// +/// This URL will be embedded in the binaries that you create, which may be used +/// for years into the future, so it needs to be durable and reliable. At the +/// moment, the URL is hosted on `archive.org` and redirects to a web-based +/// storage service that has changed a few times over the years. Note that +/// `archive.org` is blocked in China, causing problems for that potential user +/// base. +pub const FALLBACK_BUNDLE_URL: &str = + "https://archive.org/services/purl/net/pkgwpub/tectonic-default"; + +/// Open the fallback bundle. +/// +/// This is essentially the default Tectonic bundle, but the higher-level +/// `tectonic` crate provides a configuration mechanism to allow the user to +/// override the [`FALLBACK_BUNDLE_URL`] setting, and that should be preferred +/// if you’re in a position to use it. +pub fn get_fallback_bundle( + only_cached: bool, + status: &mut dyn StatusBackend, +) -> Result> { + let mut cache = cache::Cache::get_user_default()?; + cache.open(FALLBACK_BUNDLE_URL, only_cached, status) +} diff --git a/src/io/zipbundle.rs b/crates/bundles/src/zip.rs similarity index 72% rename from src/io/zipbundle.rs rename to crates/bundles/src/zip.rs index 6d30b801a6..c0d2757d4f 100644 --- a/src/io/zipbundle.rs +++ b/crates/bundles/src/zip.rs @@ -1,23 +1,27 @@ -// src/io/zipbundle.rs -- I/O on files in a Zipped-up "bundle" -// Copyright 2016-2020 the Tectonic Project +// Copyright 2016-2021 the Tectonic Project // Licensed under the MIT License. +//! ZIP files as Tectonic bundles. + use std::{ fs::File, io::{Cursor, Read, Seek}, path::Path, }; +use tectonic_errors::prelude::*; +use tectonic_io_base::{InputHandle, InputOrigin, IoProvider, OpenResult}; +use tectonic_status_base::StatusBackend; use zip::{result::ZipError, ZipArchive}; -use super::{Bundle, InputHandle, InputOrigin, IoProvider, OpenResult}; -use crate::errors::Result; -use crate::status::StatusBackend; +use crate::Bundle; +/// A bundle backed by a ZIP file. pub struct ZipBundle { zip: ZipArchive, } impl ZipBundle { + /// Create a new ZIP bundle for a generic readable and seekable stream. pub fn new(reader: R) -> Result> { Ok(ZipBundle { zip: ZipArchive::new(reader)?, @@ -26,6 +30,7 @@ impl ZipBundle { } impl ZipBundle { + /// Open a file on the filesystem as a ZIP bundle. pub fn open>(path: P) -> Result> { Self::new(File::open(path)?) } @@ -66,4 +71,8 @@ impl IoProvider for ZipBundle { } } -impl Bundle for ZipBundle {} +impl Bundle for ZipBundle { + fn all_files(&mut self, _status: &mut dyn StatusBackend) -> Result> { + Ok(self.zip.file_names().map(|s| s.to_owned()).collect()) + } +} diff --git a/crates/docmodel/CHANGELOG.md b/crates/docmodel/CHANGELOG.md new file mode 100644 index 0000000000..167dff323b --- /dev/null +++ b/crates/docmodel/CHANGELOG.md @@ -0,0 +1,5 @@ +# tectonic_docmodel 0.1.0 (2021-06-15) + +This crate isolates the file formats used by the Tectonic “document model”, +primarily `Tectonic.toml`. This makes it possible to interact with these data +formats without needing to link in with the full Tectonic dependency stack. diff --git a/crates/docmodel/Cargo.toml b/crates/docmodel/Cargo.toml new file mode 100644 index 0000000000..fb4a21f7a9 --- /dev/null +++ b/crates/docmodel/Cargo.toml @@ -0,0 +1,26 @@ +# Copyright 2020-2021 the Tectonic Project +# Licensed under the MIT License. + +# See README.md for discussion of features (or lack thereof) in this crate. + +[package] +name = "tectonic_docmodel" +version = "0.1.0" +authors = ["Peter Williams "] +description = """ +The Tectonic document model and its serialization into `Tectonic.toml`. +""" +homepage = "https://tectonic-typesetting.github.io/" +documentation = "https://docs.rs/tectonic_docmodel" +repository = "https://github.com/tectonic-typesetting/tectonic/" +readme = "README.md" +license = "MIT" +edition = "2018" + +[dependencies] +serde = { version = "^1.0", features = ["derive"] } +tectonic_errors = { path = "../errors", version =">=0.2.0,<1"} +toml = { version = "^0.5" } + +[package.metadata.internal_dep_versions] +tectonic_errors = "5c9ba661edf5ef669f24f9904f99cca369d999e7" diff --git a/crates/docmodel/README.md b/crates/docmodel/README.md new file mode 100644 index 0000000000..8aadce4298 --- /dev/null +++ b/crates/docmodel/README.md @@ -0,0 +1,19 @@ +# The `tectonic_docmodel` crate + +[![](http://meritbadge.herokuapp.com/tectonic_docmodel)](https://crates.io/crates/tectonic_docmodel) + +This crate is part of [the Tectonic +project](https://tectonic-typesetting.github.io/en-US/). It implements the +Tectonic document model, including the [`Tectonic.toml`] file. + +[`Tectonic.toml`]: https://tectonic-typesetting.github.io/book/latest/ref/tectonic-toml.html + +- [API documentation](https://docs.rs/tectonic_docmodel/). +- [Main Git repository](https://github.com/tectonic-typesetting/tectonic/). + + +## Cargo features + +This crate does not currently provides any [Cargo features][features]. + +[features]: https://doc.rust-lang.org/cargo/reference/features.html diff --git a/src/document.rs b/crates/docmodel/src/document.rs similarity index 53% rename from src/document.rs rename to crates/docmodel/src/document.rs index cc6efcc035..3f9f17afb9 100644 --- a/src/document.rs +++ b/crates/docmodel/src/document.rs @@ -1,33 +1,43 @@ -// Copyright 2020 the Tectonic Project +// Copyright 2020-2021 the Tectonic Project // Licensed under the MIT License. -//! Tectonic document definitions. +//! A single Tectonic document. +//! +//! Every document is part of a [`crate::workspace::Workspace`]. At the moment +//! workspaces can only contain a single document each, but in the future it +//! might become possible for one workspace to contain multiple documents. +//! +//! This crate, on its own, does not provide document-processing capabilities. +//! The main `tectonic` crate provides extension traits that set up document +//! processing, in the `tectonic::docmodel` module. use std::{ collections::HashMap, - env, - fmt::Write as FmtWrite, - fs, - io::{self, Read, Write}, + env, fs, + io::{Read, Write}, path::{Component, Path, PathBuf}, }; -use tectonic_geturl::{DefaultBackend, GetUrlBackend}; -use url::Url; - -use crate::{ - config, ctry, - driver::{OutputFormat, PassSetting, ProcessingSessionBuilder}, - errmsg, - errors::{ErrorKind, Result}, - io::{cached_itarbundle::CachedITarBundle, dirbundle::DirBundle, zipbundle::ZipBundle, Bundle}, - status::StatusBackend, - test_util, tt_error, tt_note, - workspace::WorkspaceCreator, -}; +use tectonic_errors::prelude::*; + +use crate::workspace::WorkspaceCreator; + +/// The default filesystem name for the "preamble" file of a document. +/// +/// This default can be overridden on an output-by-output basis in +/// `Tectonic.toml`. +pub const DEFAULT_PREAMBLE_FILE: &str = "_preamble.tex"; -const DEFAULT_PREAMBLE_FILE: &str = "_preamble.tex"; -const DEFAULT_INDEX_FILE: &str = "index.tex"; -const DEFAULT_POSTAMBLE_FILE: &str = "_postamble.tex"; +/// The default filesystem name for the main "index" file of a document. +/// +/// This default can be overridden on an output-by-output basis in +/// `Tectonic.toml`. +pub const DEFAULT_INDEX_FILE: &str = "index.tex"; + +/// The default filesystem name for the "postamble" file of a document. +/// +/// This default can be overridden on an output-by-output basis in +/// `Tectonic.toml`. +pub const DEFAULT_POSTAMBLE_FILE: &str = "_postamble.tex"; /// A Tectonic document. #[derive(Debug)] @@ -42,41 +52,29 @@ pub struct Document { /// The document name. This will be used to name build artifacts and the /// like, and so should be relatively filesystem-friendly. It does not /// need to be the same as the document title. - name: String, + pub name: String, /// The name of core TeX file bundle upon which this document is based. /// Either a URL or a local path. - bundle_loc: String, + pub bundle_loc: String, /// The different outputs that are created from the document source. These /// may have different formats (e.g., PDF and HTML) or the same format but /// different settings (e.g., PDF with A4 paper and PDF with US Letter /// paper). - outputs: HashMap, -} - -fn default_outputs() -> HashMap { - let mut outputs = HashMap::new(); - outputs.insert( - "default".to_owned(), - OutputProfile { - name: "default".to_owned(), - target_type: BuildTargetType::Pdf, - tex_format: "latex".to_owned(), - preamble_file: DEFAULT_PREAMBLE_FILE.to_owned(), - index_file: DEFAULT_INDEX_FILE.to_owned(), - postamble_file: DEFAULT_POSTAMBLE_FILE.to_owned(), - shell_escape: false, - }, - ); - outputs + pub outputs: HashMap, } impl Document { - /// Initialize a Document based on a TOML specification - pub(crate) fn new_from_toml( - src_dir: PathBuf, - build_dir: PathBuf, + /// Initialize a Document based on a TOML specification. + /// + /// This function can initialize a document directly from a TOML-formatted + /// data stream. In many circumstances you shouldn’t use it; instead you + /// should open a [`crate::workspace::Workspace`] and get a [`Document`] + /// through it. + pub fn new_from_toml, P2: Into, R: Read>( + src_dir: P1, + build_dir: P2, toml_data: &mut R, ) -> Result { let mut toml_text = String::new(); @@ -89,93 +87,33 @@ impl Document { let output = toml_output.to_runtime(); if outputs.insert(output.name.clone(), output).is_some() { - return Err(errmsg!( + bail!( "duplicated output name `{}` in TOML specification", &toml_output.name - )); + ); } } if outputs.is_empty() { - return Err(errmsg!( - "TOML specification must define at least one output" - )); + bail!("TOML specification must define at least one output"); } Ok(Document { - src_dir, - build_dir, + src_dir: src_dir.into(), + build_dir: build_dir.into(), name: doc.doc.name, bundle_loc: doc.doc.bundle, outputs, }) } - /// Create a new in-memory Document, based on the settings of a - /// WorkspaceCreator object. - pub(crate) fn new_for_creator( - wc: &WorkspaceCreator, - config: &config::PersistentConfig, - status: &mut dyn StatusBackend, - ) -> Result { - let src_dir = wc.root_dir().to_owned(); - - let mut build_dir = src_dir.clone(); - build_dir.push("build"); - - // We're a bit roundabout in how we figure out the name of the - // containing src_dir, in an effort to Do The Right Thing with symlinks - // and whatnot. - let name = { - let mut name = "document".to_owned(); - let mut tried_src_path = false; - - if let Some(Component::Normal(t)) = src_dir.components().next_back() { - tried_src_path = true; - - if let Some(s) = t.to_str() { - name = s.to_owned(); - } - } - - if !tried_src_path { - if let Ok(cwd) = env::current_dir() { - let full_path = cwd.join(&src_dir); - - if let Some(Component::Normal(t)) = full_path.components().next_back() { - if let Some(s) = t.to_str() { - name = s.to_owned(); - } - } - } - } - - name - }; - - // Determine the bundle URL that we'll put in as the default. - - let bundle_loc = if config::is_config_test_mode_activated() { - "test-bundle".to_owned() - } else { - let mut gub = DefaultBackend::default(); - gub.resolve_url(config.default_bundle_loc(), status)? - }; - - // All done. - Ok(Document { - src_dir, - build_dir, - name, - bundle_loc, - outputs: default_outputs(), - }) - } - - /// Write out this document's state as a new TOML file. This should only be - /// used when creating a totally new document; otherwise TOML rewriting - /// should be used. - pub(crate) fn create_toml(&self) -> Result<()> { + /// Write out this document's state as a fresh `Tectonic.toml` file in the + /// document’s [`Self::src_dir`]. + /// + /// This should only be used when creating a totally new document. Otherwise + /// TOML rewriting should be used, to preserve the user's file structure, + /// comments, etc. + pub fn create_toml(&self) -> Result<()> { let outputs = self .outputs .values() @@ -195,82 +133,36 @@ impl Document { let mut toml_path = self.src_dir.clone(); toml_path.push("Tectonic.toml"); - let mut toml_file = ctry!(fs::OpenOptions::new() + let mut toml_file = atry!(fs::OpenOptions::new() .create_new(true) .write(true) .open(&toml_path); - "couldn\'t create `{}`", toml_path.display() + ["couldn\'t create `{}`", toml_path.display()] ); toml_file.write_all(toml_text.as_bytes())?; Ok(()) } -} - -/// Persistent settings for a document build. -#[derive(Clone, Debug)] -pub struct OutputProfile { - name: String, - target_type: BuildTargetType, - tex_format: String, - preamble_file: String, - index_file: String, - postamble_file: String, - shell_escape: bool, -} - -/// The output target type of a document build. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum BuildTargetType { - /// Output to the Portable Document Format (PDF). - Pdf, -} - -/// Temporary options for a document build. -#[derive(Clone, Debug, Default)] -pub struct BuildOptions { - format_cache_path: Option, - only_cached: bool, - keep_intermediates: bool, - keep_logs: bool, - print_stdout: bool, - open: bool, -} - -impl BuildOptions { - pub fn format_cache_path>(&mut self, p: P) -> &mut Self { - self.format_cache_path = Some(p.as_ref().to_owned()); - self - } - - pub fn only_cached(&mut self, value: bool) -> &mut Self { - self.only_cached = value; - self - } - pub fn keep_intermediates(&mut self, value: bool) -> &mut Self { - self.keep_intermediates = value; - self - } - - pub fn keep_logs(&mut self, value: bool) -> &mut Self { - self.keep_logs = value; - self - } - - pub fn print_stdout(&mut self, value: bool) -> &mut Self { - self.print_stdout = value; - self + /// Get this document's toplevel source directory. + /// + /// Note that this directory is the one containing the file `Tectonic.toml`. + /// The actual document source is contained in a subdirectory named `src`. + pub fn src_dir(&self) -> &Path { + &self.src_dir } - pub fn open(&mut self, value: bool) -> &mut Self { - self.open = value; - self + /// Get this document's build directory. + /// + /// This is the directory where persistent files associated with the + /// document build are stored. By default, it is a subdirectory of + /// [`Self::src_dir`] named `build`. + pub fn build_dir(&self) -> &Path { + &self.build_dir } -} -impl Document { /// Iterate over the names of the output profiles defined for this document. + /// /// These may have different formats (e.g., PDF and HTML) or the same format /// but different settings (e.g., PDF with A4 paper and PDF with US Letter /// paper). @@ -278,153 +170,125 @@ impl Document { self.outputs.keys().map(|k| k.as_ref()) } - /// Get default the build options associated with an output profile. + /// Get the path of the "main" output file for the given output profile. /// - /// Panics if the output name is not one of the ones associated with this - /// document. - pub fn build_options_for(&self, _output_profile: &str) -> BuildOptions { - BuildOptions::default() + /// The exact meaning of "main" will depend on the output format. + pub fn output_main_file(&self, profile_name: &str) -> PathBuf { + let profile = self.outputs.get(profile_name).unwrap(); + + let mut p = self.build_dir.clone(); + p.push(&profile.name); + p.push(&profile.name); + p.set_extension(match profile.target_type { + BuildTargetType::Pdf => "pdf", + }); + p } +} - /// Get the bundle used by this document. - pub fn bundle( - &self, - only_cached: bool, - status: &mut dyn StatusBackend, - ) -> Result> { - fn bundle_from_path(p: PathBuf) -> Result> { - if p.is_dir() { - Ok(Box::new(DirBundle::new(p))) - } else { - Ok(Box::new(ZipBundle::open(p)?)) - } - } +/// Persistent settings for a document build. +#[derive(Clone, Debug)] +pub struct OutputProfile { + /// The name of this profile. + pub name: String, - if config::is_config_test_mode_activated() { - Ok(Box::new(test_util::TestBundle::default())) - } else if let Ok(url) = Url::parse(&self.bundle_loc) { - if url.scheme() != "file" { - let bundle = CachedITarBundle::new(&self.bundle_loc, only_cached, None, status)?; - Ok(Box::new(bundle)) - } else { - let file_path = url.to_file_path().map_err(|_| { - io::Error::new(io::ErrorKind::InvalidInput, "failed to parse local path") - })?; - bundle_from_path(file_path) - } - } else { - bundle_from_path(Path::new(&self.bundle_loc).to_owned()) - } - } + /// The type of output targeted by this profile. + pub target_type: BuildTargetType, - /// Build one of the document’s outputs. - pub fn build( - &self, - output_profile: &str, - options: &BuildOptions, - status: &mut dyn StatusBackend, - ) -> Result { - let profile = self.outputs.get(output_profile).unwrap(); - - let output_format = match profile.target_type { - BuildTargetType::Pdf => OutputFormat::Pdf, - }; + /// The name of the TeX format used by this profile. + pub tex_format: String, - let mut input_buffer = String::new(); - if !profile.preamble_file.is_empty() { - writeln!(input_buffer, "\\input{{{}}}", profile.preamble_file)?; - } - if !profile.index_file.is_empty() { - writeln!(input_buffer, "\\input{{{}}}", profile.index_file)?; - } - if !profile.postamble_file.is_empty() { - writeln!(input_buffer, "\\input{{{}}}", profile.postamble_file)?; - } + /// The name of the preamble file within the `src` directory. + pub preamble_file: String, - let mut sess_builder = ProcessingSessionBuilder::default(); - sess_builder - .output_format(output_format) - .format_name(&profile.tex_format) - .build_date(std::time::SystemTime::now()) - .pass(PassSetting::Default) - .primary_input_buffer(input_buffer.as_bytes()) - .tex_input_name(output_profile) - .keep_logs(options.keep_logs) - .keep_intermediates(options.keep_intermediates) - .print_stdout(options.print_stdout); - - if profile.shell_escape { - // For now, this is the only option we allow. - sess_builder.shell_escape_with_temp_dir(); - } + /// The name of the index (main) file within the `src` directory. + pub index_file: String, - if options.only_cached { - tt_note!(status, "using only cached resource files"); - } - sess_builder.bundle(self.bundle(options.only_cached, status)?); + /// The name of the postamble file within the `src` directory. + pub postamble_file: String, - // keep intermed, keep logs, print stdout + /// Whether TeX's shell-escape feature should be activated in this profile. + /// + /// Note that besides creating portability and reproducibility issues, + /// shell-escape opens enormous security holes. It should only ever be + /// activated with fully trusted input. + pub shell_escape: bool, +} - if let Some(ref p) = options.format_cache_path { - sess_builder.format_cache_path(p); - } +/// The output target type of a document build. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum BuildTargetType { + /// Output to the Portable Document Format (PDF). + Pdf, +} - let mut tex_dir = self.src_dir.clone(); - tex_dir.push("src"); - sess_builder.filesystem_root(&tex_dir); +impl Document { + /// Create a new in-memory Document, based on the settings of a + /// WorkspaceCreator object. + pub(crate) fn create_for(wc: &WorkspaceCreator, bundle_loc: String) -> Result { + let src_dir = wc.root_dir.clone(); - let mut output_dir = self.build_dir.clone(); - output_dir.push(output_profile); - ctry!( - fs::create_dir_all(&output_dir); - "couldn\'t create output directory `{}`", output_dir.display() - ); - sess_builder.output_dir(&output_dir); - - let mut sess = sess_builder.create(status)?; - let result = sess.run(status); - - if let Err(e) = &result { - if let ErrorKind::EngineError(engine) = e.kind() { - let output = sess.get_stdout_content(); - - if output.is_empty() { - tt_error!( - status, - "something bad happened inside {}, but no output was logged", - engine - ); - } else { - tt_error!( - status, - "something bad happened inside {}; its output follows:\n", - engine - ); - status.dump_error_logs(&output); + let mut build_dir = src_dir.clone(); + build_dir.push("build"); + + // We're a bit roundabout in how we figure out the name of the + // containing src_dir, in an effort to Do The Right Thing with symlinks + // and whatnot. + let name = { + let mut name = "document".to_owned(); + let mut tried_src_path = false; + + if let Some(Component::Normal(t)) = src_dir.components().next_back() { + tried_src_path = true; + + if let Some(s) = t.to_str() { + name = s.to_owned(); } } - } else if options.open { - let out_file = - output_dir - .join(&profile.name) - .with_extension(match profile.target_type { - BuildTargetType::Pdf => "pdf", - }); - tt_note!(status, "opening `{}`", out_file.display()); - if let Err(e) = open::that(&out_file) { - tt_error!( - status, - "failed to open `{}` with system handler", - out_file.display(); - e.into() - ) + + if !tried_src_path { + if let Ok(cwd) = env::current_dir() { + let full_path = cwd.join(&src_dir); + + if let Some(Component::Normal(t)) = full_path.components().next_back() { + if let Some(s) = t.to_str() { + name = s.to_owned(); + } + } + } } - } - result.map(|_| 0) + name + }; + + // All done. + Ok(Document { + src_dir, + build_dir, + name, + bundle_loc, + outputs: crate::document::default_outputs(), + }) } } +pub(crate) fn default_outputs() -> HashMap { + let mut outputs = HashMap::new(); + outputs.insert( + "default".to_owned(), + OutputProfile { + name: "default".to_owned(), + target_type: BuildTargetType::Pdf, + tex_format: "latex".to_owned(), + preamble_file: DEFAULT_PREAMBLE_FILE.to_owned(), + index_file: DEFAULT_INDEX_FILE.to_owned(), + postamble_file: DEFAULT_POSTAMBLE_FILE.to_owned(), + shell_escape: false, + }, + ); + outputs +} + /// The concrete syntax for saving document state, wired up via serde. mod syntax { use super::{DEFAULT_INDEX_FILE, DEFAULT_POSTAMBLE_FILE, DEFAULT_PREAMBLE_FILE}; diff --git a/crates/docmodel/src/lib.rs b/crates/docmodel/src/lib.rs new file mode 100644 index 0000000000..c4fa9b8334 --- /dev/null +++ b/crates/docmodel/src/lib.rs @@ -0,0 +1,22 @@ +// Copyright 2020-2021 the Tectonic Project +// Licensed under the MIT License. + +#![deny(missing_docs)] + +//! The Tectonic document model and its serialization into `Tectonic.toml`. +//! +//! This crate provides data structures and serialization support for the +//! Tectonic document model and its expression in the `Tectonic.toml` file. It +//! only provides data access: you can use this crate without needing to link +//! with the full Tectonic engines and all of the dependencies they drag in. The +//! main `tectonic` crate provides extension traits that attach actual +//! document-processing capabilities to these data structures. +//! +//! Your primary entrypoint to this crate will likely be +//! [`workspace::Workspace::open_from_environment`], which will attempt to load +//! up a workspace by searching the process’ current directory and parents for a +//! `Tectonic.toml` file. There is also [`workspace::WorkspaceCreator`] for +//! creating new workspaces from scratch. + +pub mod document; +pub mod workspace; diff --git a/src/workspace.rs b/crates/docmodel/src/workspace.rs similarity index 68% rename from src/workspace.rs rename to crates/docmodel/src/workspace.rs index 43f716445f..ac9dcdb1b0 100644 --- a/src/workspace.rs +++ b/crates/docmodel/src/workspace.rs @@ -1,4 +1,4 @@ -// Copyright 2020 the Tectonic Project +// Copyright 2020-2021 the Tectonic Project // Licensed under the MIT License. //! A Tectonic document-build workspace. @@ -8,17 +8,24 @@ //! world where one workspace can contain multiple documents. use std::{ - env, fs, + env, + error::Error, + fmt, fs, io::{self, Write}, - path::{Path, PathBuf}, + path::PathBuf, }; +use tectonic_errors::prelude::*; -use crate::{ - config::PersistentConfig, ctry, document::Document, errmsg, errors::Result, - status::StatusBackend, -}; +use crate::document::Document; /// A Tectonic workspace. +/// +/// For the time being, a Workspace is just a thin wrapper to provide access to +/// a `Document` instance. In the future, it might become possible for one +/// workspace to contain multiple documents. +/// +/// In most cases, you will want to create a [`Workspace`] by opening an +/// existing one using [`Workspace::open_from_environment`]. #[derive(Debug)] pub struct Workspace { /// The root directory of the workspace. @@ -45,7 +52,13 @@ impl Workspace { &mut self.doc } - /// Open up a workspace baced on the current process environment. + /// Open up a workspace based on the current process environment. + /// + /// This function searches the current directory and its parents for a + /// `Tectonic.toml` file. Because workspaces can currently only contain a + /// single document, the search stops when the first such file is found. If + /// no such file is found, an error downcastable into + /// [`NoWorkspaceFoundError`] is returned. pub fn open_from_environment() -> Result { let mut root_dir = env::current_dir()?; root_dir.push("tmp"); // simplifies loop logic @@ -70,17 +83,28 @@ impl Workspace { return Ok(Workspace { root_dir, doc }); } - Err(errmsg!( - "No `Tectonic.toml` found in current directory or any of its parents" - )) + Err(NoWorkspaceFoundError {}.into()) } } +/// An error for when the environment does not seem to contain a Tectonic +/// workspace. +#[derive(Debug)] +pub struct NoWorkspaceFoundError {} + +impl fmt::Display for NoWorkspaceFoundError { + fn fmt(&self, f: &mut fmt::Formatter) -> StdResult<(), fmt::Error> { + write!(f, "no get-URL backend was enabled") + } +} + +impl Error for NoWorkspaceFoundError {} + /// A type for creating a new workspace. #[derive(Debug)] pub struct WorkspaceCreator { /// The root directory of the workspace to be created. - root_dir: PathBuf, + pub(crate) root_dir: PathBuf, } impl WorkspaceCreator { @@ -92,19 +116,15 @@ impl WorkspaceCreator { } /// Consume this object and attempt to create the new workspace. - pub fn create( - self, - config: &PersistentConfig, - status: &mut dyn StatusBackend, - ) -> Result { - let doc = Document::new_for_creator(&self, config, status)?; + pub fn create(self, bundle_loc: String) -> Result { + let doc = Document::create_for(&self, bundle_loc)?; let mut tex_dir = self.root_dir.clone(); tex_dir.push("src"); - ctry!( + atry!( fs::create_dir_all(&tex_dir); - "couldn\'t create workspace directory `{}`", tex_dir.display() + ["couldn\'t create workspace directory `{}`", tex_dir.display()] ); doc.create_toml()?; @@ -150,9 +170,4 @@ impl WorkspaceCreator { doc, }) } - - /// Get the root directory of the workspace. - pub fn root_dir(&self) -> &Path { - &self.root_dir - } } diff --git a/crates/engine_xetex/CHANGELOG.md b/crates/engine_xetex/CHANGELOG.md index 7f3d8f3010..e8a6296601 100644 --- a/crates/engine_xetex/CHANGELOG.md +++ b/crates/engine_xetex/CHANGELOG.md @@ -1,3 +1,15 @@ +# tectonic_engine_xetex 0.1.1 (2021-06-15) + +- Fix SyncTeX output (@hulloanson, @pkgw, #720, #744). We needed to include + absolute paths and properly deal with file renames, etc. The only way to + really do this right is to have the I/O backend provide filesystem paths when + it has them, so we've extended the lower-level crates to make this possible. +- Fix the implementation of some special XeTeX commands, reported by @burrbull + (@pkgw, #714, #783). This requires a bump in the format file serial number. We + believe that this fix includes a fix to an upstream XeTeX bug, which has been + reported. + + # tectonic_engine_xetex 0.1.0 (2021-06-03) This crate introduces the XeTeX engine as a standalone crate, building on the diff --git a/crates/engine_xetex/Cargo.toml b/crates/engine_xetex/Cargo.toml index 89e1e1924e..99eeb8b665 100644 --- a/crates/engine_xetex/Cargo.toml +++ b/crates/engine_xetex/Cargo.toml @@ -5,7 +5,7 @@ [package] name = "tectonic_engine_xetex" -version = "0.1.0" +version = "0.1.1" authors = ["Peter Williams "] description = """ The [XeTeX](http://xetex.sourceforge.net/) engine as a reusable crate. @@ -20,7 +20,7 @@ links = "tectonic_engine_xetex" [dependencies] libc = "^0.2" -tectonic_bridge_core = { path = "../bridge_core", version =">=0.1.0,<1"} +tectonic_bridge_core = { path = "../bridge_core", version =">=0.2.0,<1"} tectonic_bridge_flate = { path = "../bridge_flate", version =">=0.1.1,<1"} tectonic_bridge_graphite2 = { path = "../bridge_graphite2", version =">=0.2.0,<1"} tectonic_bridge_harfbuzz = { path = "../bridge_harfbuzz", version =">=0.2.0,<1"} @@ -40,7 +40,7 @@ external-harfbuzz = [ ] [package.metadata.internal_dep_versions] -tectonic_bridge_core = "thiscommit:2021-06-02:ieXoo6ne" +tectonic_bridge_core = "4e16bf963700aae59772a6fb223981ceaa9b5f57" tectonic_bridge_flate = "5933308152efb6ba206b4dc01ab6814063b835c0" tectonic_bridge_graphite2 = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" tectonic_bridge_harfbuzz = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" diff --git a/crates/engine_xetex/src/lib.rs b/crates/engine_xetex/src/lib.rs index 159209883d..4df939cc8f 100644 --- a/crates/engine_xetex/src/lib.rs +++ b/crates/engine_xetex/src/lib.rs @@ -32,7 +32,7 @@ use tectonic_errors::prelude::*; /// should munge this serial number in the filename, or something along those /// lines, to make sure that when the engine is updated you don’t attempt to /// reuse old files. -pub const FORMAT_SERIAL: u32 = 29; +pub const FORMAT_SERIAL: u32 = 30; /// A possible outcome from a (Xe)TeX engine invocation. /// diff --git a/crates/engine_xetex/xetex/xetex-ext.c b/crates/engine_xetex/xetex/xetex-ext.c index d214716e49..93f039325b 100644 --- a/crates/engine_xetex/xetex/xetex-ext.c +++ b/crates/engine_xetex/xetex/xetex-ext.c @@ -1021,10 +1021,10 @@ gr_print_font_name(int32_t what, void* pEngine, int32_t param1, int32_t param2) char* name = NULL; XeTeXLayoutEngine engine = (XeTeXLayoutEngine)pEngine; switch (what) { - case XeTeX_feature_name: + case XETEX_FEATURE_NAME_CODE: name = getGraphiteFeatureLabel(engine, param1); break; - case XeTeX_selector_name: + case XETEX_SELECTOR_NAME_CODE: name = getGraphiteFeatureSettingLabel(engine, param1, param2); break; } @@ -2069,9 +2069,10 @@ aat_font_get_named_1(int what, CFDictionaryRef attributes, int param) void aat_print_font_name(int what, CFDictionaryRef attributes, int param1, int param2) { + /* Tectonic: this function is called for XETEX_VARIATION_NAME_CODE but doesn't handle it */ #ifdef XETEX_MAC CFStringRef name = NULL; - if (what == XeTeX_feature_name || what == XeTeX_selector_name) { + if (what == XETEX_FEATURE_NAME_CODE || what == XETEX_SELECTOR_NAME_CODE) { CTFontRef font = fontFromAttributes(attributes); CFArrayRef features = CTFontCopyFeatures(font); if (features) { @@ -2079,7 +2080,7 @@ aat_print_font_name(int what, CFDictionaryRef attributes, int param1, int param2 kCTFontFeatureTypeIdentifierKey, param1); if (feature) { - if (what == XeTeX_feature_name) + if (what == XETEX_FEATURE_NAME_CODE) name = CFDictionaryGetValue(feature, kCTFontFeatureTypeNameKey); else { CFArrayRef selectors = CFDictionaryGetValue(feature, kCTFontFeatureTypeSelectorsKey); diff --git a/crates/engine_xetex/xetex/xetex-ext.h b/crates/engine_xetex/xetex/xetex-ext.h index b0872b3f9a..78f15bb1cc 100644 --- a/crates/engine_xetex/xetex/xetex-ext.h +++ b/crates/engine_xetex/xetex/xetex-ext.h @@ -58,25 +58,22 @@ typedef void* CFDictionaryRef; /* dummy declaration just so the stubs can compil #define pdfbox_none 6 /* command codes for XeTeX extension commands */ -#define XeTeX_count_glyphs 1 -#define XeTeX_count_features 8 -#define XeTeX_feature_code 9 -#define XeTeX_find_feature_by_name 10 -#define XeTeX_is_exclusive_feature 11 -#define XeTeX_count_selectors 12 -#define XeTeX_selector_code 13 -#define XeTeX_find_selector_by_name 14 -#define XeTeX_is_default_selector 15 -#define XeTeX_OT_count_scripts 16 -#define XeTeX_OT_count_languages 17 -#define XeTeX_OT_count_features 18 -#define XeTeX_OT_script_code 19 -#define XeTeX_OT_language_code 20 -#define XeTeX_OT_feature_code 21 -#define XeTeX_map_char_to_glyph_code 22 - -#define XeTeX_feature_name 8 -#define XeTeX_selector_name 9 +#define XeTeX_count_glyphs (XETEX_COUNT_GLYPHS_CODE - XETEX_INT) +#define XeTeX_count_features (XETEX_COUNT_FEATURES_CODE - XETEX_INT) +#define XeTeX_feature_code (XETEX_FEATURE_CODE_CODE - XETEX_INT) +#define XeTeX_find_feature_by_name (XETEX_FIND_FEATURE_BY_NAME_CODE - XETEX_INT) +#define XeTeX_is_exclusive_feature (XETEX_IS_EXCLUSIVE_FEATURE_CODE - XETEX_INT) +#define XeTeX_count_selectors (XETEX_COUNT_SELECTORS_CODE - XETEX_INT) +#define XeTeX_selector_code (XETEX_SELECTOR_CODE_CODE - XETEX_INT) +#define XeTeX_find_selector_by_name (XETEX_FIND_SELECTOR_BY_NAME_CODE - XETEX_INT) +#define XeTeX_is_default_selector (XETEX_IS_DEFAULT_SELECTOR_CODE - XETEX_INT) +#define XeTeX_OT_count_scripts (XETEX_OT_COUNT_SCRIPTS_CODE - XETEX_INT) +#define XeTeX_OT_count_languages (XETEX_OT_COUNT_LANGUAGES_CODE - XETEX_INT) +#define XeTeX_OT_count_features (XETEX_OT_COUNT_FEATURES_CODE - XETEX_INT) +#define XeTeX_OT_script_code (XETEX_OT_SCRIPT_CODE - XETEX_INT) +#define XeTeX_OT_language_code (XETEX_OT_LANGUAGE_CODE - XETEX_INT) +#define XeTeX_OT_feature_code (XETEX_OT_FEATURE_CODE - XETEX_INT) +#define XeTeX_map_char_to_glyph_code (XETEX_MAP_CHAR_TO_GLYPH_CODE - XETEX_INT) /* accessing info in a native_word_node */ #define width_offset 1 diff --git a/crates/engine_xetex/xetex/xetex-ini.c b/crates/engine_xetex/xetex/xetex-ini.c index b58904e6bf..3039e27965 100644 --- a/crates/engine_xetex/xetex/xetex-ini.c +++ b/crates/engine_xetex/xetex/xetex-ini.c @@ -4199,8 +4199,8 @@ tt_run_engine(const char *dump_name, const char *input_file_name, time_t build_d primitive("XeTeXisdefaultselector", LAST_ITEM, XETEX_IS_DEFAULT_SELECTOR_CODE); primitive("XeTeXvariationname", CONVERT, XETEX_VARIATION_NAME_CODE); - primitive("XeTeXfeaturename", CONVERT, XeTeX_feature_name); - primitive("XeTeXselectorname", CONVERT, XeTeX_selector_name); + primitive("XeTeXfeaturename", CONVERT, XETEX_FEATURE_NAME_CODE); + primitive("XeTeXselectorname", CONVERT, XETEX_SELECTOR_NAME_CODE); primitive("XeTeXOTcountscripts", LAST_ITEM, XETEX_OT_COUNT_SCRIPTS_CODE); primitive("XeTeXOTcountlanguages", LAST_ITEM, XETEX_OT_COUNT_LANGUAGES_CODE); diff --git a/crates/engine_xetex/xetex/xetex-io.c b/crates/engine_xetex/xetex/xetex-io.c index 56c989005e..b84ce90bf4 100644 --- a/crates/engine_xetex/xetex/xetex-io.c +++ b/crates/engine_xetex/xetex/xetex-io.c @@ -13,6 +13,11 @@ char *name_of_input_file = NULL; +// Tectonic: This buffer is used for SyncTeX, which needs to emit absolute +// filesystem paths -- which are difficult to derive in our virtualized I/O +// system. The most backwards-compatible way to expose this information to the +// engine was to add the `ttstub_get_last_input_abspath()` API used below. +char abspath_of_input_file[1024] = ""; rust_input_handle_t tt_xetex_open_input (int filefmt) @@ -27,6 +32,10 @@ tt_xetex_open_input (int filefmt) if (handle == NULL) return NULL; + if (ttstub_get_last_input_abspath(abspath_of_input_file, sizeof(abspath_of_input_file)) < 1) { + abspath_of_input_file[0] = '\0'; + } + name_length = strlen(name_of_file); free(name_of_input_file); name_of_input_file = xstrdup(name_of_file); diff --git a/crates/engine_xetex/xetex/xetex-io.h b/crates/engine_xetex/xetex/xetex-io.h index e367840e0d..6e66061c1e 100644 --- a/crates/engine_xetex/xetex/xetex-io.h +++ b/crates/engine_xetex/xetex/xetex-io.h @@ -21,6 +21,7 @@ typedef struct { BEGIN_EXTERN_C extern char *name_of_input_file; +extern char abspath_of_input_file[]; extern const uint32_t offsetsFromUTF8[6]; extern const uint8_t bytesFromUTF8[256]; extern const uint8_t firstByteMark[7]; diff --git a/crates/engine_xetex/xetex/xetex-synctex.c b/crates/engine_xetex/xetex/xetex-synctex.c index e09ca40a9e..0403d790a6 100644 --- a/crates/engine_xetex/xetex/xetex-synctex.c +++ b/crates/engine_xetex/xetex/xetex-synctex.c @@ -9,6 +9,7 @@ #include "tectonic_bridge_core.h" #include +#include #include #define SYNCTEX_VERSION 1 @@ -129,14 +130,9 @@ static struct { static char * get_current_name (void) { - /* This used to always make the pathname absolute but I'm getting rid of - * that since it ends up adding dependencies on a bunch of functions I - * don't want to have to deal with. */ - - if (!name_of_input_file) - return xstrdup(""); - - return xstrdup(name_of_input_file); + /* Tectonic: this used to make pathnames absolute, but in the virtualized + * I/O system that information has to be provided externally. */ + return xstrdup(abspath_of_input_file); } diff --git a/crates/geturl/CHANGELOG.md b/crates/geturl/CHANGELOG.md index 9fc44c510c..737b18e0da 100644 --- a/crates/geturl/CHANGELOG.md +++ b/crates/geturl/CHANGELOG.md @@ -1,3 +1,8 @@ +# tectonic_geturl 0.2.1 (2021-06-15) + +- Fix a deprecation warning in the latest version of `reqwest`. + + # tectonic_geturl 0.2.0 (2021-06-03) - Expose a new `native-tls-vendored` Cargo feature, to allow people to control diff --git a/crates/geturl/Cargo.toml b/crates/geturl/Cargo.toml index 22bf83a481..e438afe2d2 100644 --- a/crates/geturl/Cargo.toml +++ b/crates/geturl/Cargo.toml @@ -3,7 +3,7 @@ [package] name = "tectonic_geturl" -version = "0.2.0" +version = "0.2.1" authors = ["Peter Williams "] description = """ A generic interface for HTTP GETs and byte-range requests, with pluggable backends. diff --git a/crates/io_base/CHANGELOG.md b/crates/io_base/CHANGELOG.md index edfdda8682..57a47c90b6 100644 --- a/crates/io_base/CHANGELOG.md +++ b/crates/io_base/CHANGELOG.md @@ -1,3 +1,18 @@ +# tectonic_io_base 0.3.0 (2021-06-15) + +- Add new "abspath" methods to the IoProvider trait. We need a new API to + generate proper SyncTeX output in the XeTeX engine, and this is the best + approach that we could devise that does a good job of maintaining backwards + compatibility. However, implementors of the IoProvider trait that delegate to + inner implementations will need to make sure to explicitly implement the new + methods in order to provide correct behavior (#762). +- Add a new `app_dirs` module for system-wide knowledge of per-user directories + (@pkgw, #768). It's valuable to put this low in the dependency stack so that + higher-level crates can just "know" where to go for per-user files such as the + bundle cache. +- Correct some broken internal links in the docs. + + # tectonic_io_base 0.2.0 (2021-06-03) - BREAKING: use `&str` for TeX paths rather than `OsStr`. In principle this diff --git a/crates/io_base/Cargo.toml b/crates/io_base/Cargo.toml index 8f6ace6185..4cf5b635f6 100644 --- a/crates/io_base/Cargo.toml +++ b/crates/io_base/Cargo.toml @@ -3,7 +3,7 @@ [package] name = "tectonic_io_base" -version = "0.2.0" +version = "0.3.0" authors = ["Peter Williams "] description = """ Basic types for Tectonic's pluggable I/O backend system @@ -16,6 +16,7 @@ license = "MIT" edition = "2018" [dependencies] +app_dirs2 = "^2.3" flate2 = { version = "^1.0.19", default-features = false, features = ["zlib"] } libc = "^0.2" # for EISDIR :-( sha2 = "^0.9" # for digest computations diff --git a/crates/io_base/src/app_dirs.rs b/crates/io_base/src/app_dirs.rs new file mode 100644 index 0000000000..363985fb06 --- /dev/null +++ b/crates/io_base/src/app_dirs.rs @@ -0,0 +1,66 @@ +// Copyright 2019-2021 the Tectonic Project +// Licensed under the MIT License. + +//! Default directories for per-user Tectonic files. +//! +//! If you want to use your own directories for your own application, that's OK, +//! but if you want to look at Tectonic’s default configuration and/or cache +//! data, these are the places to go. + +use app_dirs2::AppDataType; +use std::path::PathBuf; +use tectonic_errors::prelude::*; + +/// The instance of the `app_dirs2` crate that this crate links to. +pub use app_dirs2; + +/// Maybe we should just make this public? But we preserve some flexibility by +/// not doing so just yet. +const APP_INFO: app_dirs2::AppInfo = app_dirs2::AppInfo { + name: "Tectonic", + author: "TectonicProject", +}; + +/// Get the directory for per-user Tectonic configuration files. +/// +/// This constructs the path but does not ensure that the directory actually +/// exists. The function [`ensure_user_config`] makes sure that the directory is +/// created. +/// +/// This function is currently implemented with [`app_dirs2::get_app_root`] using +/// the `UserConfig` data type. Return values have the form: +/// +/// - Windows: `%APPDATA%\TectonicProject\Tectonic`, where `%APPDATA%` is +/// something like `C:\Users\knuth\AppData\Roaming`. +/// - macOS: `$HOME/Library/Application Support/Tectonic` +/// - Others: `$XDG_CONFIG_HOME/Tectonic` if defined, otherwise +/// `$HOME/.config/Tectonic` +pub fn get_user_config() -> Result { + Ok(app_dirs2::get_app_root(AppDataType::UserConfig, &APP_INFO)?) +} + +/// Get the directory for per-user Tectonic configuration files, creating it if needed. +/// +/// This is largely the same as [`get_user_config`], but ensures that the +/// returned directory actually exists. +pub fn ensure_user_config() -> Result { + Ok(app_dirs2::app_root(AppDataType::UserConfig, &APP_INFO)?) +} + +/// Get a directory for per-user Tectonic cache files, creating it if needed. +/// +/// The *path* argument may include subdirectories, but the directory separator +/// should be a forward slash on all platforms. It may be an empty string if you +/// want to get the toplevel user cache directory. +/// +/// This function is currently implemented with [`app_dirs2::app_dir`] using the +/// `UserCache` data type. Return values have the form: +/// +/// - Windows: `%LOCALAPPDATA%\TectonicProject\Tectonic`, where `%LOCALAPPDATA%` +/// is something like `C:\Users\knuth\AppData\Local`. +/// - macOS: `$HOME/Library/Caches/Tectonic` +/// - Others: `$XDG_CACHE_HOME/Tectonic` if defined, otherwise +/// `$HOME/.cache/Tectonic` +pub fn ensure_user_cache_dir(path: &str) -> Result { + Ok(app_dirs2::app_dir(AppDataType::UserCache, &APP_INFO, path)?) +} diff --git a/crates/io_base/src/filesystem.rs b/crates/io_base/src/filesystem.rs index f867fc3832..ea7f24bd81 100644 --- a/crates/io_base/src/filesystem.rs +++ b/crates/io_base/src/filesystem.rs @@ -1,10 +1,11 @@ -// Copyright 2016-2020 the Tectonic Project +// Copyright 2016-2021 the Tectonic Project // Licensed under the MIT License. //! Tectonic I/O implementations for `std::fs` types. use std::{ collections::HashSet, + env, fs::File, io::{self, BufReader, Seek, SeekFrom}, path::{Path, PathBuf}, @@ -42,18 +43,32 @@ impl FilesystemPrimaryInputIo { } impl IoProvider for FilesystemPrimaryInputIo { - fn input_open_primary(&mut self, _status: &mut dyn StatusBackend) -> OpenResult { + fn input_open_primary(&mut self, status: &mut dyn StatusBackend) -> OpenResult { + match self.input_open_primary_with_abspath(status) { + OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih), + OpenResult::Err(e) => OpenResult::Err(e), + OpenResult::NotAvailable => OpenResult::NotAvailable, + } + } + + fn input_open_primary_with_abspath( + &mut self, + _status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { let f = match try_open_file(&self.path) { OpenResult::Ok(f) => f, OpenResult::NotAvailable => return OpenResult::NotAvailable, OpenResult::Err(e) => return OpenResult::Err(e), }; - OpenResult::Ok(InputHandle::new( - "", - BufReader::new(f), - InputOrigin::Filesystem, - )) + let handle = InputHandle::new("", BufReader::new(f), InputOrigin::Filesystem); + + let path = match make_abspath(&self.path) { + Ok(m) => m, + Err(e) => return OpenResult::Err(e), + }; + + OpenResult::Ok((handle, Some(path))) } } @@ -131,8 +146,20 @@ impl IoProvider for FilesystemIo { fn input_open_name( &mut self, name: &str, - _status: &mut dyn StatusBackend, + status: &mut dyn StatusBackend, ) -> OpenResult { + match self.input_open_name_with_abspath(name, status) { + OpenResult::Ok((h, _path)) => OpenResult::Ok(h), + OpenResult::Err(e) => OpenResult::Err(e), + OpenResult::NotAvailable => OpenResult::NotAvailable, + } + } + + fn input_open_name_with_abspath( + &mut self, + name: &str, + _status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { let path = match self.construct_path(name) { Ok(p) => p, Err(e) => return OpenResult::Err(e), @@ -142,7 +169,7 @@ impl IoProvider for FilesystemIo { return OpenResult::NotAvailable; } - let f = match File::open(path) { + let f = match File::open(&path) { Ok(f) => f, Err(e) => { return if e.kind() == io::ErrorKind::NotFound { @@ -173,12 +200,15 @@ impl IoProvider for FilesystemIo { return OpenResult::NotAvailable; } + // SyncTeX requires absolute paths. + let path = match make_abspath(path) { + Ok(m) => m, + Err(e) => return OpenResult::Err(e), + }; + // Good to go. - OpenResult::Ok(InputHandle::new( - name, - BufReader::new(f), - InputOrigin::Filesystem, - )) + let handle = InputHandle::new(name, BufReader::new(f), InputOrigin::Filesystem); + OpenResult::Ok((handle, Some(path))) } } @@ -216,3 +246,11 @@ impl InputFeatures for BufReader { Ok(self.seek(pos)?) } } + +/// For SyncTeX paths we need to make sure that we return an absolute +/// path. `std::fs::canonicalize` is a bit overkill and prefixes all of +/// our paths with `\\?\` on Windows. +fn make_abspath>(path: P) -> Result { + let cwd = env::current_dir()?; + Ok(cwd.join(path.as_ref())) +} diff --git a/crates/io_base/src/lib.rs b/crates/io_base/src/lib.rs index 53e4da213b..73b9569d4f 100644 --- a/crates/io_base/src/lib.rs +++ b/crates/io_base/src/lib.rs @@ -28,6 +28,7 @@ use thiserror::Error as ThisError; use crate::digest::DigestData; +pub mod app_dirs; pub mod digest; pub mod filesystem; pub mod flate2; @@ -450,6 +451,33 @@ pub trait IoProvider: AsIoProviderMut { OpenResult::NotAvailable } + /// Open the named file for input and return filesystem path information. + /// + /// This method extends [`Self::input_open_name`] to help support SyncTeX output. + /// While SyncTeX output files should contain absolute source file paths, + /// Tectonic’s pluggable I/O system makes it so that the mapping between + /// input names and filesystem paths is not well-defined. This optional + /// interface enables backends to provide filesystem information at the time + /// of opening. + /// + /// The default implementation returns None for the path information, to + /// preserve backwards compatibility. If you are implementing a new backend + /// that might provide path information, or you are implementing an I/O + /// provider that delegates to other I/O providers, you should implement + /// this function fully, and then provide a simple implementation of + /// [`Self::input_open_name`] that drops the pathing information. + fn input_open_name_with_abspath( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + match self.input_open_name(name, status) { + OpenResult::Ok(h) => OpenResult::Ok((h, None)), + OpenResult::Err(e) => OpenResult::Err(e), + OpenResult::NotAvailable => OpenResult::NotAvailable, + } + } + /// Open the "primary" input file, which in the context of TeX is the main /// input that it's given. When the build is being done using the /// filesystem and the input is a file on the filesystem, this function @@ -459,6 +487,21 @@ pub trait IoProvider: AsIoProviderMut { OpenResult::NotAvailable } + /// Open the primary input and return filesystem path information. + /// + /// This method is as to [`Self::input_open_primary`] as + /// [`Self::input_open_name_with_abspath`] is to [`Self::input_open_name`]. + fn input_open_primary_with_abspath( + &mut self, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + match self.input_open_primary(status) { + OpenResult::Ok(h) => OpenResult::Ok((h, None)), + OpenResult::Err(e) => OpenResult::Err(e), + OpenResult::NotAvailable => OpenResult::NotAvailable, + } + } + /// Open a format file with the specified name. Format files have a /// specialized entry point because IOProviders may wish to handle them /// specially: namely, to munge the filename to one that includes the @@ -502,10 +545,25 @@ impl IoProvider for Box

{ (**self).input_open_name(name, status) } + fn input_open_name_with_abspath( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + (**self).input_open_name_with_abspath(name, status) + } + fn input_open_primary(&mut self, status: &mut dyn StatusBackend) -> OpenResult { (**self).input_open_primary(status) } + fn input_open_primary_with_abspath( + &mut self, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + (**self).input_open_primary_with_abspath(status) + } + fn input_open_format( &mut self, name: &str, diff --git a/crates/io_base/src/stack.rs b/crates/io_base/src/stack.rs index da8380125b..ebbbe9a1d7 100644 --- a/crates/io_base/src/stack.rs +++ b/crates/io_base/src/stack.rs @@ -1,9 +1,10 @@ -// Copyright 2016-2020 the Tectonic Project +// Copyright 2016-2021 the Tectonic Project // Licensed under the MIT License. //! An "I/O stack" is an I/O provider that delegates requests to //! a series of sub-providers in turn. +use std::path::PathBuf; use tectonic_status_base::StatusBackend; use super::{InputHandle, IoProvider, OpenResult, OutputHandle}; @@ -68,6 +69,23 @@ impl<'a> IoProvider for IoStack<'a> { OpenResult::NotAvailable } + fn input_open_name_with_abspath( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + for item in &mut self.items { + let r = item.input_open_name_with_abspath(name, status); + + match r { + OpenResult::NotAvailable => continue, + _ => return r, + }; + } + + OpenResult::NotAvailable + } + fn input_open_primary(&mut self, status: &mut dyn StatusBackend) -> OpenResult { for item in &mut self.items { let r = item.input_open_primary(status); @@ -81,6 +99,22 @@ impl<'a> IoProvider for IoStack<'a> { OpenResult::NotAvailable } + fn input_open_primary_with_abspath( + &mut self, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + for item in &mut self.items { + let r = item.input_open_primary_with_abspath(status); + + match r { + OpenResult::NotAvailable => continue, + _ => return r, + }; + } + + OpenResult::NotAvailable + } + fn input_open_format( &mut self, name: &str, diff --git a/crates/status_base/CHANGELOG.md b/crates/status_base/CHANGELOG.md index 95ec1e26ca..b33861f860 100644 --- a/crates/status_base/CHANGELOG.md +++ b/crates/status_base/CHANGELOG.md @@ -1,3 +1,12 @@ +# tectonic_status_base 0.2.0 (2021-06-15) + +- Add `PlainStatusBackend.always_stderr()`, allowing users to specify that + status-reporting output in this backend should always go to standard error + rather than standard output. This is useful in cases where a program's output + to stdout needs to be machine-parseable, since the status-reporting could + potentially interfere with that if not directed elsewhere (@pkgw, #768). + + # tectonic_status_base 0.1.0 (2021-01-15) Initial release: a new crate with basic Tectonic status-reporting traits. diff --git a/crates/status_base/Cargo.toml b/crates/status_base/Cargo.toml index 6254f2655e..b5f4b1a25c 100644 --- a/crates/status_base/Cargo.toml +++ b/crates/status_base/Cargo.toml @@ -3,7 +3,7 @@ [package] name = "tectonic_status_base" -version = "0.1.0" +version = "0.2.0" authors = ["Peter Williams "] description = """ Basic types for reporting status messages to a user. diff --git a/crates/status_base/src/plain.rs b/crates/status_base/src/plain.rs index ee330cb542..79b7c6f30e 100644 --- a/crates/status_base/src/plain.rs +++ b/crates/status_base/src/plain.rs @@ -15,12 +15,29 @@ use super::{ChatterLevel, MessageKind, StatusBackend}; #[derive(Clone, Debug, Default)] pub struct PlainStatusBackend { chatter: ChatterLevel, + always_stderr: bool, } impl PlainStatusBackend { /// Create a new backend with the specified chatter level. + /// + /// To use the default chatter level, you can also use [`Self::default`]. pub fn new(chatter: ChatterLevel) -> Self { - PlainStatusBackend { chatter } + PlainStatusBackend { + chatter, + always_stderr: false, + } + } + + /// Configure this backend to always print to the standard error stream. + /// + /// This setting can be useful if you have a program that is printing output + /// to standard output that needs to be machine-parsable. By activating it + /// you can ensure that any status reports don't get mixed in with your + /// stdout output. + pub fn always_stderr(&mut self, setting: bool) -> &mut Self { + self.always_stderr = setting; + self } } @@ -36,7 +53,7 @@ impl StatusBackend for PlainStatusBackend { MessageKind::Error => "error:", }; - if kind == MessageKind::Note { + if kind == MessageKind::Note && !self.always_stderr { println!("{} {}", prefix, args); } else { eprintln!("{} {}", prefix, args); diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 407fce6ff0..47266e17da 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -12,8 +12,11 @@ # “V2” Command-Line Interface - [`tectonic -X build`](v2cli/build.md) +- [`tectonic -X bundle`](v2cli/bundle.md) - [`tectonic -X compile`](v2cli/compile.md) - [`tectonic -X new`](v2cli/new.md) +- [`tectonic -X show`](v2cli/show.md) +- [`tectonic -X watch`](v2cli/watch.md) # Concept Reference diff --git a/docs/src/v2cli/build.md b/docs/src/v2cli/build.md index 74305d7d4c..ab5330a592 100644 --- a/docs/src/v2cli/build.md +++ b/docs/src/v2cli/build.md @@ -17,6 +17,7 @@ tectonic -X build [--only-cached] [--print] [--open] + [--untrusted] ``` #### Remarks @@ -52,3 +53,15 @@ identical to, the contents of the log file. By default, this output is only printed if the engine encounteres a fatal error. The `--open` option will open the built document using the system handler. + +Use the `--untrusted` option if building untrusted content. This is not the +default because in most cases you *will* trust the document that you’re +building, probably because you have created it yourself, and it would be very +annoying to have to pass `--trusted` every time you build a document that uses +shell-escape. See the security discussion in the documentation of the +[compile](./compile.md) command for details. In actual usage, it would obviously +be easy to forget to use this option; in cases where untrusted inputs are a +genuine concern, we recommend setting the environment variable +`TECTONIC_UNTRUSTED_MODE` to a non-empty value. This has the same effect as the +`--untrusted` option. Note, however, that a hostile shell user can trivially +clear this variable. \ No newline at end of file diff --git a/docs/src/v2cli/bundle.md b/docs/src/v2cli/bundle.md new file mode 100644 index 0000000000..e294c1a7e1 --- /dev/null +++ b/docs/src/v2cli/bundle.md @@ -0,0 +1,73 @@ +# tectonic -X bundle + +Commands relating to Tectonic’s “bundles” of support files. + +***This is a [V2 CLI][v2cli-ref] command. For information on the original (“V1” +CLI), see [its reference page][v1cli-ref].*** + +[v2cli-ref]: ../ref/v2cli.md +[v1cli-ref]: ../ref/v1cli.md + +The `bundle` subcommands are: + +- [`tectonic -X bundle cat`](#tectonic--x-bundle-cat) +- [`tectonic -X bundle search`](#tectonic--x-bundle-search) + + +## tectonic -X bundle cat + +Print out a file stored in the current document’s backing bundle. + +#### Usage Synopsis + +```sh +tectonic -X bundle cat +``` + +#### Example + +```sh +$ tectonic -X bundle cat latex.ltx +%% +%% This is file `latex.ltx', +%% generated with the docstrip utility. +... +``` + +#### Remarks + +If this command is run outside of a [document workspace](../ref/workspaces.md), +the system default bundle will be used. + + +## tectonic -X bundle search + +Print out the names of files in the current document’s backing bundle, +potentially with filtering. + +#### Usage Synopsis + +```sh +tectonic -X bundle search [TERM] +``` + +#### Example + +```sh +$ tectonic -X bundle search minted +minted1.sty +tcbminted.code.tex +minted.4ht +minted.sty +``` + +#### Remarks + +If no term is specified, *all* of the files in the bundle are printed. The +ordering of those filenames is unspecified. + +The default search method is to use simple substring matching. Other methods may +be added in the future, activated by additional options. + +If this command is run outside of a [document workspace](../ref/workspaces.md), +the system default bundle will be used. diff --git a/docs/src/v2cli/compile.md b/docs/src/v2cli/compile.md index 9378097e2d..5985e1a738 100644 --- a/docs/src/v2cli/compile.md +++ b/docs/src/v2cli/compile.md @@ -36,6 +36,7 @@ tectonic -X compile # full form [--print] [-p] [--reruns COUNT] [-r COUNT] [--synctex] + [--untrusted] [--web-bundle URL] [-w] [-Z UNSTABLE-OPTION] TEXPATH @@ -63,6 +64,27 @@ This will compile the file and create `myfile.pdf` if nothing went wrong. You can use an input filename of `-` to have Tectonic process standard input. (In this case, the output file will be named `texput.pdf`.) +##### Security + +By default, the document is compiled in a “trusted” mode. This means that the +calling user can request to enable certain engine features that could raise +security concerns if used with untrusted input: the classic example of this +being TeX's “shell-escape” functionality. These features are *not* enabled by +default, but they can be enabled on the command line; in the case of +shell-escape, this is done with `-Z shell-escape`. + +If the command-line argument `--untrusted` is provided, these features cannot be +enabled, regardless of other settings such as `-Z shell-escape`. So if you are +going to process untrusted input in a command-line script, as long as you make +sure that `--untrusted` is provided, the known-dangerous features will be +disabled. + +Furthermore, if the environment variable `TECTONIC_UNTRUSTED_MODE` is set to a +non-empty value, Tectonic will behave as if `--untrusted` were specified, +regardless of the actual command-line arguments. Setting this variable can +provide a modest extra layer of protection if the Tectonic engine is being run +outside of its CLI form. Keep in mind that untrusted shell scripts and the like +can trivially defeat this by explicitly clearing the environment variable. #### Options @@ -87,6 +109,7 @@ The following are the available flags. | `-p` | `--print` | Print the engine's chatter during processing | | `-r` | `--reruns ` | Rerun the TeX engine exactly this many times after the first | | | `--synctex` | Generate SyncTeX data | +| | `--untrusted` | Input is untrusted: disable all known-insecure features | | `-V` | `--version` | Prints version information | | `-w` | `--web-bundle ` | Use this URL find resource files instead of the default | | `-Z` | `-Z ` | Activate experimental “unstable” options | @@ -102,5 +125,5 @@ the set of unstable options is subject to change at any time. | `-Z continue-on-errors` | Keep compiling even when severe errors occur | | `-Z min-crossrefs=` | Equivalent to bibtex's `-min-crossrefs` flag. Default vaue: 2 | | `-Z paper-size=` | Change the initial paper size. Default: `letter` | -| `-Z shell-escape` | Enable `\write18` | +| `-Z shell-escape` | Enable `\write18` (unless `--untrusted` has been specified) | diff --git a/docs/src/v2cli/show.md b/docs/src/v2cli/show.md new file mode 100644 index 0000000000..4697513674 --- /dev/null +++ b/docs/src/v2cli/show.md @@ -0,0 +1,36 @@ +# tectonic -X show + +Display various useful pieces of information. + +***This is a [V2 CLI][v2cli-ref] command. For information on the original (“V1” +CLI), see [its reference page][v1cli-ref].*** + +[v2cli-ref]: ../ref/v2cli.md +[v1cli-ref]: ../ref/v1cli.md + +The `show` subcommands are: + +- [`tectonic -X show user-cache-dir`](#tectonic--x-show-user-cache-dir) + +## tectonic -X show user-cache-dir + +Print out the location of Tectonic’s default per-user cache directory. + +#### Usage Synopsis + +```sh +tectonic -X show user-cache-dir +``` + +#### Example + +```sh +$ tectonic -X show user-cache-dir +/home/knuth/.cache/Tectonic # Unix + +$ tectonic -X show user-cache-dir +/home/knuth/Library/Caches/Tectonic # macOS + +$ tectonic -X show user-cache-dir +C:\Users\knuth\AppData\Local\TectonicProject\Tectonic # Windows +``` diff --git a/docs/src/v2cli/watch.md b/docs/src/v2cli/watch.md new file mode 100644 index 0000000000..97b3368391 --- /dev/null +++ b/docs/src/v2cli/watch.md @@ -0,0 +1,29 @@ +# tectonic -X watch + +Build the current document and rebuild it as input files change. + +***This is a [V2 CLI][v2cli-ref] command. For information on the original (“V1” +CLI), see [its reference page][v1cli-ref].*** + +[v2cli-ref]: ../ref/v2cli.md +[v1cli-ref]: ../ref/v1cli.md + +#### Usage Synopsis + +```sh +tectonic -X watch + [--exec COMMAND] [-x COMMAND] +``` + +#### Remarks + +This command builds the current document in the same fashion as [`tectonic -X +build`](./build.md), and then stays running and watches for changes to the input +files. It rebuilds the document when changes are detected. + +#### Command-Line Options + +The `--exec` option (or `-x` for short) configures the command that is used to +run the document build. The value of this option is appended to `tectonic -X` +and defaults to `build`. If you want to pass options to the build command, this +is the way to do so. diff --git a/src/app_dirs.rs b/src/app_dirs.rs deleted file mode 100644 index 62219ad0ee..0000000000 --- a/src/app_dirs.rs +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2019 the Tectonic Project -// Licensed under the MIT License. - -use crate::errors::Result; -use app_dirs::AppDataType; -use std::path::PathBuf; - -pub use app_dirs::sanitized; - -const APP_INFO: app_dirs::AppInfo = app_dirs::AppInfo { - name: "Tectonic", - author: "TectonicProject", -}; - -#[cfg(feature = "serialization")] -pub fn user_config() -> Result { - Ok(app_dirs::app_root(AppDataType::UserConfig, &APP_INFO)?) -} - -#[cfg(feature = "serialization")] -pub fn get_user_config() -> Result { - Ok(app_dirs::get_app_root(AppDataType::UserConfig, &APP_INFO)?) -} - -pub fn user_cache_dir(path: &str) -> Result { - Ok(app_dirs::app_dir(AppDataType::UserCache, &APP_INFO, path)?) -} diff --git a/src/bin/tectonic/compile.rs b/src/bin/tectonic/compile.rs index 5a197cfae1..069914f1c3 100644 --- a/src/bin/tectonic/compile.rs +++ b/src/bin/tectonic/compile.rs @@ -1,18 +1,18 @@ -// Copyright 2016-2020 the Tectonic Project +// Copyright 2016-2021 the Tectonic Project // Licensed under the MIT License. //! Standalone compilation of TeX documents. This implements the "classic" / //! "V1" / "rustc-like" Tectonic command-line interface, as well as the //! `compile` subcommand of the "V2" / "cargo-like" interface. -use structopt::StructOpt; - use std::{ env, path::{Path, PathBuf}, str::FromStr, time, }; +use structopt::StructOpt; +use tectonic_bridge_core::{SecuritySettings, SecurityStance}; use tectonic::{ config::PersistentConfig, @@ -87,6 +87,10 @@ pub struct CompileOptions { #[structopt(name = "outdir", short, long, parse(from_os_str))] outdir: Option, + /// Input is untrusted -- disable all known-insecure features + #[structopt(long)] + untrusted: bool, + /// Unstable options. Pass -Zhelp to show a list // TODO we can't pass -Zhelp without also passing #[structopt(name = "option", short = "Z", number_of_values = 1)] @@ -97,7 +101,18 @@ impl CompileOptions { pub fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { let unstable = UnstableOptions::from_unstable_args(self.unstable.into_iter()); - let mut sess_builder = ProcessingSessionBuilder::default(); + // Default to allowing insecure since it would be super duper annoying + // to have to pass `--trusted` every time to build a personal document + // that uses shell-escape! This default can be overridden by setting the + // environment variable TECTONIC_UNTRUSTED_MODE to a nonempty value. + let stance = if self.untrusted { + SecurityStance::DisableInsecures + } else { + SecurityStance::MaybeAllowInsecures + }; + + let mut sess_builder = + ProcessingSessionBuilder::new_with_security(SecuritySettings::new(stance)); let format_path = self.format; sess_builder .unstables(unstable) @@ -197,31 +212,37 @@ impl CompileOptions { None => time::SystemTime::now(), }; sess_builder.build_date(build_date); + run_and_report(sess_builder, status) + } +} - let mut sess = sess_builder.create(status)?; - let result = sess.run(status); - - if let Err(e) = &result { - if let ErrorKind::EngineError(engine) = e.kind() { - let output = sess.get_stdout_content(); - - if output.is_empty() { - tt_error!( - status, - "something bad happened inside {}, but no output was logged", - engine - ); - } else { - tt_error!( - status, - "something bad happened inside {}; its output follows:\n", - engine - ); - status.dump_error_logs(&output); - } +pub(crate) fn run_and_report( + sess_builder: ProcessingSessionBuilder, + status: &mut dyn StatusBackend, +) -> Result { + let mut sess = sess_builder.create(status)?; + let result = sess.run(status); + + if let Err(e) = &result { + if let ErrorKind::EngineError(engine) = e.kind() { + let output = sess.get_stdout_content(); + + if output.is_empty() { + tt_error!( + status, + "something bad happened inside {}, but no output was logged", + engine + ); + } else { + tt_error!( + status, + "something bad happened inside {}; its output follows:\n", + engine + ); + status.dump_error_logs(&output); } } - - result.map(|_| 0) } + + result.map(|_| 0) } diff --git a/src/bin/tectonic/main.rs b/src/bin/tectonic/main.rs index 4bc0a56462..436865ae55 100644 --- a/src/bin/tectonic/main.rs +++ b/src/bin/tectonic/main.rs @@ -11,7 +11,6 @@ use tectonic::{ errors::SyncError, status::termcolor::TermcolorStatusBackend, status::{ChatterLevel, StatusBackend}, - tt_note, }; mod compile; @@ -128,13 +127,6 @@ fn main() { Box::new(PlainStatusBackend::new(chatter_level)) as Box }; - // For now ... - - tt_note!( - status, - "this is a BETA release; ask questions and report bugs at https://tectonic.newton.cx/" - ); - // Now that we've got colorized output, pass off to the inner function ... // all so that we can print out the word "error:" in red. This code // parallels various bits of the `error_chain` crate. diff --git a/src/bin/tectonic/v2cli.rs b/src/bin/tectonic/v2cli.rs index 22c045121b..2a52397246 100644 --- a/src/bin/tectonic/v2cli.rs +++ b/src/bin/tectonic/v2cli.rs @@ -10,11 +10,14 @@ use tectonic::{ self, config::PersistentConfig, ctry, + docmodel::{DocumentExt, DocumentSetupOptions, WorkspaceCreatorExt}, errors::{Result, SyncError}, status::{termcolor::TermcolorStatusBackend, ChatterLevel, StatusBackend}, tt_error, tt_note, - workspace::{self, Workspace}, }; +use tectonic_bridge_core::{SecuritySettings, SecurityStance}; +use tectonic_bundles::Bundle; +use tectonic_docmodel::workspace::{Workspace, WorkspaceCreator}; use tectonic_errors::Error as NewError; use tectonic_status_base::plain::PlainStatusBackend; @@ -49,6 +52,13 @@ struct V2CliOptions { command: Commands, } +/// A semi-hack to allow command-specific customizations of the centralized app +/// initialization. +#[derive(Debug, Default)] +struct CommandCustomizations { + always_stderr: bool, +} + /// The main function for the Cargo-like, "V2" CLI. This intentionally /// duplicates a lot of the "old" main() function, so that the implementation /// can drift over time as needed. @@ -72,6 +82,13 @@ pub fn v2_main(effective_args: &[OsString]) { let args = V2CliOptions::from_iter(effective_args); + // Command-specific customizations before we do our centralized setup. + // This is a semi-hack so that we can set up certain commands to ensure + // that status info is always printed to stderr. + + let mut customizations = CommandCustomizations::default(); + args.command.customize(&mut customizations); + // Set up colorized output. let chatter_level = ChatterLevel::from_str(&args.chatter_level).unwrap(); @@ -83,9 +100,13 @@ pub fn v2_main(effective_args: &[OsString]) { }; let mut status = if use_cli_color { - Box::new(TermcolorStatusBackend::new(chatter_level)) as Box + let mut sb = TermcolorStatusBackend::new(chatter_level); + sb.always_stderr(customizations.always_stderr); + Box::new(sb) as Box } else { - Box::new(PlainStatusBackend::new(chatter_level)) as Box + let mut sb = PlainStatusBackend::new(chatter_level); + sb.always_stderr(customizations.always_stderr); + Box::new(sb) as Box }; // For now ... @@ -110,26 +131,47 @@ enum Commands { /// Build a document Build(BuildCommand), + #[structopt(name = "bundle")] + /// Commands relating to this document’s TeX file bundle + Bundle(BundleCommand), + #[structopt(name = "compile")] /// Run a standalone (La)TeX compilation Compile(crate::compile::CompileOptions), - #[structopt(name = "watch")] - /// Watch input files and execute commands on change - Watch(WatchCommand), - #[structopt(name = "new")] /// Create a new document New(NewCommand), + + #[structopt(name = "show")] + /// Display various useful pieces of information + Show(ShowCommand), + + #[structopt(name = "watch")] + /// Watch input files and execute commands on change + Watch(WatchCommand), } impl Commands { + fn customize(&self, cc: &mut CommandCustomizations) { + match self { + Commands::Build(o) => o.customize(cc), + Commands::Bundle(o) => o.customize(cc), + Commands::Compile(_) => {} // avoid namespacing/etc issues + Commands::New(o) => o.customize(cc), + Commands::Show(o) => o.customize(cc), + Commands::Watch(o) => o.customize(cc), + } + } + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { match self { Commands::Build(o) => o.execute(config, status), + Commands::Bundle(o) => o.execute(config, status), Commands::Compile(o) => o.execute(config, status), - Commands::Watch(o) => o.execute(config, status), Commands::New(o) => o.execute(config, status), + Commands::Show(o) => o.execute(config, status), + Commands::Watch(o) => o.execute(config, status), } } } @@ -137,6 +179,10 @@ impl Commands { /// `build`: Build a document #[derive(Debug, PartialEq, StructOpt)] pub struct BuildCommand { + /// Document is untrusted -- disable all known-insecure features + #[structopt(long)] + untrusted: bool, + /// Use only resource files cached locally #[structopt(short = "C", long)] only_cached: bool, @@ -159,19 +205,176 @@ pub struct BuildCommand { } impl BuildCommand { + fn customize(&self, _cc: &mut CommandCustomizations) {} + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { let ws = Workspace::open_from_environment()?; let doc = ws.first_document(); + // Default to allowing insecure since it would be super duper annoying + // to have to pass `--trusted` every time to build a personal document + // that uses shell-escape! This default can be overridden by setting the + // environment variable TECTONIC_UNTRUSTED_MODE to a nonempty value. + let stance = if self.untrusted { + SecurityStance::DisableInsecures + } else { + SecurityStance::MaybeAllowInsecures + }; + + let mut setup_options = + DocumentSetupOptions::new_with_security(SecuritySettings::new(stance)); + setup_options.only_cached(self.only_cached); + for output_name in doc.output_names() { - let mut opts = doc.build_options_for(output_name); - opts.format_cache_path(config.format_cache_path()?) - .only_cached(self.only_cached) + let mut builder = doc.setup_session(output_name, &setup_options, status)?; + + builder + .format_cache_path(config.format_cache_path()?) .keep_intermediates(self.keep_intermediates) .keep_logs(self.keep_logs) - .print_stdout(self.print_stdout) - .open(self.open); - doc.build(output_name, &opts, status)?; + .print_stdout(self.print_stdout); + + crate::compile::run_and_report(builder, status)?; + + if self.open { + let out_file = doc.output_main_file(output_name); + tt_note!(status, "opening `{}`", out_file.display()); + if let Err(e) = open::that(&out_file) { + tt_error!( + status, + "failed to open `{}` with system handler", + out_file.display(); + e.into() + ) + } + } + } + + Ok(0) + } +} + +/// `bundle`: Commands relating to Tectonic bundles +#[derive(Debug, PartialEq, StructOpt)] +pub struct BundleCommand { + #[structopt(subcommand)] + command: BundleCommands, +} + +#[derive(Debug, PartialEq, StructOpt)] +enum BundleCommands { + #[structopt(name = "cat")] + /// Dump the contents of a file in the bundle + Cat(BundleCatCommand), + + #[structopt(name = "search")] + /// Filter the list of filenames contained in the bundle + Search(BundleSearchCommand), +} + +impl BundleCommand { + fn customize(&self, cc: &mut CommandCustomizations) { + match &self.command { + BundleCommands::Cat(c) => c.customize(cc), + BundleCommands::Search(c) => c.customize(cc), + } + } + + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { + match self.command { + BundleCommands::Cat(c) => c.execute(config, status), + BundleCommands::Search(c) => c.execute(config, status), + } + } +} + +fn get_a_bundle( + _config: PersistentConfig, + only_cached: bool, + status: &mut dyn StatusBackend, +) -> Result> { + use tectonic_docmodel::workspace::NoWorkspaceFoundError; + + match Workspace::open_from_environment() { + Ok(ws) => { + let doc = ws.first_document(); + let mut options: DocumentSetupOptions = Default::default(); + options.only_cached(only_cached); + doc.bundle(&options, status) + } + + Err(e) => { + if e.downcast_ref::().is_none() { + Err(e.into()) + } else { + tt_note!( + status, + "not in a document workspace; using the built-in default bundle" + ); + Ok(Box::new(tectonic_bundles::get_fallback_bundle( + only_cached, + status, + )?)) + } + } + } +} + +#[derive(Debug, PartialEq, StructOpt)] +struct BundleCatCommand { + /// Use only resource files cached locally + #[structopt(short = "C", long)] + only_cached: bool, + + #[structopt(help = "The name of the file to dump")] + filename: String, +} + +impl BundleCatCommand { + fn customize(&self, cc: &mut CommandCustomizations) { + cc.always_stderr = true; + } + + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { + let mut bundle = get_a_bundle(config, self.only_cached, status)?; + let mut ih = bundle + .input_open_name(&self.filename, status) + .must_exist()?; + std::io::copy(&mut ih, &mut std::io::stdout())?; + Ok(0) + } +} + +#[derive(Debug, PartialEq, StructOpt)] +struct BundleSearchCommand { + /// Use only resource files cached locally + #[structopt(short = "C", long)] + only_cached: bool, + + #[structopt(help = "The search term")] + term: Option, +} + +impl BundleSearchCommand { + fn customize(&self, cc: &mut CommandCustomizations) { + cc.always_stderr = true; + } + + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { + let mut bundle = get_a_bundle(config, self.only_cached, status)?; + let files = bundle.all_files(status)?; + + // Is there a better way to do this? + let filter: Box bool> = if let Some(t) = self.term { + Box::new(move |s: &str| s.contains(&t)) + } else { + Box::new(|_: &str| true) + }; + + for filename in &files { + if filter(filename) { + println!("{}", filename); + } } Ok(0) @@ -187,6 +390,8 @@ pub struct WatchCommand { } impl WatchCommand { + fn customize(&self, _cc: &mut CommandCustomizations) {} + fn execute(self, _config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { let exe_name = crate::watch::get_trimmed_exe_name() .into_os_string() @@ -257,6 +462,8 @@ pub struct NewCommand { } impl NewCommand { + fn customize(&self, _cc: &mut CommandCustomizations) {} + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { tt_note!( status, @@ -264,11 +471,55 @@ impl NewCommand { self.path.display() ); - let wc = workspace::WorkspaceCreator::new(self.path); + let wc = WorkspaceCreator::new(self.path); ctry!( - wc.create(&config, status); + wc.create_defaulted(&config, status); "failed to create the new Tectonic workspace" ); Ok(0) } } + +/// `show`: Show various useful pieces of information. +#[derive(Debug, PartialEq, StructOpt)] +pub struct ShowCommand { + #[structopt(subcommand)] + command: ShowCommands, +} + +#[derive(Debug, PartialEq, StructOpt)] +enum ShowCommands { + #[structopt(name = "user-cache-dir")] + /// Print the location of the default per-user cache directory + UserCacheDir(ShowUserCacheDirCommand), +} + +impl ShowCommand { + fn customize(&self, cc: &mut CommandCustomizations) { + match &self.command { + ShowCommands::UserCacheDir(c) => c.customize(cc), + } + } + + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { + match self.command { + ShowCommands::UserCacheDir(c) => c.execute(config, status), + } + } +} + +#[derive(Debug, PartialEq, StructOpt)] +struct ShowUserCacheDirCommand {} + +impl ShowUserCacheDirCommand { + fn customize(&self, cc: &mut CommandCustomizations) { + cc.always_stderr = true; + } + + fn execute(self, _config: PersistentConfig, _status: &mut dyn StatusBackend) -> Result { + use tectonic_bundles::cache::Cache; + let cache = Cache::get_user_default()?; + println!("{}", cache.root().display()); + Ok(0) + } +} diff --git a/src/config.rs b/src/config.rs index 1044a0acbb..3efe1d6c37 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,5 +1,4 @@ -// src/config.rs -- configuration for the Tectonic library. -// Copyright 2016-2020 the Tectonic Project +// Copyright 2016-2021 the Tectonic Project // Licensed under the MIT License. //! User configuration settings for the Tectonic engine. @@ -16,15 +15,14 @@ use std::{ path::{Path, PathBuf}, sync::atomic::{AtomicBool, Ordering}, }; +use tectonic_bundles::{ + cache::Cache, dir::DirBundle, itar::IndexedTarBackend, zip::ZipBundle, Bundle, +}; +use tectonic_io_base::app_dirs; use url::Url; use crate::{ - app_dirs, errors::{ErrorKind, Result}, - io::cached_itarbundle::CachedITarBundle, - io::dirbundle::DirBundle, - io::zipbundle::ZipBundle, - io::Bundle, status::StatusBackend, }; @@ -74,7 +72,7 @@ impl PersistentConfig { }; let mut cfg_path = if auto_create_config_file { - app_dirs::user_config()? + app_dirs::ensure_user_config()? } else { app_dirs::get_user_config()? }; @@ -124,8 +122,13 @@ impl PersistentConfig { custom_cache_root: Option<&Path>, status: &mut dyn StatusBackend, ) -> Result> { - let bundle = CachedITarBundle::new(url, only_cached, custom_cache_root, status)?; + let mut cache = if let Some(root) = custom_cache_root { + Cache::get_for_custom_directory(root) + } else { + Cache::get_user_default()? + }; + let bundle = cache.open::(url, only_cached, status)?; Ok(Box::new(bundle) as _) } @@ -182,7 +185,7 @@ impl PersistentConfig { if CONFIG_TEST_MODE_ACTIVATED.load(Ordering::SeqCst) { Ok(crate::test_util::test_path(&[])) } else { - Ok(app_dirs::user_cache_dir("formats")?) + Ok(app_dirs::ensure_user_cache_dir("formats")?) } } } @@ -191,7 +194,7 @@ impl Default for PersistentConfig { fn default() -> Self { PersistentConfig { default_bundles: vec![BundleInfo { - url: String::from("https://archive.org/services/purl/net/pkgwpub/tectonic-default"), + url: String::from(tectonic_bundles::FALLBACK_BUNDLE_URL), }], } } diff --git a/src/docmodel.rs b/src/docmodel.rs new file mode 100644 index 0000000000..0a23b3a214 --- /dev/null +++ b/src/docmodel.rs @@ -0,0 +1,219 @@ +// Copyright 2020-2021 the Tectonic Project +// Licensed under the MIT License. + +//! Connecting the Tectonic document model to the engines. +//! +//! This module extends the document model types provided by the +//! `tectonic_docmodel` crate with the actual document-processing capabilities +//! provided by the processing engines. + +use std::{ + fmt::Write as FmtWrite, + fs, io, + path::{Path, PathBuf}, +}; +use tectonic_bridge_core::SecuritySettings; +use tectonic_bundles::{ + cache::Cache, dir::DirBundle, itar::IndexedTarBackend, zip::ZipBundle, Bundle, +}; +use tectonic_docmodel::{ + document::{BuildTargetType, Document}, + workspace::{Workspace, WorkspaceCreator}, +}; +use tectonic_geturl::{DefaultBackend, GetUrlBackend}; +use url::Url; + +use crate::{ + config, ctry, + driver::{OutputFormat, PassSetting, ProcessingSessionBuilder}, + errors::{ErrorKind, Result}, + status::StatusBackend, + test_util, tt_note, +}; + +/// Options for setting up [`Document`] instances with the driver +#[derive(Clone, Debug, Default)] +pub struct DocumentSetupOptions { + /// Disable requests to the network, if the document’s bundle happens to be + /// network-based. + only_cached: bool, + + /// Security settings for engine features. + security: SecuritySettings, +} + +impl DocumentSetupOptions { + /// Create a new set of document setup options with custom security + /// settings. + pub fn new_with_security(security: SecuritySettings) -> Self { + DocumentSetupOptions { + only_cached: false, + security, + } + } + + /// Specify whether any requests to the network will be made for bundle + /// resources. + /// + /// If the document’s backing bundle is not network-based, this setting will + /// have no effect. + pub fn only_cached(&mut self, s: bool) -> &mut Self { + self.only_cached = s; + self + } +} + +pub trait DocumentExt { + /// Get the bundle used by this document. + /// + /// This parses [`Document::bundle_loc`] and turns it into the appropriate + /// bundle backend. + fn bundle( + &self, + setup_options: &DocumentSetupOptions, + status: &mut dyn StatusBackend, + ) -> Result>; + + /// Set up a [`ProcessingSessionBuilder`] for one of the outputs. + /// + /// The *output_profile* argument gives the name of the document’s output profile to + /// use. + fn setup_session( + &self, + output_profile: &str, + setup_options: &DocumentSetupOptions, + status: &mut dyn StatusBackend, + ) -> Result; +} + +impl DocumentExt for Document { + fn bundle( + &self, + setup_options: &DocumentSetupOptions, + status: &mut dyn StatusBackend, + ) -> Result> { + fn bundle_from_path(p: PathBuf) -> Result> { + if p.is_dir() { + Ok(Box::new(DirBundle::new(p))) + } else { + Ok(Box::new(ZipBundle::open(p)?)) + } + } + + if config::is_config_test_mode_activated() { + Ok(Box::new(test_util::TestBundle::default())) + } else if let Ok(url) = Url::parse(&self.bundle_loc) { + if url.scheme() != "file" { + let mut cache = Cache::get_user_default()?; + let bundle = cache.open::( + &self.bundle_loc, + setup_options.only_cached, + status, + )?; + Ok(Box::new(bundle)) + } else { + let file_path = url.to_file_path().map_err(|_| { + io::Error::new(io::ErrorKind::InvalidInput, "failed to parse local path") + })?; + bundle_from_path(file_path) + } + } else { + bundle_from_path(Path::new(&self.bundle_loc).to_owned()) + } + } + + fn setup_session( + &self, + output_profile: &str, + setup_options: &DocumentSetupOptions, + status: &mut dyn StatusBackend, + ) -> Result { + let profile = self.outputs.get(output_profile).ok_or_else(|| { + ErrorKind::Msg(format!( + "unrecognized output profile name \"{}\"", + output_profile + )) + })?; + + let output_format = match profile.target_type { + BuildTargetType::Pdf => OutputFormat::Pdf, + }; + + let mut input_buffer = String::new(); + if !profile.preamble_file.is_empty() { + writeln!(input_buffer, "\\input{{{}}}", profile.preamble_file)?; + } + if !profile.index_file.is_empty() { + writeln!(input_buffer, "\\input{{{}}}", profile.index_file)?; + } + if !profile.postamble_file.is_empty() { + writeln!(input_buffer, "\\input{{{}}}", profile.postamble_file)?; + } + + let mut sess_builder = + ProcessingSessionBuilder::new_with_security(setup_options.security.clone()); + + sess_builder + .output_format(output_format) + .format_name(&profile.tex_format) + .build_date(std::time::SystemTime::now()) + .pass(PassSetting::Default) + .primary_input_buffer(input_buffer.as_bytes()) + .tex_input_name(output_profile); + + if profile.shell_escape { + // For now, this is the only option we allow. + sess_builder.shell_escape_with_temp_dir(); + } + + if setup_options.only_cached { + tt_note!(status, "using only cached resource files"); + } + sess_builder.bundle(self.bundle(setup_options, status)?); + + let mut tex_dir = self.src_dir().to_owned(); + tex_dir.push("src"); + sess_builder.filesystem_root(&tex_dir); + + let mut output_dir = self.build_dir().to_owned(); + output_dir.push(output_profile); + ctry!( + fs::create_dir_all(&output_dir); + "couldn\'t create output directory `{}`", output_dir.display() + ); + sess_builder.output_dir(&output_dir); + + Ok(sess_builder) + } +} + +/// Extension methods for [`WorkspaceCreator`]. +pub trait WorkspaceCreatorExt { + /// Create the new workspace with a good default for the bundle location. + /// + /// This method is a thin wrapper on [`WorkspaceCreator::create`] that uses + /// the current configuration to determine a good default bundle location + /// for the main document. + fn create_defaulted( + self, + config: &config::PersistentConfig, + status: &mut dyn StatusBackend, + ) -> Result; +} + +impl WorkspaceCreatorExt for WorkspaceCreator { + fn create_defaulted( + self, + config: &config::PersistentConfig, + status: &mut dyn StatusBackend, + ) -> Result { + let bundle_loc = if config::is_config_test_mode_activated() { + "test-bundle://".to_owned() + } else { + let mut gub = DefaultBackend::default(); + gub.resolve_url(config.default_bundle_loc(), status)? + }; + + Ok(self.create(bundle_loc)?) + } +} diff --git a/src/driver.rs b/src/driver.rs index b9cd9c30ec..5da8d127b6 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -5,7 +5,11 @@ //! This module contains the high-level interface that ties together the various //! engines. The main struct is [`ProcessingSession`], which knows how to run -//! (and re-run if necessary) the various engines in the right order. +//! (and re-run if necessary) the various engines in the right order. Such a +//! session can be created with a [`ProcessingSessionBuilder`], which you might +//! obtain from a [`tectonic_docmodel::document::Document`] using the +//! [`crate::docmodel::DocumentExt::setup_session`] extension method, if you’re +//! using the Tectonic document model. //! //! For an example of how to use this module, see `src/bin/tectonic.rs`, which //! contains tectonic's main CLI program. @@ -22,7 +26,8 @@ use std::{ str::FromStr, time::SystemTime, }; -use tectonic_bridge_core::{CoreBridgeLauncher, DriverHooks, SystemRequestError}; +use tectonic_bridge_core::{CoreBridgeLauncher, DriverHooks, SecuritySettings, SystemRequestError}; +use tectonic_bundles::Bundle; use tectonic_io_base::{ digest::DigestData, filesystem::{FilesystemIo, FilesystemPrimaryInputIo}, @@ -36,7 +41,7 @@ use crate::{ io::{ format_cache::FormatCache, memory::{MemoryFileCollection, MemoryIo}, - Bundle, InputOrigin, + InputOrigin, }, status::StatusBackend, tt_error, tt_note, tt_warning, @@ -382,12 +387,24 @@ impl IoProvider for BridgeState { name: &str, status: &mut dyn StatusBackend, ) -> OpenResult { + match self.input_open_name_with_abspath(name, status) { + OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih), + OpenResult::Err(e) => OpenResult::Err(e), + OpenResult::NotAvailable => OpenResult::NotAvailable, + } + } + + fn input_open_name_with_abspath( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { let r = (|| { - bridgestate_ioprovider_cascade!(self, input_open_name(name, status)); + bridgestate_ioprovider_cascade!(self, input_open_name_with_abspath(name, status)); })(); match r { - OpenResult::Ok(ref ih) => { + OpenResult::Ok((ref ih, ref _path)) => { if let Some(summ) = self.events.get_mut(name) { summ.access_pattern = match summ.access_pattern { AccessPattern::Written => AccessPattern::WrittenThenRead, @@ -430,7 +447,18 @@ impl IoProvider for BridgeState { } fn input_open_primary(&mut self, status: &mut dyn StatusBackend) -> OpenResult { - bridgestate_ioprovider_cascade!(self, input_open_primary(status)); + match self.input_open_primary_with_abspath(status) { + OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih), + OpenResult::Err(e) => OpenResult::Err(e), + OpenResult::NotAvailable => OpenResult::NotAvailable, + } + } + + fn input_open_primary_with_abspath( + &mut self, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + bridgestate_ioprovider_cascade!(self, input_open_primary_with_abspath(status)); } fn input_open_format( @@ -609,8 +637,15 @@ impl Default for ShellEscapeMode { } /// A builder-style interface for creating a [`ProcessingSession`]. +/// +/// This uses standard builder patterns. The `Default` implementation defaults +/// to restrictive security settings that disable all known-insecure features +/// that could be abused by untrusted inputs. Use +/// [`ProcessingSessionBuilder::new_with_security()`] in order to have the +/// option to enable potentially-insecure features such as shell-escape. #[derive(Default)] pub struct ProcessingSessionBuilder { + security: SecuritySettings, primary_input: PrimaryInputMode, tex_input_name: Option, output_dest: OutputDestination, @@ -633,6 +668,14 @@ pub struct ProcessingSessionBuilder { } impl ProcessingSessionBuilder { + /// Create a new builder with customized security settings. + pub fn new_with_security(security: SecuritySettings) -> Self { + ProcessingSessionBuilder { + security, + ..Default::default() + } + } + /// Sets the path to the primary input file. /// /// If a primary input path is not specified, we will default to reading it from stdin. @@ -798,7 +841,10 @@ impl ProcessingSessionBuilder { /// disable shell-escape unless the [`UnstableOptions`] say otherwise, /// in which case a driver-managed temporary directory will be used. pub fn shell_escape_with_work_dir>(&mut self, path: P) -> &mut Self { - self.shell_escape_mode = ShellEscapeMode::ExternallyManagedDir(path.as_ref().to_owned()); + if self.security.allow_shell_escape() { + self.shell_escape_mode = + ShellEscapeMode::ExternallyManagedDir(path.as_ref().to_owned()); + } self } @@ -807,7 +853,9 @@ impl ProcessingSessionBuilder { /// unless the [`UnstableOptions`] say otherwise, in which case a /// driver-managed temporary directory will be used. pub fn shell_escape_with_temp_dir(&mut self) -> &mut Self { - self.shell_escape_mode = ShellEscapeMode::TempDir; + if self.security.allow_shell_escape() { + self.shell_escape_mode = ShellEscapeMode::TempDir; + } self } @@ -919,19 +967,24 @@ impl ProcessingSessionBuilder { let mut pdf_path = aux_path.clone(); pdf_path.set_extension("pdf"); - let shell_escape_mode = match self.shell_escape_mode { - ShellEscapeMode::Defaulted => { - if self.unstables.shell_escape { - ShellEscapeMode::TempDir - } else { - ShellEscapeMode::Disabled + let shell_escape_mode = if !self.security.allow_shell_escape() { + ShellEscapeMode::Disabled + } else { + match self.shell_escape_mode { + ShellEscapeMode::Defaulted => { + if self.unstables.shell_escape { + ShellEscapeMode::TempDir + } else { + ShellEscapeMode::Disabled + } } - } - other => other, + other => other, + } }; Ok(ProcessingSession { + security: self.security, bs, pass: self.pass, primary_input_path, @@ -964,6 +1017,9 @@ enum RerunReason { /// processing a file. It understands, for example, the need to re-run the TeX /// engine if the `.aux` file changed. pub struct ProcessingSession { + // Security settings. + security: SecuritySettings, + /// The subset of the session state that's can be mutated while the C/C++ /// engines are running. Importantly, this includes the full I/O stack. bs: BridgeState, @@ -1456,7 +1512,8 @@ impl ProcessingSession { let result = { self.bs .enter_format_mode(&format!("tectonic-format-{}.tex", stem)); - let mut launcher = CoreBridgeLauncher::new(&mut self.bs, status); + let mut launcher = + CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone()); let r = TexEngine::default() .halt_on_error_mode(true) .initex_mode(true) @@ -1518,7 +1575,8 @@ impl ProcessingSession { status.note_highlighted("Running ", "TeX", " ..."); } - let mut launcher = CoreBridgeLauncher::new(&mut self.bs, status); + let mut launcher = + CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone()); TexEngine::default() .halt_on_error_mode(true) @@ -1551,7 +1609,8 @@ impl ProcessingSession { fn bibtex_pass(&mut self, status: &mut dyn StatusBackend) -> Result { let result = { status.note_highlighted("Running ", "BibTeX", " ..."); - let mut launcher = CoreBridgeLauncher::new(&mut self.bs, status); + let mut launcher = + CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone()); let mut engine = BibtexEngine::new(); engine.process(&mut launcher, &self.tex_aux_path, &self.unstables) }; @@ -1583,7 +1642,8 @@ impl ProcessingSession { { status.note_highlighted("Running ", "xdvipdfmx", " ..."); - let mut launcher = CoreBridgeLauncher::new(&mut self.bs, status); + let mut launcher = + CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone()); let mut engine = XdvipdfmxEngine::default(); engine.build_date(self.build_date); diff --git a/src/errors.rs b/src/errors.rs index d4702c3371..f0b665bbc5 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -60,7 +60,6 @@ error_chain! { } foreign_links { - AppDirs(app_dirs::AppDirsError); Io(io::Error); Fmt(fmt::Error); Nul(ffi::NulError); diff --git a/src/io/cached_itarbundle.rs b/src/io/cached_itarbundle.rs deleted file mode 100644 index 976916059c..0000000000 --- a/src/io/cached_itarbundle.rs +++ /dev/null @@ -1,619 +0,0 @@ -// Copyright 2017-2020 the Tectonic Project -// Licensed under the MIT License. - -use flate2::read::GzDecoder; -use fs2::FileExt; -use std::{ - collections::HashMap, - fs::{self, File}, - io::{BufRead, BufReader, Error as IoError, ErrorKind as IoErrorKind, Read, Write}, - path::{Path, PathBuf}, - str::FromStr, -}; -use tectonic_errors::{anyhow::bail, atry, Result}; -use tectonic_geturl::{DefaultBackend, DefaultRangeReader, GetUrlBackend, RangeReader}; - -use super::{try_open_file, Bundle, InputHandle, InputOrigin, IoProvider, OpenResult}; -use crate::app_dirs; -use crate::digest::{self, Digest, DigestData}; -use crate::errors::SyncError; -use crate::status::StatusBackend; -use crate::{tt_note, tt_warning}; - -const MAX_HTTP_ATTEMPTS: usize = 4; - -#[derive(Clone, Copy, Debug)] -struct FileInfo { - offset: u64, - length: u64, -} - -#[derive(Clone, Copy, Debug)] -struct LocalCacheItem { - _length: u64, - digest: DigestData, -} - -/// Attempts to download a file from the bundle. -fn get_file( - data: &mut DefaultRangeReader, - name: &str, - offset: u64, - length: usize, - status: &mut dyn StatusBackend, -) -> Result> { - // In principle it'd be cool to return a handle right to the HTTP - // response, but those can't be seekable, and doing so introduces - // lifetime-related issues. So for now we just slurp the whole thing - // into RAM. - - tt_note!(status, "downloading {}", name); - - // When fetching a bunch of resource files (i.e., on the first - // invocation), bintray will sometimes drop connections. The error - // manifests itself in a way that has a not-so-nice user experience. - // Our solution: retry the HTTP a few times in case it was a transient - // problem. - - let mut buf = Vec::with_capacity(length); - let mut overall_failed = true; - let mut any_failed = false; - - for _ in 0..MAX_HTTP_ATTEMPTS { - let mut stream = match data.read_range(offset, length) { - Ok(r) => r, - Err(e) => { - tt_warning!(status, "failure requesting \"{}\" from network", name; e); - any_failed = true; - continue; - } - }; - - if let Err(e) = stream.read_to_end(&mut buf) { - tt_warning!(status, "failure downloading \"{}\" from network", name; e.into()); - any_failed = true; - continue; - } - - overall_failed = false; - break; - } - - if overall_failed { - bail!( - "failed to retrieve \"{}\" from the network; \ - this most probably is not Tectonic's fault \ - -- please check your network connection.", - name - ); - } else if any_failed { - tt_note!(status, "download succeeded after retry"); - } - - Ok(buf) -} - -fn parse_index_line(line: &str) -> Result> { - let mut bits = line.split_whitespace(); - - if let (Some(name), Some(offset), Some(length)) = (bits.next(), bits.next(), bits.next()) { - Ok(Some(( - name.to_owned(), - FileInfo { - offset: offset.parse::()?, - length: length.parse::()?, - }, - ))) - } else { - // TODO: preserve the warning info or something! - Ok(None) - } -} - -/// Attempts to find the redirected url, download the index and digest. -fn get_everything( - backend: &mut DefaultBackend, - url: &str, - status: &mut dyn StatusBackend, -) -> Result<(String, String, String)> { - let url = backend.resolve_url(url, status)?; - - let index = { - let mut index = String::new(); - let index_url = format!("{}.index.gz", &url); - tt_note!(status, "downloading index {}", index_url); - GzDecoder::new(backend.get_url(&index_url, status)?).read_to_string(&mut index)?; - index - }; - - let digest_text = { - // Find the location of the digest file. - let digest_info = { - let mut digest_info = None; - for line in index.lines() { - if let Some((name, info)) = parse_index_line(line)? { - if name == digest::DIGEST_NAME { - digest_info = Some(info); - break; - } - } - } - atry!(digest_info; ["backend does not provide needed {} file", digest::DIGEST_NAME]) - }; - - let mut range_reader = backend.open_range_reader(&url); - String::from_utf8(get_file( - &mut range_reader, - digest::DIGEST_NAME, - digest_info.offset, - digest_info.length as usize, - status, - )?) - .map_err(|e| e.utf8_error())? - }; - - Ok((digest_text, index, url)) -} - -#[derive(Clone, Debug)] -struct CacheContent { - digest_text: String, - redirect_url: String, - index: HashMap, -} - -/// Load cached data. -/// -/// If any of the files is not found return None. -fn load_cache( - digest_path: &Path, - redirect_base: &Path, - index_base: &Path, -) -> Result> { - // Convert file-not-found errors into None. - match load_cache_inner(digest_path, redirect_base, index_base) { - Ok(r) => Ok(Some(r)), - Err(e) => { - if let Some(ioe) = e.downcast_ref::() { - if ioe.kind() == IoErrorKind::NotFound { - return Ok(None); - } - } - - Err(e) - } - } -} - -/// See `load_cache`. -fn load_cache_inner( - digest_path: &Path, - redirect_base: &Path, - index_base: &Path, -) -> Result { - let digest_text = { - let f = File::open(digest_path)?; - let mut digest_text = String::with_capacity(digest::DIGEST_LEN); - f.take(digest::DIGEST_LEN as u64) - .read_to_string(&mut digest_text)?; - digest_text - }; - - let redirect_path = make_txt_path(redirect_base, &digest_text); - let redirect_url = fs::read_to_string(redirect_path)?; - - let index_path = make_txt_path(index_base, &digest_text); - - let index = { - let f = File::open(index_path)?; - let mut index = HashMap::new(); - for line in BufReader::new(f).lines() { - if let Some((name, info)) = parse_index_line(&line?)? { - index.insert(name, info); - } - } - index - }; - Ok(CacheContent { - digest_text, - redirect_url, - index, - }) -} - -fn make_txt_path(base: &Path, digest_text: &str) -> PathBuf { - base.join(&digest_text).with_extension("txt") -} - -/// Bundle provided by an indexed tar file over http with a local cache. -#[derive(Debug)] -pub struct CachedITarBundle { - url: String, - redirect_url: String, - digest_path: PathBuf, - cached_digest: DigestData, - checked_digest: bool, - redirect_base: PathBuf, - manifest_path: PathBuf, - data_base: PathBuf, - contents: HashMap, - only_cached: bool, - - tar_data: DefaultRangeReader, - index: HashMap, -} - -impl CachedITarBundle { - pub fn new( - url: &str, - only_cached: bool, - custom_cache_root: Option<&Path>, - status: &mut dyn StatusBackend, - ) -> Result { - let mut backend = DefaultBackend::default(); - let digest_path = cache_dir("urls", custom_cache_root)?.join(app_dirs::sanitized(url)); - - let redirect_base = &cache_dir("redirects", custom_cache_root)?; - let index_base = &cache_dir("indexes", custom_cache_root)?; - let manifest_base = &cache_dir("manifests", custom_cache_root)?; - let data_base = &cache_dir("files", custom_cache_root)?; - - let mut checked_digest = false; - let CacheContent {digest_text, redirect_url, index} = - // Try loading the cached files. - match load_cache(&digest_path, &redirect_base, &index_base)? { - Some(c) => c, - None => { - // At least one of the cached files does not exists. We fetch everything from - // scratch and save the files. - let (digest_text, index, redirect_url) = get_everything(&mut backend, url, status)?; - let _ = DigestData::from_str(&digest_text)?; - checked_digest = true; - - file_create_write(&digest_path, |f| writeln!(f, "{}", digest_text))?; - file_create_write(make_txt_path(&redirect_base, &digest_text), |f| f.write_all(redirect_url.as_bytes()))?; - file_create_write(make_txt_path(&index_base, &digest_text), |f| f.write_all(index.as_bytes()))?; - - // Reload the cached files now when they were saved. - atry!(load_cache(&digest_path, &redirect_base, &index_base)?; ["cache files missing even after they were created"]) - } - }; - - let cached_digest = DigestData::from_str(&digest_text)?; - - // We can now figure out which manifest to use. - let manifest_path = make_txt_path(manifest_base, &digest_text); - - // Read it in, if it exists. - - let mut contents = HashMap::new(); - - match try_open_file(&manifest_path) { - OpenResult::NotAvailable => {} - OpenResult::Err(e) => { - return Err(e); - } - OpenResult::Ok(mfile) => { - // Note that the lock is released when the file is closed, - // which is good since BufReader::new() and BufReader::lines() - // consume their objects. - if let Err(e) = mfile.lock_shared() { - tt_warning!(status, "failed to lock manifest file \"{}\" for reading; this might be fine", - manifest_path.display(); e.into()); - } - - let f = BufReader::new(mfile); - - for res in f.lines() { - let line = res?; - let mut bits = line.rsplitn(3, ' '); - - let (original_name, length, digest) = - match (bits.next(), bits.next(), bits.next(), bits.next()) { - (Some(s), Some(t), Some(r), None) => (r, t, s), - _ => continue, - }; - - let name = original_name.to_owned(); - - let length = match length.parse::() { - Ok(l) => l, - Err(_) => continue, - }; - - let digest = if digest == "-" { - continue; - } else { - match DigestData::from_str(&digest) { - Ok(d) => d, - Err(e) => { - tt_warning!(status, "ignoring bad digest data \"{}\" for \"{}\" in \"{}\"", - &digest, original_name, manifest_path.display() ; e); - continue; - } - } - }; - - contents.insert( - name, - LocalCacheItem { - _length: length, - digest, - }, - ); - } - } - } - - // All set. - - let tar_data = backend.open_range_reader(&redirect_url); - - Ok(CachedITarBundle { - url: url.to_owned(), - redirect_url, - digest_path, - cached_digest, - checked_digest, - manifest_path, - data_base: data_base.to_owned(), - redirect_base: redirect_base.to_owned(), - contents, - only_cached, - tar_data, - index, - }) - } - - fn record_cache_result(&mut self, name: &str, length: u64, digest: DigestData) -> Result<()> { - let digest_text = digest.to_string(); - - // Due to a quirk about permissions for file locking on Windows, we - // need to add `.read(true)` to be able to lock a file opened in - // append mode. - - let mut man = fs::OpenOptions::new() - .append(true) - .create(true) - .read(true) - .open(&self.manifest_path)?; - - // Lock will be released when file is closed at the end of this function. - atry!(man.lock_exclusive(); ["failed to lock manifest file \"{}\" for writing", self.manifest_path.display()]); - - if !name.contains(|c| c == '\n' || c == '\r') { - writeln!(man, "{} {} {}", name, length, digest_text)?; - } - self.contents.insert( - name.to_owned(), - LocalCacheItem { - _length: length, - digest, - }, - ); - Ok(()) - } - - /// If we're going to make a request of the backend, we should check that - /// its digest is what we expect. If not, we do a lame thing where we - /// error out but set things up so that things should succeed if the - /// program is re-run. Exactly the lame TeX user experience that I've been - /// trying to avoid! - fn check_digest(&mut self, status: &mut dyn StatusBackend) -> Result<()> { - if self.checked_digest { - return Ok(()); - } - - // Do a quick and dirty check first and ignore errors. - if let Some(info) = self.index.get(digest::DIGEST_NAME) { - if let Ok(d) = get_file( - &mut self.tar_data, - digest::DIGEST_NAME, - info.offset, - info.length as usize, - status, - ) { - if let Ok(d) = String::from_utf8(d) { - if let Ok(d) = DigestData::from_str(&d) { - if self.cached_digest == d { - // We managed to pull some data that match the digest. - // We can be quite confident that the bundle is what we expect it to be. - self.checked_digest = true; - return Ok(()); - } - } - } - } - } - - // The quick check failed. Try to pull all data to make sure that it wasn't a network - // error or that the redirect url hasn't been updated. - let mut backend = DefaultBackend::default(); - let (digest_text, _index, redirect_url) = get_everything(&mut backend, &self.url, status)?; - - let current_digest = - atry!(DigestData::from_str(&digest_text); ["bad SHA256 digest from bundle"]); - - if self.cached_digest != current_digest { - // Crap! The backend isn't what we thought it was. Rewrite the - // digest file so that next time we'll start afresh. - - file_create_write(&self.digest_path, |f| { - writeln!(f, "{}", current_digest.to_string()) - })?; - bail!("backend digest changed; rerun tectonic to use updated information"); - } - - if self.redirect_url != redirect_url { - // The redirect url has changed, let's update it. - let redirect_path = make_txt_path(&self.redirect_base, &digest_text); - file_create_write(&redirect_path, |f| f.write_all(redirect_url.as_bytes()))?; - - self.redirect_url = redirect_url; - } - - // Index should've changed as the digest hasn't. - - // Phew, the backend hasn't changed. Don't check again. - self.checked_digest = true; - Ok(()) - } - - /// Find the path in the local cache for the provided file. Download the file first if it is - /// not in the local cache already. - fn path_for_name(&mut self, name: &str, status: &mut dyn StatusBackend) -> OpenResult { - if let Some(info) = self.contents.get(name) { - return match info.digest.create_two_part_path(&self.data_base) { - Ok(p) => OpenResult::Ok(p), - Err(e) => OpenResult::Err(e), - }; - } - - // The file is not in the cache and we are asked not to try to fetch it. - if self.only_cached { - return OpenResult::NotAvailable; - } - - let info = match self.index.get(name).cloned() { - Some(info) => info, - None => return OpenResult::NotAvailable, - }; - - // Bummer, we haven't seen this file before. We need to (try to) fetch - // the item from the backend, saving it to disk and calculating its - // digest ourselves, then enter it in the cache and in our manifest. - // Fun times. Because we're touching the backend, we need to verify that - // its digest is what we think. - - if let Err(e) = self.check_digest(status) { - return OpenResult::Err(e); - } - - // The bundle's overall digest is OK. Now try open the file. If it's - // not available, cache that result, since LaTeX compilations commonly - // touch nonexistent files. If we didn't maintain the negative cache, - // we'd have to touch the network for virtually every compilation. - - let content = match get_file( - &mut self.tar_data, - name, - info.offset, - info.length as usize, - status, - ) { - Ok(c) => c, - Err(e) => return OpenResult::Err(e), - }; - - // OK, we can stream the file to a temporary location on disk, - // computing its SHA256 as we go. - - let length = content.len(); - - let mut digest_builder = digest::create(); - digest_builder.update(&content); - - let digest = DigestData::from(digest_builder); - - let final_path = match digest.create_two_part_path(&self.data_base) { - Ok(p) => p, - Err(e) => return OpenResult::Err(e), - }; - - // Perform a racy check for the destination existing, because this - // matters on Windows: if the destination is already there, we'll get - // an error because the destination is marked read-only. Assuming - // non-pathological filesystem manipulation, though, we'll only be - // subject to the race once. - - if !final_path.exists() { - if let Err(e) = file_create_write(&final_path, |f| f.write_all(&content)) { - return OpenResult::Err(e); - } - - // Now we can make the file readonly. It would be nice to set the - // permissions using the already-open file handle owned by the - // tempfile, but mkstemp doesn't give us access. - let mut perms = match fs::metadata(&final_path) { - Ok(p) => p, - Err(e) => { - return OpenResult::Err(e.into()); - } - } - .permissions(); - perms.set_readonly(true); - - if let Err(e) = fs::set_permissions(&final_path, perms) { - return OpenResult::Err(e.into()); - } - } - - // And finally add a record of this file to our manifest. Note that - // we're opening and closing this file every time we load a new file; - // not so efficient, but whatever. - - if let Err(e) = self.record_cache_result(name, length as u64, digest) { - return OpenResult::Err(e); - } - - OpenResult::Ok(final_path) - } -} - -impl IoProvider for CachedITarBundle { - fn input_open_name( - &mut self, - name: &str, - status: &mut dyn StatusBackend, - ) -> OpenResult { - let path = match self.path_for_name(name, status) { - OpenResult::Ok(p) => p, - OpenResult::NotAvailable => return OpenResult::NotAvailable, - OpenResult::Err(e) => return OpenResult::Err(e), - }; - - let f = match File::open(&path) { - Ok(f) => f, - Err(e) => return OpenResult::Err(e.into()), - }; - - OpenResult::Ok(InputHandle::new_read_only( - name, - BufReader::new(f), - InputOrigin::Other, - )) - } -} - -impl Bundle for CachedITarBundle { - fn get_digest(&mut self, _status: &mut dyn StatusBackend) -> Result { - Ok(self.cached_digest) - } -} - -/// A convenience method to provide a better error message when writing to a created file. -fn file_create_write(path: P, write_fn: F) -> Result<()> -where - P: AsRef, - F: FnOnce(&mut File) -> std::result::Result<(), E>, - E: std::error::Error + 'static + Sync + Send, -{ - let path = path.as_ref(); - let mut f = atry!(File::create(path); ["couldn't open {} for writing", - path.display()]); - atry!(write_fn(&mut f); ["couldn't write to {}", path.display()]); - Ok(()) -} - -fn cache_dir(path: &str, custom_cache_root: Option<&Path>) -> Result { - if let Some(root) = custom_cache_root { - if !root.is_dir() { - bail!("Custom cache path {} is not a directory", root.display()); - } - let full_path = root.join(path); - atry!(fs::create_dir_all(&full_path); ["failed to create directory {}", full_path.display()]); - Ok(full_path) - } else { - Ok(app_dirs::user_cache_dir(path).map_err(SyncError::new)?) - } -} diff --git a/src/io/dirbundle.rs b/src/io/dirbundle.rs deleted file mode 100644 index 3a898bcde5..0000000000 --- a/src/io/dirbundle.rs +++ /dev/null @@ -1,40 +0,0 @@ -use std::{fs::File, io::BufReader, path::PathBuf}; - -use super::{Bundle, InputHandle, InputOrigin, IoProvider, OpenResult}; -use crate::status::StatusBackend; - -pub struct DirBundle { - dir: PathBuf, -} - -impl DirBundle { - pub fn new(dir: PathBuf) -> DirBundle { - DirBundle { dir } - } -} - -impl IoProvider for DirBundle { - fn input_open_name( - &mut self, - name: &str, - _status: &mut dyn StatusBackend, - ) -> OpenResult { - let mut path = self.dir.clone(); - path.push(name); - - if path.is_file() { - match File::open(path) { - Err(e) => OpenResult::Err(e.into()), - Ok(f) => OpenResult::Ok(InputHandle::new( - name, - BufReader::new(f), - InputOrigin::Filesystem, - )), - } - } else { - OpenResult::NotAvailable - } - } -} - -impl Bundle for DirBundle {} diff --git a/src/io/mod.rs b/src/io/mod.rs index f6a4a6760c..c18997f5dc 100644 --- a/src/io/mod.rs +++ b/src/io/mod.rs @@ -3,15 +3,10 @@ //! Extensions to Tectonic’s pluggable I/O backend. -use std::{io::Read, str::FromStr}; -use tectonic_errors::{anyhow::bail, atry, Result}; use tectonic_status_base::StatusBackend; -pub mod cached_itarbundle; -pub mod dirbundle; pub mod format_cache; pub mod memory; -pub mod zipbundle; // Convenience re-exports. @@ -28,53 +23,6 @@ pub use tectonic_io_base::{ pub use self::memory::MemoryIo; -/// A special IoProvider that can make TeX format files. -/// -/// A “bundle” is expected to contain a large number of TeX support files — -/// for instance, a compilation of a TeXLive distribution. In terms of the -/// software architecture, though, what is special about a bundle is that one -/// can generate one or more TeX format files from its contents without -/// reference to any other I/O resources. -pub trait Bundle: IoProvider { - /// Get a cryptographic digest summarizing this bundle’s contents. - /// - /// The digest summarizes the exact contents of every file in the bundle. - /// It is computed from the sorted names and SHA256 digests of the - /// component files [as implemented in the script - /// builder/make-zipfile.py](https://github.com/tectonic-typesetting/tectonic-staging/blob/master/builder/make-zipfile.py#L138) - /// in the `tectonic-staging` module. - /// - /// The default implementation gets the digest from a file name - /// `SHA256SUM`, which is expected to contain the digest in hex-encoded - /// format. - fn get_digest(&mut self, status: &mut dyn StatusBackend) -> Result { - let digest_text = match self.input_open_name(digest::DIGEST_NAME, status) { - OpenResult::Ok(h) => { - let mut text = String::new(); - h.take(64).read_to_string(&mut text)?; - text - } - - OpenResult::NotAvailable => { - // Broken or un-cacheable backend. - bail!("bundle does not provide needed SHA256SUM file"); - } - - OpenResult::Err(e) => { - return Err(e); - } - }; - - Ok(atry!(DigestData::from_str(&digest_text); ["corrupted SHA256 digest data"])) - } -} - -impl Bundle for Box { - fn get_digest(&mut self, status: &mut dyn StatusBackend) -> Result { - (**self).get_digest(status) - } -} - // Helper for testing. FIXME: I want this to be conditionally compiled with // #[cfg(test)] but things break if I do that. diff --git a/src/lib.rs b/src/lib.rs index 0afe9bf1c1..e0eb423572 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,19 +75,16 @@ //! The [`driver`] module provides a high-level interface for driving the //! engines in more realistic circumstances. -mod app_dirs; pub mod config; pub mod digest; #[cfg(feature = "serialization")] -pub mod document; +pub mod docmodel; pub mod driver; pub mod engines; pub mod errors; pub mod io; pub mod status; pub mod unstable_opts; -#[cfg(feature = "serialization")] -pub mod workspace; // Note: this module is intentionally *not* gated by #[cfg(test)] -- see its // docstring for details. @@ -101,13 +98,9 @@ pub use crate::engines::xdvipdfmx::XdvipdfmxEngine; pub use crate::errors::{Error, ErrorKind, Result}; // Convenienece re-exports for migration into our multi-crate setup +pub use tectonic_engine_xetex::FORMAT_SERIAL; pub use tectonic_status_base::{tt_error, tt_note, tt_warning}; -// Increase this whenever the engine internals change such that the contents -// of the "format" files must be regenerated. - -pub const FORMAT_SERIAL: u32 = 29; - /// Compile LaTeX text to a PDF. /// /// This function is an all-in-one interface to the main Tectonic workflow. Given diff --git a/src/status/termcolor.rs b/src/status/termcolor.rs index 42764c6497..f2151be78d 100644 --- a/src/status/termcolor.rs +++ b/src/status/termcolor.rs @@ -15,6 +15,7 @@ use super::{ChatterLevel, MessageKind, StatusBackend}; pub struct TermcolorStatusBackend { chatter: ChatterLevel, + always_stderr: bool, stdout: StandardStream, stderr: StandardStream, note_spec: ColorSpec, @@ -39,6 +40,7 @@ impl TermcolorStatusBackend { TermcolorStatusBackend { chatter, + always_stderr: false, stdout: StandardStream::stdout(ColorChoice::Auto), stderr: StandardStream::stderr(ColorChoice::Auto), note_spec, @@ -48,6 +50,11 @@ impl TermcolorStatusBackend { } } + pub fn always_stderr(&mut self, setting: bool) -> &mut Self { + self.always_stderr = setting; + self + } + fn styled(&mut self, kind: MessageKind, f: F) where F: FnOnce(&mut StandardStream), @@ -57,7 +64,13 @@ impl TermcolorStatusBackend { } let (spec, stream) = match kind { - MessageKind::Note => (&self.note_spec, &mut self.stdout), + MessageKind::Note => { + if self.always_stderr { + (&self.note_spec, &mut self.stderr) + } else { + (&self.note_spec, &mut self.stdout) + } + } MessageKind::Warning => (&self.warning_spec, &mut self.stderr), MessageKind::Error => (&self.error_spec, &mut self.stderr), }; @@ -76,7 +89,13 @@ impl TermcolorStatusBackend { } let stream = match kind { - MessageKind::Note => &mut self.stdout, + MessageKind::Note => { + if self.always_stderr { + &mut self.stderr + } else { + &mut self.stdout + } + } MessageKind::Warning => &mut self.stderr, MessageKind::Error => &mut self.stderr, }; @@ -108,7 +127,11 @@ impl TermcolorStatusBackend { pub fn note_styled(&mut self, args: Arguments) { if self.chatter > ChatterLevel::Minimal { - writeln!(self.stdout, "{}", args).expect("write to stdout failed"); + if self.always_stderr { + writeln!(self.stderr, "{}", args).expect("write to stderr failed"); + } else { + writeln!(self.stdout, "{}", args).expect("write to stdout failed"); + } } } @@ -166,13 +189,19 @@ impl StatusBackend for TermcolorStatusBackend { fn note_highlighted(&mut self, before: &str, highlighted: &str, after: &str) { if self.chatter > ChatterLevel::Minimal { - write!(self.stdout, "{}", before).expect("write to stdout failed"); - self.stdout + let stream = if self.always_stderr { + &mut self.stderr + } else { + &mut self.stdout + }; + + write!(stream, "{}", before).expect("write failed"); + stream .set_color(&self.highlight_spec) - .expect("write to stdout failed"); - write!(self.stdout, "{}", highlighted).expect("write to stdout failed"); - self.stdout.reset().expect("write to stdout failed"); - writeln!(self.stdout, "{}", after).expect("write to stdout failed"); + .expect("write failed"); + write!(stream, "{}", highlighted).expect("write failed"); + stream.reset().expect("write failed"); + writeln!(stream, "{}", after).expect("write failed"); } } diff --git a/src/test_util.rs b/src/test_util.rs index e2fc2e8525..1a37b116e8 100644 --- a/src/test_util.rs +++ b/src/test_util.rs @@ -35,12 +35,13 @@ //! That call simultaneously tells this module where to find the test assets, //! and also activates the test mode. -use std::{collections::HashSet, env, ffi::OsStr, path::PathBuf}; +use std::{env, ffi::OsStr, path::PathBuf}; +use tectonic_bundles::{dir::DirBundle, Bundle}; use tectonic_errors::Result; use crate::{ digest::DigestData, - io::{Bundle, FilesystemIo, InputHandle, IoProvider, OpenResult}, + io::{InputHandle, IoProvider, OpenResult}, status::StatusBackend, }; @@ -101,16 +102,15 @@ pub fn test_path(parts: &[&str]) -> PathBuf { } /// Utility for being able to treat the "assets/" directory as a bundle. -pub struct TestBundle(FilesystemIo); +/// +/// I think we want to always wrap DirBundle so that we can override +/// `get_digest()`? But once DirBundle implements `get_digest()` for real we +/// could consider just dropping this type altogether. +pub struct TestBundle(DirBundle); impl Default for TestBundle { fn default() -> Self { - TestBundle(FilesystemIo::new( - &test_path(&["assets"]), - false, - false, - HashSet::new(), - )) + TestBundle(DirBundle::new(&test_path(&["assets"]))) } } @@ -129,4 +129,8 @@ impl Bundle for TestBundle { fn get_digest(&mut self, _status: &mut dyn StatusBackend) -> Result { Ok(DigestData::zeros()) } + + fn all_files(&mut self, status: &mut dyn StatusBackend) -> Result> { + self.0.all_files(status) + } } diff --git a/tests/executable.rs b/tests/executable.rs index 062d8e2049..339b17c522 100644 --- a/tests/executable.rs +++ b/tests/executable.rs @@ -567,6 +567,16 @@ fn v2_new_build_multiple_outputs() { success_or_panic(output); } +const SHELL_ESCAPE_TEST_DOC: &str = r#"\immediate\write18{mkdir shellwork} +\immediate\write18{echo 123 >shellwork/persist} +\ifnum123=\input{shellwork/persist} +a +\else +\ohnotheshellescapedidntwork +\fi +\bye +"#; + /// Test that shell escape actually runs the commands #[test] fn shell_escape() { @@ -576,15 +586,49 @@ fn shell_escape() { let output = run_tectonic_with_stdin( tempdir.path(), &[&fmt_arg, "-", "-Zshell-escape"], - r#"\immediate\write18{mkdir shellwork} - \immediate\write18{echo 123 >shellwork/persist} - \ifnum123=\input{shellwork/persist} - a - \else - \ohnotheshellescapedidntwork - \fi - \bye - "#, + SHELL_ESCAPE_TEST_DOC, ); success_or_panic(output); } + +/// Test that shell-escape can be killed by command-line-option +#[test] +fn shell_escape_cli_override() { + let fmt_arg = get_plain_format_arg(); + let tempdir = setup_and_copy_files(&[]); + + let output = run_tectonic_with_stdin( + tempdir.path(), + &[&fmt_arg, "--untrusted", "-", "-Zshell-escape"], + SHELL_ESCAPE_TEST_DOC, + ); + error_or_panic(output); +} + +/// Test that shell-escape can be killed by environment variable +#[test] +fn shell_escape_env_override() { + let fmt_arg = get_plain_format_arg(); + let tempdir = setup_and_copy_files(&[]); + + // Note that we intentionally set the variable to 0 below -- it takes it + // effect if it has ANY value, not just a "truthy" one. + + let mut command = prep_tectonic(tempdir.path(), &[&fmt_arg, "-", "-Zshell-escape"]); + command + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .env("TECTONIC_UNTRUSTED_MODE", "0"); + + println!("running {:?}", command); + let mut child = command.spawn().expect("tectonic failed to start"); + write!(child.stdin.as_mut().unwrap(), "{}", SHELL_ESCAPE_TEST_DOC) + .expect("failed to send data to tectonic subprocess"); + + let output = child + .wait_with_output() + .expect("failed to wait on tectonic subprocess"); + + error_or_panic(output); +} diff --git a/tests/formats.rs b/tests/formats.rs index f625a0d4f2..f3dffa5c73 100644 --- a/tests/formats.rs +++ b/tests/formats.rs @@ -196,6 +196,6 @@ fn plain_format() { test_format_generation( "plain.tex", "plain.fmt", - "8e33c4c9af66ddb064a36749db1e0ba681bbebd1a896d2886745a0efa9a745a1", + "7012eeebbbcec81f6ce2c4d232013e306898f211fa252685434a8624ac7323d4", ) } diff --git a/tests/tex-outputs.rs b/tests/tex-outputs.rs index f0c625ab55..8fb34c3ca0 100644 --- a/tests/tex-outputs.rs +++ b/tests/tex-outputs.rs @@ -166,7 +166,8 @@ impl TestCase { } if self.check_synctex { - ExpectedInfo::read_with_extension_gz(&mut p, "synctex.gz").test_from_collection(&files); + ExpectedInfo::read_with_extension_rooted_gz(&mut p, "synctex.gz") + .test_from_collection(&files); } if self.check_pdf { diff --git a/tests/tex-outputs/synctex.synctex.gz b/tests/tex-outputs/synctex.synctex.gz index 905b039b0a..4803c00843 100644 Binary files a/tests/tex-outputs/synctex.synctex.gz and b/tests/tex-outputs/synctex.synctex.gz differ diff --git a/tests/util/mod.rs b/tests/util/mod.rs index 6e1bec5466..559787fddb 100644 --- a/tests/util/mod.rs +++ b/tests/util/mod.rs @@ -131,14 +131,33 @@ impl ExpectedInfo { Self::read(pbase) } - pub fn read_with_extension_gz(pbase: &mut PathBuf, extension: &str) -> Self { + /// Special handling for synctex files -- we need to decode the gzip and + /// fill in the absolute paths of the output files (cf. #720) + pub fn read_with_extension_rooted_gz(pbase: &mut PathBuf, extension: &str) -> Self { pbase.set_extension(extension); let name = pbase.file_name().unwrap().to_str().unwrap().to_owned(); - let mut dec = GzDecoder::new(File::open(pbase).unwrap()); + let mut dec = GzDecoder::new(File::open(&pbase).unwrap()); let mut contents = Vec::new(); dec.read_to_end(&mut contents).unwrap(); + // Special SyncTeX rooting. We need a *mega* hack since there is a + // byte-offset field whose value depends on the length of the file + // prefix. + let root = format!( + "{}{}", + pbase.parent().unwrap().to_str().unwrap(), + std::path::MAIN_SEPARATOR + ); + let contents = String::from_utf8(contents) + .unwrap() + .replace("${ROOT}", &root) + .replace( + "${len(ROOT)+106}", + &(root.as_bytes().len() + 106).to_string(), + ) + .into_bytes(); + ExpectedInfo { name, contents,