diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1e7caa9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +Cargo.lock +target/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e56f1a..a2849f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# 1.0.0-rc.0 - 2025-03-25 + +- Add two crate level decoding functions (for `Vec` and const generic arrays) +- Rename the error types to convey purpose rather than type they are decoding +- Add `newer-rust-version` feature to enable `core::error::Error` +- Remove everything except the decoding functions and associated error types, so that types can be stabilized quickly +- Prepare the crate for stabilization!!! + # 0.3.0 - 2024-09-18 - Re-implement `HexWriter` [#113](https://github.com/rust-bitcoin/hex-conservative/pull/113) diff --git a/Cargo-minimal.lock b/Cargo-minimal.lock index 1ee8980..89c344d 100644 --- a/Cargo-minimal.lock +++ b/Cargo-minimal.lock @@ -4,4 +4,13 @@ version = 3 [[package]] name = "hex-conservative" -version = "0.3.0" +version = "1.0.0-rc.0" +dependencies = [ + "if_rust_version", +] + +[[package]] +name = "if_rust_version" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46dbcb333e86939721589d25a3557e180b52778cb33c7fdfe9e0158ff790d5ec" diff --git a/Cargo-recent.lock b/Cargo-recent.lock index 1ee8980..89c344d 100644 --- a/Cargo-recent.lock +++ b/Cargo-recent.lock @@ -4,4 +4,13 @@ version = 3 [[package]] name = "hex-conservative" -version = "0.3.0" +version = "1.0.0-rc.0" +dependencies = [ + "if_rust_version", +] + +[[package]] +name = "if_rust_version" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46dbcb333e86939721589d25a3557e180b52778cb33c7fdfe9e0158ff790d5ec" diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index 1ee8980..0000000 --- a/Cargo.lock +++ /dev/null @@ -1,7 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "hex-conservative" -version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index bb15997..c28c4c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "hex-conservative" -version = "0.3.0" +version = "1.0.0-rc.0" authors = ["Martin Habovštiak ", "Andrew Poelstra "] license = "CC0-1.0" repository = "https://github.com/rust-bitcoin/hex-conservative" @@ -19,10 +19,13 @@ rustdoc-args = ["--cfg", "docsrs"] [features] default = ["std"] +# Activates the usage of standard library, namely implementations of std::error::Error std = ["alloc"] +# Activates the usage of standard alloc library (enables Vec) alloc = [] +# Enables detection of newer rust versions to provide additional features +# Turning it on may pull in dependencies that run build scripts and prolong compile time. +newer-rust-version = ["dep:if_rust_version"] [dependencies] - - -[dev-dependencies] +if_rust_version = { version = "1.0.0", optional = true } diff --git a/README.md b/README.md index e1cd8d1..2fba07b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,43 @@ -# Bitcoin Hexadecimal Library +# A Rust hexadecimal decoding library -General purpose hex encoding/decoding library with a conservative MSRV and dependency policy. +General purpose hex decoding library with a conservative MSRV and dependency policy. + +You're currently looking at the stable crate which has advanced features removed to make +stabilization quicker and thus allowing downstream crates to stabilize quicker too. To get the +full feature set check the lower (0.x.y) versions. + +## Stabilization strategy + +Because downstream crates may need to return hex errors in their APIs and they need to be +stabilized soon, this crate only exposes the errors and two basic decoding functions. This +should already help with the vast majority of the cases and we're sufficiently confident that +these errors won't have a breaking change any time soon (possibly never). + +If you're writing a binary you don't need to worry about any of this and just use the unstable +version for now. If you're writing a library you should use these stable errors in the API but +you may internally depend on the unstable crate version to get the advanced features that won't +affect your API. This way your API can stabilize before all features in this crate are fully +stable and you still can use all of them. + +## Crate features + +* `std` - enables the standard library, on by default. +* `alloc` - enables features that require allocation such as decoding into `Vec`, implied +by `std`. +* `newer-rust-version` - enables Rust version detection and thus newer features, may add + dependency on a feature detection crate to reduce compile times. This + feature is expected to do nothing once the native detection is in Rust + and our MSRV is at least that version. We may also remove the feature + gate in 2.0 with semver trick once that happens. + +## MSRV policy + +The MSRV of the crate is currently 1.63.0 and we don't intend to bump it until the newer Rust +version is at least two years old and also included in Debian stable (1.63 is in Debian 12 at +the moment). + +Note though that the dependencies may have looser policy. This is not considered breaking/wrong +- you would just need to pin them in `Cargo.lock` (not `.toml`). ## Minimum Supported Rust Version (MSRV) @@ -8,7 +45,7 @@ This library should compile with almost any combination of features on **Rust 1. reserve the right to use features to guard compiler specific code so `--all-features` may not work using the MSRV toolchain. -### Githooks +## Githooks To assist devs in catching errors _before_ running CI we provide some githooks. If you do not already have locally configured githooks you can use the ones in this repository by running, in the diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..04ce0c2 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: CC0-1.0 + +//! Error code for the `hex-conservative` crate. + +use core::convert::Infallible; +use core::fmt; +#[cfg(feature = "std")] +use std::error::Error as StdError; +#[cfg(all(not(feature = "std"), feature = "newer-rust-version"))] +if_rust_version::if_rust_version! { + >= 1.81 { + use core::error::Error as StdError; + } +} + +#[cfg(feature = "std")] +macro_rules! if_std_error { + ({ $($if_yes:tt)* } $(else { $($if_not:tt)* })?) => { + #[cfg_attr(docsrs, doc(cfg(any(feature = "std", all(feature = "newer-rust-version", rust_version = ">= 1.81.0")))))] + $($if_yes)* + } +} + +#[cfg(all(not(feature = "std"), feature = "newer-rust-version"))] +macro_rules! if_std_error { + ({ $($if_yes:tt)* } $(else { $($if_not:tt)* })?) => { + if_rust_version::if_rust_version! { + >= 1.81 { + #[cfg_attr(docsrs, doc(cfg(any(feature = "std", all(feature = "newer-rust-version", rust_version = ">= 1.81.0")))))] + $($if_yes)* + } $(else { $($if_not)* })? + } + } +} + +#[cfg(all(not(feature = "std"), not(feature = "newer-rust-version")))] +macro_rules! if_std_error { + ({ $($if_yes:tt)* } $(else { $($if_not:tt)* })?) => { + $($($if_not)*)? + } +} + +/// Formats error. +/// +/// If `std` feature is OFF appends error source (delimited by `: `). We do this because +/// `e.source()` is only available in std builds, without this macro the error source is lost for +/// no-std builds. +macro_rules! write_err { + ($writer:expr, $string:literal $(, $args:expr)*; $source:expr) => { + { + if_std_error! { + { + { + let _ = &$source; // Prevents clippy warnings. + write!($writer, $string $(, $args)*) + } + } else { + { + write!($writer, concat!($string, ": {}") $(, $args)*, $source) + } + } + } + } + } +} +pub(crate) use write_err; + +/// Error returned when hex decoding a hex string with variable length. +/// +/// This represents the first error encountered during decoding, however we may add ther remaining +/// ones in the future. +/// +/// This error differs from `DecodeFixedSizedBytesError` in that the number of bytes is only known +/// at run time - e.g. when decoding `Vec`. +#[cfg(feature = "alloc")] +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DecodeDynSizedBytesError { + /// Non-hexadecimal character. + InvalidChar(InvalidCharError), + /// Purported hex string had odd (not even) length. + OddLengthString(OddLengthStringError), +} + +#[cfg(feature = "alloc")] +impl DecodeDynSizedBytesError { + /// Adds `by_bytes` to all character positions stored inside. + /// + /// If you're parsing a larger string that consists of multiple hex sub-strings and want to + /// return `InvalidCharError` you may need to use this function so that the callers of your + /// parsing function can tell the exact position where decoding failed relative to the start of + /// the string passed into your parsing function. + /// + /// Note that this function has the standard Rust overflow behavior because you should only + /// ever pass in the position of the parsed hex string relative to the start of the entire + /// input. In that case overflow is impossible. + /// + /// This method is specifically designed to be used with [`map_err`](Result::map_err) method of + /// [`Result`]. + #[inline] + pub fn offset(self, by_bytes: usize) -> Self { + match self { + DecodeDynSizedBytesError::InvalidChar(error) => { + DecodeDynSizedBytesError::InvalidChar(error.offset(by_bytes)) + }, + DecodeDynSizedBytesError::OddLengthString(error) => { + DecodeDynSizedBytesError::OddLengthString(error) + }, + } + } +} + +#[cfg(feature = "alloc")] +impl From for DecodeDynSizedBytesError { + #[inline] + fn from(never: Infallible) -> Self { match never {} } +} + +#[cfg(feature = "alloc")] +impl fmt::Display for DecodeDynSizedBytesError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use DecodeDynSizedBytesError as E; + + match *self { + E::InvalidChar(ref e) => + write_err!(f, "failed to decode hex"; e), + E::OddLengthString(ref e) => + write_err!(f, "failed to decode hex"; e), + } + } +} + +#[cfg(feature = "alloc")] +if_std_error! {{ + impl StdError for DecodeDynSizedBytesError { + fn source(&self) -> Option<&(dyn StdError + 'static)> { + use DecodeDynSizedBytesError as E; + + match *self { + E::InvalidChar(ref e) => Some(e), + E::OddLengthString(ref e) => Some(e), + } + } + } +}} + +#[cfg(feature = "alloc")] +impl From for DecodeDynSizedBytesError { + #[inline] + fn from(e: InvalidCharError) -> Self { Self::InvalidChar(e) } +} + +#[cfg(feature = "alloc")] +impl From for DecodeDynSizedBytesError { + #[inline] + fn from(e: OddLengthStringError) -> Self { Self::OddLengthString(e) } +} + +/// Error returned when hex decoding bytes whose length is known at compile time. +/// +/// This error differs from `DecodeDynSizedBytesError` in that the number of bytes is known at +/// compile time - e.g. when decoding to an array of bytes. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DecodeFixedSizedBytesError { + /// Non-hexadecimal character. + InvalidChar(InvalidCharError), + /// Tried to parse fixed-length hash from a string with the wrong length. + InvalidLength(InvalidLengthError), +} + +impl DecodeFixedSizedBytesError { + /// Adds `by_bytes` to all character positions stored inside. + /// + /// If you're parsing a larger string that consists of multiple hex sub-strings and want to + /// return `InvalidCharError` you may need to use this function so that the callers of your + /// parsing function can tell the exact position where decoding failed relative to the start of + /// the string passed into your parsing function. + /// + /// Note that this function has the standard Rust overflow behavior because you should only + /// ever pass in the position of the parsed hex string relative to the start of the entire + /// input. In that case overflow is impossible. + /// + /// This method is specifically designed to be used with [`map_err`](Result::map_err) method of + /// [`Result`]. + #[inline] + pub fn offset(self, by_bytes: usize) -> Self { + match self { + DecodeFixedSizedBytesError::InvalidChar(error) => { + DecodeFixedSizedBytesError::InvalidChar(error.offset(by_bytes)) + }, + DecodeFixedSizedBytesError::InvalidLength(error) => { + DecodeFixedSizedBytesError::InvalidLength(error) + }, + } + } +} + +impl From for DecodeFixedSizedBytesError { + #[inline] + fn from(never: Infallible) -> Self { match never {} } +} + +impl fmt::Display for DecodeFixedSizedBytesError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use DecodeFixedSizedBytesError as E; + + match *self { + E::InvalidChar(ref e) => write_err!(f, "failed to parse hex digit"; e), + E::InvalidLength(ref e) => write_err!(f, "failed to parse hex"; e), + } + } +} + +if_std_error! {{ + impl StdError for DecodeFixedSizedBytesError { + fn source(&self) -> Option<&(dyn StdError + 'static)> { + use DecodeFixedSizedBytesError as E; + + match *self { + E::InvalidChar(ref e) => Some(e), + E::InvalidLength(ref e) => Some(e), + } + } + } +}} + +impl From for DecodeFixedSizedBytesError { + #[inline] + fn from(e: InvalidCharError) -> Self { Self::InvalidChar(e) } +} + +impl From for DecodeFixedSizedBytesError { + #[inline] + fn from(e: InvalidLengthError) -> Self { Self::InvalidLength(e) } +} + +/// Invalid hex character. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct InvalidCharError { + pub(crate) invalid: u8, + pub(crate) pos: usize, +} + +impl From for InvalidCharError { + #[inline] + fn from(never: Infallible) -> Self { match never {} } +} + +impl InvalidCharError { + /// Returns the invalid character byte. + #[inline] + // We do not expose this because we want to eventually return a `char`. + // https://github.com/rust-bitcoin/hex-conservative/issues/100 + pub(crate) fn invalid_char(&self) -> u8 { self.invalid } + /// Returns the position of the invalid character byte. + #[inline] + pub fn pos(&self) -> usize { self.pos } + + /// Adds `by_bytes` to all character positions stored inside. + /// + /// **Important**: if you have `DecodeDynSizedBytesError` or `DecodeFixedSizedBytesError` you + /// should call the method *on them* - do not match them and manually call this method. Doing + /// so may lead to broken behavior in the future. + /// + /// If you're parsing a larger string that consists of multiple hex sub-strings and want to + /// return `InvalidCharError` you may need to use this function so that the callers of your + /// parsing function can tell the exact position where decoding failed relative to the start of + /// the string passed into your parsing function. + /// + /// Note that this function has the standard Rust overflow behavior because you should only + /// ever pass in the position of the parsed hex string relative to the start of the entire + /// input. In that case overflow is impossible. + /// + /// This method is specifically designed to be used with [`map_err`](Result::map_err) method of + /// [`Result`]. + #[inline] + pub fn offset(mut self, by_bytes: usize) -> Self { + self.pos += by_bytes; + self + } +} + +impl fmt::Display for InvalidCharError { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "invalid hex char {} at pos {}", self.invalid_char(), self.pos()) + } +} + +if_std_error! {{ + impl StdError for InvalidCharError {} +}} + +/// Purported hex string had odd length. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct OddLengthStringError { + pub(crate) len: usize, +} + +impl From for OddLengthStringError { + #[inline] + fn from(never: Infallible) -> Self { match never {} } +} + +impl OddLengthStringError { + /// Returns the odd length of the input string. + #[inline] + pub fn length(&self) -> usize { self.len } +} + +impl fmt::Display for OddLengthStringError { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "the hex string is {} bytes long which is not an even number", self.length()) + } +} + +if_std_error! {{ + impl StdError for OddLengthStringError {} +}} + +/// Tried to parse fixed-length hash from a string with the wrong length. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct InvalidLengthError { + /// The expected length. + pub(crate) expected: usize, + /// The invalid length. + pub(crate) invalid: usize, +} + +impl From for InvalidLengthError { + #[inline] + fn from(never: Infallible) -> Self { match never {} } +} + +impl InvalidLengthError { + /// Returns the expected length. + /// + /// Note that this represents both the number of bytes and the number of characters that needs + /// to be passed into the decoder, since the hex digits are ASCII and thus always 1-byte long. + #[inline] + pub fn expected_length(&self) -> usize { self.expected } + + /// Returns the number of *hex bytes* passed to the hex decoder. + /// + /// Note that this does not imply the number of characters nor hex digits since they may be + /// invalid (wide unicode chars). + #[inline] + pub fn invalid_length(&self) -> usize { self.invalid } +} + +impl fmt::Display for InvalidLengthError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "the hex string is {} bytes long but exactly {} bytes was required", + self.invalid_length(), + self.expected_length() + ) + } +} + +if_std_error! {{ + impl StdError for InvalidLengthError {} +}} diff --git a/src/iter.rs b/src/iter.rs new file mode 100644 index 0000000..34a67c8 --- /dev/null +++ b/src/iter.rs @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: CC0-1.0 + +//! Iterator that converts hex to bytes. + +use core::convert::TryInto; +use core::iter::FusedIterator; +use core::str; +#[cfg(feature = "alloc")] +use crate::alloc::vec::Vec; +use crate::error::InvalidCharError; +#[cfg(feature = "alloc")] +use crate::error::OddLengthStringError; + +/// Iterator yielding bytes decoded from an iterator of pairs of hex digits. +#[derive(Debug)] +pub(crate) struct HexToBytesIter +where + I: Iterator, +{ + iter: I, + original_len: usize, +} + +impl<'a> HexToBytesIter> { + /// Constructs a new `HexToBytesIter` from a string slice. + /// + /// # Errors + /// + /// If the input string is of odd length. + #[inline] + #[cfg(feature = "alloc")] + pub(crate) fn new(s: &'a str) -> Result { + if s.len() % 2 != 0 { + Err(OddLengthStringError { len: s.len() }) + } else { + Ok(Self::new_unchecked(s)) + } + } + + pub(crate) fn new_unchecked(s: &'a str) -> Self { + Self::from_pairs(HexDigitsIter::new_unchecked(s.as_bytes())) + } + + /// Writes all the bytes yielded by this `HexToBytesIter` to the provided slice. + /// + /// Stops writing if this `HexToBytesIter` yields an `InvalidCharError`. + /// + /// # Panics + /// + /// Panics if the length of this `HexToBytesIter` is not equal to the length of the provided + /// slice. + pub(crate) fn drain_to_slice(self, buf: &mut [u8]) -> Result<(), InvalidCharError> { + assert_eq!(self.len(), buf.len()); + let mut ptr = buf.as_mut_ptr(); + for byte in self { + // SAFETY: for loop iterates `len` times, and `buf` has length `len` + unsafe { + core::ptr::write(ptr, byte?); + ptr = ptr.add(1); + } + } + Ok(()) + } + + /// Writes all the bytes yielded by this `HexToBytesIter` to a `Vec`. + /// + /// This is equivalent to the combinator chain `iter().map().collect()` but was found by + /// benchmarking to be faster. + #[cfg(feature = "alloc")] + pub(crate) fn drain_to_vec(self) -> Result, InvalidCharError> { + let len = self.len(); + let mut ret = Vec::with_capacity(len); + let mut ptr = ret.as_mut_ptr(); + for byte in self { + // SAFETY: for loop iterates `len` times, and `ret` has a capacity of at least `len` + unsafe { + // docs: "`core::ptr::write` is appropriate for initializing uninitialized memory" + core::ptr::write(ptr, byte?); + ptr = ptr.add(1); + } + } + // SAFETY: `len` elements have been initialized, and `ret` has a capacity of at least `len` + unsafe { + ret.set_len(len); + } + Ok(ret) + } +} + +impl HexToBytesIter +where + I: Iterator + ExactSizeIterator, +{ + /// Constructs a custom hex decoding iterator from another iterator. + #[inline] + pub(crate) fn from_pairs(iter: I) -> Self { Self { original_len: iter.len(), iter } } +} + +impl Iterator for HexToBytesIter +where + I: Iterator + ExactSizeIterator, +{ + type Item = Result; + + #[inline] + fn next(&mut self) -> Option { + let [hi, lo] = self.iter.next()?; + Some(hex_chars_to_byte(hi, lo).map_err(|(c, is_high)| InvalidCharError { + invalid: c, + pos: if is_high { + (self.original_len - self.iter.len() - 1) * 2 + } else { + (self.original_len - self.iter.len() - 1) * 2 + 1 + }, + })) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + + #[inline] + fn nth(&mut self, n: usize) -> Option { + let [hi, lo] = self.iter.nth(n)?; + Some(hex_chars_to_byte(hi, lo).map_err(|(c, is_high)| InvalidCharError { + invalid: c, + pos: if is_high { + (self.original_len - self.iter.len() - 1) * 2 + } else { + (self.original_len - self.iter.len() - 1) * 2 + 1 + }, + })) + } +} + +impl DoubleEndedIterator for HexToBytesIter +where + I: Iterator + DoubleEndedIterator + ExactSizeIterator, +{ + #[inline] + fn next_back(&mut self) -> Option { + let [hi, lo] = self.iter.next_back()?; + Some(hex_chars_to_byte(hi, lo).map_err(|(c, is_high)| InvalidCharError { + invalid: c, + pos: if is_high { self.iter.len() * 2 } else { self.iter.len() * 2 + 1 }, + })) + } + + #[inline] + fn nth_back(&mut self, n: usize) -> Option { + let [hi, lo] = self.iter.nth_back(n)?; + Some(hex_chars_to_byte(hi, lo).map_err(|(c, is_high)| InvalidCharError { + invalid: c, + pos: if is_high { self.iter.len() * 2 } else { self.iter.len() * 2 + 1 }, + })) + } +} + +impl ExactSizeIterator for HexToBytesIter where I: Iterator + ExactSizeIterator +{} + +impl FusedIterator for HexToBytesIter where + I: Iterator + ExactSizeIterator + FusedIterator +{ +} + +/// An internal iterator returning hex digits from a string. +/// +/// Generally you shouldn't need to refer to this or bother with it and just use +/// [`HexToBytesIter::new`] consuming the returned value and use `HexSliceToBytesIter` if you need +/// to refer to the iterator in your types. +#[derive(Debug)] +pub(crate) struct HexDigitsIter<'a> { + // Invariant: the length of the chunks is 2. + // Technically, this is `iter::Map` but we can't use it because fn is anonymous. + // We can swap this for actual `ArrayChunks` once it's stable. + iter: core::slice::ChunksExact<'a, u8>, +} + +impl<'a> HexDigitsIter<'a> { + #[inline] + fn new_unchecked(digits: &'a [u8]) -> Self { Self { iter: digits.chunks_exact(2) } } +} + +impl Iterator for HexDigitsIter<'_> { + type Item = [u8; 2]; + + #[inline] + fn next(&mut self) -> Option { + self.iter.next().map(|digits| digits.try_into().expect("HexDigitsIter invariant")) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + + #[inline] + fn nth(&mut self, n: usize) -> Option { + self.iter.nth(n).map(|digits| digits.try_into().expect("HexDigitsIter invariant")) + } +} + +impl DoubleEndedIterator for HexDigitsIter<'_> { + #[inline] + fn next_back(&mut self) -> Option { + self.iter.next_back().map(|digits| digits.try_into().expect("HexDigitsIter invariant")) + } + + #[inline] + fn nth_back(&mut self, n: usize) -> Option { + self.iter.nth_back(n).map(|digits| digits.try_into().expect("HexDigitsIter invariant")) + } +} + +impl ExactSizeIterator for HexDigitsIter<'_> {} + +impl core::iter::FusedIterator for HexDigitsIter<'_> {} + +/// `hi` and `lo` are bytes representing hex characters. +/// +/// Returns the valid byte or the invalid input byte and a bool indicating error for `hi` or `lo`. +fn hex_chars_to_byte(hi: u8, lo: u8) -> Result { + let hih = (hi as char).to_digit(16).ok_or((hi, true))?; + let loh = (lo as char).to_digit(16).ok_or((lo, false))?; + + let ret = (hih << 4) + loh; + Ok(ret as u8) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(feature = "alloc")] + #[test] + fn decode_iter_forward() { + let hex = "deadbeef"; + let bytes = [0xde, 0xad, 0xbe, 0xef]; + + for (i, b) in HexToBytesIter::new(hex).unwrap().enumerate() { + assert_eq!(b.unwrap(), bytes[i]); + } + + let mut iter = HexToBytesIter::new(hex).unwrap(); + for i in (0..=bytes.len()).rev() { + assert_eq!(iter.len(), i); + let _ = iter.next(); + } + } + + #[cfg(feature = "alloc")] + #[test] + fn decode_iter_backward() { + let hex = "deadbeef"; + let bytes = [0xef, 0xbe, 0xad, 0xde]; + + for (i, b) in HexToBytesIter::new(hex).unwrap().rev().enumerate() { + assert_eq!(b.unwrap(), bytes[i]); + } + + let mut iter = HexToBytesIter::new(hex).unwrap().rev(); + for i in (0..=bytes.len()).rev() { + assert_eq!(iter.len(), i); + let _ = iter.next(); + } + } + + #[test] + fn hex_to_digits_size_hint() { + let hex = "deadbeef"; + let iter = HexDigitsIter::new_unchecked(hex.as_bytes()); + // HexDigitsIter yields two digits at a time `[u8; 2]`. + assert_eq!(iter.size_hint(), (4, Some(4))); + } + + #[test] + fn hex_to_bytes_size_hint() { + let hex = "deadbeef"; + let iter = HexToBytesIter::new_unchecked(hex); + assert_eq!(iter.size_hint(), (4, Some(4))); + } + + #[test] + fn hex_to_bytes_slice_drain() { + let hex = "deadbeef"; + let want = [0xde, 0xad, 0xbe, 0xef]; + let iter = HexToBytesIter::new_unchecked(hex); + let mut got = [0u8; 4]; + iter.drain_to_slice(&mut got).unwrap(); + assert_eq!(got, want); + + let hex = ""; + let want = []; + let iter = HexToBytesIter::new_unchecked(hex); + let mut got = []; + iter.drain_to_slice(&mut got).unwrap(); + assert_eq!(got, want); + } + + #[test] + #[should_panic] + fn hex_to_bytes_slice_drain_panic_empty() { + let hex = "deadbeef"; + let iter = HexToBytesIter::new_unchecked(hex); + let mut got = []; + iter.drain_to_slice(&mut got).unwrap(); + } + + #[test] + #[should_panic] + fn hex_to_bytes_slice_drain_panic_too_small() { + let hex = "deadbeef"; + let iter = HexToBytesIter::new_unchecked(hex); + let mut got = [0u8; 3]; + iter.drain_to_slice(&mut got).unwrap(); + } + + #[test] + #[should_panic] + fn hex_to_bytes_slice_drain_panic_too_big() { + let hex = "deadbeef"; + let iter = HexToBytesIter::new_unchecked(hex); + let mut got = [0u8; 5]; + iter.drain_to_slice(&mut got).unwrap(); + } + + #[test] + fn hex_to_bytes_slice_drain_first_char_error() { + let hex = "geadbeef"; + let iter = HexToBytesIter::new_unchecked(hex); + let mut got = [0u8; 4]; + assert_eq!( + iter.drain_to_slice(&mut got).unwrap_err(), + InvalidCharError { invalid: b'g', pos: 0 } + ); + } + + #[test] + fn hex_to_bytes_slice_drain_middle_char_error() { + let hex = "deadgeef"; + let iter = HexToBytesIter::new_unchecked(hex); + let mut got = [0u8; 4]; + assert_eq!( + iter.drain_to_slice(&mut got).unwrap_err(), + InvalidCharError { invalid: b'g', pos: 4 } + ); + } + + #[test] + fn hex_to_bytes_slice_drain_end_char_error() { + let hex = "deadbeeg"; + let iter = HexToBytesIter::new_unchecked(hex); + let mut got = [0u8; 4]; + assert_eq!( + iter.drain_to_slice(&mut got).unwrap_err(), + InvalidCharError { invalid: b'g', pos: 7 } + ); + } + + #[cfg(feature = "alloc")] + #[test] + fn hex_to_bytes_vec_drain() { + let hex = "deadbeef"; + let want = [0xde, 0xad, 0xbe, 0xef]; + let iter = HexToBytesIter::new_unchecked(hex); + let got = iter.drain_to_vec().unwrap(); + assert_eq!(got, want); + + let hex = ""; + let iter = HexToBytesIter::new_unchecked(hex); + let got = iter.drain_to_vec().unwrap(); + assert!(got.is_empty()); + } + + #[cfg(feature = "alloc")] + #[test] + fn hex_to_bytes_vec_drain_first_char_error() { + let hex = "geadbeef"; + let iter = HexToBytesIter::new_unchecked(hex); + assert_eq!(iter.drain_to_vec().unwrap_err(), InvalidCharError { invalid: b'g', pos: 0 }); + } + + #[cfg(feature = "alloc")] + #[test] + fn hex_to_bytes_vec_drain_middle_char_error() { + let hex = "deadgeef"; + let iter = HexToBytesIter::new_unchecked(hex); + assert_eq!(iter.drain_to_vec().unwrap_err(), InvalidCharError { invalid: b'g', pos: 4 }); + } + + #[cfg(feature = "alloc")] + #[test] + fn hex_to_bytes_vec_drain_end_char_error() { + let hex = "deadbeeg"; + let iter = HexToBytesIter::new_unchecked(hex); + assert_eq!(iter.drain_to_vec().unwrap_err(), InvalidCharError { invalid: b'g', pos: 7 }); + } + + #[cfg(feature = "alloc")] + #[test] + fn hex_error_position() { + let badpos1 = "Z123456789abcdef"; + let badpos2 = "012Y456789abcdeb"; + let badpos3 = "0123456789abcdeZ"; + let badpos4 = "0123456789abYdef"; + + assert_eq!( + HexToBytesIter::new(badpos1).unwrap().next().unwrap().unwrap_err(), + InvalidCharError { pos: 0, invalid: b'Z' } + ); + assert_eq!( + HexToBytesIter::new(badpos2).unwrap().nth(1).unwrap().unwrap_err(), + InvalidCharError { pos: 3, invalid: b'Y' } + ); + assert_eq!( + HexToBytesIter::new(badpos3).unwrap().next_back().unwrap().unwrap_err(), + InvalidCharError { pos: 15, invalid: b'Z' } + ); + assert_eq!( + HexToBytesIter::new(badpos4).unwrap().nth_back(1).unwrap().unwrap_err(), + InvalidCharError { pos: 12, invalid: b'Y' } + ); + } +} diff --git a/src/lib.rs b/src/lib.rs index 7a0d25e..c5d9d8e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,20 +1,152 @@ // SPDX-License-Identifier: CC0-1.0 -//! Hex encoding and decoding. +//! General purpose hex decoding library with a conservative MSRV and dependency policy. //! -//! General purpose hex encoding/decoding library with a conservative MSRV and dependency policy. +//! You're currently looking at the stable crate which has advanced features removed to make +//! stabilization quicker and thus allowing downstream crates to stabilize quicker too. To get the +//! full feature set check the lower (0.x.y) versions. //! //! ## Stabilization strategy //! -//! In an effort to release stable 1.0 crates that are forward compatible we are striving -//! relentlessly to release the bare minimum amount of code. +//! Because downstream crates may need to return hex errors in their APIs and they need to be +//! stabilized soon, this crate only exposes the errors and two basic decoding functions. This +//! should already help with the vast majority of the cases and we're sufficiently confident that +//! these errors won't have a breaking change any time soon (possibly never). +//! +//! If you're writing a binary you don't need to worry about any of this and just use the unstable +//! version for now. If you're writing a library you should use these stable errors in the API but +//! you may internally depend on the unstable crate version to get the advanced features that won't +//! affect your API. This way your API can stabilize before all features in this crate are fully +//! stable and you still can use all of them. +//! +//! ## Crate features +//! +//! * `std` - enables the standard library, on by default. +//! * `alloc` - enables features that require allocation such as decoding into `Vec`, implied +//! by `std`. +//! * `newer-rust-version` - enables Rust version detection and thus newer features, may add +//! dependency on a feature detection crate to reduce compile times. This +//! feature is expected to do nothing once the native detection is in Rust +//! and our MSRV is at least that version. We may also remove the feature +//! gate in 2.0 with semver trick once that happens. +//! +//! ## MSRV policy +//! +//! The MSRV of the crate is currently 1.63.0 and we don't intend to bump it until the newer Rust +//! version is at least two years old and also included in Debian stable (1.63 is in Debian 12 at +//! the moment). +//! +//! Note though that the dependencies may have looser policy. This is not considered breaking/wrong +//! - you would just need to pin them in `Cargo.lock` (not `.toml`). -#![cfg_attr(all(not(test), not(feature = "std")), no_std)] +#![no_std] // Experimental features we need. #![cfg_attr(docsrs, feature(doc_cfg))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] // Coding conventions #![warn(missing_docs)] +#[cfg(feature = "std")] +extern crate std; + #[cfg(feature = "alloc")] extern crate alloc; + +mod error; +mod iter; + +#[cfg(feature = "alloc")] +use alloc::vec::Vec; + +use crate::iter::HexToBytesIter; + +#[rustfmt::skip] // Keep public re-exports separate. +#[doc(inline)] +pub use self::error::{DecodeFixedSizedBytesError, InvalidCharError, InvalidLengthError}; +#[cfg(feature = "alloc")] +pub use self::error::{DecodeDynSizedBytesError, OddLengthStringError}; + +/// Decodes a hex string with variable length. +/// +/// The length of the returned `Vec` is determined by the length of the input, meaning all even +/// lengths of the input string are allowed. If you know the required length at compile time using +/// [`decode_fixed_sized`] is most likely a better choice. +/// +/// # Errors +/// +/// Errors if `hex` contains invalid characters or doesn't have even length. +#[cfg(feature = "alloc")] +pub fn decode_dyn_sized(hex: &str) -> Result, DecodeDynSizedBytesError> { + Ok(HexToBytesIter::new(hex)?.drain_to_vec()?) +} + +/// Decodes a hex string with expected length kown at compile time. +/// +/// If you don't know the required length at compile time you need to use [`decode_dyn_sized`] +/// instead. +/// +/// # Errors +/// +/// Errors if `hex` contains invalid characters or has incorrect length. (Should be `N * 2`.) +pub fn decode_fixed_sized(hex: &str) -> Result<[u8; N], DecodeFixedSizedBytesError> { + if hex.len() == N * 2 { + let mut ret = [0u8; N]; + // checked above + HexToBytesIter::new_unchecked(hex).drain_to_slice(&mut ret)?; + Ok(ret) + } else { + Err(InvalidLengthError { invalid: hex.len(), expected: 2 * N }.into()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg(feature = "alloc")] + fn hex_error() { + use crate::error::{InvalidCharError, OddLengthStringError}; + + let oddlen = "0123456789abcdef0"; + let badchar1 = "Z123456789abcdef"; + let badchar2 = "012Y456789abcdeb"; + let badchar3 = "«23456789abcdef"; + + assert_eq!( + decode_dyn_sized(oddlen).unwrap_err(), + OddLengthStringError { len: 17 }.into() + ); + assert_eq!( + decode_fixed_sized::<4>(oddlen).unwrap_err(), + InvalidLengthError { invalid: 17, expected: 8 }.into() + ); + assert_eq!( + decode_dyn_sized(badchar1).unwrap_err(), + InvalidCharError { pos: 0, invalid: b'Z' }.into() + ); + assert_eq!( + decode_dyn_sized(badchar2).unwrap_err(), + InvalidCharError { pos: 3, invalid: b'Y' }.into() + ); + assert_eq!( + decode_dyn_sized(badchar3).unwrap_err(), + InvalidCharError { pos: 0, invalid: 194 }.into() + ); + } + + #[test] + fn hex_to_array() { + let len_sixteen = "0123456789abcdef"; + assert!(decode_fixed_sized::<8>(len_sixteen).is_ok()); + } + + #[test] + fn hex_to_array_error() { + let len_sixteen = "0123456789abcdef"; + assert_eq!( + decode_fixed_sized::<4>(len_sixteen).unwrap_err(), + InvalidLengthError { invalid: 16, expected: 8 }.into() + ) + } +}