diff --git a/sdk/src/asset_handlers/bmff_io.rs b/sdk/src/asset_handlers/bmff_io.rs index bce737ed4..69d522d14 100644 --- a/sdk/src/asset_handlers/bmff_io.rs +++ b/sdk/src/asset_handlers/bmff_io.rs @@ -71,7 +71,7 @@ const FULL_BOX_TYPES: &[&str; 80] = &[ "txtC", "mime", "uri ", "uriI", "hmhd", "sthd", "vvhd", "medc", ]; -static SUPPORTED_TYPES: [&str; 15] = [ +static SUPPORTED_TYPES: [&str; 19] = [ "avif", "heif", "heic", @@ -79,6 +79,8 @@ static SUPPORTED_TYPES: [&str; 15] = [ "m4a", "mov", "m4v", + "3gp", + "3g2", "application/mp4", "audio/mp4", "image/avif", @@ -87,6 +89,8 @@ static SUPPORTED_TYPES: [&str; 15] = [ "video/mp4", "video/quicktime", "video/x-m4v", + "video/3gpp", + "video/3g2", ]; macro_rules! boxtype { @@ -1718,6 +1722,27 @@ impl AssetIO for BmffIO { fn supported_types(&self) -> &[&str] { &SUPPORTED_TYPES } + + fn get_handler_type_from_bytes(&self, data: &[u8]) -> Option<&'static str> { + if data.len() < 12 || &data[4..8] != crate::utils::signatures::BMFF_FTYP { + return None; + } + let brand = &data[8..12]; + match brand { + b"mp41" | b"mp42" | b"isom" | b"iso2" | b"iso3" | b"iso4" | b"iso5" | b"iso6" + | b"avc1" | b"mp71" => Some("video/mp4"), + b"m4a " => Some("audio/mp4"), + b"m4v " => Some("video/x-m4v"), + b"heic" | b"heix" | b"mif1" | b"msf1" => Some("image/heic"), + b"hevc" | b"hevx" => Some("image/heif"), + b"avif" | b"avis" => Some("image/avif"), + b"qt " => Some("video/quicktime"), + b"3gp1" | b"3gp2" | b"3gp3" | b"3gp4" | b"3gp5" | b"3gp6" | b"3gr6" | b"3gs6" + | b"3ge6" => Some("video/3gpp"), + b"3g2a" | b"3g2b" | b"3g2c" => Some("video/3g2"), + _ => None, + } + } } impl CAIWriter for BmffIO { diff --git a/sdk/src/asset_handlers/c2pa_io.rs b/sdk/src/asset_handlers/c2pa_io.rs index 183d9b0bf..61ae30baa 100644 --- a/sdk/src/asset_handlers/c2pa_io.rs +++ b/sdk/src/asset_handlers/c2pa_io.rs @@ -133,6 +133,17 @@ impl AssetIO for C2paIO { &SUPPORTED_TYPES } + fn get_handler_type_from_bytes(&self, data: &[u8]) -> Option<&'static str> { + // JUMBF starts with a box size then 'jumb' + if data.len() < 8 { + return None; + } + if &data[4..8] == crate::utils::signatures::JUMBF_TYPE { + return Some("application/c2pa"); + } + None + } + fn composed_data_ref(&self) -> Option<&dyn ComposedManifestRef> { Some(self) } diff --git a/sdk/src/asset_handlers/gif_io.rs b/sdk/src/asset_handlers/gif_io.rs index f7ca2e128..42de187db 100644 --- a/sdk/src/asset_handlers/gif_io.rs +++ b/sdk/src/asset_handlers/gif_io.rs @@ -379,6 +379,15 @@ impl AssetIO for GifIO { fn supported_types(&self) -> &[&str] { &["gif", "image/gif"] } + + fn get_handler_type_from_bytes(&self, data: &[u8]) -> Option<&'static str> { + if data.starts_with(crate::utils::signatures::GIF87A) + || data.starts_with(crate::utils::signatures::GIF89A) + { + return Some("image/gif"); + } + None + } } impl GifIO { diff --git a/sdk/src/asset_handlers/jpeg_io.rs b/sdk/src/asset_handlers/jpeg_io.rs index d9c64030b..ecc9270ca 100644 --- a/sdk/src/asset_handlers/jpeg_io.rs +++ b/sdk/src/asset_handlers/jpeg_io.rs @@ -577,6 +577,16 @@ impl AssetIO for JpegIO { fn supported_types(&self) -> &[&str] { &SUPPORTED_TYPES } + + fn get_handler_type_from_bytes(&self, data: &[u8]) -> Option<&'static str> { + if data.len() < 3 { + return None; + } + if data.starts_with(crate::utils::signatures::JPEG) { + return Some("image/jpeg"); + } + None + } } impl RemoteRefEmbed for JpegIO { diff --git a/sdk/src/asset_handlers/mp3_io.rs b/sdk/src/asset_handlers/mp3_io.rs index 1bdba56e6..3b66c2f3d 100644 --- a/sdk/src/asset_handlers/mp3_io.rs +++ b/sdk/src/asset_handlers/mp3_io.rs @@ -349,6 +349,20 @@ impl AssetIO for Mp3IO { fn supported_types(&self) -> &[&str] { &SUPPORTED_TYPES } + + fn get_handler_type_from_bytes(&self, data: &[u8]) -> Option<&'static str> { + if data.len() < 3 { + return None; + } + if data.starts_with(crate::utils::signatures::MP3_ID3) { + return Some("audio/mpeg"); + } + // MP3 sync frame (simplified) + if data.len() >= 2 && data[0] == 0xff && (data[1] & 0xe0) == 0xe0 { + return Some("audio/mpeg"); + } + None + } } impl CAIWriter for Mp3IO { diff --git a/sdk/src/asset_handlers/pdf_io.rs b/sdk/src/asset_handlers/pdf_io.rs index 3ebb54d37..8aeb6673b 100644 --- a/sdk/src/asset_handlers/pdf_io.rs +++ b/sdk/src/asset_handlers/pdf_io.rs @@ -109,6 +109,13 @@ impl AssetIO for PdfIO { &SUPPORTED_TYPES } + fn get_handler_type_from_bytes(&self, data: &[u8]) -> Option<&'static str> { + if data.starts_with(crate::utils::signatures::PDF) { + return Some("application/pdf"); + } + None + } + fn composed_data_ref(&self) -> Option<&dyn ComposedManifestRef> { Some(self) } diff --git a/sdk/src/asset_handlers/png_io.rs b/sdk/src/asset_handlers/png_io.rs index b6385acdc..d50ae5e92 100644 --- a/sdk/src/asset_handlers/png_io.rs +++ b/sdk/src/asset_handlers/png_io.rs @@ -572,6 +572,16 @@ impl AssetIO for PngIO { fn supported_types(&self) -> &[&str] { &SUPPORTED_TYPES } + + fn get_handler_type_from_bytes(&self, data: &[u8]) -> Option<&'static str> { + if data.len() < 8 { + return None; + } + if data.starts_with(crate::utils::signatures::PNG) { + return Some("image/png"); + } + None + } } fn get_xmp_insertion_point(asset_reader: &mut dyn CAIRead) -> Option<(u64, u32)> { diff --git a/sdk/src/asset_handlers/riff_io.rs b/sdk/src/asset_handlers/riff_io.rs index ecd434c03..083c2f2bc 100644 --- a/sdk/src/asset_handlers/riff_io.rs +++ b/sdk/src/asset_handlers/riff_io.rs @@ -398,6 +398,23 @@ impl AssetIO for RiffIO { fn supported_types(&self) -> &[&str] { &SUPPORTED_TYPES } + + fn get_handler_type_from_bytes(&self, data: &[u8]) -> Option<&'static str> { + if data.len() < 12 || &data[0..4] != crate::utils::signatures::RIFF { + return None; + } + let form_type = &data[8..12]; + if form_type == crate::utils::signatures::AVI { + return Some("video/avi"); + } + if form_type == crate::utils::signatures::WEBP { + return Some("image/webp"); + } + if form_type == crate::utils::signatures::WAVE { + return Some("audio/wav"); + } + None + } } impl CAIWriter for RiffIO { diff --git a/sdk/src/asset_handlers/svg_io.rs b/sdk/src/asset_handlers/svg_io.rs index c41a8b709..629e919bf 100644 --- a/sdk/src/asset_handlers/svg_io.rs +++ b/sdk/src/asset_handlers/svg_io.rs @@ -159,6 +159,21 @@ impl AssetIO for SvgIO { fn supported_types(&self) -> &[&str] { &SUPPORTED_TYPES } + + fn get_handler_type_from_bytes(&self, data: &[u8]) -> Option<&'static str> { + // Check for " &[&str] { &SUPPORTED_TYPES } + + fn get_handler_type_from_bytes(&self, data: &[u8]) -> Option<&'static str> { + if data.len() < 4 { + return None; + } + if data.starts_with(crate::utils::signatures::TIFF_LE) + || data.starts_with(crate::utils::signatures::TIFF_BE) + { + return Some("image/tiff"); + } + None + } } impl CAIWriter for TiffIO { diff --git a/sdk/src/asset_io.rs b/sdk/src/asset_io.rs index 8d45f89e8..c550eb6c2 100644 --- a/sdk/src/asset_io.rs +++ b/sdk/src/asset_io.rs @@ -183,6 +183,11 @@ pub trait AssetIO: Sync + Send { // OPTIONAL INTERFACES + // Checks if the data matches the file signature for this handler and returns the mime type. + fn get_handler_type_from_bytes(&self, _data: &[u8]) -> Option<&'static str> { + None + } + // Returns [`AssetPatch`] trait if this I/O handler supports patching. #[allow(dead_code)] // this here for wasm builds to pass clippy (todo: remove) fn asset_patch_ref(&self) -> Option<&dyn AssetPatch> { diff --git a/sdk/src/ingredient.rs b/sdk/src/ingredient.rs index de019ba0d..6f9d67c3f 100644 --- a/sdk/src/ingredient.rs +++ b/sdk/src/ingredient.rs @@ -548,9 +548,10 @@ impl Ingredient { .unwrap_or_else(|| "".into()) .to_lowercase(); - let format = extension_to_mime(&extension) - .unwrap_or("application/octet-stream") - .to_owned(); + let format = crate::utils::mime::detect_format_from_path(path) + .or_else(|| extension_to_mime(&extension).map(|m| m.to_owned())) + .unwrap_or_else(|| "application/octet-stream".to_owned()); + (title, extension, format) } diff --git a/sdk/src/jumbf_io.rs b/sdk/src/jumbf_io.rs index 530b47a76..26256178b 100644 --- a/sdk/src/jumbf_io.rs +++ b/sdk/src/jumbf_io.rs @@ -37,26 +37,24 @@ use crate::{ // initialize asset handlers lazy_static! { - static ref CAI_READERS: HashMap> = { - let handlers: Vec> = vec![ - #[cfg(feature = "pdf")] - Box::new(PdfIO::new("")), - Box::new(BmffIO::new("")), - Box::new(C2paIO::new("")), - Box::new(JpegIO::new("")), - Box::new(PngIO::new("")), - Box::new(RiffIO::new("")), - Box::new(SvgIO::new("")), - Box::new(TiffIO::new("")), - Box::new(Mp3IO::new("")), - Box::new(GifIO::new("")), - ]; + static ref HANDLERS: Vec> = vec![ + #[cfg(feature = "pdf")] + Box::new(PdfIO::new("")), + Box::new(BmffIO::new("")), + Box::new(C2paIO::new("")), + Box::new(JpegIO::new("")), + Box::new(PngIO::new("")), + Box::new(RiffIO::new("")), + Box::new(SvgIO::new("")), + Box::new(TiffIO::new("")), + Box::new(Mp3IO::new("")), + Box::new(GifIO::new("")), + ]; + static ref CAI_READERS: HashMap> = { let mut handler_map = HashMap::new(); - // build handler map - for h in handlers { - // get the supported types add entry for each + for h in HANDLERS.iter() { for supported_type in h.supported_types() { handler_map.insert(supported_type.to_string(), h.get_handler(supported_type)); } @@ -64,27 +62,11 @@ lazy_static! { handler_map }; -} -// initialize streaming write handlers -lazy_static! { static ref CAI_WRITERS: HashMap> = { - let handlers: Vec> = vec![ - Box::new(BmffIO::new("")), - Box::new(C2paIO::new("")), - Box::new(JpegIO::new("")), - Box::new(PngIO::new("")), - Box::new(RiffIO::new("")), - Box::new(SvgIO::new("")), - Box::new(TiffIO::new("")), - Box::new(Mp3IO::new("")), - Box::new(GifIO::new("")), - ]; let mut handler_map = HashMap::new(); - // build handler map - for h in handlers { - // get the supported types add entry for each + for h in HANDLERS.iter() { for supported_type in h.supported_types() { if let Some(writer) = h.get_writer(supported_type) { // get streaming writer if supported handler_map.insert(supported_type.to_string(), writer); @@ -96,6 +78,16 @@ lazy_static! { }; } +/// Detects the format from the data by asking registered handlers. +pub fn format_from_bytes(data: &[u8]) -> Option { + for handler in HANDLERS.iter() { + if let Some(fmt) = handler.get_handler_type_from_bytes(data) { + return Some(fmt.to_string()); + } + } + None +} + pub(crate) fn is_bmff_format(asset_type: &str) -> bool { let bmff_io = BmffIO::new(""); bmff_io.supported_types().contains(&asset_type) @@ -111,7 +103,14 @@ pub fn load_jumbf_from_memory(asset_type: &str, data: &[u8]) -> Result> /// Return jumbf block from stream asset pub fn load_jumbf_from_stream(asset_type: &str, input_stream: &mut dyn CAIRead) -> Result> { - let cai_block = match get_cailoader_handler(asset_type) { + let mut asset_type = asset_type.to_lowercase(); + if asset_type.is_empty() || asset_type == "application/octet-stream" { + if let Some(detected) = crate::utils::mime::detect_format_from_stream(input_stream) { + asset_type = detected; + } + } + + let cai_block = match get_cailoader_handler(&asset_type) { Some(asset_handler) => asset_handler.read_cai(input_stream)?, None => return Err(Error::UnsupportedType), }; @@ -152,8 +151,15 @@ pub fn save_jumbf_to_memory(asset_type: &str, data: &[u8], store_bytes: &[u8]) - #[cfg(feature = "file_io")] pub(crate) fn get_assetio_handler_from_path(asset_path: &Path) -> Option<&dyn AssetIO> { - let ext = get_file_extension(asset_path)?; + // try to detect from content first + if let Some(format) = crate::utils::mime::detect_format_from_path(asset_path) { + if let Some(h) = CAI_READERS.get(&format) { + return Some(h.as_ref()); + } + } + // if not found by content, try to use extension + let ext = get_file_extension(asset_path).unwrap_or_default(); CAI_READERS.get(&ext).map(|h| h.as_ref()) } @@ -186,13 +192,21 @@ pub(crate) fn get_file_extension(path: &Path) -> Option { #[cfg(feature = "file_io")] pub(crate) fn get_supported_file_extension(path: &Path) -> Option { - let ext = get_file_extension(path)?; + // try to detect from content first + if let Some(format) = crate::utils::mime::detect_format_from_path(path) { + if CAI_READERS.get(&format).is_some() { + return Some(format); + } + } - if CAI_READERS.get(&ext).is_some() { - Some(ext) - } else { - None + // fallback to extension + if let Some(ext) = get_file_extension(path) { + if CAI_READERS.get(&ext).is_some() { + return Some(ext); + } } + + None } /// Returns a [Vec] of supported mime types for reading manifests. @@ -220,7 +234,7 @@ pub fn save_jumbf_to_file, P2: AsRef>( in_path: P1, out_path: Option, ) -> Result<()> { - let ext = get_file_extension(in_path.as_ref()).ok_or(Error::UnsupportedType)?; + let ext = get_supported_file_extension(in_path.as_ref()).ok_or(Error::UnsupportedType)?; // if no output path make a new file based off of source file name let asset_out_path: PathBuf = match out_path.as_ref() { @@ -286,7 +300,7 @@ pub(crate) fn update_file_jumbf( #[cfg(feature = "file_io")] /// load the JUMBF block from an asset if available pub fn load_jumbf_from_file>(in_path: P) -> Result> { - let ext = get_file_extension(in_path.as_ref()).ok_or(Error::UnsupportedType)?; + let ext = get_supported_file_extension(in_path.as_ref()).ok_or(Error::UnsupportedType)?; match get_assetio_handler(&ext) { Some(asset_handler) => asset_handler.read_cai_store(in_path.as_ref()), @@ -326,7 +340,7 @@ where let mut reader = CAIReadAdapter { reader: stream }; match get_caiwriter_handler(format) { Some(handler) => handler.get_object_locations_from_stream(&mut reader), - _ => Err(Error::UnsupportedType), + _ => Ok(Vec::new()), } } @@ -338,7 +352,7 @@ where /// returns Unsupported type or errors from remove_cai_store #[cfg(feature = "file_io")] pub fn remove_jumbf_from_file>(path: P) -> Result<()> { - let ext = get_file_extension(path.as_ref()).ok_or(Error::UnsupportedType)?; + let ext = get_supported_file_extension(path.as_ref()).ok_or(Error::UnsupportedType)?; match get_assetio_handler(&ext) { Some(asset_handler) => asset_handler.remove_cai_store(path.as_ref()), _ => Err(Error::UnsupportedType), diff --git a/sdk/src/reader.rs b/sdk/src/reader.rs index c3fe6c574..ba1cd1af5 100644 --- a/sdk/src/reader.rs +++ b/sdk/src/reader.rs @@ -273,10 +273,19 @@ impl Reader { #[async_generic] pub fn with_file>(mut self, path: P) -> Result { let path = path.as_ref(); - let format = crate::format_from_path(path).ok_or(crate::Error::UnsupportedType)?; let mut file = File::open(path)?; - // Try loading from stream first + // attempt to detect format from content first + let format = crate::utils::mime::detect_format_from_stream(&mut file) + .or_else(|| { + // fallback to extension + path.extension() + .and_then(|ext| crate::utils::mime::extension_to_mime(ext.to_string_lossy().to_lowercase().as_ref())) + .map(|m| m.to_owned()) + }) + .unwrap_or_else(|| "application/octet-stream".to_string()); + + // Try loading from stream let mut validation_log = StatusTracker::default(); let store = if _sync { Store::from_stream(&format, &mut file, &mut validation_log, &self.context) @@ -285,7 +294,7 @@ impl Reader { }; match store { - Err(Error::JumbfNotFound) => { + Err(Error::JumbfNotFound) | Err(Error::UnsupportedType) => { // if not embedded or cloud, check for sidecar first and load if it exists let potential_sidecar_path = path.with_extension("c2pa"); if potential_sidecar_path.exists() { @@ -316,7 +325,8 @@ impl Reader { }?; Ok(self) } else { - Err(Error::JumbfNotFound) + // return original error + store.map(|_| self) } } Ok(store) => { diff --git a/sdk/src/store.rs b/sdk/src/store.rs index 147802a23..85fc709af 100644 --- a/sdk/src/store.rs +++ b/sdk/src/store.rs @@ -2012,31 +2012,46 @@ impl Store { let locations = match asset_data { #[cfg(feature = "file_io")] ClaimAssetData::Path(path) => { - let format = get_supported_file_extension(path).ok_or(Error::UnsupportedType)?; + let format = get_supported_file_extension(path).unwrap_or_else(|| "application/octet-stream".to_string()); let mut reader = std::fs::File::open(path)?; - object_locations_from_stream(&format, &mut reader) + object_locations_from_stream(&format, &mut reader).or_else(|e| match e { + Error::UnsupportedType => Ok(Vec::new()), + _ => Err(e), + }) } ClaimAssetData::Bytes(items, typ) => { let format = typ.to_owned(); let mut reader = Cursor::new(items); - object_locations_from_stream(&format, &mut reader) + object_locations_from_stream(&format, &mut reader).or_else(|e| match e { + Error::UnsupportedType => Ok(Vec::new()), + _ => Err(e), + }) } ClaimAssetData::Stream(reader, typ) => { let format = typ.to_owned(); - let positions = object_locations_from_stream(&format, reader); + let positions = object_locations_from_stream(&format, reader).or_else(|e| match e { + Error::UnsupportedType => Ok(Vec::new()), + _ => Err(e), + }); reader.rewind()?; positions } ClaimAssetData::StreamFragment(reader, _read1, typ) => { let format = typ.to_owned(); - object_locations_from_stream(&format, reader) + object_locations_from_stream(&format, reader).or_else(|e| match e { + Error::UnsupportedType => Ok(Vec::new()), + _ => Err(e), + }) } #[cfg(feature = "file_io")] ClaimAssetData::StreamFragments(reader, _path_bufs, typ) => { let format = typ.to_owned(); - object_locations_from_stream(&format, reader) + object_locations_from_stream(&format, reader).or_else(|e| match e { + Error::UnsupportedType => Ok(Vec::new()), + _ => Err(e), + }) } }; @@ -3106,47 +3121,55 @@ impl Store { RemoteManifest::EmbedWithRemote(url) => (Some(url), false), }; - let io_handler = get_assetio_handler(format).ok_or(Error::UnsupportedType)?; + let io_handler = get_assetio_handler(format); - // Do not assume the handler supports XMP or removing manifests unless we need it to - if let Some(url) = url { - let external_ref_writer = io_handler - .remote_ref_writer_ref() - .ok_or(Error::XmpNotSupported)?; + if let Some(io_handler) = io_handler { + // Do not assume the handler supports XMP or removing manifests unless we need it to + if let Some(url) = url { + let external_ref_writer = io_handler + .remote_ref_writer_ref() + .ok_or(Error::XmpNotSupported)?; - if remove_manifests { - let manifest_writer = io_handler - .get_writer(format) - .ok_or(Error::UnsupportedType)?; + if remove_manifests { + let manifest_writer = io_handler.get_writer(format).ok_or(Error::UnsupportedType)?; - let mut tmp_stream = io_utils::stream_with_fs_fallback(threshold); - manifest_writer.remove_cai_store_from_stream(input_stream, &mut tmp_stream)?; + let mut tmp_stream = io_utils::stream_with_fs_fallback(threshold); + manifest_writer.remove_cai_store_from_stream(input_stream, &mut tmp_stream)?; - // add external ref if possible - tmp_stream.rewind()?; - external_ref_writer.embed_reference_to_stream( - &mut tmp_stream, - &mut intermediate_stream, - RemoteRefEmbedType::Xmp(url), - )?; + // add external ref if possible + tmp_stream.rewind()?; + external_ref_writer.embed_reference_to_stream( + &mut tmp_stream, + &mut intermediate_stream, + RemoteRefEmbedType::Xmp(url), + )?; + } else { + // add external ref if possible + external_ref_writer.embed_reference_to_stream( + input_stream, + &mut intermediate_stream, + RemoteRefEmbedType::Xmp(url), + )?; + } + } else if remove_manifests { + let manifest_writer = io_handler.get_writer(format).ok_or(Error::UnsupportedType)?; + + manifest_writer + .remove_cai_store_from_stream(input_stream, &mut intermediate_stream)?; } else { - // add external ref if possible - external_ref_writer.embed_reference_to_stream( - input_stream, - &mut intermediate_stream, - RemoteRefEmbedType::Xmp(url), - )?; + // just clone stream + input_stream.rewind()?; + std::io::copy(input_stream, &mut intermediate_stream)?; } - } else if remove_manifests { - let manifest_writer = io_handler - .get_writer(format) - .ok_or(Error::UnsupportedType)?; - - manifest_writer.remove_cai_store_from_stream(input_stream, &mut intermediate_stream)?; } else { - // just clone stream - input_stream.rewind()?; - std::io::copy(input_stream, &mut intermediate_stream)?; + // no handler + if matches!(pc.remote_manifest(), RemoteManifest::SideCar) { + // just clone stream + input_stream.rewind()?; + std::io::copy(input_stream, &mut intermediate_stream)?; + } else { + return Err(Error::UnsupportedType); + } } let is_bmff = is_bmff_format(format); diff --git a/sdk/src/utils/mime.rs b/sdk/src/utils/mime.rs index 97a000ad6..b80627e46 100644 --- a/sdk/src/utils/mime.rs +++ b/sdk/src/utils/mime.rs @@ -41,6 +41,9 @@ pub fn extension_to_mime(extension: &str) -> Option<&'static str> { "ai" => "application/postscript", "arw" => "image/x-sony-arw", "nef" => "image/x-nikon-nef", + "m4v" => "video/x-m4v", + "3gp" => "video/3gpp", + "3g2" => "video/3g2", "c2pa" | "application/x-c2pa-manifest-store" | "application/c2pa" => "application/c2pa", _ => return None, }) @@ -88,6 +91,9 @@ pub fn format_to_extension(format: &str) -> Option<&'static str> { "ai" | "application/postscript" => "ai", "arw" | "image/x-sony-arw" => "arw", "nef" | "image/x-nikon-nef" => "nef", + "m4v" | "video/x-m4v" => "m4v", + "3gp" | "video/3gpp" => "3gp", + "3g2" | "video/3g2" => "3g2", "c2pa" | "application/x-c2pa-manifest-store" | "application/c2pa" => "c2pa", _ => return None, }) @@ -95,9 +101,80 @@ pub fn format_to_extension(format: &str) -> Option<&'static str> { /// Return a MIME type given a file path. /// -/// This function will use the file extension to determine the MIME type. +/// This function will use the file content (magic bytes) to determine the MIME type. +/// If the format cannot be determined from content, it will fall back to using the file extension. pub fn format_from_path>(path: P) -> Option { - path.as_ref().extension().map(|ext| { - crate::utils::mime::format_to_mime(ext.to_string_lossy().to_lowercase().as_ref()) + let path = path.as_ref(); + + // try to detect from content first if we have file_io + #[cfg(feature = "file_io")] + if let Some(format) = detect_format_from_path(path) { + return Some(format); + } + + // fallback to extension + path.extension() + .and_then(|ext| extension_to_mime(ext.to_string_lossy().to_lowercase().as_ref())) + .map(|m| m.to_owned()) +} + +/// Detect a MIME type from the content of a file. +#[cfg(feature = "file_io")] +pub fn detect_format_from_path>(path: P) -> Option { + std::fs::File::open(path).ok().and_then(|mut file| { + detect_format_from_stream(&mut file) }) } + +/// Detect a MIME type from a stream of bytes. +pub fn detect_format_from_stream(stream: &mut R) -> Option { + let _ = stream.rewind(); + let mut buffer = [0u8; 512]; + let n = stream.read(&mut buffer).ok()?; + let _ = stream.rewind(); // attempt to rewind, but don't fail if we can't + crate::jumbf_io::format_from_bytes(&buffer[..n]) +} + +/// Returns a MIME type given a stream of bytes. +#[allow(dead_code)] +pub fn get_mime_from_bytes(data: &[u8]) -> Option { + crate::jumbf_io::format_from_bytes(data) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_mime_from_bytes() { + assert_eq!(get_mime_from_bytes(&[0xff, 0xd8, 0xff, 0xe0]), Some("image/jpeg".to_string())); + assert_eq!(get_mime_from_bytes(&[0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]), Some("image/png".to_string())); + assert_eq!(get_mime_from_bytes(b"GIF87a"), Some("image/gif".to_string())); + assert_eq!(get_mime_from_bytes(b"GIF89a"), Some("image/gif".to_string())); + assert_eq!(get_mime_from_bytes(&[0x49, 0x49, 0x2a, 0x00]), Some("image/tiff".to_string())); + assert_eq!(get_mime_from_bytes(&[0x4d, 0x4d, 0x00, 0x2a]), Some("image/tiff".to_string())); + // assert_eq!(get_mime_from_bytes(b"BM"), Some("image/bmp".to_string())); // BMP not currently in handlers list + #[cfg(feature = "pdf")] + assert_eq!(get_mime_from_bytes(b"%PDF-1.4"), Some("application/pdf".to_string())); + assert_eq!(get_mime_from_bytes(b"RIFF\0\0\0\0WEBP"), Some("image/webp".to_string())); + assert_eq!(get_mime_from_bytes(b"RIFF\0\0\0\0WAVE"), Some("audio/wav".to_string())); + assert_eq!(get_mime_from_bytes(b"RIFF\0\0\0\0AVI "), Some("video/avi".to_string())); + assert_eq!(get_mime_from_bytes(b"ID3\x03\0\0\0\0\0\0"), Some("audio/mpeg".to_string())); + assert_eq!(get_mime_from_bytes(&[0x00, 0x00, 0x00, 0x18, b'f', b't', b'y', b'p', b'm', b'p', b'4', b'2']), Some("video/mp4".to_string())); + assert_eq!(get_mime_from_bytes(&[0x00, 0x00, 0x00, 0x18, b'f', b't', b'y', b'p', b'h', b'e', b'i', b'c']), Some("image/heic".to_string())); + assert_eq!(get_mime_from_bytes(b""), Some("image/svg+xml".to_string())); + assert_eq!(get_mime_from_bytes(b""), Some("image/svg+xml".to_string())); + assert_eq!(get_mime_from_bytes(&[0x00, 0x00, 0x00, 0x0c, b'j', b'u', b'm', b'b']), Some("application/c2pa".to_string())); + } + + #[test] + fn test_format_from_path() { + use std::io::Write; + let mut temp = tempfile::NamedTempFile::new().unwrap(); + temp.write_all(&[0xff, 0xd8, 0xff, 0xe0]).unwrap(); + let path = temp.path(); + + // No extension, should detect from content + assert_eq!(format_from_path(path), Some("image/jpeg".to_string())); + } +} diff --git a/sdk/src/utils/mod.rs b/sdk/src/utils/mod.rs index eb96e3bb1..feba47753 100644 --- a/sdk/src/utils/mod.rs +++ b/sdk/src/utils/mod.rs @@ -23,6 +23,7 @@ pub(crate) mod merkle; pub(crate) mod mime; #[allow(dead_code)] // for wasm build pub(crate) mod patch; +pub(crate) mod signatures; #[cfg(feature = "add_thumbnails")] pub(crate) mod thumbnail; pub(crate) mod time_it; diff --git a/sdk/src/utils/signatures.rs b/sdk/src/utils/signatures.rs new file mode 100644 index 000000000..7047b0df7 --- /dev/null +++ b/sdk/src/utils/signatures.rs @@ -0,0 +1,37 @@ +//! Centralized location for file signatures (magic bytes). + +/// JPEG start of image marker +pub const JPEG: &[u8] = b"\xff\xd8\xff"; + +/// PNG file signature +pub const PNG: &[u8] = b"\x89PNG\r\n\x1a\n"; + +/// GIF signatures +pub const GIF87A: &[u8] = b"GIF87a"; +pub const GIF89A: &[u8] = b"GIF89a"; + +/// TIFF signatures (Little Endian and Big Endian) +pub const TIFF_LE: &[u8] = b"II\x2a\x00"; +pub const TIFF_BE: &[u8] = b"MM\x00\x2a"; + +/// BMFF (ISO Base Media File Format) "ftyp" marker +pub const BMFF_FTYP: &[u8] = b"ftyp"; + +/// RIFF container signature (used by WEBP, WAV, AVI) +pub const RIFF: &[u8] = b"RIFF"; +pub const WEBP: &[u8] = b"WEBP"; +pub const WAVE: &[u8] = b"WAVE"; +pub const AVI: &[u8] = b"AVI "; + +/// MP3 signatures +pub const MP3_ID3: &[u8] = b"ID3"; + +/// PDF signature +pub const PDF: &[u8] = b"%PDF-"; + +/// SVG text patterns +pub const SVG_TAG: &str = " (), // Success + Err(c2pa::Error::JumbfNotFound) => (), // Success - found it's a JPEG but no JUMBF + Err(e) => panic!("Should have detected as JPEG, but got error: {:?}", e), + } +} + +#[test] +fn test_ingredient_from_file_no_extension() { + let dir = tempdir().unwrap(); + let image_path = dir.path().join("image_no_ext"); + let jpeg_data = fs::read("tests/fixtures/IMG_0003.jpg").expect("Failed to read fixture"); + fs::write(&image_path, jpeg_data).expect("Failed to write image"); + + let ingredient = Ingredient::from_file(&image_path).expect("Failed to create ingredient"); + assert_eq!(ingredient.format(), Some("image/jpeg")); +} + +#[test] +fn test_sidecar_with_unsupported_asset() { + let dir = tempdir().unwrap(); + let asset_path = dir.path().join("unsupported.xyz"); + let asset_data = b"unsupported file content"; + fs::write(&asset_path, asset_data).expect("Failed to write asset"); + + let sidecar_path = asset_path.with_extension("c2pa"); + let manifest_data = fs::read("tests/fixtures/cloud_manifest.c2pa").expect("Failed to read fixture"); + fs::write(&sidecar_path, manifest_data).expect("Failed to write sidecar"); + + // Should detect the sidecar even though .xyz is unsupported + let reader = Reader::from_file(&asset_path); + + match reader { + Ok(r) => { + assert!(r.active_manifest().is_some()); + }, + Err(e) => panic!("Should have detected sidecar for unsupported asset, but got error: {:?}", e), + } +} + +#[test] +fn test_sign_unsupported_with_sidecar() { + use c2pa::{Builder, Context}; + use std::io::Cursor; + + let dir = tempdir().unwrap(); + let asset_path = dir.path().join("unsupported.txt"); + let asset_data = b"This is a text file that c2pa-rs doesn't normally support."; + fs::write(&asset_path, asset_data).expect("Failed to write asset"); + + // Create context with a test signer from standard test settings + let settings_str = include_str!("../tests/fixtures/test_settings.toml"); + let context = Context::new().with_settings(settings_str).unwrap(); + + let mut builder = Builder::from_context(context) + .with_definition(serde_json::json!({"title": "Unsupported Format Test"})).unwrap(); + // Request a sidecar manifest + builder.set_no_embed(true); + + let mut source = fs::File::open(&asset_path).unwrap(); + let mut dest = Cursor::new(Vec::new()); + + // Should work now! + // We use "application/octet-stream" as the format for the stream + let result = builder.save_to_stream("application/octet-stream", &mut source, &mut dest); + + match result { + Ok(manifest_bytes) => { + assert!(!manifest_bytes.is_empty()); + // The destination should contain the original asset (since it's a sidecar) + assert_eq!(dest.get_ref().as_slice(), asset_data); + + // Now verify we can read it back + let sidecar_path = asset_path.with_extension("c2pa"); + fs::write(&sidecar_path, manifest_bytes).unwrap(); + + let reader = Reader::from_file(&asset_path).expect("Failed to read back"); + assert_eq!(reader.active_manifest().unwrap().title().unwrap(), "Unsupported Format Test"); + }, + Err(e) => panic!("Should have allowed signing unsupported format with sidecar, but got error: {:?}", e), + } +} \ No newline at end of file