diff --git a/benches/decompress.rs b/benches/decompress.rs index 07676fa..b1100fc 100644 --- a/benches/decompress.rs +++ b/benches/decompress.rs @@ -1,5 +1,5 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use fitsrs::Pixels; +use fitsrs::hdu::data::bintable::{self, data::BinaryTableData}; fn criterion_benchmark_decompression(c: &mut Criterion) { let mut group = c.benchmark_group("decompression"); @@ -34,9 +34,22 @@ fn decompress(filename: &str) { if let HDU::XBinaryTable(hdu) = hdu { let width = hdu.get_header().get_parsed::("ZNAXIS1").unwrap(); let height = hdu.get_header().get_parsed::("ZNAXIS2").unwrap(); - let pixels = hdu_list.get_data(&hdu).collect::>(); - assert!(width * height == pixels.len()); + match hdu_list.get_data(&hdu) { + BinaryTableData::TileCompressed(bintable::tile_compressed::pixels::Pixels::U8( + pixels, + )) => assert!(width * height == pixels.count()), + BinaryTableData::TileCompressed( + bintable::tile_compressed::pixels::Pixels::I16(pixels), + ) => assert!(width * height == pixels.count()), + BinaryTableData::TileCompressed( + bintable::tile_compressed::pixels::Pixels::I32(pixels), + ) => assert!(width * height == pixels.count()), + BinaryTableData::TileCompressed( + bintable::tile_compressed::pixels::Pixels::F32(pixels), + ) => assert!(width * height == pixels.count()), + _ => unreachable!(), + } } } } @@ -57,7 +70,7 @@ fn read_image() { let width = hdu.get_header().get_parsed::("NAXIS1").unwrap(); let height = hdu.get_header().get_parsed::("NAXIS2").unwrap(); let pixels = match hdu_list.get_data(&hdu).pixels() { - Pixels::I16(it) => it.count(), + fitsrs::hdu::data::image::Pixels::I16(it) => it.count(), _ => unreachable!(), }; diff --git a/src/hdu/data/bintable/data.rs b/src/hdu/data/bintable/data.rs index 14eaf73..ebbb48b 100644 --- a/src/hdu/data/bintable/data.rs +++ b/src/hdu/data/bintable/data.rs @@ -27,13 +27,13 @@ where } } -use super::tile_compressed::TileCompressedData; #[derive(Debug)] pub enum BinaryTableData { Table(TableData), - TileCompressed(TileCompressedData), + TileCompressed(Pixels), } +/* impl Iterator for BinaryTableData where R: Debug + Seek + Read, @@ -47,18 +47,19 @@ where } } } +*/ +use super::tile_compressed::pixels::Pixels; impl BinaryTableData where R: Debug + Read, { fn new(reader: R, header: &Header, start_pos: u64) -> Self { let ctx = header.get_xtension(); - let data = TableData::new(reader, header, start_pos); if let Some(tile_compressed) = &ctx.z_image { - BinaryTableData::TileCompressed(TileCompressedData::new(header, data, tile_compressed)) + BinaryTableData::TileCompressed(Pixels::new(data, header, tile_compressed)) } else { BinaryTableData::Table(data) } @@ -66,7 +67,12 @@ where pub fn table_data(self) -> TableData { match self { - BinaryTableData::TileCompressed(tile) => tile.table_data(), + BinaryTableData::TileCompressed(pixels) => match pixels { + Pixels::U8(pixels) => pixels.row_it.table_data(), + Pixels::I16(pixels) => pixels.row_it.table_data(), + Pixels::I32(pixels) => pixels.row_it.table_data(), + Pixels::F32(pixels) => pixels.row_it.table_data(), + }, BinaryTableData::Table(table) => table, } } @@ -75,8 +81,13 @@ where impl BinaryTableData { pub fn row_iter(self) -> TableRowData { match self { - Self::Table(table) => TableRowData::new(table), - Self::TileCompressed(TileCompressedData { row_it, .. }) => row_it, + BinaryTableData::TileCompressed(pixels) => match pixels { + Pixels::U8(pixels) => pixels.row_it, + Pixels::I16(pixels) => pixels.row_it, + Pixels::I32(pixels) => pixels.row_it, + Pixels::F32(pixels) => pixels.row_it, + }, + BinaryTableData::Table(table) => table.row_iter(), } } } diff --git a/src/hdu/data/bintable/mod.rs b/src/hdu/data/bintable/mod.rs index 6b2df8e..67de5fe 100644 --- a/src/hdu/data/bintable/mod.rs +++ b/src/hdu/data/bintable/mod.rs @@ -1,12 +1,9 @@ #![allow(clippy::upper_case_acronyms)] pub mod data; -pub mod dithering; -pub mod rice; pub mod row; pub mod tile_compressed; -pub use data::BinaryTableData; pub use data::TableData; pub use row::TableRowData; diff --git a/src/hdu/data/bintable/tile_compressed.rs b/src/hdu/data/bintable/tile_compressed.rs deleted file mode 100644 index c44103c..0000000 --- a/src/hdu/data/bintable/tile_compressed.rs +++ /dev/null @@ -1,722 +0,0 @@ -use flate2::read::GzDecoder; - -use super::data::TableData; -use super::dithering::{N_RANDOM, RAND_VALUES}; -use super::rice::RICEDecoder; -use crate::error::Error; -use crate::hdu::header::extension::bintable::{BinTable, TileCompressedImage, ZCmpType, ZQuantiz}; -use crate::hdu::header::{Bitpix, Header}; - -use super::row::TableRowData; - -#[derive(Debug)] -pub struct TileCompressedData { - /// An iterator over the row of a binary table - pub(crate) row_it: TableRowData, - /// A buffer for storing uncompressed data from GZIP1, GZIP2 or RICE - buf: Vec, - - // Mandatory data compressed column - data_compressed_idx: usize, - // Optional floating point columns - z_scale_idx: Option, - z_zero_idx: Option, - // Optional column or keyword - z_blank: Option, - /// The type of compression algorithm - z_cmp_type: ZCmpType, - /// Bitpix of the uncompressed values - z_bitpix: Bitpix, - /// Tile size - z_tile: Box<[usize]>, - /// Total image size - z_naxis: Box<[usize]>, - /// optional z_dither0 - z_dither_0: Option, - /// optional z_quantiz, - z_quantiz: Option, - - /// Current tile pointer - tile: TileCompressed, -} - -/// Proper data relative to the tile currently being parsed -#[derive(Debug)] -struct TileCompressed { - /// Current value of ZSCALE field (floating point case) - z_scale: f32, - /// Current value of ZZERO field (floating point case) - z_zero: f32, - /// Current value of ZBLANK field (float and integer case) - z_blank: Option, - /// Total number of pixels in the tile - n_pixels: u64, - /// Number of pixels remaining to return - remaining_pixels: u64, - /// Quantiz parameters - quantiz: Quantiz, -} - -impl TileCompressed { - /// Unquantize the integer decoded value to the real floating point value - fn unquantize(&mut self, value: i32) -> f32 { - match &mut self.quantiz { - Quantiz::NoDither => (value as f32) * self.z_scale + self.z_zero, - Quantiz::SubtractiveDither1 { i1 } => { - let ri = RAND_VALUES[*i1]; - // increment i1 for the next pixel - *i1 = (*i1 + 1) % N_RANDOM; - - ((value as f32) - ri + 0.5) * self.z_scale + self.z_zero - } - Quantiz::SubtractiveDither2 { i1 } => { - // FIXME: i32::MIN is -2147483648 ! - if value == -2147483647 { - *i1 = (*i1 + 1) % N_RANDOM; - - 0.0 - } else { - let ri = RAND_VALUES[*i1]; - // increment i1 for the next pixel - *i1 = (*i1 + 1) % N_RANDOM; - - ((value as f32) - ri + 0.5) * self.z_scale + self.z_zero - } - } - } - } -} - -#[derive(Debug)] -enum ZBLANK { - ColumnIdx(usize), - Value(f64), -} - -#[derive(Debug)] -enum Quantiz { - NoDither, - SubtractiveDither1 { i1: usize }, - SubtractiveDither2 { i1: usize }, -} - -use std::fmt::Debug; -use std::io::{Read, Seek, SeekFrom}; - -/// Compute the size of the tile from its row position inside the compressed data column -/// FIXME: optimize this computation by using memoization -fn tile_size_from_row_idx(z_tile: &[usize], z_naxis: &[usize], n: usize) -> Box<[usize]> { - let d = z_tile.len(); - - if d == 0 { - // There must be at least one dimension - unreachable!(); - } else { - let mut u = vec![0_usize; z_tile.len()]; - - let s = z_naxis - .iter() - .zip(z_tile.iter()) - .map(|(naxisi, tilei)| naxisi.div_ceil(*tilei)) - .collect::>(); - - // Compute the position inside the first dimension - u[0] = n % s[0]; - - for i in 1..d { - u[i] = n - - u[0] - - (1..i) - .map(|k| { - let prod_sk = s.iter().take(k).product::(); - u[k] * prod_sk - }) - .sum::(); - - let prod_si = s.iter().take(i).product::(); - - u[i] = (u[i] / prod_si) % s[i]; - } - - u.iter() - .zip(z_naxis.iter().zip(z_tile.iter())) - .map(|(&u_i, (&naxis, &tilez))| tilez.min(naxis - u_i * tilez)) - .collect() - } -} - -impl TileCompressedData { - fn get_reader(&mut self) -> &mut R { - self.row_it.get_reader() - } -} - -impl TileCompressedData { - pub(crate) fn new( - header: &Header, - mut data: TableData, - config: &TileCompressedImage, - ) -> Self { - // This buffer is only used if tile compressed image in the gzip compression is to be found - let mut buf = vec![]; - - let TileCompressedImage { - z_tilen, - z_naxisn, - z_bitpix, - z_cmp_type, - data_compressed_idx, - z_quantiz, - z_dither_0, - .. - } = config; - // Disable reading the heap, we will handle that manually after reading the column - data.read_the_heap(false); - let row_it = data.row_iter(); - - let ctx = header.get_xtension(); - - let z_scale_idx = ctx.find_field_by_ttype("ZSCALE"); - let z_zero_idx = ctx.find_field_by_ttype("ZZERO"); - - // Allocation of a buffer at init of the iterator that is the size of the biggest tiles we can found - // on the tile compressed image. This is simply given by the ZTILEi keyword. - // Some tiles found on the border of the image can be smaller - let n_elems_max = z_tilen.iter().product::(); - - // FIXME - // A little precision. The gzdecoder from flate2 seems to unzip data in a stream of u32 i.e. even if the type of data - // to uncompress is byte or short, the result will be cast in a u32. I thus allocate for gzip compression, a buffer of - // n_elems_max * size_in_bytes of u32. - // It seems to be the same for RICE, so the latter rice decompression code is called on i32 - let num_bytes_max_tile = n_elems_max * std::mem::size_of::(); - - buf.resize(num_bytes_max_tile, 0); - - let z_blank = header - .get_xtension() - // Check for a field named ZBLANK - .find_field_by_ttype("ZBLANK") - // If no ZBLANK colum has been found then check the header keywords (ZBLANK for float, BLANK for integer) - .map_or_else( - || { - header - .get_parsed(if (*z_bitpix as i32) < 0 { - "ZBLANK" - } else { - "BLANK" - }) - // TODO: we should probably propagate errors from here if ZBLANK/BLANK exist but are of a wrong type. - .ok() - .map(ZBLANK::Value) - }, - |field_idx| Some(ZBLANK::ColumnIdx(field_idx)), - ); - let tile = TileCompressed { - z_scale: 1.0, - z_zero: 0.0, - z_blank: None, - n_pixels: 0, - remaining_pixels: 0, - quantiz: Quantiz::NoDither, - }; - - Self { - buf, - row_it, - tile, - data_compressed_idx: *data_compressed_idx, - z_scale_idx, - z_zero_idx, - z_blank, - z_naxis: z_naxisn.clone(), - z_tile: z_tilen.clone(), - z_cmp_type: *z_cmp_type, - z_bitpix: *z_bitpix, - z_dither_0: *z_dither_0, - z_quantiz: z_quantiz.clone(), - } - } - - /// Get an iterator over the binary table without interpreting its content as - /// a compressed tile. - /// - /// This can be useful if you want to have access to the raw data because [TableData] has a method - /// to get its raw_bytes - pub fn table_data(self) -> TableData { - self.row_it.table_data() - } -} - -use super::{ColumnId, DataValue}; -impl Iterator for TileCompressedData -where - R: Read + Seek + Debug, -{ - // Return a vec of fields because to take into account the repeat count value for that field - type Item = DataValue; - - fn next(&mut self) -> Option { - // We first retrieve the whole row from the row data iterator - if self.tile.remaining_pixels == 0 { - let row_data = self.row_it.next()?; - - let (_, byte_offset) = match row_data[self.data_compressed_idx] { - DataValue::VariableLengthArray32 { - num_elems, - offset_byte, - } => (num_elems as u64, offset_byte as u64), - DataValue::VariableLengthArray64 { - num_elems, - offset_byte, - } => (num_elems, offset_byte), - _ => unreachable!(), - }; - - let ctx = self.row_it.get_ctx(); - let row_idx = self.row_it.get_row_idx(); - - // Update the tile compressed currently decompressed - let num_pixels = tile_size_from_row_idx(&self.z_tile[..], &self.z_naxis[..], row_idx) - .iter() - .product::() as u64; - self.tile.n_pixels = num_pixels; - self.tile.remaining_pixels = num_pixels; - self.tile.quantiz = match (&self.z_quantiz, self.z_dither_0) { - (Some(ZQuantiz::SubtractiveDither1), Some(zdither0)) => { - let i0 = (row_idx - 1 + (zdither0 as usize)) % 10000; - let i1 = (RAND_VALUES[i0] * 500.0).floor() as usize; - Quantiz::SubtractiveDither1 { i1 } - } - (Some(ZQuantiz::SubtractiveDither2), Some(zdither0)) => { - let i0 = (row_idx - 1 + (zdither0 as usize)) % 10000; - let i1 = (RAND_VALUES[i0] * 500.0).floor() as usize; - Quantiz::SubtractiveDither2 { i1 } - } - _ => Quantiz::NoDither, - }; - - self.tile.z_scale = if let Some(idx) = self.z_scale_idx { - match row_data[idx] { - DataValue::Float { value, .. } => value, - DataValue::Double { value, .. } => value as f32, - _ => unreachable!(), - } - } else { - 1.0 - }; - - self.tile.z_zero = if let Some(idx) = self.z_zero_idx { - match row_data[idx] { - DataValue::Float { value, .. } => value, - DataValue::Double { value, .. } => value as f32, - _ => unreachable!(), - } - } else { - 0.0 - }; - - self.tile.z_blank = match self.z_blank { - Some(ZBLANK::ColumnIdx(idx)) => match row_data[idx] { - DataValue::Float { value, .. } => Some(value), - DataValue::Double { value, .. } => Some(value as f32), - _ => unreachable!(), - }, - Some(ZBLANK::Value(value)) => Some(value as f32), - _ => None, - }; - - // We jump to the heap at the position of the tile - // Then we decomp the tile and store it into out internal buf - // Finally we go back to the main data table location before jumping to the heap - let main_data_table_offset = row_idx * (ctx.naxis1 as usize); - let off = - // go back to the beginning of the main table data block - - (main_data_table_offset as i64) - // from the beginning of the main table go to the beginning of the heap - + ctx.theap as i64 - // from the beginning of the heap go to the start of the array - + byte_offset as i64; - self.jump_to_location( - |s| { - let TileCompressedData { buf, row_it, .. } = s; - - let reader = row_it.get_reader(); - - match s.z_cmp_type { - // For GZIP2, the byte shuffling is done in the next method - ZCmpType::Gzip1 | ZCmpType::Gzip2 => { - let mut gz = GzDecoder::new(reader); - gz.read_exact(&mut buf[..])?; - } - // FIXME support bytepix - ZCmpType::Rice { blocksize, .. } => { - let mut rice = RICEDecoder::<_, i32>::new( - reader, - blocksize as i32, - num_pixels as i32, - ); - rice.read_exact(&mut buf[..])?; - } - // Other compression not supported, when parsing the bintable extension keywords - // we ensured that z_image is `None` for other compressions than GZIP or RICE - _ => unreachable!(), - } - - Ok(()) - }, - SeekFrom::Current(off), - ) - .ok()?; - } - - // There is remaining pixels inside our buffer, we simply return the current one - let idx = (self.tile.n_pixels - self.tile.remaining_pixels) as usize; - let column = ColumnId::Index(self.data_compressed_idx); - - let value = match (self.z_cmp_type, self.z_bitpix) { - // GZIP compression - // Unsigned byte - (ZCmpType::Gzip1, Bitpix::U8) | (ZCmpType::Gzip2, Bitpix::U8) => { - // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements - let off = 4 * idx; - // read from BigEndian, i.e. the most significant byte is at first and the least one is at last position - let value = self.buf[off + 3]; - DataValue::UnsignedByte { value, column, idx } - } - // 16-bit integer - (ZCmpType::Gzip1, Bitpix::I16) => { - // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements - // read from BigEndian, i.e. the most significant byte is at first and the least one is at last position - let off = 4 * idx; - let value = (self.buf[off + 3] as i16) | ((self.buf[off + 2] as i16) << 8); - DataValue::Short { value, column, idx } - } - // 32-bit integer - (ZCmpType::Gzip1, Bitpix::I32) => { - // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements - let off = 4 * idx; - let value = i32::from_be_bytes([ - self.buf[off], - self.buf[off + 1], - self.buf[off + 2], - self.buf[off + 3], - ]); - DataValue::Integer { value, column, idx } - } - // 32-bit floating point - (ZCmpType::Gzip1, Bitpix::F32) => { - // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements - let off = 4 * idx; - let value = i32::from_be_bytes([ - self.buf[off], - self.buf[off + 1], - self.buf[off + 2], - self.buf[off + 3], - ]); - let value = self.tile.unquantize(value); - - DataValue::Float { value, column, idx } - } - // GZIP2 (when uncompressed, the bytes are all in most signicant byte order) - // FIXME: not tested - // 16-bit integer - (ZCmpType::Gzip2, Bitpix::I16) => { - // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements - // read from BigEndian, i.e. the most significant byte is at first and the least one is at last position - let num_bytes = self.buf.len(); - let step_msb = num_bytes / 4; - let value = (self.buf[3 * step_msb + idx] as i16) - | ((self.buf[2 * step_msb + idx] as i16) << 8); - DataValue::Short { value, column, idx } - } - // 32-bit integer - // FIXME: not tested - (ZCmpType::Gzip2, Bitpix::I32) => { - // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements - // read from BigEndian, i.e. the most significant byte is at first and the least one is at last position - let num_bytes = self.buf.len(); - let step_msb = num_bytes / 4; - let value = ((self.buf[idx] as i32) << 24) - | ((self.buf[idx + step_msb] as i32) << 16) - | ((self.buf[idx + 2 * step_msb] as i32) << 8) - | (self.buf[idx + 3 * step_msb] as i32); - - DataValue::Integer { value, column, idx } - } - // 32-bit floating point - (ZCmpType::Gzip2, Bitpix::F32) => { - // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements - // read from BigEndian, i.e. the most significant byte is at first and the least one is at last position - let num_bytes = self.buf.len(); - let step_msb = num_bytes / 4; - let value = ((self.buf[idx] as i32) << 24) - | ((self.buf[idx + step_msb] as i32) << 16) - | ((self.buf[idx + 2 * step_msb] as i32) << 8) - | (self.buf[idx + 3 * step_msb] as i32); - - let value = self.tile.unquantize(value); - - DataValue::Float { value, column, idx } - } - // RICE compression - // Unsigned byte - (ZCmpType::Rice { .. }, Bitpix::U8) => { - // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements - let off = 4 * idx; - let value = self.buf[off]; - DataValue::UnsignedByte { value, column, idx } - } - // 16-bit integer - (ZCmpType::Rice { .. }, Bitpix::I16) => { - // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements - let off = 4 * idx; - let value = (self.buf[off] as i16) | ((self.buf[off + 1] as i16) << 8); - DataValue::Short { value, column, idx } - } - // 32-bit integer - (ZCmpType::Rice { .. }, Bitpix::I32) => { - // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements - let off = 4 * idx; - let value = i32::from_ne_bytes([ - self.buf[off], - self.buf[off + 1], - self.buf[off + 2], - self.buf[off + 3], - ]); - DataValue::Integer { value, column, idx } - } - // 32-bit floating point - (ZCmpType::Rice { .. }, Bitpix::F32) => { - // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements - let off = 4 * idx; - let value = i32::from_ne_bytes([ - self.buf[off], - self.buf[off + 1], - self.buf[off + 2], - self.buf[off + 3], - ]); - - let value = self.tile.unquantize(value); - - DataValue::Float { value, column, idx } - } - // Not supported compression/bitpix results in parsing the binary table as normal and thus this part is not reachable - _ => unreachable!(), - }; - self.tile.remaining_pixels -= 1; - - Some(value) - } -} - -impl TileCompressedData -where - R: Seek, -{ - /// Jump to a specific location of the reader, perform an operation and jumps back to the original position - pub(crate) fn jump_to_location(&mut self, f: F, pos: SeekFrom) -> Result<(), Error> - where - F: FnOnce(&mut Self) -> Result<(), Error>, - { - let old_pos = SeekFrom::Start(self.get_reader().stream_position()?); - - self.get_reader().seek(pos)?; - f(self)?; - let _ = self.get_reader().seek(old_pos)?; - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use std::io::{Cursor, Read}; - - use image::DynamicImage; - use test_case::test_case; - - use crate::{Fits, HDU}; - - #[test] - fn test_tile_size_from_row_idx() { - let ground_truth = [ - [300, 200, 150], - [300, 200, 150], - [300, 200, 150], - [100, 200, 150], - [300, 200, 150], - [300, 200, 150], - [300, 200, 150], - [100, 200, 150], - [300, 100, 150], - [300, 100, 150], - [300, 100, 150], - [100, 100, 150], - [300, 200, 150], - [300, 200, 150], - [300, 200, 150], - [100, 200, 150], - [300, 200, 150], - [300, 200, 150], - [300, 200, 150], - [100, 200, 150], - [300, 100, 150], - [300, 100, 150], - [300, 100, 150], - [100, 100, 150], - [300, 200, 50], - [300, 200, 50], - [300, 200, 50], - [100, 200, 50], - [300, 200, 50], - [300, 200, 50], - [300, 200, 50], - [100, 200, 50], - [300, 100, 50], - [300, 100, 50], - [300, 100, 50], - [100, 100, 50], - ]; - use super::tile_size_from_row_idx; - for (i, &ground_truth) in ground_truth.iter().enumerate() { - let tile_s = tile_size_from_row_idx(&[300, 200, 150], &[1000, 500, 350], i); - assert_eq!([tile_s[0], tile_s[1], tile_s[2]], ground_truth); - } - } - - #[test_case("samples/fits.gsfc.nasa.gov/m13real_rice.fits", 1000.0)] - #[test_case("samples/fits.gsfc.nasa.gov/m13_rice.fits", 1000.0)] - #[test_case("samples/fits.gsfc.nasa.gov/m13_gzip.fits", 1000.0)] - fn test_fits_without_dithering(filename: &str, vmax: f32) { - use std::fs::File; - - use crate::hdu::data::bintable::DataValue; - - let mut f = File::open(filename).unwrap(); - let mut buf = Vec::new(); - f.read_to_end(&mut buf).unwrap(); - let reader = Cursor::new(&buf[..]); - - let mut hdu_list = Fits::from_reader(reader); - - while let Some(Ok(hdu)) = hdu_list.next() { - if let HDU::XBinaryTable(hdu) = hdu { - let width = hdu.get_header().get_parsed::("ZNAXIS1").unwrap(); - let height = hdu.get_header().get_parsed::("ZNAXIS2").unwrap(); - let pixels = hdu_list - .get_data(&hdu) - .map(|value| match value { - DataValue::Short { value, .. } => value as f32, - DataValue::Integer { value, .. } => value as f32, - DataValue::Float { value, .. } => value, - _ => unimplemented!(), - }) - .map(|v| ((v / vmax) * 255.0) as u8) - .collect::>(); - - let imgbuf = DynamicImage::ImageLuma8( - image::ImageBuffer::from_raw(width, height, pixels).unwrap(), - ); - imgbuf.save(format!("{filename}.jpg")).unwrap(); - } - } - } - - #[test_case("samples/fits.gsfc.nasa.gov/FITS RICE integer.fz")] - fn test_fits_rice_integer(filename: &str) { - use std::fs::File; - - use crate::hdu::data::bintable::DataValue; - - let mut f = File::open(filename).unwrap(); - let mut buf = Vec::new(); - f.read_to_end(&mut buf).unwrap(); - let reader = Cursor::new(&buf[..]); - - let mut hdu_list = Fits::from_reader(reader); - - while let Some(Ok(hdu)) = hdu_list.next() { - if let HDU::XBinaryTable(hdu) = hdu { - let width = hdu.get_header().get_parsed::("ZNAXIS1").unwrap(); - let height = hdu.get_header().get_parsed::("ZNAXIS2").unwrap(); - let bscale = hdu.get_header().get_parsed::("BSCALE").unwrap(); - let bzero = hdu.get_header().get_parsed::("BZERO").unwrap(); - - let pixels = hdu_list - .get_data(&hdu) - .map(|value| match value { - DataValue::Short { value, .. } => value as f32, - DataValue::Integer { value, .. } => value as f32, - DataValue::Float { value, .. } => value, - _ => unimplemented!(), - }) - .map(|v| (((v * bscale + bzero) / 100.0) * 255.0) as u8) - .collect::>(); - - let imgbuf = DynamicImage::ImageLuma8( - image::ImageBuffer::from_raw(width, height, pixels).unwrap(), - ); - imgbuf.save(format!("{filename}.jpg")).unwrap(); - } - } - } - - #[test_case("samples/fits.gsfc.nasa.gov/FITS RICE_ONE.fits")] - #[test_case("samples/fits.gsfc.nasa.gov/FITS RICE DITHER2 method.fz")] - fn test_fits_f32_with_dithering(filename: &str) { - use std::fs::File; - - use crate::hdu::data::bintable::DataValue; - - let mut f = File::open(filename).unwrap(); - let mut buf = Vec::new(); - f.read_to_end(&mut buf).unwrap(); - let reader = Cursor::new(&buf[..]); - - let mut hdu_list = Fits::from_reader(reader); - - while let Some(Ok(hdu)) = hdu_list.next() { - if let HDU::XBinaryTable(hdu) = hdu { - let width = hdu.get_header().get_parsed::("ZNAXIS1").unwrap(); - let height = hdu.get_header().get_parsed::("ZNAXIS2").unwrap(); - - let mut buf = vec![0_u8; (width as usize) * (height as usize)]; - - let tile_w = 100; - let tile_h = 100; - let num_tile_per_w = width / tile_w; - - for (i, pixel) in hdu_list - .get_data(&hdu) - .map(|value| match value { - DataValue::Short { value, .. } => value as f32, - DataValue::Integer { value, .. } => value as f32, - DataValue::Float { value, .. } => value, - _ => unimplemented!(), - }) - .map(|v| (v * 255.0) as u8) - .enumerate() - { - let tile_idx = (i as u64) / ((tile_w * tile_h) as u64); - let x_tile_idx = tile_idx % (num_tile_per_w as u64); - let y_tile_idx = tile_idx / (num_tile_per_w as u64); - - let pixel_inside_tile_idx = (i as u64) % ((tile_w * tile_h) as u64); - let x_pixel_inside_tile_idx = pixel_inside_tile_idx % (tile_w as u64); - let y_pixel_inside_tile_idx = pixel_inside_tile_idx / (tile_w as u64); - - let x = x_tile_idx * (tile_w as u64) + x_pixel_inside_tile_idx; - let y = y_tile_idx * (tile_h as u64) + y_pixel_inside_tile_idx; - - buf[(y * (width as u64) + x) as usize] = pixel; - } - - let imgbuf = DynamicImage::ImageLuma8( - image::ImageBuffer::from_raw(width, height, buf).unwrap(), - ); - imgbuf.save(format!("{filename}.jpg")).unwrap(); - } - } - } -} diff --git a/src/hdu/data/bintable/dithering.rs b/src/hdu/data/bintable/tile_compressed/dithering.rs similarity index 100% rename from src/hdu/data/bintable/dithering.rs rename to src/hdu/data/bintable/tile_compressed/dithering.rs diff --git a/src/hdu/data/bintable/tile_compressed/mod.rs b/src/hdu/data/bintable/tile_compressed/mod.rs new file mode 100644 index 0000000..0015ac8 --- /dev/null +++ b/src/hdu/data/bintable/tile_compressed/mod.rs @@ -0,0 +1,378 @@ +mod dithering; +pub mod pixels; +mod rice; + +use dithering::{N_RANDOM, RAND_VALUES}; + +use crate::hdu::header::extension::bintable::{BinTable, TileCompressedImage, ZQuantiz}; +use crate::hdu::header::Header; + +pub trait Keywords { + type T; + + fn new(header: &Header, config: &TileCompressedImage) -> Self; +} + +#[derive(Debug)] +struct TileDesc { + /// Total number of pixels in the tile + pub n_pixels: u64, + /// Number of pixels remaining to return + pub remaining_pixels: u64, + /// Optional keywords relative to the parsing of floats + pub keywords: K, +} + +impl TileDesc +where + K: Keywords, +{ + fn new(header: &Header, config: &TileCompressedImage) -> Self { + let keywords = K::new(header, config); + + Self { + keywords, + n_pixels: 0, + remaining_pixels: 0, + } + } +} + +#[derive(Debug)] +pub struct F32Keywords { + /// Idx column storing z_scale values for each tile + z_scale_idx: usize, + /// Idx column storing z_zero values for each tile + z_zero_idx: usize, + /// Idx column storing z_blank values for each tile + z_blank_idx: Option, + + /// Dither + z_dither_0: i64, + /// quantiz option + z_quantiz: ZQuantiz, + + /// Current value of ZSCALE field (floating point case) + scale: f32, + /// Current value of ZZERO field (floating point case) + zero: f32, + /// Current value of ZBLANK (floating point case) + /// Stores the integer value that evaluates to a floating point NAN + z_blank: Option, + /// Current value of ZBLANK field (float and integer case) + /// Current quantiz state + quantiz: Quantiz, +} + +impl F32Keywords { + /// Unquantize the integer decoded value to the real floating point value + fn unquantize(&mut self, value: i32) -> f32 { + // map the NaN if value corresponds to BLANK + if let Some(z_blank) = self.z_blank { + if z_blank == value { + return f32::NAN; + } + } + + match &mut self.quantiz { + Quantiz::NoDither => (value as f32) * self.scale + self.zero, + Quantiz::SubtractiveDither1 { i1 } => { + let ri = RAND_VALUES[*i1]; + // increment i1 for the next pixel + *i1 = (*i1 + 1) % N_RANDOM; + + ((value as f32) - ri + 0.5) * self.scale + self.zero + } + Quantiz::SubtractiveDither2 { i1 } => { + // FIXME: i32::MIN is -2147483648 ! + if value == -2147483647 { + *i1 = (*i1 + 1) % N_RANDOM; + + 0.0 + } else { + let ri = RAND_VALUES[*i1]; + // increment i1 for the next pixel + *i1 = (*i1 + 1) % N_RANDOM; + + ((value as f32) - ri + 0.5) * self.scale + self.zero + } + } + } + } +} + +impl Keywords for F32Keywords { + type T = f32; + + fn new(header: &Header, config: &TileCompressedImage) -> Self { + let TileCompressedImage { + z_dither_0, + z_quantiz, + .. + } = config; + + let ctx = header.get_xtension(); + let z_scale_idx = ctx.find_field_by_ttype("ZSCALE").unwrap(); + let z_zero_idx = ctx.find_field_by_ttype("ZZERO").unwrap(); + + let mut z_blank = None; + let z_blank_idx = ctx + // Check for a field named ZBLANK + .find_field_by_ttype("ZBLANK") + .or_else(|| { + z_blank = header + .get_parsed("ZBLANK") + // TODO: we should probably propagate errors from here if ZBLANK/BLANK exist but are of a wrong type. + .ok(); + + None + }); + // If no ZBLANK colum has been found then check the header keywords (ZBLANK for float, BLANK for integer) + + Self { + scale: 1.0, + zero: 0.0, + z_scale_idx, + z_zero_idx, + z_blank_idx, + z_blank, + quantiz: Quantiz::NoDither, + z_quantiz: z_quantiz.clone().unwrap_or(ZQuantiz::NoDither), + z_dither_0: z_dither_0.unwrap_or(0), + } + } +} + +#[derive(Debug)] +pub struct U8Keywords { + _blank: Option, +} + +impl Keywords for U8Keywords { + type T = u8; + + fn new(header: &Header, _: &TileCompressedImage) -> Self { + let _blank = header.get_parsed::("BLANK").ok(); + + Self { _blank } + } +} + +#[derive(Debug)] +pub struct I16Keywords { + _blank: Option, +} + +impl Keywords for I16Keywords { + type T = i16; + + fn new(header: &Header, _: &TileCompressedImage) -> Self { + let _blank = header.get_parsed::("BLANK").ok(); + + Self { _blank } + } +} + +#[derive(Debug)] +pub struct I32Keywords { + _blank: Option, +} + +impl Keywords for I32Keywords { + type T = i32; + + fn new(header: &Header, _: &TileCompressedImage) -> Self { + let _blank = header.get_parsed::("BLANK").ok(); + + Self { _blank } + } +} + +#[derive(Debug)] +enum Quantiz { + NoDither, + SubtractiveDither1 { i1: usize }, + SubtractiveDither2 { i1: usize }, +} + +#[cfg(test)] +mod tests { + use crate::hdu::data::bintable::data::BinaryTableData; + use image::DynamicImage; + use std::io::{Cursor, Read}; + use test_case::test_case; + + use crate::{hdu::data::bintable::tile_compressed::pixels::Pixels, Fits, HDU}; + + #[test] + fn test_tile_size_from_row_idx() { + let ground_truth = [ + [300, 200, 150], + [300, 200, 150], + [300, 200, 150], + [100, 200, 150], + [300, 200, 150], + [300, 200, 150], + [300, 200, 150], + [100, 200, 150], + [300, 100, 150], + [300, 100, 150], + [300, 100, 150], + [100, 100, 150], + [300, 200, 150], + [300, 200, 150], + [300, 200, 150], + [100, 200, 150], + [300, 200, 150], + [300, 200, 150], + [300, 200, 150], + [100, 200, 150], + [300, 100, 150], + [300, 100, 150], + [300, 100, 150], + [100, 100, 150], + [300, 200, 50], + [300, 200, 50], + [300, 200, 50], + [100, 200, 50], + [300, 200, 50], + [300, 200, 50], + [300, 200, 50], + [100, 200, 50], + [300, 100, 50], + [300, 100, 50], + [300, 100, 50], + [100, 100, 50], + ]; + use super::pixels::tile_size_from_row_idx; + for (i, &ground_truth) in ground_truth.iter().enumerate() { + let tile_s = tile_size_from_row_idx(&[300, 200, 150], &[1000, 500, 350], i); + assert_eq!([tile_s[0], tile_s[1], tile_s[2]], ground_truth); + } + } + + #[test_case("samples/fits.gsfc.nasa.gov/m13real_rice.fits", 1000.0)] + #[test_case("samples/fits.gsfc.nasa.gov/m13_rice.fits", 1000.0)] + #[test_case("samples/fits.gsfc.nasa.gov/m13_gzip.fits", 1000.0)] + fn test_fits_without_dithering(filename: &str, vmax: f32) { + use std::fs::File; + + let mut f = File::open(filename).unwrap(); + let mut buf = Vec::new(); + f.read_to_end(&mut buf).unwrap(); + let reader = Cursor::new(&buf[..]); + + let mut hdu_list = Fits::from_reader(reader); + + while let Some(Ok(hdu)) = hdu_list.next() { + if let HDU::XBinaryTable(hdu) = hdu { + let width = hdu.get_header().get_parsed::("ZNAXIS1").unwrap(); + let height = hdu.get_header().get_parsed::("ZNAXIS2").unwrap(); + + let img_bytes = match hdu_list.get_data(&hdu) { + BinaryTableData::TileCompressed(Pixels::U8(pixels)) => { + pixels.collect::>() + } + BinaryTableData::TileCompressed(Pixels::I16(pixels)) => pixels + .map(|v| (((v as f32) / vmax) * 255.0) as u8) + .collect::>(), + BinaryTableData::TileCompressed(Pixels::I32(pixels)) => pixels + .map(|v| (((v as f32) / vmax) * 255.0) as u8) + .collect::>(), + BinaryTableData::TileCompressed(Pixels::F32(pixels)) => pixels + .map(|v| ((v / vmax) * 255.0) as u8) + .collect::>(), + _ => unreachable!(), + }; + + let imgbuf = DynamicImage::ImageLuma8( + image::ImageBuffer::from_raw(width, height, img_bytes).unwrap(), + ); + imgbuf.save(format!("{filename}.jpg")).unwrap(); + } + } + } + #[test_case("samples/fits.gsfc.nasa.gov/FITS RICE integer.fz")] + fn test_fits_rice_integer(filename: &str) { + use std::fs::File; + + let mut f = File::open(filename).unwrap(); + let mut buf = Vec::new(); + f.read_to_end(&mut buf).unwrap(); + let reader = Cursor::new(&buf[..]); + + let mut hdu_list = Fits::from_reader(reader); + + while let Some(Ok(hdu)) = hdu_list.next() { + if let HDU::XBinaryTable(hdu) = hdu { + let width = hdu.get_header().get_parsed::("ZNAXIS1").unwrap(); + let height = hdu.get_header().get_parsed::("ZNAXIS2").unwrap(); + let bscale = hdu.get_header().get_parsed::("BSCALE").unwrap(); + let bzero = hdu.get_header().get_parsed::("BZERO").unwrap(); + + if let BinaryTableData::TileCompressed(Pixels::I32(pixels)) = + hdu_list.get_data(&hdu) + { + let pixels = pixels + .map(|v| ((((v as f32) * bscale + bzero) / 100.0) * 255.0) as u8) + .collect::>(); + + let imgbuf = DynamicImage::ImageLuma8( + image::ImageBuffer::from_raw(width, height, pixels).unwrap(), + ); + imgbuf.save(format!("{filename}.jpg")).unwrap(); + } + } + } + } + + #[test_case("samples/fits.gsfc.nasa.gov/FITS RICE_ONE.fits")] + #[test_case("samples/fits.gsfc.nasa.gov/FITS RICE DITHER2 method.fz")] + fn test_fits_f32_with_dithering(filename: &str) { + use std::fs::File; + + let mut f = File::open(filename).unwrap(); + let mut buf = Vec::new(); + f.read_to_end(&mut buf).unwrap(); + let reader = Cursor::new(&buf[..]); + + let mut hdu_list = Fits::from_reader(reader); + + while let Some(Ok(hdu)) = hdu_list.next() { + if let HDU::XBinaryTable(hdu) = hdu { + let width = hdu.get_header().get_parsed::("ZNAXIS1").unwrap(); + let height = hdu.get_header().get_parsed::("ZNAXIS2").unwrap(); + + let mut buf = vec![0_u8; (width as usize) * (height as usize)]; + + let tile_w = 100; + let tile_h = 100; + let num_tile_per_w = width / tile_w; + + if let BinaryTableData::TileCompressed(Pixels::F32(pixels)) = + hdu_list.get_data(&hdu) + { + for (i, pixel) in pixels.map(|v| (v * 255.0) as u8).enumerate() { + let tile_idx = (i as u64) / ((tile_w * tile_h) as u64); + let x_tile_idx = tile_idx % (num_tile_per_w as u64); + let y_tile_idx = tile_idx / (num_tile_per_w as u64); + + let pixel_inside_tile_idx = (i as u64) % ((tile_w * tile_h) as u64); + let x_pixel_inside_tile_idx = pixel_inside_tile_idx % (tile_w as u64); + let y_pixel_inside_tile_idx = pixel_inside_tile_idx / (tile_w as u64); + + let x = x_tile_idx * (tile_w as u64) + x_pixel_inside_tile_idx; + let y = y_tile_idx * (tile_h as u64) + y_pixel_inside_tile_idx; + + buf[(y * (width as u64) + x) as usize] = pixel; + } + } + + let imgbuf = DynamicImage::ImageLuma8( + image::ImageBuffer::from_raw(width, height, buf).unwrap(), + ); + imgbuf.save(format!("{filename}.jpg")).unwrap(); + } + } + } +} diff --git a/src/hdu/data/bintable/tile_compressed/pixels.rs b/src/hdu/data/bintable/tile_compressed/pixels.rs new file mode 100644 index 0000000..f2f7db1 --- /dev/null +++ b/src/hdu/data/bintable/tile_compressed/pixels.rs @@ -0,0 +1,689 @@ +use flate2::read::GzDecoder; + +use super::super::DataValue; +use super::dithering::RAND_VALUES; +use super::rice::RICEDecoder; +use super::{F32Keywords, I16Keywords, I32Keywords, Keywords, Quantiz, TileDesc, U8Keywords}; +use crate::error::Error; +use crate::hdu::header::extension::bintable::{BinTable, TileCompressedImage, ZCmpType, ZQuantiz}; +use crate::hdu::header::{Bitpix, Header}; +use crate::{TableData, TableRowData}; +use std::io::{Read, Seek, SeekFrom}; + +#[derive(Debug)] +pub enum Pixels { + U8(It), + I16(It), + I32(It), + F32(It), +} + +impl Pixels { + pub fn new( + data: TableData, + header: &Header, + config: &TileCompressedImage, + ) -> Self { + match config.z_bitpix { + Bitpix::U8 => Self::U8(It::new(header, data, config)), + Bitpix::I16 => Self::I16(It::new(header, data, config)), + Bitpix::I32 => Self::I32(It::new(header, data, config)), + Bitpix::F32 => Self::F32(It::new(header, data, config)), + _ => unreachable!(), + } + } +} + +#[derive(Debug)] +pub struct It +where + K: Keywords, +{ + /// An iterator over the row of a binary table + pub row_it: TableRowData, + /// A buffer for storing uncompressed data from GZIP1, GZIP2 or RICE + buf: Vec, + + /// Current tile pointer + desc: TileDesc, + + z_tile: Box<[usize]>, + z_naxis: Box<[usize]>, + z_cmp_type: ZCmpType, + data_compressed_idx: usize, +} + +impl It +where + K: Keywords, +{ + fn get_reader(&mut self) -> &mut R { + self.row_it.get_reader() + } +} + +impl It +where + K: Keywords, +{ + pub(crate) fn new( + header: &Header, + mut data: TableData, + config: &TileCompressedImage, + ) -> Self { + // This buffer is only used if tile compressed image in the gzip compression is to be found + let TileCompressedImage { + z_tilen, + z_naxisn, + z_cmp_type, + data_compressed_idx, + .. + } = config; + // Allocation of a buffer at init of the iterator that is the size of the biggest tiles we can found + // on the tile compressed image. This is simply given by the ZTILEi keyword. + // Some tiles found on the border of the image can be smaller + let n_elems_max = z_tilen.iter().product::(); + + // FIXME + // A little precision. The gzdecoder from flate2 seems to unzip data in a stream of u32 i.e. even if the type of data + // to uncompress is byte or short, the result will be cast in a u32. I thus allocate for gzip compression, a buffer of + // n_elems_max * size_in_bytes of u32. + // It seems to be the same for RICE, so the latter rice decompression code is called on i32 + let num_bytes_max_tile = n_elems_max * std::mem::size_of::(); + + let buf = vec![0_u8; num_bytes_max_tile]; + + let desc = TileDesc::new(header, config); + + // do not read the heap, we will manage our way decompressing the tiles + data.read_the_heap(false); + Self { + buf, + row_it: data.row_iter(), + + desc, + + data_compressed_idx: *data_compressed_idx, + z_naxis: z_naxisn.clone(), + z_tile: z_tilen.clone(), + z_cmp_type: *z_cmp_type, + } + } +} + +/// Compute the size of the tile from its row position inside the compressed data column +/// FIXME: optimize this computation by using memoization +pub(crate) fn tile_size_from_row_idx( + z_tile: &[usize], + z_naxis: &[usize], + n: usize, +) -> Box<[usize]> { + let d = z_tile.len(); + + if d == 0 { + // There must be at least one dimension + unreachable!(); + } else { + let mut u = vec![0_usize; z_tile.len()]; + + let s = z_naxis + .iter() + .zip(z_tile.iter()) + .map(|(naxisi, tilei)| naxisi.div_ceil(*tilei)) + .collect::>(); + + // Compute the position inside the first dimension + u[0] = n % s[0]; + + for i in 1..d { + u[i] = n + - u[0] + - (1..i) + .map(|k| { + let prod_sk = s.iter().take(k).product::(); + u[k] * prod_sk + }) + .sum::(); + + let prod_si = s.iter().take(i).product::(); + + u[i] = (u[i] / prod_si) % s[i]; + } + + u.iter() + .zip(z_naxis.iter().zip(z_tile.iter())) + .map(|(&u_i, (&naxis, &tilez))| tilez.min(naxis - u_i * tilez)) + .collect() + } +} + +use std::fmt::Debug; +impl Iterator for It +where + R: Read + Seek + Debug, +{ + // Return a vec of fields because to take into account the repeat count value for that field + type Item = u8; + + fn next(&mut self) -> Option { + // We first retrieve the whole row from the row data iterator + if self.desc.remaining_pixels == 0 { + let row_data = self.row_it.next()?; + + let (_, byte_offset) = match row_data[self.data_compressed_idx] { + DataValue::VariableLengthArray32 { + num_elems, + offset_byte, + } => (num_elems as u64, offset_byte as u64), + DataValue::VariableLengthArray64 { + num_elems, + offset_byte, + } => (num_elems, offset_byte), + _ => unreachable!(), + }; + + let ctx = self.row_it.get_ctx(); + let row_idx = self.row_it.get_row_idx(); + + // Update the tile compressed currently decompressed + let num_pixels = tile_size_from_row_idx(&self.z_tile[..], &self.z_naxis[..], row_idx) + .iter() + .product::() as u64; + self.desc.n_pixels = num_pixels; + self.desc.remaining_pixels = num_pixels; + + // We jump to the heap at the position of the tile + // Then we decomp the tile and store it into out internal buf + // Finally we go back to the main data table location before jumping to the heap + let main_data_table_offset = row_idx * (ctx.naxis1 as usize); + let off = + // go back to the beginning of the main table data block + - (main_data_table_offset as i64) + // from the beginning of the main table go to the beginning of the heap + + ctx.theap as i64 + // from the beginning of the heap go to the start of the array + + byte_offset as i64; + self.jump_to_location( + |s| { + let It { buf, row_it, .. } = s; + + let reader = row_it.get_reader(); + + match s.z_cmp_type { + // For GZIP2, the byte shuffling is done in the next method + ZCmpType::Gzip1 | ZCmpType::Gzip2 => { + let mut gz = GzDecoder::new(reader); + gz.read_exact(&mut buf[..])?; + } + // FIXME support bytepix + ZCmpType::Rice { blocksize, .. } => { + let mut rice = RICEDecoder::<_, i32>::new( + reader, + blocksize as i32, + num_pixels as i32, + ); + rice.read_exact(&mut buf[..])?; + } + // Other compression not supported, when parsing the bintable extension keywords + // we ensured that z_image is `None` for other compressions than GZIP or RICE + _ => unreachable!(), + } + + Ok(()) + }, + SeekFrom::Current(off), + ) + .ok()?; + } + + // There is remaining pixels inside our buffer, we simply return the current one + let idx = (self.desc.n_pixels - self.desc.remaining_pixels) as usize; + + let value = match self.z_cmp_type { + ZCmpType::Gzip1 | ZCmpType::Gzip2 => { + // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements + let off = 4 * idx; + // read from BigEndian, i.e. the most significant byte is at first and the least one is at last position + self.buf[off + 3] + } + ZCmpType::Rice { .. } => { + // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements + let off = 4 * idx; + self.buf[off] + } + // Not supported compression/bitpix results in parsing the binary table as normal and thus this part is not reachable + _ => unreachable!(), + }; + self.desc.remaining_pixels -= 1; + + Some(value) + } +} + +impl Iterator for It +where + R: Read + Seek + Debug, +{ + // Return a vec of fields because to take into account the repeat count value for that field + type Item = i16; + + fn next(&mut self) -> Option { + // We first retrieve the whole row from the row data iterator + if self.desc.remaining_pixels == 0 { + let row_data = self.row_it.next()?; + + let (_, byte_offset) = match row_data[self.data_compressed_idx] { + DataValue::VariableLengthArray32 { + num_elems, + offset_byte, + } => (num_elems as u64, offset_byte as u64), + DataValue::VariableLengthArray64 { + num_elems, + offset_byte, + } => (num_elems, offset_byte), + _ => unreachable!(), + }; + + let ctx = self.row_it.get_ctx(); + let row_idx = self.row_it.get_row_idx(); + + // Update the tile compressed currently decompressed + let num_pixels = tile_size_from_row_idx(&self.z_tile[..], &self.z_naxis[..], row_idx) + .iter() + .product::() as u64; + self.desc.n_pixels = num_pixels; + self.desc.remaining_pixels = num_pixels; + // We jump to the heap at the position of the tile + // Then we decomp the tile and store it into out internal buf + // Finally we go back to the main data table location before jumping to the heap + let main_data_table_offset = row_idx * (ctx.naxis1 as usize); + let off = + // go back to the beginning of the main table data block + - (main_data_table_offset as i64) + // from the beginning of the main table go to the beginning of the heap + + ctx.theap as i64 + // from the beginning of the heap go to the start of the array + + byte_offset as i64; + self.jump_to_location( + |s| { + let It { buf, row_it, .. } = s; + + let reader = row_it.get_reader(); + + match s.z_cmp_type { + // For GZIP2, the byte shuffling is done in the next method + ZCmpType::Gzip1 | ZCmpType::Gzip2 => { + let mut gz = GzDecoder::new(reader); + gz.read_exact(&mut buf[..])?; + } + // FIXME support bytepix + ZCmpType::Rice { blocksize, .. } => { + let mut rice = RICEDecoder::<_, i32>::new( + reader, + blocksize as i32, + num_pixels as i32, + ); + rice.read_exact(&mut buf[..])?; + } + // Other compression not supported, when parsing the bintable extension keywords + // we ensured that z_image is `None` for other compressions than GZIP or RICE + _ => unreachable!(), + } + + Ok(()) + }, + SeekFrom::Current(off), + ) + .ok()?; + } + + // There is remaining pixels inside our buffer, we simply return the current one + let idx = (self.desc.n_pixels - self.desc.remaining_pixels) as usize; + + let value = match self.z_cmp_type { + ZCmpType::Gzip1 => { + // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements + // read from BigEndian, i.e. the most significant byte is at first and the least one is at last position + let off = 4 * idx; + (self.buf[off + 3] as i16) | ((self.buf[off + 2] as i16) << 8) + } + ZCmpType::Gzip2 => { + // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements + // read from BigEndian, i.e. the most significant byte is at first and the least one is at last position + let num_bytes = self.buf.len(); + let step_msb = num_bytes / 4; + (self.buf[3 * step_msb + idx] as i16) | ((self.buf[2 * step_msb + idx] as i16) << 8) + } + ZCmpType::Rice { .. } => { + // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements + let off = 4 * idx; + (self.buf[off] as i16) | ((self.buf[off + 1] as i16) << 8) + } + // Not supported compression/bitpix results in parsing the binary table as normal and thus this part is not reachable + _ => unreachable!(), + }; + self.desc.remaining_pixels -= 1; + + Some(value) + } +} + +impl Iterator for It +where + R: Read + Seek + Debug, +{ + // Return a vec of fields because to take into account the repeat count value for that field + type Item = i32; + + fn next(&mut self) -> Option { + // We first retrieve the whole row from the row data iterator + if self.desc.remaining_pixels == 0 { + let row_data = self.row_it.next()?; + + let (_, byte_offset) = match row_data[self.data_compressed_idx] { + DataValue::VariableLengthArray32 { + num_elems, + offset_byte, + } => (num_elems as u64, offset_byte as u64), + DataValue::VariableLengthArray64 { + num_elems, + offset_byte, + } => (num_elems, offset_byte), + _ => unreachable!(), + }; + + let ctx = self.row_it.get_ctx(); + let row_idx = self.row_it.get_row_idx(); + + // Update the tile compressed currently decompressed + let num_pixels = tile_size_from_row_idx(&self.z_tile[..], &self.z_naxis[..], row_idx) + .iter() + .product::() as u64; + self.desc.n_pixels = num_pixels; + self.desc.remaining_pixels = num_pixels; + // We jump to the heap at the position of the tile + // Then we decomp the tile and store it into out internal buf + // Finally we go back to the main data table location before jumping to the heap + let main_data_table_offset = row_idx * (ctx.naxis1 as usize); + let off = + // go back to the beginning of the main table data block + - (main_data_table_offset as i64) + // from the beginning of the main table go to the beginning of the heap + + ctx.theap as i64 + // from the beginning of the heap go to the start of the array + + byte_offset as i64; + self.jump_to_location( + |s| { + let It { buf, row_it, .. } = s; + + let reader = row_it.get_reader(); + + match s.z_cmp_type { + // For GZIP2, the byte shuffling is done in the next method + ZCmpType::Gzip1 | ZCmpType::Gzip2 => { + let mut gz = GzDecoder::new(reader); + gz.read_exact(&mut buf[..])?; + } + // FIXME support bytepix + ZCmpType::Rice { blocksize, .. } => { + let mut rice = RICEDecoder::<_, i32>::new( + reader, + blocksize as i32, + num_pixels as i32, + ); + rice.read_exact(&mut buf[..])?; + } + // Other compression not supported, when parsing the bintable extension keywords + // we ensured that z_image is `None` for other compressions than GZIP or RICE + _ => unreachable!(), + } + + Ok(()) + }, + SeekFrom::Current(off), + ) + .ok()?; + } + + // There is remaining pixels inside our buffer, we simply return the current one + let idx = (self.desc.n_pixels - self.desc.remaining_pixels) as usize; + + let value = match self.z_cmp_type { + ZCmpType::Gzip1 => { + // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements + let off = 4 * idx; + i32::from_be_bytes([ + self.buf[off], + self.buf[off + 1], + self.buf[off + 2], + self.buf[off + 3], + ]) + } + ZCmpType::Gzip2 => { + // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements + // read from BigEndian, i.e. the most significant byte is at first and the least one is at last position + let num_bytes = self.buf.len(); + let step_msb = num_bytes / 4; + ((self.buf[idx] as i32) << 24) + | ((self.buf[idx + step_msb] as i32) << 16) + | ((self.buf[idx + 2 * step_msb] as i32) << 8) + | (self.buf[idx + 3 * step_msb] as i32) + } + ZCmpType::Rice { .. } => { + // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements + let off = 4 * idx; + i32::from_ne_bytes([ + self.buf[off], + self.buf[off + 1], + self.buf[off + 2], + self.buf[off + 3], + ]) + } + // Not supported compression/bitpix results in parsing the binary table as normal and thus this part is not reachable + _ => unreachable!(), + }; + self.desc.remaining_pixels -= 1; + + Some(value) + } +} + +impl Iterator for It +where + R: Read + Seek + Debug, +{ + // Return a vec of fields because to take into account the repeat count value for that field + type Item = f32; + + fn next(&mut self) -> Option { + // We first retrieve the whole row from the row data iterator + if self.desc.remaining_pixels == 0 { + let row_data = self.row_it.next()?; + + let (_, byte_offset) = match row_data[self.data_compressed_idx] { + DataValue::VariableLengthArray32 { + num_elems, + offset_byte, + } => (num_elems as u64, offset_byte as u64), + DataValue::VariableLengthArray64 { + num_elems, + offset_byte, + } => (num_elems, offset_byte), + _ => unreachable!(), + }; + + let ctx = self.row_it.get_ctx(); + let row_idx = self.row_it.get_row_idx(); + + // Update the tile compressed currently decompressed + let num_pixels = tile_size_from_row_idx(&self.z_tile[..], &self.z_naxis[..], row_idx) + .iter() + .product::() as u64; + self.desc.n_pixels = num_pixels; + self.desc.remaining_pixels = num_pixels; + + let TileDesc { + keywords: + F32Keywords { + z_scale_idx, + z_zero_idx, + scale, + zero, + z_dither_0, + z_quantiz, + quantiz, + z_blank_idx, + z_blank, + }, + .. + } = &mut self.desc; + + *quantiz = match z_quantiz { + ZQuantiz::SubtractiveDither1 => { + let i0 = (row_idx - 1 + ((*z_dither_0) as usize)) % 10000; + let i1 = (RAND_VALUES[i0] * 500.0).floor() as usize; + Quantiz::SubtractiveDither1 { i1 } + } + ZQuantiz::SubtractiveDither2 => { + let i0 = (row_idx - 1 + (*z_dither_0 as usize)) % 10000; + let i1 = (RAND_VALUES[i0] * 500.0).floor() as usize; + Quantiz::SubtractiveDither2 { i1 } + } + _ => Quantiz::NoDither, + }; + + *scale = match row_data[*z_scale_idx] { + DataValue::Float { value, .. } => value, + DataValue::Double { value, .. } => value as f32, + _ => unreachable!(), + }; + + *zero = match row_data[*z_zero_idx] { + DataValue::Float { value, .. } => value, + DataValue::Double { value, .. } => value as f32, + _ => unreachable!(), + }; + + if let Some(idx) = z_blank_idx { + *z_blank = match row_data[*idx] { + DataValue::UnsignedByte { value, .. } => Some(value as i32), + DataValue::Short { value, .. } => Some(value as i32), + DataValue::Integer { value, .. } => Some(value), + DataValue::Long { value, .. } => Some(value as i32), + _ => unreachable!(), + } + } + + // We jump to the heap at the position of the tile + // Then we decomp the tile and store it into out internal buf + // Finally we go back to the main data table location before jumping to the heap + let main_data_table_offset = row_idx * (ctx.naxis1 as usize); + let off = + // go back to the beginning of the main table data block + - (main_data_table_offset as i64) + // from the beginning of the main table go to the beginning of the heap + + ctx.theap as i64 + // from the beginning of the heap go to the start of the array + + byte_offset as i64; + self.jump_to_location( + |s| { + let It { buf, row_it, .. } = s; + + let reader = row_it.get_reader(); + + match s.z_cmp_type { + // For GZIP2, the byte shuffling is done in the next method + ZCmpType::Gzip1 | ZCmpType::Gzip2 => { + let mut gz = GzDecoder::new(reader); + gz.read_exact(&mut buf[..])?; + } + // FIXME support bytepix + ZCmpType::Rice { blocksize, .. } => { + let mut rice = RICEDecoder::<_, i32>::new( + reader, + blocksize as i32, + num_pixels as i32, + ); + rice.read_exact(&mut buf[..])?; + } + // Other compression not supported, when parsing the bintable extension keywords + // we ensured that z_image is `None` for other compressions than GZIP or RICE + _ => unreachable!(), + } + + Ok(()) + }, + SeekFrom::Current(off), + ) + .ok()?; + } + + // There is remaining pixels inside our buffer, we simply return the current one + let idx = (self.desc.n_pixels - self.desc.remaining_pixels) as usize; + + let value = match self.z_cmp_type { + // 32-bit floating point + ZCmpType::Gzip1 => { + // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements + let off = 4 * idx; + let value = i32::from_be_bytes([ + self.buf[off], + self.buf[off + 1], + self.buf[off + 2], + self.buf[off + 3], + ]); + self.desc.keywords.unquantize(value) + } + ZCmpType::Gzip2 => { + // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements + // read from BigEndian, i.e. the most significant byte is at first and the least one is at last position + let num_bytes = self.buf.len(); + let step_msb = num_bytes / 4; + let value = ((self.buf[idx] as i32) << 24) + | ((self.buf[idx + step_msb] as i32) << 16) + | ((self.buf[idx + 2 * step_msb] as i32) << 8) + | (self.buf[idx + 3 * step_msb] as i32); + + self.desc.keywords.unquantize(value) + } + // 32-bit floating point + ZCmpType::Rice { .. } => { + // We need to get the byte index in the buffer storing u32, i.e. 4 bytes per elements + let off = 4 * idx; + let value = i32::from_ne_bytes([ + self.buf[off], + self.buf[off + 1], + self.buf[off + 2], + self.buf[off + 3], + ]); + + self.desc.keywords.unquantize(value) + } + // Not supported compression/bitpix results in parsing the binary table as normal and thus this part is not reachable + _ => unreachable!(), + }; + self.desc.remaining_pixels -= 1; + + Some(value) + } +} + +impl It +where + R: Seek, + K: Keywords, +{ + /// Jump to a specific location of the reader, perform an operation and jumps back to the original position + pub(crate) fn jump_to_location(&mut self, f: F, pos: SeekFrom) -> Result<(), Error> + where + F: FnOnce(&mut Self) -> Result<(), Error>, + { + let old_pos = SeekFrom::Start(self.get_reader().stream_position()?); + + self.get_reader().seek(pos)?; + f(self)?; + let _ = self.get_reader().seek(old_pos)?; + + Ok(()) + } +} diff --git a/src/hdu/data/bintable/rice.rs b/src/hdu/data/bintable/tile_compressed/rice.rs similarity index 98% rename from src/hdu/data/bintable/rice.rs rename to src/hdu/data/bintable/tile_compressed/rice.rs index ffcfce6..b9a9fac 100644 --- a/src/hdu/data/bintable/rice.rs +++ b/src/hdu/data/bintable/tile_compressed/rice.rs @@ -93,51 +93,58 @@ impl RICEDecoder { } use std::io::Error; +use std::io::Read; +trait FromBytes { + fn from_be_bytes(reader: &mut R) -> Result + where + Self: Sized; +} + +impl FromBytes for u8 { + fn from_be_bytes(reader: &mut R) -> Result { + reader.read_u8() + } +} + +impl FromBytes for i16 { + fn from_be_bytes(reader: &mut R) -> Result { + reader.read_i16::() + } +} + +impl FromBytes for i32 { + fn from_be_bytes(reader: &mut R) -> Result { + reader.read_i32::() + } +} + /// A trait to define constant for possible output types from a /// RICE decoder/encoder -trait RICE: Sized + Into { +trait RICE: Sized + Into + FromBytes { const FSBITS: i32; const FSMAX: i32; const BBITS: i32 = 1 << Self::FSBITS; - /// Just a wrapping around std::mem::size_of fn size_of() -> usize { std::mem::size_of::() } - - /// Utilitary method for reading the first T elem that is not encoded - fn from_be_bytes(reader: &mut R) -> Result; } impl RICE for u8 { const FSBITS: i32 = 3; const FSMAX: i32 = 6; - - fn from_be_bytes(reader: &mut R) -> Result { - reader.read_u8() - } } impl RICE for i16 { const FSBITS: i32 = 4; const FSMAX: i32 = 14; - - fn from_be_bytes(reader: &mut R) -> Result { - reader.read_i16::() - } } impl RICE for i32 { const FSBITS: i32 = 5; const FSMAX: i32 = 25; - - fn from_be_bytes(reader: &mut R) -> Result { - reader.read_i32::() - } } -use std::io::Read; - use byteorder::BigEndian; use byteorder::ReadBytesExt; impl Read for RICEDecoder diff --git a/src/hdu/data/mod.rs b/src/hdu/data/mod.rs index 3f7fcd6..620637f 100644 --- a/src/hdu/data/mod.rs +++ b/src/hdu/data/mod.rs @@ -4,7 +4,6 @@ pub mod image; pub mod iter; pub mod stream; -pub use bintable::BinaryTableData; pub use bintable::TableData; pub use image::{ImageData, Pixels}; diff --git a/src/hdu/header/extension/bintable.rs b/src/hdu/header/extension/bintable.rs index 0f7e757..06c16a1 100644 --- a/src/hdu/header/extension/bintable.rs +++ b/src/hdu/header/extension/bintable.rs @@ -91,7 +91,7 @@ impl BinTable { } #[derive(Debug, PartialEq, Serialize, Clone)] -pub(crate) struct TileCompressedImage { +pub struct TileCompressedImage { /// ZCMPTYPE (required keyword) The value field of this keyword shall contain a character string /// giving the name of the algorithm that must be used to decompress the image. Currently, values of GZIP 1, GZIP 2, RICE 1, PLIO 1, and HCOMPRESS 1 are reserved, and the corresponding /// algorithms are described in a later section of this document. The value RICE ONE is also diff --git a/src/lib.rs b/src/lib.rs index 4e54398..053fe7c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,7 +55,7 @@ pub mod hdu; pub use async_fits::AsyncFits; pub use file::FITSFile; pub use fits::Fits; -pub use hdu::data::bintable::{BinaryTableData, DataValue, TableData, TableRowData}; +pub use hdu::data::bintable::{DataValue, TableData, TableRowData}; pub use hdu::data::image::{ImageData, Pixels}; pub use hdu::data::iter::It; pub use hdu::{AsyncHDU, HDU}; @@ -315,7 +315,7 @@ mod tests { while let Some(Ok(hdu)) = hdu_list.next() { if let HDU::XBinaryTable(hdu) = hdu { let _ = hdu.get_header().get_xtension(); - data_len = hdu_list.get_data(&hdu).count(); + data_len = hdu_list.get_data(&hdu).table_data().count(); } }