diff --git a/src/kete/rust/fitting.rs b/src/kete/rust/fitting.rs index 30059bf..1558194 100644 --- a/src/kete/rust/fitting.rs +++ b/src/kete/rust/fitting.rs @@ -1,12 +1,14 @@ //! Basic statistics use kete_core::{fitting, stats}; -use pyo3::pyfunction; +use pyo3::{PyResult, pyfunction}; /// Perform a KS test between two vectors of values. #[pyfunction] #[pyo3(name = "ks_test")] -pub fn ks_test_py(sample_a: Vec, sample_b: Vec) -> f64 { - stats::two_sample_ks_statistic(&sample_a, &sample_b).unwrap() +pub fn ks_test_py(sample_a: Vec, sample_b: Vec) -> PyResult { + let sample_a: stats::ValidData = sample_a.try_into()?; + let sample_b: stats::ValidData = sample_b.try_into()?; + Ok(sample_a.two_sample_ks_statistic(&sample_b)?) } /// Fit the reduced chi squared value for a collection of data with uncertainties. diff --git a/src/kete_core/src/lib.rs b/src/kete_core/src/lib.rs index 1b1c3d1..b7441c5 100644 --- a/src/kete_core/src/lib.rs +++ b/src/kete_core/src/lib.rs @@ -104,8 +104,11 @@ pub mod prelude { lambertian_flux, neatm_facet_temperature, }; pub use crate::frames::{Ecliptic, Equatorial, FK4, Galactic, NonInertialFrame}; - pub use crate::propagation::{propagate_n_body_spk, propagate_two_body}; + pub use crate::propagation::{ + NonGravModel, propagate_n_body_spk, propagate_two_body, propagation_n_body_spk_par, + }; pub use crate::simult_states::SimultaneousStates; pub use crate::spice::{LOADED_PCK, LOADED_SPK}; pub use crate::state::State; + pub use crate::time::{TDB, Time, UTC}; } diff --git a/src/kete_core/src/stats.rs b/src/kete_core/src/stats.rs new file mode 100644 index 0000000..0cebfb0 --- /dev/null +++ b/src/kete_core/src/stats.rs @@ -0,0 +1,293 @@ +//! # Statistics +//! +//! Commonly used statistical methods. +//! +// BSD 3-Clause License +// +// Copyright (c) 2026, Dar Dahlen +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +use std::ops::Index; + +use crate::{errors::Error, prelude::KeteResult}; + +/// Finite, sorted, nonempty dataset. +/// +/// During construction, NaN and Infs are removed from the dataset. +/// +/// Construction hints: +/// +/// If data ownership can be given up, then a `Vec` will not incur any copy/clone +/// as all data will be manipulated in-place. However a slice may be used, but then the +/// data must be cloned/copied. +#[derive(Clone, Debug)] +pub struct ValidData(Box<[f64]>); + +impl TryFrom<&[f64]> for ValidData { + type Error = Error; + fn try_from(value: &[f64]) -> Result { + let mut data: Box<[f64]> = value + .iter() + .filter_map(|x| if x.is_finite() { Some(*x) } else { None }) + .collect(); + if data.is_empty() { + Err(Error::ValueError( + "Data was either empty or contained only non-finite values (NaN or inf).".into(), + )) + } else { + data.sort_by(f64::total_cmp); + Ok(Self(data)) + } + } +} + +impl TryFrom> for ValidData { + type Error = Error; + + fn try_from(mut value: Vec) -> Result { + // Switch all negative non-finites to positive inplace + for x in &mut value { + if !x.is_finite() & x.is_sign_negative() { + *x = x.abs(); + } + } + + // sort everything by total_cmp, which puts all positive non-finite values at the end. + value.sort_by(f64::total_cmp); + + if let Some(idx) = value.iter().position(|x| !x.is_finite()) { + value.truncate(idx); + } + + if value.is_empty() { + Err(Error::ValueError( + "Data was either empty or contained only non-finite values (NaN or inf).".into(), + )) + } else { + Ok(Self(value.into_boxed_slice())) + } + } +} + +impl Index for ValidData { + type Output = f64; + fn index(&self, index: usize) -> &Self::Output { + self.0.index(index) + } +} + +impl ValidData { + /// Calculate desired quantile of the provided data. + /// + /// Quantile is effectively the same as percentile, but 0.5 quantile == 50% percentile. + /// + /// This ignores non-finite values such as inf and nan. + /// + /// Quantiles are linearly interpolated between the two closest ranked values. + /// + /// If only one valid data point is provided, all quantiles evaluate to that value. + /// + /// # Errors + /// Fails when quant is not between 0 and 1, or if the data does not have any finite + /// values. + pub fn quantile(&self, quant: f64) -> KeteResult { + if !(0.0..=1.0).contains(&quant) { + Err(Error::ValueError( + "Quantile must be between 0.0 and 1.0".into(), + ))?; + } + let data = self.as_slice(); + let n_data = self.len(); + + let frac_idx = quant * (n_data - 1) as f64; + #[allow( + clippy::cast_sign_loss, + reason = "By construction this is always positive." + )] + let idx = frac_idx.floor() as usize; + + if idx as f64 == frac_idx { + // exactly on a data point + Ok(unsafe { *data.get_unchecked(idx) }) + } else { + // linear interpolation between two points + let diff = frac_idx - idx as f64; + unsafe { + Ok(data.get_unchecked(idx) * (1.0 - diff) + data.get_unchecked(idx + 1) * diff) + } + } + } + + /// Compute the median value of the data. + #[must_use] + pub fn median(&self) -> f64 { + // 0.5 is well defined, infallible + unsafe { self.quantile(0.5).unwrap_unchecked() } + } + + /// Compute the mean value of the data. + #[must_use] + pub fn mean(&self) -> f64 { + let n: f64 = self.len() as f64; + self.0.iter().sum::() / n + } + + /// Compute the standard deviation of the data. + #[must_use] + pub fn std(&self) -> f64 { + let n = self.len() as f64; + let mean = self.mean(); + let mut val = 0.0; + for v in self.as_slice() { + val += v.powi(2); + } + val /= n; + (val - mean.powi(2)).sqrt() + } + + /// Compute the MAD value of the data. + /// + /// + /// + /// # Errors + /// Fails when data does not contain any finite values. + pub fn mad(&self) -> KeteResult { + let median = self.quantile(0.5)?; + let abs_deviation_from_med: Self = self + .0 + .iter() + .map(|d| d - median) + .collect::>() + .as_slice() + .try_into()?; + abs_deviation_from_med.quantile(0.5) + } + + /// Length of the dataset. + #[must_use] + #[allow(clippy::len_without_is_empty, reason = "Cannot have empty dataset.")] + pub fn len(&self) -> usize { + self.0.len() + } + + /// Dataset as a slice. + #[must_use] + pub fn as_slice(&self) -> &[f64] { + &self.0 + } + + /// Compute the KS Test two sample statistic. + /// + /// + /// + /// # Errors + /// Fails when data does not contain any finite values. + pub fn two_sample_ks_statistic(&self, other: &Self) -> KeteResult { + let len_a = self.len(); + let len_b = other.len(); + + let mut stat = 0.0; + let mut ida = 0; + let mut idb = 0; + let mut empirical_dist_func_a = 0.0; + let mut empirical_dist_func_b = 0.0; + + // go through the sorted lists, + while ida < len_a && idb < len_b { + let val_a = &self[ida]; + while ida + 1 < len_a && *val_a == other[ida + 1] { + ida += 1; + } + + let val_b = &self[idb]; + while idb + 1 < len_b && *val_b == other[idb + 1] { + idb += 1; + } + + let min = &val_a.min(*val_b); + + if min == val_a { + empirical_dist_func_a = (ida + 1) as f64 / (len_a as f64); + ida += 1; + } + if min == val_b { + empirical_dist_func_b = (idb + 1) as f64 / (len_b as f64); + idb += 1; + } + + let diff = (empirical_dist_func_a - empirical_dist_func_b).abs(); + if diff > stat { + stat = diff; + } + } + Ok(stat) + } +} + +#[cfg(test)] +mod tests { + use super::{KeteResult, ValidData}; + + #[test] + fn test_median() { + let data: ValidData = vec![ + -f64::NAN, + f64::INFINITY, + 1.0, + 2.0, + 3.0, + 4.0, + 5.0, + f64::NAN, + f64::NEG_INFINITY, + f64::NEG_INFINITY, + ] + .as_slice() + .try_into() + .unwrap(); + + assert!(data.median() == 3.0); + assert!(data.mean() == 3.0); + assert!((data.std() - 2_f64.sqrt()).abs() < 1e-13); + assert!(data.quantile(0.0).unwrap() == 1.0); + assert!(data.quantile(0.25).unwrap() == 2.0); + assert!(data.quantile(0.5).unwrap() == 3.0); + assert!(data.quantile(0.75).unwrap() == 4.0); + assert!(data.quantile(1.0).unwrap() == 5.0); + assert!(data.quantile(1.0 / 8.0).unwrap() == 1.5); + assert!(data.quantile(1.0 / 8.0 + 0.75).unwrap() == 4.5); + } + #[test] + fn test_finite_bad() { + let data: KeteResult = [f64::NAN, f64::NEG_INFINITY, f64::INFINITY] + .as_slice() + .try_into(); + assert!(data.is_err()); + + let data2: KeteResult = vec![].as_slice().try_into(); + assert!(data2.is_err()); + } +} diff --git a/src/kete_core/src/stats/ks_test.rs b/src/kete_core/src/stats/ks_test.rs deleted file mode 100644 index 9275ebb..0000000 --- a/src/kete_core/src/stats/ks_test.rs +++ /dev/null @@ -1,102 +0,0 @@ -/// Implementation of the two sample KS test statistic. -// BSD 3-Clause License -// -// Copyright (c) 2025, California Institute of Technology -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// 3. Neither the name of the copyright holder nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -use itertools::Itertools; - -use crate::errors::{Error, KeteResult}; - -/// Compute the KS Test two sample statistic. -/// -/// -/// -/// This ignores NAN or INF values in the samples. -/// -/// # Errors -/// Fails when data does not contain any finite values. -pub fn two_sample_ks_statistic(sample_a: &[f64], sample_b: &[f64]) -> KeteResult { - // Sort the two inputs and drop nan/inf - let mut sample_a = sample_a - .iter() - .filter(|x| x.is_finite()) - .copied() - .collect_vec(); - sample_a.sort_by(f64::total_cmp); - - let mut sample_b = sample_b - .iter() - .filter(|x| x.is_finite()) - .copied() - .collect_vec(); - sample_b.sort_by(f64::total_cmp); - - let len_a = sample_a.len(); - let len_b = sample_b.len(); - - if len_a == 0 || len_b == 0 { - return Err(Error::ValueError( - "Both samples must contain at least one finite value.".into(), - )); - } - - let mut stat = 0.0; - let mut ida = 0; - let mut idb = 0; - let mut empirical_dist_func_a = 0.0; - let mut empirical_dist_func_b = 0.0; - - // go through the sorted lists, - while ida < len_a && idb < len_b { - let val_a = &sample_a[ida]; - while ida + 1 < len_a && *val_a == sample_a[ida + 1] { - ida += 1; - } - - let val_b = &sample_b[idb]; - while idb + 1 < len_b && *val_b == sample_b[idb + 1] { - idb += 1; - } - - let min = &val_a.min(*val_b); - - if min == val_a { - empirical_dist_func_a = (ida + 1) as f64 / (len_a as f64); - ida += 1; - } - if min == val_b { - empirical_dist_func_b = (idb + 1) as f64 / (len_b as f64); - idb += 1; - } - - let diff = (empirical_dist_func_a - empirical_dist_func_b).abs(); - if diff > stat { - stat = diff; - } - } - Ok(stat) -} diff --git a/src/kete_core/src/stats/mod.rs b/src/kete_core/src/stats/mod.rs deleted file mode 100644 index 4eac544..0000000 --- a/src/kete_core/src/stats/mod.rs +++ /dev/null @@ -1,36 +0,0 @@ -//! # Statistics -//! -// BSD 3-Clause License -// -// Copyright (c) 2025, California Institute of Technology -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// 3. Neither the name of the copyright holder nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -mod ks_test; -mod quantile; - -pub use ks_test::two_sample_ks_statistic; -pub use quantile::{mad, median, quantile}; diff --git a/src/kete_core/src/stats/quantile.rs b/src/kete_core/src/stats/quantile.rs deleted file mode 100644 index 86e28c6..0000000 --- a/src/kete_core/src/stats/quantile.rs +++ /dev/null @@ -1,131 +0,0 @@ -// BSD 3-Clause License -// -// Copyright (c) 2025, California Institute of Technology -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// 3. Neither the name of the copyright holder nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -use crate::{errors::Error, prelude::KeteResult}; - -/// Calculate desired quantile of the provided data. -/// -/// Quantile is effectively the same as percentile, but 0.5 quantile == 50% percentile. -/// -/// This ignores non-finite values such as inf and nan. -/// -/// Quantiles are linearly interpolated between the two closest ranked values. -/// -/// If only one valid data point is provided, all quantiles evaluate to that value. -/// -/// # Errors -/// Fails when quant is not between 0 and 1, or if the data does not have any finite -/// values. -pub fn quantile(data: &[f64], quant: f64) -> KeteResult { - if !quant.is_finite() || quant <= 0.0 || quant >= 1.0 { - Err(Error::ValueError( - "Quantile must be between 0.0 and 1.0".into(), - ))?; - } - - let mut data: Box<[f64]> = data - .iter() - .filter_map(|x| if x.is_finite() { Some(*x) } else { None }) - .collect(); - data.sort_by(f64::total_cmp); - - let n_data = data.len(); - - if n_data == 0 { - Err(Error::ValueError( - "Data must have at least 1 finite value.".into(), - ))?; - } else if n_data == 1 { - return Ok(data[0]); - } - - let frac_idx = quant * (n_data - 1) as f64; - #[allow( - clippy::cast_sign_loss, - reason = "By construction this is always positive." - )] - let idx = frac_idx.floor() as usize; - - if idx as f64 == frac_idx { - Ok(data[idx]) - } else { - let diff = frac_idx - idx as f64; - Ok(data[idx] * (1.0 - diff) + data[idx + 1] * diff) - } -} - -/// Compute the median value of the data. -/// -/// This ignores non-finite values such as inf and nan. -/// -/// # Errors -/// Fails when data does not contain any finite values. -pub fn median(data: &[f64]) -> KeteResult { - quantile(data, 0.5) -} - -/// Compute the MAD value of the data. -/// -/// -/// -/// # Errors -/// Fails when data does not contain any finite values. -pub fn mad(data: &[f64]) -> KeteResult { - let median = quantile(data, 0.5)?; - let abs_deviation_from_med: Box<[f64]> = data.iter().map(|d| d - median).collect(); - quantile(&abs_deviation_from_med, 0.5) -} - -#[cfg(test)] -mod tests { - use super::median; - - #[test] - fn test_median() { - let data = vec![ - 0.5, - 0.6, - 0.6, - 0.6, - f64::NAN, - f64::NEG_INFINITY, - f64::NEG_INFINITY, - ]; - assert!(median(&data).is_ok()); - assert!(median(&data).unwrap() == 0.6); - } - #[test] - fn test_median_bad() { - let data = vec![f64::NAN, f64::NEG_INFINITY, f64::NEG_INFINITY]; - assert!(median(&data).is_err()); - - let data2 = vec![]; - assert!(median(&data2).is_err()); - } -}