Reconstruct weights in WeightedAliasIndex (#25)

bens-schreiber · Ben Schreiber · web-flow · commit 3a528141abb1 · 2025-07-10T09:28:43.000+02:00
Co-authored-by: Ben Schreiber &lt;bschreiber@cloudflare.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.5.2]
+
+### API Changes
+- Add `WeightedAliasIndex::weights()` to reconstruct the original weights in O(n)
+
+### Testing
+- Added a test for `WeightedAliasIndex::weights()`
+
 ## [0.5.1]
 
 ### Testing
diff --git a/benches/benches/weighted.rs b/benches/benches/weighted.rs
@@ -49,7 +49,7 @@ pub fn bench(c: &mut Criterion) {
         (1000, 1_000_000, "1M"),
     ];
     for (amount, length, len_name) in lens {
-        let name = format!("weighted_sample_indices_{}_of_{}", amount, len_name);
+        let name = format!("weighted_sample_indices_{amount}_of_{len_name}");
         c.bench_function(name.as_str(), |b| {
             let length = black_box(length);
             let amount = black_box(amount);
diff --git a/src/weighted/weighted_alias.rs b/src/weighted/weighted_alias.rs
@@ -79,6 +79,7 @@ pub struct WeightedAliasIndex<W: AliasableWeight> {
     no_alias_odds: Box<[W]>,
     uniform_index: Uniform<u32>,
     uniform_within_weight_sum: Uniform<W>,
+    weight_sum: W,
 }
 
 impl<W: AliasableWeight> WeightedAliasIndex<W> {
@@ -231,8 +232,42 @@ impl<W: AliasableWeight> WeightedAliasIndex<W> {
             no_alias_odds,
             uniform_index,
             uniform_within_weight_sum,
+            weight_sum,
         })
     }
+
+    /// Reconstructs and returns the original weights used to create the distribution.
+    ///
+    /// `O(n)` time, where `n` is the number of weights.
+    ///
+    /// Note: Exact values may not be recovered if `W` is a float.
+    pub fn weights(&self) -> Vec<W> {
+        let n = self.aliases.len();
+
+        // `n` was validated in the constructor.
+        let n_converted = W::try_from_u32_lossy(n as u32).unwrap();
+
+        // pre-calculate the total contribution each index receives from serving
+        // as an alias for other indices.
+        let mut alias_contributions = vec![W::ZERO; n];
+        for j in 0..n {
+            if self.no_alias_odds[j] < self.weight_sum {
+                let contribution = self.weight_sum - self.no_alias_odds[j];
+                let alias_index = self.aliases[j] as usize;
+                alias_contributions[alias_index] += contribution;
+            }
+        }
+
+        // Reconstruct each weight by combining its direct `no_alias_odds`
+        // with its total `alias_contributions` and scaling the result.
+        self.no_alias_odds
+            .iter()
+            .zip(&alias_contributions)
+            .map(|(&no_alias_odd, &alias_contribution)| {
+                (no_alias_odd + alias_contribution) / n_converted
+            })
+            .collect()
+    }
 }
 
 impl<W: AliasableWeight> Distribution<usize> for WeightedAliasIndex<W> {
@@ -271,6 +306,7 @@ where
             no_alias_odds: self.no_alias_odds.clone(),
             uniform_index: self.uniform_index,
             uniform_within_weight_sum: self.uniform_within_weight_sum.clone(),
+            weight_sum: self.weight_sum,
         }
     }
 }
@@ -503,6 +539,48 @@ mod test {
         );
     }
 
+    #[test]
+    fn test_weights_reconstruction() {
+        // Standard integers
+        {
+            let weights_i32 = vec![10, 2, 8, 0, 30, 5];
+            let dist_i32 = WeightedAliasIndex::new(weights_i32.clone()).unwrap();
+            assert_eq!(weights_i32, dist_i32.weights());
+        }
+
+        // Uniform weights
+        {
+            let weights_u64 = vec![1, 1, 1, 1, 1];
+            let dist_u64 = WeightedAliasIndex::new(weights_u64.clone()).unwrap();
+            assert_eq!(weights_u64, dist_u64.weights());
+        }
+
+        // Floating point
+        {
+            const EPSILON: f64 = 1e-9;
+            let weights_f64 = vec![0.5, 0.2, 0.3, 0.0, 1.5, 0.88];
+            let dist_f64 = WeightedAliasIndex::new(weights_f64.clone()).unwrap();
+            let reconstructed_f64 = dist_f64.weights();
+
+            assert_eq!(weights_f64.len(), reconstructed_f64.len());
+            for (original, reconstructed) in weights_f64.iter().zip(reconstructed_f64.iter()) {
+                assert!(
+                    f64::abs(original - reconstructed) < EPSILON,
+                    "Weight reconstruction failed: original {}, reconstructed {}",
+                    original,
+                    reconstructed
+                );
+            }
+        }
+
+        // Single item
+        {
+            let weights_single = vec![42_u32];
+            let dist_single = WeightedAliasIndex::new(weights_single.clone()).unwrap();
+            assert_eq!(weights_single, dist_single.weights());
+        }
+    }
+
     #[test]
     fn value_stability() {
         fn test_samples<W: AliasableWeight>(