cloudflare · mendess · Oct 27, 2023 · Oct 26, 2023 · Oct 26, 2023 · cjpatton
@@ -35,6 +35,7 @@ prio = "0.15.3"
 prometheus = "0.13.3"
 rand = "0.8.5"
 reqwest = "0.11.22"
+replace_with = "0.1.7"
 ring = "0.16.20"
 serde = { version = "1.0.188", features = ["derive"] }
 serde_json = "1.0.107"

@@ -34,6 +34,7 @@ hpke-rs-rust-crypto.workspace = true
 prio = { workspace = true, features = ["prio2"] }
 prometheus.workspace = true
 rand.workspace = true
+replace_with.workspace = true
 ring.workspace = true
 serde.workspace = true
 serde_json.workspace = true

@@ -366,6 +366,32 @@ impl<T> Extend<(DapBatchBucket, (T, Vec<(ReportId, Time)>))> for DapAggregateSpa
     }
 }
 
+impl FromIterator<(DapBatchBucket, (ReportId, Time))> for DapAggregateSpan<()> {
+    fn from_iter<I>(iter: I) -> Self
+    where
+        I: IntoIterator<Item = (DapBatchBucket, (ReportId, Time))>,
+    {
+        let mut this = Self::default();
+        this.extend(iter);
+        this
+    }
+}
+
+impl Extend<(DapBatchBucket, (ReportId, Time))> for DapAggregateSpan<()> {
+    fn extend<I>(&mut self, iter: I)
+    where
+        I: IntoIterator<Item = (DapBatchBucket, (ReportId, Time))>,
+    {
+        for (k, v) in iter {
+            self.span
+                .entry(k)
+                .or_insert_with(|| ((), Vec::new()))
+                .1
+                .push(v);
+        }
+    }
+}
+
 /// Per-task DAP parameters.
 #[derive(Clone, Deserialize, Serialize)]
 pub struct DapTaskConfig {
@@ -484,32 +510,35 @@ impl DapTaskConfig {
         &self,
         part_batch_sel: &'sel PartialBatchSelector,
         consumed_reports: impl Iterator<Item = &'rep EarlyReportStateConsumed<'rep>>,
-    ) -> Result<HashMap<DapBatchBucket, Vec<&'rep EarlyReportStateConsumed<'rep>>>, DapError> {
+    ) -> Result<DapAggregateSpan<()>, DapError> {
         if !self.query.is_valid_part_batch_sel(part_batch_sel) {
             return Err(fatal_error!(
                 err = "partial batch selector not compatible with task",
             ));
         }
+        Ok(consumed_reports
+            .filter(|consumed_report| consumed_report.is_ready())
+            .map(|consumed_report| {
+                let bucket = self.bucket_for(part_batch_sel, consumed_report);
+                let metadata = consumed_report.metadata();
+                (bucket, (metadata.id.clone(), metadata.time))
+            })
+            .collect())
+    }
 
-        let mut span: HashMap<_, Vec<_>> = HashMap::new();
-        for consumed_report in consumed_reports.filter(|consumed_report| consumed_report.is_ready())
-        {
-            let bucket = match part_batch_sel {
-                PartialBatchSelector::TimeInterval => DapBatchBucket::TimeInterval {
-                    batch_window: self.quantized_time_lower_bound(consumed_report.metadata().time),
-                },
-                PartialBatchSelector::FixedSizeByBatchId { batch_id } => {
-                    DapBatchBucket::FixedSize {
-                        batch_id: batch_id.clone(),
-                    }
-                }
-            };
-
-            let consumed_reports_per_bucket = span.entry(bucket).or_default();
-            consumed_reports_per_bucket.push(consumed_report);
+    pub fn bucket_for(
+        &self,
+        part_batch_sel: &PartialBatchSelector,
+        consumed_report: &EarlyReportStateConsumed<'_>,
+    ) -> DapBatchBucket {
+        match part_batch_sel {
+            PartialBatchSelector::TimeInterval => DapBatchBucket::TimeInterval {
+                batch_window: self.quantized_time_lower_bound(consumed_report.metadata().time),
+            },
+            PartialBatchSelector::FixedSizeByBatchId { batch_id } => DapBatchBucket::FixedSize {
+                batch_id: batch_id.clone(),
+            },
         }
-
-        Ok(span)
     }
 
     /// Check if the batch size is too small. Returns an error if the report count is too large.

@@ -9,7 +9,7 @@ mod leader;
 
 use crate::{
     constants::DapMediaType,
-    messages::{BatchSelector, ReportMetadata, TaskId, Time, TransitionFailure},
+    messages::{BatchSelector, ReportMetadata, TaskId, Time},
     taskprov::{self, TaskprovVersion},
     DapAbort, DapError, DapQueryConfig, DapRequest, DapTaskConfig,
 };
@@ -103,34 +103,6 @@ async fn check_batch<S>(
     Ok(())
 }
 
-/// Check for transition failures due to:
-///
-/// * the report having already been processed
-/// * the report having already been collected
-/// * the report not being within time bounds
-///
-/// Returns `Some(TransitionFailure)` if there is a problem,
-/// or `None` if no transition failure occurred.
-pub fn early_metadata_check(
-    metadata: &ReportMetadata,
-    processed: bool,
-    collected: bool,
-    min_time: u64,
-    max_time: u64,
-) -> Option<TransitionFailure> {
-    if processed {
-        Some(TransitionFailure::ReportReplayed)
-    } else if collected {
-        Some(TransitionFailure::BatchCollected)
-    } else if metadata.time < min_time {
-        Some(TransitionFailure::ReportDropped)
-    } else if metadata.time > max_time {
-        Some(TransitionFailure::ReportTooEarly)
-    } else {
-        None
-    }
-}
-
 fn check_request_content_type<S>(
     req: &DapRequest<S>,
     expected: DapMediaType,
@@ -195,7 +167,7 @@ async fn resolve_taskprov<S>(
 
 #[cfg(test)]
 mod test {
-    use super::{early_metadata_check, DapAggregator, DapAuthorizedSender, DapHelper, DapLeader};
+    use super::{DapAggregator, DapAuthorizedSender, DapHelper, DapLeader};
     use crate::{
         assert_metrics_include, async_test_version, async_test_versions,
         auth::BearerToken,
@@ -234,6 +206,34 @@ mod test {
         }};
     }
 
+    /// Check for transition failures due to:
+    ///
+    /// * the report having already been processed
+    /// * the report having already been collected
+    /// * the report not being within time bounds
+    ///
+    /// Returns `Some(TransitionFailure)` if there is a problem,
+    /// or `None` if no transition failure occurred.
+    pub fn early_metadata_check(
+        metadata: &ReportMetadata,
+        processed: bool,
+        collected: bool,
+        min_time: u64,
+        max_time: u64,
+    ) -> Option<TransitionFailure> {
+        if processed {
+            Some(TransitionFailure::ReportReplayed)
+        } else if collected {
+            Some(TransitionFailure::BatchCollected)
+        } else if metadata.time < min_time {
+            Some(TransitionFailure::ReportDropped)
+        } else if metadata.time > max_time {
+            Some(TransitionFailure::ReportTooEarly)
+        } else {
+            None
+        }
+    }
+
     pub(super) struct TestData {
         pub now: Time,
         global_config: DapGlobalConfig,

@@ -13,7 +13,7 @@ use crate::{
         AggregationJobContinueReq, AggregationJobId, AggregationJobInitReq, AggregationJobResp,
         BatchId, BatchSelector, Collection, CollectionJobId, CollectionReq,
         Draft02AggregationJobId, HpkeCiphertext, Interval, PartialBatchSelector, Report, ReportId,
-        ReportMetadata, TaskId, Time, TransitionFailure,
+        TaskId, Time, TransitionFailure,
     },
     metrics::DaphneMetrics,
     roles::{DapAggregator, DapAuthorizedSender, DapHelper, DapLeader, DapReportInitializer},
@@ -697,7 +697,7 @@ impl MockAggregator {
         &self,
         task_id: &TaskId,
         bucket: &DapBatchBucket,
-        metadata: &ReportMetadata,
+        id: &ReportId,
     ) -> Option<TransitionFailure> {
         // Check AggStateStore to see whether the report is part of a batch that has already
         // been collected.
@@ -713,7 +713,7 @@ impl MockAggregator {
             .lock()
             .expect("report_store: failed to lock");
         let report_store = guard.entry(task_id.clone()).or_default();
-        if report_store.processed.contains(&metadata.id) {
+        if report_store.processed.contains(id) {
             return Some(TransitionFailure::ReportReplayed);
         }
 
@@ -920,17 +920,13 @@ impl DapReportInitializer for MockAggregator {
         )?;
 
         let mut early_fails = HashMap::new();
-        for (bucket, reports_consumed_per_bucket) in span.iter() {
-            for metadata in reports_consumed_per_bucket
-                .iter()
-                .map(|report| report.metadata())
-            {
+        for (bucket, ((), report_ids_and_time)) in span.iter() {
+            for (id, _) in report_ids_and_time {
                 // Check whether Report has been collected or replayed.
-                if let Some(transition_failure) = self
-                    .check_report_early_fail(task_id, bucket, metadata)
-                    .await
+                if let Some(transition_failure) =
+                    self.check_report_early_fail(task_id, bucket, id).await
                 {
-                    early_fails.insert(metadata.id.clone(), transition_failure);
+                    early_fails.insert(id.clone(), transition_failure);
                 };
             }
         }
@@ -1233,7 +1229,7 @@ impl DapLeader<BearerToken> for MockAggregator {
 
         // Check whether Report has been collected or replayed.
         if let Some(transition_failure) = self
-            .check_report_early_fail(task_id, &bucket, &report.report_metadata)
+            .check_report_early_fail(task_id, &bucket, &report.report_metadata.id)
             .await
         {
             return Err(DapError::Transition(transition_failure));

@@ -42,6 +42,7 @@ use prio::{
     },
 };
 use rand::prelude::*;
+use replace_with::replace_with_or_abort;
 use serde::{Deserialize, Serialize, Serializer};
 use std::{
     borrow::Cow,
@@ -185,6 +186,20 @@ impl<'req> EarlyReportStateConsumed<'req> {
             input_share: input_share.payload,
         })
     }
+
+    /// Convert this EarlyReportStateConsumed into a rejected [EarlyReportStateInitialized] using
+    /// `failure` as the reason. If this is already a rejected report, the passed in `failure`
+    /// value overwrites the previous one.
+    pub fn into_initialized_rejected_due_to(
+        self,
+        failure: TransitionFailure,
+    ) -> EarlyReportStateInitialized<'req> {
+        let metadata = match self {
+            Self::Ready { metadata, .. } => metadata,
+            Self::Rejected { metadata, .. } => metadata,
+        };
+        EarlyReportStateInitialized::Rejected { metadata, failure }
+    }
 }
 
 impl EarlyReportState for EarlyReportStateConsumed<'_> {
@@ -307,6 +322,18 @@ impl<'req> EarlyReportStateInitialized<'req> {
         };
         Ok(early_report_state_initialized)
     }
+
+    /// Turn this report into a rejected report using `failure` as the reason for it's rejection.
+    pub fn reject_due_to(&mut self, failure: TransitionFailure) {
+        // this never aborts because the closure never panics
+        replace_with_or_abort(self, |self_| {
+            let metadata = match self_ {
+                Self::Rejected { metadata, .. } => metadata,
+                Self::Ready { metadata, .. } => metadata,
+            };
+            Self::Rejected { metadata, failure }
+        })
+    }
 }
 
 impl EarlyReportState for EarlyReportStateInitialized<'_> {