CDCgov · afmagee42 · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025
diff --git a/ringvax/__init__.py b/ringvax/__init__.py
@@ -6,12 +6,15 @@
 
 
 class Simulation:
-    PROPERTIES = {
+    INIT_PROPERTIES = {
         "id",
         "infector",
-        "infectees",
         "generation",
         "t_exposed",
+        "simulated",
+    }
+    SIM_PROPERTIES = {
+        "infectees",
         "t_infectious",
         "t_recovered",
         "infection_rate",
@@ -20,6 +23,7 @@ class Simulation:
         "t_detected",
         "infection_times",
     }
+    PROPERTIES = INIT_PROPERTIES | SIM_PROPERTIES
 
     def __init__(
         self, params: dict[str, Any], rng: Optional[numpy.random.Generator] = None
@@ -29,10 +33,18 @@ def __init__(
         self.infections = {}
         self.termination: Optional[str] = None
 
-    def create_person(self) -> str:
+    def create_person(
+        self, infector: Optional[str], t_exposed: float, generation: int
+    ) -> str:
         """Add a new person to the data"""
         id = str(len(self.infections))
-        self.infections[id] = {x: None for x in self.PROPERTIES}
+        self.infections[id] = {
+            "id": id,
+            "infector": infector,
+            "t_exposed": t_exposed,
+            "generation": generation,
+            "simulated": False,
+        } | {x: None for x in self.SIM_PROPERTIES}
         return id
 
     def update_person(self, id: str, content: dict[str, Any]) -> None:
@@ -67,18 +79,11 @@ def query_people(self, query: Optional[dict[str, Any]] = None) -> List[str]:
                 if all(person[k] == v for k, v in query.items())
             ]
 
-    def register_infectee(self, infector, infectee) -> None:
-        infectees = self.get_person_property(infector, "infectees")
-        if infectees is None:
-            self.update_person(infector, {"infectees": []})
-            infectees = self.get_person_property(infector, "infectees")
-        infectees.append(infectee)
-
     def run(self) -> None:
         """Run simulation"""
-        # queue is pairs (t_exposed, infector)
+        # queue is of infection ids
         # start with the index infection
-        infection_queue: List[tuple[float, Optional[str]]] = [(0.0, None)]
+        infection_queue: List[str] = [self.create_person(None, 0.0, 0)]
 
         passed_max_generations = False
 
@@ -98,24 +103,17 @@ def run(self) -> None:
                 )
                 # exit the loop
                 break
-            elif n_infections == self.params["max_infections"]:
+            elif n_infections >= self.params["max_infections"]:
                 # we are at maximum number of infections
-                self.termination = "max_infections"
-                # exit the loop
-                break
-            elif n_infections > self.params["max_infections"]:
-                # this loop instantiates infections one at a time. we should
-                # exactly hit the maximum and not exceed it.
                 raise RuntimeError("Maximum number of infections exceeded")
 
             # find the person who is infected next
             # (the queue is time-sorted, so this is the temporally next infection)
-            t_exposed, infector = infection_queue.pop(0)
+            id = infection_queue.pop(0)
 
-            # otherwise, instantiate this infection, draw who they in turn infect,
+            # draw who they in turn infect,
             # and add the infections they cause to the queue, in time order
-            id = self.create_person()
-            self.generate_infection(id=id, t_exposed=t_exposed, infector=infector)
+            offspring = self.generate_infection(id=id)
 
             # if the infector is in the final generation, do not add their
             # infectees to the queue
@@ -129,29 +127,26 @@ def run(self) -> None:
             else:
                 # only add infectees to the queue if we are not yet at maximum
                 # number of generations
-                for t in self.get_person_property(id, "infection_times"):
-                    bisect.insort_right(infection_queue, (t, id), key=lambda x: x[0])
+                for child in offspring:
+                    bisect.insort_right(
+                        infection_queue,
+                        child,
+                        key=lambda x: self.get_person_property(x, "t_exposed"),
+                    )
 
     def generate_infection(
-        self, id: str, t_exposed: float, infector: Optional[str]
-    ) -> None:
+        self,
+        id: str,
+    ) -> List[str]:
         """
         Generate a single infected person's biological disease history, detection
         history and transmission history
         """
-        # keep track of generations
-        if infector is None:
-            generation = 0
-        else:
-            generation = self.get_person_property(infector, "generation") + 1
-            self.register_infectee(infector, id)
-
-        self.update_person(
-            id, {"id": id, "infector": infector, "generation": generation}
-        )
 
         # disease state history in this individual
-        disease_history = self.generate_disease_history(t_exposed=t_exposed)
+        disease_history = self.generate_disease_history(
+            self.get_person_property(id, "t_exposed")
+        )
         self.update_person(id, disease_history)
 
         # whether this person was detected
@@ -182,7 +177,18 @@ def generate_infection(
             assert (infection_times >= disease_history["t_infectious"]).all()
             assert (infection_times <= t_end_infectious).all()
 
-        self.update_person(id, {"infection_times": infection_times})
+        infectees = [
+            self.create_person(id, time, self.get_person_property(id, "generation") + 1)
+            for time in infection_times
+        ]
+        self.update_person(
+            id, {"infection_times": infection_times, "infectees": infectees}
+        )
+
+        # mark this person as simulated
+        self.update_person(id, {"simulated": True})
+
+        return infectees
 
     def generate_disease_history(self, t_exposed: float) -> dict[str, Any]:
         """Generate infection history for a single infected person"""

diff --git a/ringvax/app.py b/ringvax/app.py
@@ -46,8 +46,15 @@ def run_simulations(n: int, params: dict, seed: int) -> List[Simulation]:
     for i in range(n):
         progress_bar.progress(i / n, text=progress_text)
         sim = Simulation(params=params, rng=rngs[i])
-        sim.run()
-        sims.append(sim)
+        try:
+            sim.run()
+            sims.append(sim)
+        except Exception as e:
+            if not (
+                isinstance(e, RuntimeError)
+                and str(e) == "Maximum number of infections exceeded"
+            ):
+                raise (e)
 
     progress_bar.empty()
     toc = time.perf_counter()
@@ -284,13 +291,6 @@ def infectiousness_callback():
                 max_value=n_generations + 1,
                 help="Successful control is defined as no infections in contacts at this degree. Set to 1 for contacts of the index case, 2 for contacts of contacts, etc. Equivalent to checking for extinction in the specified generation.",
             )
-            max_infections = st.number_input(
-                "Maximum number of infections",
-                value=1000,
-                step=10,
-                min_value=100,
-                help="",
-            )
             seed = st.number_input("Random seed", value=1234, step=1)
             nsim = st.number_input("Number of simulations", value=250, step=1)
             plot_gen = st.toggle("Show infection's generation", value=False)
@@ -313,6 +313,7 @@ def infectiousness_callback():
                 == "Cumulative"
             )
 
+    max_infections = 1000000
-    max_infections = 1000000
+    max_infections = 1_000_000
-    max_infections = 1000000
+    max_infections = 1_000_000
     params = {
         "n_generations": n_generations,
         "latent_duration": latent_duration,
@@ -329,13 +330,13 @@ def infectiousness_callback():
 
     sims = run_simulations(n=nsim, params=params, seed=seed)
 
-    n_at_max = sum(1 for sim in sims if sim.termination == "max_infections")
+    n_at_max = nsim - len(sims)
 
     show = True if n_at_max == 0 else False
     if not show:
         st.warning(
             body=(
-                f"{n_at_max} simulations hit the specified maximum number of infections ({max_infections})."
+                f"{n_at_max} simulations hit the maximum number of infections ({max_infections})."
             ),
             icon="🚨",
         )

diff --git a/ringvax/plot.py b/ringvax/plot.py
@@ -225,7 +225,11 @@ def get_infection_time_tuples(id: str, sim: Simulation):
     if infectees is None or len(infectees) == 0:
         return None
 
-    return [(sim.get_person_property(inf, "t_exposed"), inf) for inf in infectees]
+    return [
+        (sim.get_person_property(inf, "t_exposed"), inf)
+        for inf in infectees
+        if sim.get_person_property(inf, "simulated")
+    ]
 
 
 def order_descendants(sim: Simulation):
@@ -282,21 +286,21 @@ def make_plot_par(sim: Simulation, show_counterfactual=True):
             0.0,
             max(
                 sim.get_person_property(id, stage_map["infectious"]["end"])
-                for id in sim.infections.keys()
+                for id in sim.query_people({"simulated": True})
             ),
         ],
-        "y_range": [-1.0, len(sim.infections)],
+        "y_range": [-1.0, len(sim.query_people({"simulated": True}))],
     }
 
 
 def plot_simulation(sim: Simulation, par: dict[str, Any]):
-    n_inf = len(sim.query_people())
+    n_inf = len(sim.query_people({"simulated": True}))
 
     plot_par = make_plot_par(sim) | par
 
     fig, ax = plt.subplots()
 
-    for inf in sim.query_people():
+    for inf in sim.query_people({"simulated": True}):
         draw_stages(ax, inf, sim, plot_par)
 
         mark_infections(ax, inf, sim, plot_par)

diff --git a/ringvax/summary.py b/ringvax/summary.py
@@ -9,6 +9,7 @@
     {
         "id": pl.String,
         "infector": pl.String,
+        "simulated": pl.Boolean,
         "infectees": pl.List(pl.String),
         "generation": pl.Int64,
         "t_exposed": pl.Float64,
@@ -29,7 +30,8 @@
 
 
 def get_all_person_properties(
-    sims: Sequence[Simulation], exclude_termination_if: list[str] = ["max_infections"]
+    sims: Sequence[Simulation],
+    exclude_termination_if: list[str] = [],
 ) -> pl.DataFrame:
     """
     Get a dataframe of all properties of all infections
@@ -82,6 +84,8 @@ def empirical_detection_prob(
 
     Returns proportion, numerator count, and denominator count.
     """
+    df = df.filter(pl.col("simulated"))
+
     if conditional_column is not None:
         assert conditional_column in df.columns
         assert df.schema[conditional_column] == pl.Boolean
@@ -122,6 +126,7 @@ def summarize_detections(df: pl.DataFrame) -> pl.DataFrame:
     """
     Get marginal detection probabilities from simulations.
     """
+    df = df.filter(pl.col("simulated"))
     n_infections = df.shape[0]
 
     # Add in eligibility conditions
@@ -203,13 +208,17 @@ def summarize_infections(df: pl.DataFrame) -> pl.DataFrame:
     """
     Get summaries of infectiousness from simulations.
     """
-    df = df.with_columns(
-        n_infections=pl.col("infection_times").list.len(),
-        t_noninfectious=pl.min_horizontal(
-            [pl.col("t_detected"), pl.col("t_recovered")]
-        ),
-    ).with_columns(
-        duration_infectious=(pl.col("t_noninfectious") - pl.col("t_infectious"))
+    df = (
+        df.filter(pl.col("simulated"))
+        .with_columns(
+            n_infections=pl.col("infection_times").list.len(),
+            t_noninfectious=pl.min_horizontal(
+                [pl.col("t_detected"), pl.col("t_recovered")]
+            ),
+        )
+        .with_columns(
+            duration_infectious=(pl.col("t_noninfectious") - pl.col("t_infectious"))
+        )
     )
 
     return pl.DataFrame(
@@ -229,15 +238,11 @@ def prob_control_by_gen(df: pl.DataFrame, gen: int) -> float:
     """
     n_sim = df["simulation"].unique().len()
     size_at_gen = (
-        df.with_columns(
-            pl.col("generation") + 1,
-            n_infections=pl.col("infection_times").list.len(),
-        )
-        .with_columns(size=pl.sum("n_infections").over("simulation", "generation"))
-        .unique(subset=["simulation", "generation"])
+        df.group_by("simulation", "generation")
+        .agg(n_infections=pl.len())
         .filter(
             pl.col("generation") == gen,
-            pl.col("size") > 0,
+            pl.col("n_infections") > 0,
         )
     )
     return 1.0 - (size_at_gen.shape[0] / n_sim)

diff --git a/tests/data/snapshot.json b/tests/data/snapshot.json