Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 46 additions & 40 deletions ringvax/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@


class Simulation:
PROPERTIES = {
INIT_PROPERTIES = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's docstring these? I wasn't immediately clear to me what "init" vs. "sim" meant here

"id",
"infector",
"infectees",
"generation",
"t_exposed",
"simulated",
}
SIM_PROPERTIES = {
"infectees",
"t_infectious",
"t_recovered",
"infection_rate",
Expand All @@ -20,6 +23,7 @@ class Simulation:
"t_detected",
"infection_times",
}
PROPERTIES = INIT_PROPERTIES | SIM_PROPERTIES

def __init__(
self, params: dict[str, Any], rng: Optional[numpy.random.Generator] = None
Expand All @@ -29,10 +33,18 @@ def __init__(
self.infections = {}
self.termination: Optional[str] = None

def create_person(self) -> str:
def create_person(
self, infector: Optional[str], t_exposed: float, generation: int
) -> str:
"""Add a new person to the data"""
id = str(len(self.infections))
self.infections[id] = {x: None for x in self.PROPERTIES}
self.infections[id] = {
"id": id,
"infector": infector,
"t_exposed": t_exposed,
"generation": generation,
"simulated": False,
} | {x: None for x in self.SIM_PROPERTIES}
return id
Comment on lines +36 to 48
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because this is a small, ad hoc simulation, this doesn't matter, but I liked having create_person return an id and do nothing else. Now create_person is a mix of creating a database-entry-person and updating them.

I guess we would have wanted to draw a distinction between a person-as-dictionary vs. a simulated-person, who has some properties.

But again this doesn't matter here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want to immediately instantiate people, it seemed to me like it no longer made sense to draw that distinction. Otherwise we'd just be adding functions to the pile that we have to call every time a new infection arises, no?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there would be an extra function, because the "create empty database entry" and "fill in defaults for this person" would be separate

Again, that might be good if we had a bigger simulation where wanted more clear separation between data mgmt and simulation, but here it doesn't matter so much


def update_person(self, id: str, content: dict[str, Any]) -> None:
Expand Down Expand Up @@ -67,18 +79,11 @@ def query_people(self, query: Optional[dict[str, Any]] = None) -> List[str]:
if all(person[k] == v for k, v in query.items())
]

def register_infectee(self, infector, infectee) -> None:
infectees = self.get_person_property(infector, "infectees")
if infectees is None:
self.update_person(infector, {"infectees": []})
infectees = self.get_person_property(infector, "infectees")
infectees.append(infectee)

def run(self) -> None:
"""Run simulation"""
# queue is pairs (t_exposed, infector)
# queue is of infection ids
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a nice improvement

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although now it means that methods like generate_infections now directly interact with the underlying data storage, rather than having run() do all that. Which violates the separation of powers a little, but again doesn't matter so much in this ad hoc simulation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sure I'm missing something, but that feels like a line we crossed somewhere between "we should instantiate infections earlier" and "this will always be a branching process"

Or, put another way, the reason I saw "this will always be a BP" as necessary scope for "infections should instantiate earlier" was because I don't see how to instantiate infections when their time of infection is drawn without doing it inside the infection simulation routine. Anything else would just land us back where we were, wouldn't it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just talking about class/static methods vs. instance methods, not necessarily about the ordering of anything

Like, the "resolve infection" function (i.e., figure out the natural history and no. of onward infections for some person) could be a class method that doesn't touch the database, but returns the natural history information back to run(), that in turn interacts with the database

I'm saying: I think there's a way to write code so that run() is the only instance method

# start with the index infection
infection_queue: List[tuple[float, Optional[str]]] = [(0.0, None)]
infection_queue: List[str] = [self.create_person(None, 0.0, 0)]

passed_max_generations = False

Expand All @@ -98,24 +103,17 @@ def run(self) -> None:
)
# exit the loop
break
elif n_infections == self.params["max_infections"]:
elif n_infections >= self.params["max_infections"]:
# we are at maximum number of infections
self.termination = "max_infections"
# exit the loop
break
elif n_infections > self.params["max_infections"]:
# this loop instantiates infections one at a time. we should
# exactly hit the maximum and not exceed it.
raise RuntimeError("Maximum number of infections exceeded")

# find the person who is infected next
# (the queue is time-sorted, so this is the temporally next infection)
t_exposed, infector = infection_queue.pop(0)
id = infection_queue.pop(0)

# otherwise, instantiate this infection, draw who they in turn infect,
# draw who they in turn infect,
# and add the infections they cause to the queue, in time order
id = self.create_person()
self.generate_infection(id=id, t_exposed=t_exposed, infector=infector)
offspring = self.generate_infection(id=id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think "infection" is now used to refer to too many things

Like, maybe we want to rename infection_queue to infected_queue? Because now it's a queue of IDs of people who have been infected, and whose onward infections we need to process

And now generate_infection returns a list of infectees, that gets merged with infection_queue, so these are the same species

I guess generate_infection now processes/resolves an infection, and returns the list of downstream infectees

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the names are... not great at the moment. I feel like the less-descriptive queue might be best here.

The generate_ family also seems somewhat inadequate. I was tempted to rename them simulate_ since they're simulating parts of infection histories.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We've replaced the (time, infector) + dict paradigm with a dict + dict paradigm since they're all just infections now, except some have not been fleshed out to full histories.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cf. my note above. I think "generate" might have made more sense if/when that function was a static method that just returned information. But now that it's interacting with the database, I see that "simulate" or "process" or whatever makes more sense


# if the infector is in the final generation, do not add their
# infectees to the queue
Expand All @@ -129,29 +127,26 @@ def run(self) -> None:
else:
# only add infectees to the queue if we are not yet at maximum
# number of generations
for t in self.get_person_property(id, "infection_times"):
bisect.insort_right(infection_queue, (t, id), key=lambda x: x[0])
for child in offspring:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And now "child" is essentially the same species too

bisect.insort_right(
infection_queue,
child,
key=lambda x: self.get_person_property(x, "t_exposed"),
)

def generate_infection(
self, id: str, t_exposed: float, infector: Optional[str]
) -> None:
self,
id: str,
) -> List[str]:
"""
Generate a single infected person's biological disease history, detection
history and transmission history
"""
# keep track of generations
if infector is None:
generation = 0
else:
generation = self.get_person_property(infector, "generation") + 1
self.register_infectee(infector, id)

self.update_person(
id, {"id": id, "infector": infector, "generation": generation}
)

# disease state history in this individual
disease_history = self.generate_disease_history(t_exposed=t_exposed)
disease_history = self.generate_disease_history(
self.get_person_property(id, "t_exposed")
)
self.update_person(id, disease_history)

# whether this person was detected
Expand Down Expand Up @@ -182,7 +177,18 @@ def generate_infection(
assert (infection_times >= disease_history["t_infectious"]).all()
assert (infection_times <= t_end_infectious).all()

self.update_person(id, {"infection_times": infection_times})
infectees = [
self.create_person(id, time, self.get_person_property(id, "generation") + 1)
for time in infection_times
]
self.update_person(
id, {"infection_times": infection_times, "infectees": infectees}
)

# mark this person as simulated
self.update_person(id, {"simulated": True})

return infectees

def generate_disease_history(self, t_exposed: float) -> dict[str, Any]:
"""Generate infection history for a single infected person"""
Expand Down
23 changes: 12 additions & 11 deletions ringvax/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,15 @@ def run_simulations(n: int, params: dict, seed: int) -> List[Simulation]:
for i in range(n):
progress_bar.progress(i / n, text=progress_text)
sim = Simulation(params=params, rng=rngs[i])
sim.run()
sims.append(sim)
try:
sim.run()
sims.append(sim)
except Exception as e:
if not (
isinstance(e, RuntimeError)
and str(e) == "Maximum number of infections exceeded"
):
raise (e)

progress_bar.empty()
toc = time.perf_counter()
Expand Down Expand Up @@ -284,13 +291,6 @@ def infectiousness_callback():
max_value=n_generations + 1,
help="Successful control is defined as no infections in contacts at this degree. Set to 1 for contacts of the index case, 2 for contacts of contacts, etc. Equivalent to checking for extinction in the specified generation.",
)
max_infections = st.number_input(
"Maximum number of infections",
value=1000,
step=10,
min_value=100,
help="",
)
seed = st.number_input("Random seed", value=1234, step=1)
nsim = st.number_input("Number of simulations", value=250, step=1)
plot_gen = st.toggle("Show infection's generation", value=False)
Expand All @@ -313,6 +313,7 @@ def infectiousness_callback():
== "Cumulative"
)

max_infections = 1000000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
max_infections = 1000000
max_infections = 1_000_000

or int(1e6)

params = {
"n_generations": n_generations,
"latent_duration": latent_duration,
Expand All @@ -329,13 +330,13 @@ def infectiousness_callback():

sims = run_simulations(n=nsim, params=params, seed=seed)

n_at_max = sum(1 for sim in sims if sim.termination == "max_infections")
n_at_max = nsim - len(sims)

show = True if n_at_max == 0 else False
if not show:
st.warning(
body=(
f"{n_at_max} simulations hit the specified maximum number of infections ({max_infections})."
f"{n_at_max} simulations hit the maximum number of infections ({max_infections})."
),
icon="🚨",
)
Expand Down
14 changes: 9 additions & 5 deletions ringvax/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,11 @@ def get_infection_time_tuples(id: str, sim: Simulation):
if infectees is None or len(infectees) == 0:
return None

return [(sim.get_person_property(inf, "t_exposed"), inf) for inf in infectees]
return [
(sim.get_person_property(inf, "t_exposed"), inf)
for inf in infectees
if sim.get_person_property(inf, "simulated")
]


def order_descendants(sim: Simulation):
Expand Down Expand Up @@ -282,21 +286,21 @@ def make_plot_par(sim: Simulation, show_counterfactual=True):
0.0,
max(
sim.get_person_property(id, stage_map["infectious"]["end"])
for id in sim.infections.keys()
for id in sim.query_people({"simulated": True})
),
],
"y_range": [-1.0, len(sim.infections)],
"y_range": [-1.0, len(sim.query_people({"simulated": True}))],
}


def plot_simulation(sim: Simulation, par: dict[str, Any]):
n_inf = len(sim.query_people())
n_inf = len(sim.query_people({"simulated": True}))

plot_par = make_plot_par(sim) | par

fig, ax = plt.subplots()

for inf in sim.query_people():
for inf in sim.query_people({"simulated": True}):
draw_stages(ax, inf, sim, plot_par)

mark_infections(ax, inf, sim, plot_par)
Expand Down
35 changes: 20 additions & 15 deletions ringvax/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
{
"id": pl.String,
"infector": pl.String,
"simulated": pl.Boolean,
"infectees": pl.List(pl.String),
"generation": pl.Int64,
"t_exposed": pl.Float64,
Expand All @@ -29,7 +30,8 @@


def get_all_person_properties(
sims: Sequence[Simulation], exclude_termination_if: list[str] = ["max_infections"]
sims: Sequence[Simulation],
exclude_termination_if: list[str] = [],
) -> pl.DataFrame:
"""
Get a dataframe of all properties of all infections
Expand Down Expand Up @@ -82,6 +84,8 @@ def empirical_detection_prob(

Returns proportion, numerator count, and denominator count.
"""
df = df.filter(pl.col("simulated"))

if conditional_column is not None:
assert conditional_column in df.columns
assert df.schema[conditional_column] == pl.Boolean
Expand Down Expand Up @@ -122,6 +126,7 @@ def summarize_detections(df: pl.DataFrame) -> pl.DataFrame:
"""
Get marginal detection probabilities from simulations.
"""
df = df.filter(pl.col("simulated"))
n_infections = df.shape[0]

# Add in eligibility conditions
Expand Down Expand Up @@ -203,13 +208,17 @@ def summarize_infections(df: pl.DataFrame) -> pl.DataFrame:
"""
Get summaries of infectiousness from simulations.
"""
df = df.with_columns(
n_infections=pl.col("infection_times").list.len(),
t_noninfectious=pl.min_horizontal(
[pl.col("t_detected"), pl.col("t_recovered")]
),
).with_columns(
duration_infectious=(pl.col("t_noninfectious") - pl.col("t_infectious"))
df = (
df.filter(pl.col("simulated"))
.with_columns(
n_infections=pl.col("infection_times").list.len(),
t_noninfectious=pl.min_horizontal(
[pl.col("t_detected"), pl.col("t_recovered")]
),
)
.with_columns(
duration_infectious=(pl.col("t_noninfectious") - pl.col("t_infectious"))
)
)

return pl.DataFrame(
Expand All @@ -229,15 +238,11 @@ def prob_control_by_gen(df: pl.DataFrame, gen: int) -> float:
"""
n_sim = df["simulation"].unique().len()
size_at_gen = (
df.with_columns(
pl.col("generation") + 1,
n_infections=pl.col("infection_times").list.len(),
)
.with_columns(size=pl.sum("n_infections").over("simulation", "generation"))
.unique(subset=["simulation", "generation"])
df.group_by("simulation", "generation")
.agg(n_infections=pl.len())
.filter(
pl.col("generation") == gen,
pl.col("size") > 0,
pl.col("n_infections") > 0,
)
)
return 1.0 - (size_at_gen.shape[0] / n_sim)
Expand Down
2 changes: 1 addition & 1 deletion tests/data/snapshot.json

Large diffs are not rendered by default.

Loading
Loading