-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdes_page.py
450 lines (391 loc) · 26.6 KB
/
des_page.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
import simpy
import random
import pandas as pd
import numpy as np
import plotly.express as px
import re
import streamlit as st
from des_classes import g, Trial
st.title("DES example")
button_run_pressed = st.button("Run simulation")
with st.sidebar:
patient_inter_input = st.slider("Select the time between patient arrivals", min_value=0, max_value=20, value= 3)
call_inter_input = st.slider("Select the time between calls", min_value= 0, max_value=30,value= 10)
sim_duration_input = st.number_input("""Select the duration of the :green[simulation]
(**Maximum value is 2000**)""",
min_value=60, max_value=2000, value=480)
g.patient_inter = patient_inter_input
g.call_inter = call_inter_input
g.sim_duration = sim_duration_input
tab1, tab2 = st.tabs(["summary results","extra graphs"])
with tab1:
st.write("hello")
###########################################################
# Run a trial using the parameters from the g class and #
# print the results #
###########################################################
if button_run_pressed:
with st.spinner("Running simulation"):
df_trial_results, caller_results, patient_results = Trial().run_trial()
# #print(df_trial_results)
# #print(caller_results.sample(25))
# #print(patient_results.sample(25))
# #print("=== Receptionist Stats ===")
# #print(f"The median wait for registration across {g.number_of_runs} " +
# f"was {df_trial_results['Mean Queue Time Reg'].median():.1f} minutes")
# print(f"The median wait for booking a test " +
# f"was {df_trial_results['Mean Queue Time Book Test'].median():.1f} minutes")
# print(f"The median wait for callers to have their call answered " +
# f"was {df_trial_results['Mean Queue Time Call'].median():.1f} minutes")
# print(f"The median utilisation for a receptionist across {g.number_of_runs} " +
# f"was {df_trial_results['Receptionist Utilisation - Percentage'].median()}%")
# print("=== GP Stats ===")
# print(f"The median wait for a GP across {g.number_of_runs} " +
# f"was {df_trial_results['Mean Queue Time GP'].median():.1f} minutes")
# print(f"The median utilisation for a GP across {g.number_of_runs} " +
# f"was {df_trial_results['GP Utilisation - Percentage'].median()}%")
###########################################################
###########################################################
# Create some summaries and visualisations for averages #
# across the trial #
###########################################################
###########################################################
with tab2:
# Let's set up a reusable sequence of colours that can give our plotly plots a consistent
# feel/identity
# This uses some colours from the NHS identity guidelines that should work well when
# placed next to each other
# https://www.england.nhs.uk/nhsidentity/identity-guidelines/colours/
# If we pass this to something with just a single colour in the plot, it will just take the
# first colour from the sequence (NHS Blue)
# If we pass it to a plot that has categories, it will assign colours to categories
# in the order given in this list
nhs_colour_sequence = ["#005EB8", "#FFB81C", "#00A499", "#41B6E6", "#AE2573", "#006747"]
##############################################
# Bar plot - average waits per stage per run #
##############################################
average_waits_fig = px.bar(
# First we need to get the dataframe into the shape needed by the plot
# We start by dropping the utilisation columns from our dataframe
# as they're on a very different scale to the wait times
df_trial_results.drop(
columns=["GP Utilisation - Percentage",
"Receptionist Utilisation - Percentage"])
# We then reset the index of the plot so the run number is
# a column rather than the index of the dataframe
.reset_index(drop=False)
# Finally, we use the melt function to turn this from a 'wide'
# dataframe (where we have a column for each of the different measures)
# to a 'long' dataframe where we have one row per run/metric combination.
# After melting, our original column names will be in a column entitled
# 'variable' and our actual wait times for each stage will be in a column
# # called 'value'
# (so a row might look like "1, Mean Queue Time Reg, 87" for the 'Run Number',
# 'variable' and 'value' columns respectively)
.melt(id_vars="Run Number"),
x="value", # What's on the horizontal axis - this is the number of minutes
y="Run Number", # What's on the vertical axis
facet_col="variable", # This will create a separate plot for each variable (here, the metric)
# Give the whole plot a title
title="Average Waits (Minutes) For Each Stage of the Patient Journey - by Run",
orientation='h', # Set this to a horizontal bar plot (default is vertical)
labels={"value": "Average Wait (Mins)"}, # Make the label on the x axis nicer
# Use our NHS colour palette; only the first colour will be used as we haven't
# made use of colour as a part of the visualisation in this plot, but this does mean
# that the bars will use the standard NHS blue rather than the plotly one
color_discrete_sequence=nhs_colour_sequence
)
# After we use the px.bar function to create our plot, there will be a few additional things
# we want to do to the plot before displaying it. There is a limit to what can be done in
# the original function call as there are only so many parameters - these little extra touches
# just make the plot as readable and polished-looking as possible!
# This will tidy up the subtitles of each 'facet' within our plot (the mini-graph relating)
# to each of our metrics
# This uses what's called a 'lambda' function, which is a little temporary function that in this case
# iterates through the annotation text and replaces the string 'variable=' with an empty string,
# which just tidies up the headers in this case so it only contains the actual name of the variable
average_waits_fig.for_each_annotation(lambda a: a.update(text=a.text.replace("variable=", "")))
# Here we are going to update the layout to ensure that we have a label for every run number in
# our y axis
# By default, plotly tries to intelligently choose a scale - but for this, it makes more sense to
# include a label for every row (unless we have lots of runs, in which case we won't apply this
# correction)
if g.number_of_runs < 20:
average_waits_fig.update_layout(yaxis = {'dtick': 1})
# Finally, we force plotly to display the plot in the interactive window.
# If we don't use this then only the final plotly plot we create will actually be displayed
#average_waits_fig.show()
st.plotly_chart(average_waits_fig)
##############################################
# Bar plot - waits per stage per run #
##############################################
performance_per_run_fig = px.bar(
# First we need to get the dataframe into the shape needed by the plot
# We start by dropping the utilisation columns from our dataframe
# as they're on a very different scale to the wait times
df_trial_results.drop(
columns=["GP Utilisation - Percentage",
"Receptionist Utilisation - Percentage"])
# We then reset the index of the plot so the run number is
# a column rather than the index of the dataframe
.reset_index(drop=False)
# This time we use a lambda function (a small temporary function)
# to look at each of our column names and replace the string
# 'Mean Queue Time ' with a blank string, which we want to do here
# as we're going to use those values as our x axis labels and it will
# get cluttered and hard to read with that phrase used (and we can just make
# it clear what each value is via other labels or the title)
.rename(columns=lambda x: re.sub('Mean Queue Time ', '', x))
# Finally, we reshape the dataframe from a wide to a long format
# (see the first plot for more details on this)
.melt(id_vars="Run Number"),
# This time we're going to facet (make mini sub-plots) by run instead - we're aiming to
# end up with a mini-plot per run to look at the performance on a run level rather than
# in the previous plot where we had more ability to look at the performance against a
# single metric across multiple runs - so even though we're using the same data here,
# the focus of the plot is slightly different
facet_col="Run Number",
facet_col_wrap=10, # Ensure that if we have lots of runs, our subplots don't become too small
x="variable", # the column used for our horizontal axis
y="value", # the column used for our vertical axis
# A title for the whole plot
title="Average Waits (Minutes) For Each Stage of the Patient Journey - by Run",
# Make use of our NHS colour scheme (again, as this plot will only use a single colour, it just
# uses the first colour from the list which is the NHS blue)
color_discrete_sequence=nhs_colour_sequence,
# Finally we tidy up the labels, replacing 'variable' with a blank string (as it's very clear
# from the category labels and the other labels on the plot what is displayed there
labels={"variable": "",
"value": "Queue Time (minutes)"
})
# We cycle through and tidy up the display of the subheaders for the subplots
performance_per_run_fig.for_each_annotation(
lambda a: a.update(text=a.text.replace("Run Number=", "Run "))
)
# This time, as we have multiple x axes in the overall plot (one per subplot) we need to use a
# slightly different function to ensure every label will get displayed
performance_per_run_fig.for_each_xaxis(lambda xaxis: xaxis.update(dtick=1))
# Display the plot
#performance_per_run_fig.show()
st.plotly_chart(performance_per_run_fig)
###############################################
# Box plot - resource utilisation by resource #
###############################################
utilisation_boxplot_fig = px.box(
# First we need to get the dataframe into the shape needed by the plot
# We start by only selecting the utilisation columns by passing a list of
# the column names inside another set of square brackets
(df_trial_results[["GP Utilisation - Percentage",
"Receptionist Utilisation - Percentage"]]
# once again we want the run number to be a column, not the index
.reset_index(drop=False)
# and once again we want it in long format (see the first plot for details)
.melt(id_vars="Run Number")),
x="value", # Make our horizontal axis display the % utilisation of the resource in the run
y="variable", # Make the y axis the utilisation category (will be our original column names)
points="all", # Force the boxplot to actually show the individual points too, not just a summary
title="Resource Utilisation", # Add a plot title
# Force the plot to start at 0 regardless of the lowest utilisation recorded
# and finish just past 100 so that the higher points can be seen
range_x=[0, 105],
# Again, use our NHS colour paletted - this will just use NHS blue (the first colour in the list)
color_discrete_sequence=nhs_colour_sequence,
# Tidy up the x and y axis labels
labels={"variable": "",
"value": "Resource Utilisation Across Run (%)"
}
)
# We don't need to do any additional tweaks to the plot this time - we can just display it
# straight away
#utilisation_boxplot_fig.show()
st.plotly_chart(utilisation_boxplot_fig)
##############################################
# Bar plot - resource utilisation per run #
##############################################
# We're going to use the same data as for our boxplot, but we're more interested in looking
# at the utilisation of resources within a single run rather than the consistency of resource
# use of a particular resource type, which the boxplot is better at demonstrating
# So once again - same data, different focus!
utilisation_bar_fig = px.bar(
# First we need to get the dataframe into the shape needed by the plot
# We start by only selecting the utilisation columns by passing a list of
# the column names inside another set of square brackets
(df_trial_results[["GP Utilisation - Percentage",
"Receptionist Utilisation - Percentage"]]
# once again we want the run number to be a column, not the index
.reset_index(drop=False)
# and once again we want it in long format (see the first plot for details)
.melt(id_vars="Run Number")),
x="Run Number", # The value for our horizontal plot
y="value", # What will be displayed on the vertical axis (here, utilisation %)
# This will colour the bars by a factor
# Here, because we melted our dataframe into long format, the values of the column 'variable'
# are the names of our original columns - i.e. "GP Utilisation - Percentage" or
# "Receptionist Utilisation - Percentage". We will automatically get a legend thanks to plotly.
color="variable",
# Force the bars to display side-by-side instead of on top of each other (which wouldn't really
# make sense in this graph)
barmode="group",
# Use our NHS colour palette - this time as we have two possible values in the column we coloured
# by, it will use the first two values in the colour palette (NHS blue and NHS warm yellow)
color_discrete_sequence=nhs_colour_sequence,
title="Resource Utilisation",
labels={"variable": "", # Remove the legend header - it's clear enough without it
"value": "Resource Utilisation Across Run (%)" # tidy up our y-axis label
}
)
# Ensure the run label appears on the x axis for each run unless there are lots of them, in
# which case we'll just leave the value of dtick as the default (which means plotly will choose
# a sensible value for us)
if g.number_of_runs < 20:
utilisation_bar_fig.update_layout(xaxis = {'dtick': 1})
# Show the bar plot
#utilisation_bar_fig.show()
st.plotly_chart(utilisation_bar_fig)
##############################################################
###########################################################
# Create some summaries and visualisations for call stats #
###########################################################
##############################################################
##############################################
# Dataframe - Call Answering Stats #
##############################################
# It would be good to be able to display whether callers had their call answered or not - this
# can give us a quick overview of whether the system has been particularly overloaded on different
# runs. If a large number of callers never get their call answered, this suggests we need more
# receptionists (as they are the ones dealing will registration, test booking and calls in
# this model)
# Adds a column for whether the call was answered
# We use np.where as a bit of an 'if'/'case when' statement here
caller_results["Call Answered"] = np.where(
# First, we check a condition - is the value in the 'call answered at' column
# NA/missing?
caller_results["Call Answered At"].isna(),
# If it is, then it means we never recorded a 'call answered at' time because a receptionist
# resource never became free for this caller - so return the string below
"Call Not Answered Before Closing Time",
# If it is not na (i.e. the method .isna() returns False), then we can be confident that the
# call was answered
"Call Answered"
)
# Now let's group by run, keep just our new 'call answered' column, and count how many calls per run
# fell into each of these categories.
# As the 'value_counts()' method returns a pandas series instead of a pandas dataframe, we need to
# manually turn it back into a dataframe first
calls_answered_df = pd.DataFrame(
caller_results.groupby("Run")["Call Answered"].value_counts()
# Finally, we reset the index (as due to grouping by 'Run' that will have been the index of
# the new column we created, but for plotting and pivoting purposes it's easier if that's an
# actual column instead)
).reset_index(drop=False)
# For display purposes, it would actually be easier to read if our dataframe was in 'wide' format -
# which will mean that we have a column for 'call answered by closing time' and a column for
# 'call not answered before closing time' and a row per run, with the cells then containing
# the count of calls per run falling into each of those categories
# We use the 'pivot' function for going from long to wide format
calls_answered_df_wide = calls_answered_df.pivot(
index="Run", columns="Call Answered", values="count"
).reset_index(drop=False)
# Finally, let's display this dataframe
print(calls_answered_df_wide)
##########################################################################
# Stacked Bar Plot - Percentage of Calls Answered - by run #
##########################################################################
# We can now use the long version of this dataframe to create a stacked bar plot
# exploring the total number of calls received - and those not answered - within
# the plot
calls_answered_fig = px.bar(
# we can just pass in our 'call_answered_df' without further modification
calls_answered_df,
x="Run", # The run should be the x axis
y="count", # The number of calls falling into each category should by the y axis
color="Call Answered", # This time we colour the dataframe by whether the call was answered or not
# Tidy up the y axis label (x axis label and legend title are already fine)
labels={"count": "Number of Calls"},
# Pass in our colour sequence - the first category alphabetically will use colour 1,
# and the second category will use colour 2. If we had more categories, it would continue to
# make its way through the list of colours we defined
color_discrete_sequence=nhs_colour_sequence,
# Add a plot title
title="Number of Calls - How Many Were Answered in Opening Hours?"
)
# Ensure each column has a number on the x axis (if there aren't too many runs)
if g.number_of_runs < 20:
calls_answered_fig.update_layout(xaxis = {'dtick': 1})
# Show the plot
#calls_answered_fig.show()
st.plotly_chart(calls_answered_fig)
##############################################
# Strip Plot - Arrival Patterns #
##############################################
# Finally, let's make a scatterplot that can help us to just check that the patterns of arrivals
# across the day makes sense. Are the callers and patients arriving in an intermingled fashion
# and do we have some of each?
# This plot might be of more use for debugging than actually understanding the model behaviour -
# although it can also be useful to demonstrate that the arrival times are not fixed across
# the different runs, which can help people to understand the value and functioning of the model
# We start by joining the patient and caller results together
calls_and_patients = pd.concat([
# we only want a few columns from each
patient_results[["Run", "Arrival Time", "What"]],
# It's important that the columns are in the same order and have the same names
# as we are just going to stack them on top of each other
caller_results[["Run", "Call Start Time", "What"]].rename(columns={"Call Start Time": "Arrival Time"})
])
# Here we are going to use something called a strip plot, which is a scatterplot (a plot with
# a series of dots - but with some level of randomness on one axis to ensure points at exactly
# the same position don't fully overlap)
arrival_fig = px.strip(
# We pass in the dataframe we just created
calls_and_patients,
# We place the points horizontally depending on the time the individual caller or patient
# arrived in the model
x="Arrival Time",
# We then use the run number on the y axis, which will give us a line of points per run
y="Run",
# We'll use the colour to distinguish between patients and callers
color="What",
# We'll use our colour palette
color_discrete_sequence=nhs_colour_sequence,
# Finally, let's add a title
title="Patient Arrivals by Time",
labels={"Arrival Time": "Arrival Time (Simulation Minute)"}
)
# Force the maximum amount of jitter (random offset) in the points
arrival_fig.update_traces(jitter=1.0)
# Display the plot
st.plotly_chart(arrival_fig)
############################################################
# Strip Plot - Call Answering by Arrival Time #
############################################################
# We can also use a similar point to give an indication of at what point our system
# starts to overload during each run.
# Instead of displaying both patients and callers, we use just the callers this time
call_answered_detailed_fig = px.strip(
# We pass in the dataframe we just created
caller_results,
# We place the points horizontally depending on the time the individual caller or patient
# arrived in the model
x="Call Start Time",
# We then use the run number on the y axis, which will give us a line of points per run
y="Run",
# We'll use the colour to distinguish between patients and callers
color="Call Answered",
# This time, instead of using our palette, let's explicitly map some colours to the possible
# values
# This allows us to ensure the 'not answered' gets associated with a typically 'bad' colour
color_discrete_map={"Call Answered": "#005EB8", # NHS blue
"Call Not Answered Before Closing Time": "#DA291C"}, # NHS Red
# Finally, let's add a title
title="Patient Calls - Successful Answering over Time",
# Make it clearer what the units of the x axis are
labels={"Call Start Time": "Call Start Time (Simulation Minute)"},
)
#call_answered_detailed_fig.show()
st.plotly_chart(call_answered_detailed_fig)
##############################################################
##############################################################
# Create some summaries and visualisations for patient stats #
##############################################################
##############################################################
# Not implemented - your code here!