Pyrrha-Platform · JSegrave-IBM · Mar 23, 2021 · Mar 23, 2021 · Apr 1, 2021 · Apr 1, 2021
diff --git a/README.md b/README.md
@@ -47,11 +47,15 @@ You can run this solution locally in docker as follows
    ```
         source python3/bin/activate
    ```
-5. Run the application
+5. Test the application
+   ```
+        python manage.py test
+   ```
+6. Run the application
    ```
         python src/core_decision_flask_app.py 8080
    ```
-6. You should see the following output
+7. You should see the following output
    ```
         starting application
         * Serving Flask app "core_decision_flask_app" (lazy loading)

diff --git a/src/GasExposureAnalytics.py b/src/GasExposureAnalytics.py
@@ -507,7 +507,10 @@ def _calculate_TWA_and_gauge_for_all_firefighters(self, sensor_log_chunk_df, ff_
     # current_utc_timestamp : The UTC datetime for which to calculate sensor analytics. Defaults to 'now' (UTC).
     # commit : Utility flag for unit testing - defaults to committing analytic results to
     #          the database. Setting commit=False prevents unit tests from writing to the database.
-    def run_analytics (self, current_utc_timestamp=None, commit=True) :
+    # overwrite : Enables batch execution to overwrite any existing result in the time period being processed.
+    # log_mins : How often to log an informational message stating which minute is currently being processed. Defaults
+    #            to every '1' min. Can be set to (eg) every '15' mins when running batches, so the logs are readable.
+    def run_analytics (self, current_utc_timestamp=None, commit=True, overwrite=False, log_mins=1) :
 
         # Get the desired timeframe for the analytics run and standardise it to UTC.
         if current_utc_timestamp is None:
@@ -528,9 +531,11 @@ def run_analytics (self, current_utc_timestamp=None, commit=True) :
         # buffer for the data.
         timestamp_key = current_utc_timestamp.floor(freq='min') - pd.Timedelta(minutes = 1)
 
-        message = ("Running Prometeo Analytics for minute key '%s'" % (timestamp_key.isoformat()))
-        if not self._from_db : message += " (local CSV file mode)"
-        self.logger.info(message)
+        # Log progress regularly (e.g. by default, log_mins is 'every 1 min', but could be set to 'every 15 mins').
+        if (timestamp_key == timestamp_key.floor(str(log_mins) + 'T')) :
+            message = ("Running Prometeo Analytics for minute key '%s'" % (timestamp_key.floor(str(log_mins) + 'T')))
+            if not self._from_db : message += " (local CSV file mode)"
+            self.logger.info(message)
 
         # Read a block of sensor logs from the DB, covering the longest window we're calculating over (usually 8hrs).
         # Note: This has the advantage of always including all known sensor data, even when that data was delayed due
@@ -550,7 +555,77 @@ def run_analytics (self, current_utc_timestamp=None, commit=True) :
         # Work out all the time-weighted averages and corresponding limit gauges for all firefighters, all limits and all gases.
         analytics_df = self._calculate_TWA_and_gauge_for_all_firefighters(sensor_log_df, ff_time_spans_df, timestamp_key)
 
+        # Write the analytic results to the DB
         if commit :
+
+            # Remove any pre-existing analytic results before writing new ones.
+            if overwrite :
+                with self._db_engine.connect() as connection: # 'with' auto-closes the connection
+                    connection.execute("DELETE FROM " + ANALYTICS_TABLE + " where " + TIMESTAMP_COL + " = '" + timestamp_key.isoformat() + "'")
+
             analytics_df.to_sql(ANALYTICS_TABLE, self._db_engine, if_exists='append', dtype={FIREFIGHTER_ID_COL:FIREFIGHTER_ID_COL_TYPE})
 
         return analytics_df
+
+
+    # This is the batched version of 'main' - given a start time and and end time, it generates a minute-by-minute
+    # playback schedule and runs all of the core analytics for Prometeo for each of those minutes.
+    # start_time : The date & time at which to start calculating sensor analytics (UTC datetime).
+    # end_time  : The date & time at which to stop calculating sensor analytics (UTC datetime).
+    # commit : Utility flag for unit testing. Defaults to committing analytic results to the database for production.
+    #          Setting commit=False prevents unit tests from writing to the database.
+    def batch_run_analytics (self, start_time=None, end_time=None, commit=True) :
+
+        # Log that a batch procss is starting & what it will look at and what it will over-write.
-        # Log that a batch procss is starting & what it will look at and what it will over-write.
+        # Log that a batch process is starting & what it will look at and what it will over-write.
-        # Log that a batch procss is starting & what it will look at and what it will over-write.
+        # Log that a batch process is starting & what it will look at and what it will over-write.
+        message = (("Running Prometeo Analytics over batch period '%s' - '%s'.  "
+            + "*** Any existing analytic results in this period will be overwritten. ***")
+            % (start_time.isoformat(), end_time.isoformat()))
+        if not self._from_db : message += " (local CSV file mode)"
+        self.logger.info(message)
+
+        # Get a minute-by-minute playback schedule over which to run the analytics
+        playback_schedule = pd.date_range(start=start_time.floor('min'), end=end_time.floor('min'), freq='min').to_list()
+
+        # Calculate exposure for every minute in the playback schedule
+        all_results = []
+        for time in playback_schedule :
+            # Calculate exposure, overwriting any pre-existing analytic results before new ones are written.
+            result = self.run_analytics(time, commit, overwrite=True, log_mins=15) # only log every 15 mins, so logs remain readable
+            if result is not None :
+                all_results.append(result)
+
+        if all_results :
+            return pd.concat(all_results)
+        else :
+            return None
+
+
+    # This is a variant of the batched version - given a date, it runs the core analytics for Prometeo for all available
+    # sensor data on that date.
+    # date : A UTC date (e.g. '2021-03-01') over which to calculate sensor analytics. Must not include time.
+    # commit : Utility flag for unit testing - defaults to committing analytic results to
+    #          the database. Setting commit=False prevents unit tests from writing to the database.
+    def batch_run_analytics_by_date (self, date, commit=True) :
+
+        # Given a date, find the start and end times for the sensor records on that date
+        start_time, end_time = None, None
+        if self._from_db :
+            sql = ("SELECT MIN("+TIMESTAMP_COL+") AS start_time, MAX("+TIMESTAMP_COL+") AS end_time FROM "+SENSOR_LOG_TABLE+" WHERE DATE("+TIMESTAMP_COL+") = '"+date+"'")
+            start_time, end_time = pd.read_sql_query(sql, self._db_engine).iloc[0,:]
+        else :
+            index = self._sensor_log_from_csv_df.sort_index().loc[date:date].index
+            start_time = index.min()
+            end_time = index.max()
+
+        # If there's no data for the requested day, return None
+        if (start_time is None) or (end_time is None) :
+            return None
+
+        # Adjust the end - allow time for the longest time-weighted averaging window to be fully-reported.
+        longest_window_in_mins = max([window['mins'] for window in self.WINDOWS_AND_LIMITS])
+        end_time = end_time + pd.Timedelta(minutes=longest_window_in_mins)
+
+        # Calculate exposure for every minute in the playback schedule
+        all_results_df = self.batch_run_analytics(start_time, end_time, commit)
+
+        return all_results_df
diff --git a/src/core_decision_flask_app.py b/src/core_decision_flask_app.py
@@ -7,6 +7,7 @@
 from GasExposureAnalytics import GasExposureAnalytics
 from dotenv import load_dotenv
 import time
+from datetime import date
 import atexit
 from apscheduler.schedulers.background import BackgroundScheduler
 import logging
@@ -45,6 +46,7 @@
 FIREFIGHTER_ID_COL = 'firefighter_id'
 TIMESTAMP_COL = 'timestamp_mins'
 STATUS_LED_COL = 'analytics_status_LED'
+DATE_PARAMETER = 'date'
 
 # We initialize the prometeo Analytics engine.
 perMinuteAnalytics = GasExposureAnalytics()
@@ -167,6 +169,45 @@ def getStatusDetails():
         logger.error(f'Internal Server Error: {e}')
         abort(500)
 
+
+@app.route('/batch_run_analytics_by_date', methods=['GET'])
+def batch_run_analytics_by_date():
+
+    try:
+        date_str = request.args.get(DATE_PARAMETER)
+
+        # Return 400 (Bad Request) if the supplied date is invalid.
+        if (date_str is None) :
+            logger.error('Missing parameters : '+DATE_PARAMETER+' : '+date_str)
-            logger.error('Missing parameters : '+DATE_PARAMETER+' : '+date_str)
+            logger.error('Missing required parameter: ' + DATE_PARAMETER)
-            logger.error('Missing parameters : '+DATE_PARAMETER+' : '+date_str)
+            logger.error('Missing required parameter: ' + DATE_PARAMETER)
+            abort(400)
+        else :
+            try :
+                date.fromisoformat(date_str)
+            except ValueError as e:
+                logger.error("Invalid '"+DATE_PARAMETER+"' parameter. '"+date_str+"' should be an ISO-formatted YYYY-MM-DD date string (like 2000-12-31)")
+                abort(400)
+
+
+        # Calculate exposure for every minute in the selected day
+        batchAnalytics = GasExposureAnalytics()
+        batch_results_df = batchAnalytics.batch_run_analytics_by_date(date_str)
+
+        # Return 404 (Not Found) if no record is found
+        if (batch_results_df is None) or (batch_results_df.empty):
+            logger.error('No analytic results were produced for : ' + DATE_PARAMETER + ' : ' + date_str)
+            abort(404)
+        else:
+            return "batch run complete"
+
+    except HTTPException as e:
+        logger.error(f'{e}')
+        raise e
+    except Exception as e:
+        # Return 500 (Internal Server Error) if there's any unexpected errors.
+        logger.error(f'Internal Server Error: {e}')
+        abort(500)
+
+
 @app.route('/get_configuration', methods=['GET'])
 def getConfiguration():