diff --git a/.github/actions/latency_logging/action.yaml b/.github/actions/latency_logging/action.yaml new file mode 100644 index 0000000..f0a46e9 --- /dev/null +++ b/.github/actions/latency_logging/action.yaml @@ -0,0 +1,25 @@ +# .github/actions/latency_logging/action.yml +name: "Alert on Long Data Latency" +description: "Checks API for most current data and calculated latency. Sends an alert if data latency is long." +inputs: + email_pw_secret: + description: "email pw" + required: true +outputs: + latency_l: + description: 'Logging statement for a long latency. A string. ' + value: ${{ steps.latency_check.outputs.latency_l }} + alert_needed: + description: 'Whether an alert should be triggered' + value: ${{ steps.latency_check.outputs.alert_needed }} +runs: + using: "composite" + steps: + - id: latency_check + - shell: bash + run: | + docker run \ + --env EMAIL_PW=${{ inputs.email_pw_secret }} \ + --pull=always -u ${UID}:${DOCKER_GID} -v ${PWD}:${PWD} -w ${PWD} \ + --rm mas.maap-project.org/root/maap-workspaces/base_images/python:v4.3.0 \ + /opt/conda/envs/python/bin/python maap_runtime/latency_logging.py >> $GITHUB_OUTPUT diff --git a/.github/workflows/schedule-latency_logging.yaml b/.github/workflows/schedule-latency_logging.yaml new file mode 100644 index 0000000..90ad4f3 --- /dev/null +++ b/.github/workflows/schedule-latency_logging.yaml @@ -0,0 +1,50 @@ +name: alert-failed-dps-jobs + +on: + workflow_dispatch: + schedule: + # every hour + - cron: '0 * * * *' +jobs: + build: + runs-on: ubuntu-latest + environment: production + steps: + - name: checkout + uses: actions/checkout@v2 + + - name: python setup + uses: actions/setup-python@v2 + with: + python-version: '3.10' + + - name: get docker GID and set DOCKER_GID environment variable + run: | + echo "DOCKER_GID=$(getent group docker | cut -d: -f3)" >> $GITHUB_ENV + + - name: get current user UID and set UID environment variable + run: | + echo "UID=$(id -u)" >> $GITHUB_ENV + + - name: print DOCKER_GID and UID + run: | + echo "Docker GID: $DOCKER_GID" + echo "User UID: $UID" + + - name: check the time between cirrent time and most recent api data + id: latency_action + uses: Earth-Information-System/fireatlas/.github/actions/alert-on-failed-dps-jobs@conus-dps + with: + email_pw_secret: ${{ secrets.EMAIL_PW }} + + - name: send alert on Slack if failure + if: steps.latency_action.outputs.alert_needed == 'True' + uses: ravsamhq/notify-slack-action@2.5.0 + with: + status: 'failure' + notify_when: 'failure' + notification_title: "Data Latency Alert for CONUS - ${{ steps.latency_action.outputs.latency_l }}" + footer: "<${{github.server_url}}/${{github.repository}}/issues/|Open GitHub Issues>" + env: + SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK_WEBHOOK_URL }} + diff --git a/maap_runtime/latency_logging.py b/maap_runtime/latency_logging.py new file mode 100644 index 0000000..8f9dc0b --- /dev/null +++ b/maap_runtime/latency_logging.py @@ -0,0 +1,48 @@ +import sys +import requests +import pytz +from datetime import datetime, timezone, timedelta + + +def fire_api_query(base = "https://openveda.cloud/api/features/collections/", collection = "public.eis_fire_lf_fireline_nrt", region = "®ion=CONUS"): + foo = requests.get(f"{base}{collection}/items?f=geojson{region}&sortby=-t") + if(foo.status_code == 200): + return(foo.json()) + else: + print(f"ERROR API call status: {foo.status_code} and URL {foo.url}", file = sys.stderr) + + +def get_time_difference_in_data(api_data, overpass_cadence = 12, baseline_latency = 12, some_time_buffer = 0.20, eastern_timezone_region = "US/Eastern"): + ''' + api_data (dict): json output from an api call to the FEDS api. + overpass_cadence (float): expected number hours between data collections + baseline_latency (float): hours of expected latency from overpass to distribution of data through api. + some_time_buffer (float): hours added to overpass and latency as a buffer. + eastern_timezone_region (str): The eastern-most timezone of the region in question. This must be compatible with pytz.timezone(). + ''' + now_utc = datetime.now(timezone.utc) + api_t = api_data['features'][0]['properties']['t'] + api_raw = api_data['features'][0]['properties']['t'] + api_t = datetime.strptime(api_t, "%Y-%m-%dT%H:%M:%S") + tz = pytz.timezone(eastern_timezone_region) # Converting from local solar to tz at the eastern most timezone of region, which would have the longest latency. + api_t = tz.localize(api_t) + tz_utc = pytz.timezone('UTC') ## Now, we can convert it to UTC, because we want the UTC time of an Eastern overpass. + api_t = api_t.astimezone(tz_utc) + time_diff = now_utc - api_t + hour_diff = (time_diff.seconds/(60))/60 + if(hour_diff > (overpass_cadence + baseline_latency + some_time_buffer_thresh)): + # Alert + alert_needed = True + latency_l = f"At {now_utc.strftime("%Y-%m-%d %H:%M:%S")} UTC, the API displayed {api_raw}. There were {round(hour_diff, 2)} hours between check time in UTC and the last API data time, or {round(hour_diff - overpass_cadence, 2)} hours since last {eastern_timezone_region} satellite overpass. " ## What is retunred to workflow + else: + alert_needed = False + latency_l = f"At {now_utc.strftime("%Y-%m-%d %H:%M:%S")} UTC, the API displayed {api_raw}. There were {round(hour_diff, 2)} hours between check time in UTC and the last API data time, or {round(hour_diff - overpass_cadence, 2)} hours since last {eastern_timezone_region} satellite overpass. " + print(f"latency_l={latency_l}", file = sys.stdout) + print(f"alert_needed={alert_needed}", file = sys.stdout) + + + + +if __name__ == '__main__': + most_recent_data = fire_api_query() + get_time_difference_in_data(api_data = most_recent_data)