From bff76fdcaff3cf935cf9b768542c9a54772303c1 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 11 May 2021 15:59:17 -0700 Subject: [PATCH] Deleted obsolote importers code, added deleted.md --- .github/workflows/test.yml | 3 - deleted.md | 17 ++++ docs/importers.md | 71 ---------------- importers/__init__.py | 0 importers/airtable_locations.py | 107 ----------------------- importers/import_utils.py | 10 --- importers/oregon_tableau_annotated.py | 92 -------------------- importers/test_vaccinefinder.py | 87 ------------------- importers/vaccinefinder.py | 117 -------------------------- 9 files changed, 17 insertions(+), 487 deletions(-) create mode 100644 deleted.md delete mode 100644 docs/importers.md delete mode 100644 importers/__init__.py delete mode 100644 importers/airtable_locations.py delete mode 100644 importers/import_utils.py delete mode 100644 importers/oregon_tableau_annotated.py delete mode 100644 importers/test_vaccinefinder.py delete mode 100755 importers/vaccinefinder.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 837797b..cd20a60 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -56,9 +56,6 @@ jobs: run: | cd vaccinate pytest --cov-report=xml --cov=./ - - name: Run importer script tests - run: | - pytest importers - name: Check migrations are backwards-compatible env: DJANGO_SECRET_KEY: secret for running tests diff --git a/deleted.md b/deleted.md new file mode 100644 index 0000000..393aec6 --- /dev/null +++ b/deleted.md @@ -0,0 +1,17 @@ +# Deleted code + +An index of code that we have deleted from this project, to make it easy to refer back to should we ever need to. + +## importers - deleted May 11th 2011 + +Prior to the creation of the [vaccine-feed-ingest](https://github.com/CAVaccineInventory/vaccine-feed-ingest) repo the `importers/` folder in VIAL included standalone Python scripts for importing data into VIAL by calling the `/api/importLocations` endpoint. + +This included the code for copying across data from our Airtable instance. + +Importer scripts: https://github.com/CAVaccineInventory/vial/tree/80ceac90a72e54731a169330a71c1f1bfa43e35a/importers + +Importer documentation: https://github.com/CAVaccineInventory/vial/blob/80ceac90a72e54731a169330a71c1f1bfa43e35a/docs/importers.md + +The `import_utils.py` module included a handy function for deriving the county for a location based on its latitude and longitude, using the https://us-counties.datasette.io/ API: + +https://github.com/CAVaccineInventory/vial/blob/80ceac90a72e54731a169330a71c1f1bfa43e35a/importers/import_utils.py diff --git a/docs/importers.md b/docs/importers.md deleted file mode 100644 index d7a47f0..0000000 --- a/docs/importers.md +++ /dev/null @@ -1,71 +0,0 @@ -# Importer scripts - -Importers are scripts that live in the `importers/` directory in this repository. They consume locations from a variety of sources and send them to the `/api/importLocations` API to import them into the database. - -You can run the tests for the importers by changing into the top level directory (`vial`) and running `pytest importers` - -## importers.airtable_locations - -Import location data from our Airtable backups. This command accepts a path to a `Locations.json` file or a URL - for the URL, visit https://github.com/CAVaccineInventory/airtable-data-backup/blob/main/backups/Locations.json and click on the "Raw" link to get a URL with a token in it, which should look something like `https://raw.githubusercontent.com/CAVaccineInventory/airtable-data-backup/main/backups/Locations.json?token=AAACK7...` - -Run this commands in the top-level `vail` folder: - - python -m importers.airtable_locations Locations.json --dry-run - -This will execute a "dry run" and show you the transformed data. - -To run the import, use: - - python -m importers.airtable_locations Locations.json \ - --url 'https://vial-staging.calltheshots.us/api/importLocations' \ - --token 3:xxxxx - -Pass a valid API key (created at https://vial-staging.calltheshots.us/admin/api/apikey/). - -In local development use `--url 'http://localhost:8000/api/importLocations`. - -## importers.vaccinefinder - -Run this importer from the top-level `vial` folder like so: - - python -m importers.vaccinefinder --help - -The script takes as arguments a list of JSON files or of directories containing JSON files. - -Here's how to run it against all locations in Rhode Island. - -First, checkout the `vaccine-feeds/raw-feed-data` directory. Here I'm checking it out to my `/tmp` directory: - - git clone git@github.com:vaccine-feeds/raw-feed-data /tmp/raw-feed-data - -You can run a dry run to see what will happen like so: - - python -m importers.vaccinefinder \ - ../raw-feed-data/vaccine-finder/RI/locations \ - --dry-run - -This will output a preview of the data, transformed into our API format. - -You can add `--derive-counties` to make an API call for each location to derive the county for it based on its latitude and longitude. - -To run the actual import, add the `--token` argument with as API Key (created at https://vial.calltheshots.us/admin/api/apikey/) and use the `--url` argument to specify the API endpoint to send the data to (it defaults to `http://localhost:3000/api/importLocations` for testing). - -Here's the comand-line recipe to import every Rhode Island location to our staging server: - - python -m importers.vaccinefinder \ - ../raw-feed-data/vaccine-finder/RI/locations \ - --url 'https://vial-staging.calltheshots.us/api/importLocations' \ - --token '1:e6c5e05637fdb6718d0c40efb3dfc98f' \ - --derive-counties - -## importers.oregon_tableau_annotated - -For this file: https://github.com/vaccine-feeds/raw-feed-data/blob/main/tableau/oregon.health.authority.covid.19/admin_site_and_county_map_site_no_info_with_places_data.json - -Run like so: - - python -i -m importers.oregon_tableau_annotated \ - admin_site_and_county_map_site_no_info_with_places_data.json \ - --token 'xxx' --url 'http://0.0.0.0:3000/api/importLocations' - -Leave off the `--url` argument to defalt to sending production. diff --git a/importers/__init__.py b/importers/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/importers/airtable_locations.py b/importers/airtable_locations.py deleted file mode 100644 index 030a5d9..0000000 --- a/importers/airtable_locations.py +++ /dev/null @@ -1,107 +0,0 @@ -import json - -import click -import httpx -from click.exceptions import ClickException - - -@click.command() -@click.argument( - "filepath_or_url", - type=str, -) -@click.option( - "--url", - default="https://vial.calltheshots.us/api/importLocations", - help="API URL to send locations to", -) -@click.option("--token", help="API token to use", envvar="IMPORT_API_TOKEN") -@click.option( - "--dry-run", - is_flag=True, - help="Display locations without sending them to the API", -) -def cli(filepath_or_url, url, token, dry_run): - "Import VaccinateCA Airtable locations" - if not dry_run and not token: - raise ClickException("--token is required unless running a --dry-run") - # Run the import, twenty at a time - locations = yield_locations(filepath_or_url) - batch = [] - for location in locations: - batch.append(location) - if len(batch) == 20: - import_batch(batch, url, token, dry_run) - batch = [] - if batch: - import_batch(batch, url, token, dry_run) - - -def yield_locations(filepath_or_url): - if filepath_or_url.startswith("https://") or filepath_or_url.startswith("http://"): - yield from httpx.get(filepath_or_url).json() - else: - yield from json.load(open(filepath_or_url)) - - -def import_batch(batch, url, token, dry_run): - fixed = [] - for item in batch: - try: - fixed.append(convert_airtable(item)) - except Exception as e: - print(e) - continue - if dry_run: - click.echo(json.dumps(fixed, indent=2)) - else: - response = httpx.post( - url, - json=fixed, - headers={"Authorization": "Bearer {}".format(token)}, - timeout=20, - ) - try: - response.raise_for_status() - except Exception as e: - print(response.text) - raise ClickException(e) - click.echo(response.status_code) - click.echo(json.dumps(response.json(), indent=2)) - - -def convert_airtable(location): - import_ref = "vca-airtable:{}".format(location["airtable_id"]) - address = location["Address"] - address_bits = [s.strip() for s in address.split(",")] - info = { - "name": location["Name"], - "full_address": ", ".join(address_bits), - "street_address": address_bits[0], - "city": None, - "phone_number": location.get("Phone number"), - "zip_code": None, - "website": location.get("Website"), - "preferred_contact_method": location.get("preferred_contact_method"), - "hours": location.get("Hours"), - "county": location["County"].replace(" County", "").strip(), - "state": "CA", - "location_type": location.get("Location Type"), - "latitude": location["Latitude"], - "longitude": location["Longitude"], - "import_ref": import_ref, - "airtable_id": location["airtable_id"], - "google_places_id": location.get("google_places_id"), - "vaccinespotter_location_id": location.get("vaccinespotter_location_id"), - "vaccinefinder_location_id": location.get("vaccinefinder_location_id"), - "import_json": location, - "soft_deleted": bool(location.get("is_soft_deleted")), - } - if location.get("duplicate_of"): - info["duplicate_of"] = location["duplicate_of"][0] - - return info - - -if __name__ == "__main__": - cli() diff --git a/importers/import_utils.py b/importers/import_utils.py deleted file mode 100644 index 2b85ce4..0000000 --- a/importers/import_utils.py +++ /dev/null @@ -1,10 +0,0 @@ -import httpx - - -def derive_county(latitude, longitude): - url = "https://us-counties.datasette.io/counties/county_for_latitude_longitude.json" - params = {"longitude": longitude, "latitude": latitude, "_shape": "array"} - results = httpx.get(url, params=params).json() - if len(results) != 1: - return None - return results[0] diff --git a/importers/oregon_tableau_annotated.py b/importers/oregon_tableau_annotated.py deleted file mode 100644 index e3c3841..0000000 --- a/importers/oregon_tableau_annotated.py +++ /dev/null @@ -1,92 +0,0 @@ -import json - -import click -import httpx -from click.exceptions import ClickException - - -@click.command() -@click.argument( - "filepath", - type=click.Path(dir_okay=False, file_okay=True, allow_dash=True), -) -@click.option( - "--url", - default="https://vial.calltheshots.us/api/importLocations", - help="API URL to send locations to", -) -@click.option("--token", help="API token to use", envvar="IMPORT_API_TOKEN") -@click.option( - "--dry-run", - is_flag=True, - help="Display locations without sending them to the API", -) -def cli(filepath, url, token, dry_run): - "Import locations from admin_site_and_county_map_site_no_info_with_places_data" - if not dry_run and not token: - raise ClickException("--token is required unless running a --dry-run") - # Run the import, twenty at a time - locations = json.load(open(filepath)) - batch = [] - for location in locations: - batch.append(location) - if len(batch) == 20: - import_batch(batch, url, token, dry_run) - batch = [] - if batch: - import_batch(batch, url, token, dry_run) - - -def import_batch(batch, url, token, dry_run): - fixed = [convert_location(item) for item in batch] - fixed = [f for f in fixed if f and f.get("phone_number")] - if dry_run: - click.echo(json.dumps(fixed, indent=2)) - else: - response = httpx.post( - url, - json=fixed, - headers={"Authorization": "Bearer {}".format(token)}, - timeout=20, - ) - try: - response.raise_for_status() - except Exception as e: - print(response.text) - raise ClickException(e) - click.echo(response.status_code) - click.echo(json.dumps(response.json(), indent=2)) - - -def convert_location(location): - import_ref = "oregon-tableau:{}".format(location["Location ID-value"]) - place_data = location.get("google_places_data") - if not place_data: - print("Skipped because no Google places data:") - print(json.dumps(location)) - return None - hours = None - weekday_text = place_data.get("opening_hours", {}).get("weekday_text") - if weekday_text: - hours = "\n".join(weekday_text) - return { - "name": place_data["name"], - "full_address": place_data["formatted_address"], - "street_address": location["Administration Address-alias"], - "city": location["City-alias"], - "phone_number": place_data.get("formatted_phone_number"), - "zip_code": location["Zip Code-alias"], - "website": place_data.get("website"), - "google_places_id": location["google_places_id"], - "state": location["GEOAdmin State-alias"], - "location_type": "Unknown", - "latitude": place_data["geometry"]["location"]["lat"], - "longitude": place_data["geometry"]["location"]["lng"], - "hours": hours, - "import_ref": import_ref, - "import_json": location, - } - - -if __name__ == "__main__": - cli() diff --git a/importers/test_vaccinefinder.py b/importers/test_vaccinefinder.py deleted file mode 100644 index c900d67..0000000 --- a/importers/test_vaccinefinder.py +++ /dev/null @@ -1,87 +0,0 @@ -import json - -from click.testing import CliRunner - -from .vaccinefinder import cli - -VACCINEFINDER_JSON = { - "address1": "63 Newport Ave", - "address2": "", - "city": "Rumford", - "distance": 1.03, - "guid": "91c4731f-11e4-4b04-80a7-f0ae39cd859b", - "in_stock": True, - "lat": 41.857945, - "long": -71.35557, - "name": "CVS Pharmacy, Inc. #07387", - "phone": "(555) 555-0461", - "state": "RI", - "zip": "02916", -} -EXPECTED = { - "name": "CVS Pharmacy, Inc. #07387", - "full_address": "63 Newport Ave, Rumford, RI, 02916", - "street_address": "63 Newport Ave", - "city": "Rumford", - "phone_number": "(555) 555-0461", - "zip_code": "02916", - "website": None, - "state": "RI", - "location_type": "Unknown", - "latitude": 41.857945, - "longitude": -71.35557, - "import_ref": "vf:91c4731f-11e4-4b04-80a7-f0ae39cd859b", -} - - -def test_help(): - runner = CliRunner() - with runner.isolated_filesystem(): - result = runner.invoke(cli, ["--help"]) - assert result.exit_code == 0 - assert result.output.startswith("Usage: cli [OPTIONS] [FILEPATHS]...") - - -def test_import_location_dry_run(httpx_mock): - runner = CliRunner() - with runner.isolated_filesystem(): - open("example.json", "w").write(json.dumps(VACCINEFINDER_JSON)) - result = runner.invoke(cli, ["example.json", "--dry-run"]) - assert result.exit_code == 0 - assert json.loads(result.output) == [EXPECTED] - - -def test_import_location_with_counties(httpx_mock): - runner = CliRunner() - httpx_mock.add_response( - url="https://us-counties.datasette.io/counties/county_for_latitude_longitude.json?longitude=-71.35557&latitude=41.857945&_shape=array", - json=[ - { - "state_fips": "44", - "state": "RI", - "county_fips": "44007", - "county_name": "Providence", - "COUNTYNS": "01219781", - "AFFGEOID": "0500000US44007", - "GEOID": "44007", - "LSAD": "06", - "ALAND": 1060563722, - "AWATER": 67858981, - } - ], - ) - httpx_mock.add_response( - method="POST", - url="https://vial.calltheshots.us/api/importLocations", - json={"ok": True}, - ) - with runner.isolated_filesystem(): - open("example.json", "w").write(json.dumps(VACCINEFINDER_JSON)) - result = runner.invoke( - cli, ["example.json", "--token", "x", "--derive-counties"] - ) - assert result.exit_code == 0 - assert result.output == '200\n{\n "ok": true\n}\n' - import_locations_request = httpx_mock.get_requests()[1] - expected_with_county = {**EXPECTED, **{"county": "Providence"}} - assert json.loads(import_locations_request.read()) == [expected_with_county] diff --git a/importers/vaccinefinder.py b/importers/vaccinefinder.py deleted file mode 100755 index b42b91c..0000000 --- a/importers/vaccinefinder.py +++ /dev/null @@ -1,117 +0,0 @@ -import json -import pathlib - -import click -import httpx -from click.exceptions import ClickException - -from .import_utils import derive_county - - -@click.command() -@click.argument( - "filepaths", - type=click.Path(dir_okay=True, file_okay=True, allow_dash=True), - nargs=-1, -) -@click.option( - "--url", - default="https://vial.calltheshots.us/api/importLocations", - help="API URL to send locations to", -) -@click.option("--token", help="API token to use", envvar="IMPORT_API_TOKEN") -@click.option( - "--dry-run", - is_flag=True, - help="Display locations without sending them to the API", -) -@click.option( - "--derive-counties", - is_flag=True, - help="Derive counties from latitude/longitude", -) -def cli(filepaths, url, token, dry_run, derive_counties): - "Import Vaccine Spotter locations - accepts multiple JSON files or directories containing JSON files" - if not dry_run and not token: - raise ClickException("--token is required unless running a --dry-run") - # Run the import, twenty at a time - locations = yield_locations(filepaths) - batch = [] - for location in locations: - batch.append(location) - if len(batch) == 20: - import_batch(batch, url, token, dry_run, derive_counties) - batch = [] - if batch: - import_batch(batch, url, token, dry_run, derive_counties) - - -def yield_locations(filepaths): - for filepath in filepaths: - path = pathlib.Path(filepath) - if path.is_file() and path.suffix == ".json": - data = json.loads(path.read_bytes()) - if data: - if isinstance(data, list): - yield from data - else: - yield data - elif path.is_dir(): - for file in path.glob("**/*.json"): - data = json.loads(file.read_bytes()) - if isinstance(data, list): - yield from data - else: - yield data - - -def import_batch(batch, url, token, dry_run, derive_counties): - fixed = [convert_vaccinefinder(item, derive_counties) for item in batch] - if dry_run: - click.echo(json.dumps(fixed, indent=2)) - else: - response = httpx.post( - url, - json=fixed, - headers={"Authorization": "Bearer {}".format(token)}, - timeout=20, - ) - try: - response.raise_for_status() - except Exception as e: - print(response.text) - raise ClickException(e) - click.echo(response.status_code) - click.echo(json.dumps(response.json(), indent=2)) - - -def convert_vaccinefinder(location, derive_counties): - import_ref = "vf:{}".format(location["guid"]) - address_bits = [ - location[key] - for key in ("address1", "address2", "city", "state", "zip") - if location[key] - ] - info = { - "name": location["name"], - "full_address": ", ".join(address_bits), - "street_address": address_bits[0], - "city": location["city"], - "phone_number": location["phone"], - "zip_code": location["zip"], - "website": location.get("website"), - "state": location["state"], - "location_type": "Unknown", - "latitude": location["lat"], - "longitude": location["long"], - "import_ref": import_ref, - } - if derive_counties: - county = derive_county(info["latitude"], info["longitude"]) - if county: - info["county"] = county["county_name"] - return info - - -if __name__ == "__main__": - cli()