diff --git a/.github/workflows/code-format-check.yml b/.github/workflows/code-format-check.yml index 55444e27..a28a2d60 100644 --- a/.github/workflows/code-format-check.yml +++ b/.github/workflows/code-format-check.yml @@ -2,30 +2,19 @@ name: Code format check on: push -jobs: - test: - - runs-on: ubuntu-22.04 +concurrency: + group: ${{ github.workflow }}-${{ github.event.number || github.ref }} + cancel-in-progress: true +jobs: + black: + runs-on: ubuntu-24.04 steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 # Python ≥3.11 required for use_pyproject with: - python-version: 3.11 - - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python3 - - - - name: Install project - run: | - # We only need Black, and we want to use the project-specific version. - # (Defaults in Black change from time to time and we want only to deal with - # the consequences, if any, on our own schedule.) - poetry install --only dev - - - name: Run check - run: poetry run black . --check + python-version: "3.11" + - uses: psf/black@stable + with: + use_pyproject: true + options: "--check" \ No newline at end of file diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index ada82f67..a5ff75a1 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -17,7 +17,7 @@ jobs: - uses: actions/checkout@v2 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: '3.12' diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 2c33ff4f..8aba7004 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -1,6 +1,12 @@ name: Python CI -on: push +on: + pull_request: + branches: ["master"] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.number || github.ref }} + cancel-in-progress: true jobs: test: @@ -13,13 +19,15 @@ jobs: - "3.10" - "3.11" - "3.12" + - "3.13" + - "3.14" steps: - name: Checkout uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -31,7 +39,7 @@ jobs: - name: Install Poetry run: | - curl -sSL https://install.python-poetry.org | python3 - + pipx install poetry==2.1.3 - name: Install project run: | diff --git a/crmprtd/__init__.py b/crmprtd/__init__.py index a3b7122b..1bc0722d 100644 --- a/crmprtd/__init__.py +++ b/crmprtd/__init__.py @@ -104,7 +104,7 @@ Row = namedtuple( "Row", - "time val variable_name unit network_name station_id lat lon", + "time val variable_name unit network_key station_id lat lon", ) diff --git a/crmprtd/align.py b/crmprtd/align.py index c5747aaa..7f7366dd 100644 --- a/crmprtd/align.py +++ b/crmprtd/align.py @@ -80,14 +80,14 @@ def memoize(sesh, *args, **kwargs): return wrapper -def histories_within_threshold(sesh, network_name, lon, lat, threshold): +def histories_within_threshold(sesh, network_key, lon, lat, threshold): """ Find existing histories associated with the given network and within a threshold distance of the point specified by (lon, lat). Return the history id and distance for each such history, as a list in ascending order of distance. :param sesh: SQLAlchemy db session - :param network_name: Name of network associated to history. + :param network_key: Key of network associated to history. :param lat: Lat for History :param lon: Lon for History :param threshold: Include only histories within this distance (m) from (lat, lon) @@ -109,7 +109,7 @@ def histories_within_threshold(sesh, network_name, lon, lat, threshold): .select_from(History) .join(Station, History.station_id == Station.id) .join(Network, Station.network_id == Network.id) - .filter(Network.name == network_name) + .filter(Network.key == network_key) .filter( History.the_geom.intersects(ST_Buffer(p_ref, threshold)), ) @@ -178,20 +178,20 @@ def find_active_history(histories): "Multiple active stations in db", extra={ "num_active_stns": len(matching_histories), - "network_name": matching_histories[0].network_name, + "network_key": matching_histories[0].network_key, }, ) return None def find_nearest_history( - sesh, network_name, native_id, lat, lon, histories, diagnostic=False + sesh, network_key, native_id, lat, lon, histories, diagnostic=False ): - close_histories = histories_within_threshold(sesh, network_name, lon, lat, 800) + close_histories = histories_within_threshold(sesh, network_key, lon, lat, 800) if len(close_histories) == 0: return create_station_and_history_entry( - sesh, network_name, native_id, lat, lon, diagnostic=diagnostic + sesh, network_key, native_id, lat, lon, diagnostic=diagnostic ) for close_history in close_histories: @@ -203,37 +203,37 @@ def find_nearest_history( return history -def match_history(sesh, network_name, native_id, lat, lon, histories, diagnostic=False): +def match_history(sesh, network_key, native_id, lat, lon, histories, diagnostic=False): if lat and lon: return find_nearest_history( - sesh, network_name, native_id, lat, lon, histories, diagnostic=diagnostic + sesh, network_key, native_id, lat, lon, histories, diagnostic=diagnostic ) else: return find_active_history(histories) def create_station_and_history_entry( - sesh, network_name, native_id, lat, lon, diagnostic=False + sesh, network_key, native_id, lat, lon, diagnostic=False ): """ Create a Station and an associated History object according to the arguments. :param sesh: SQLAlchemy db session - :param network_name: Name of network associated to Station. + :param network_key: Key of network associated to Station. :param native_id: Native id of Station. :param lat: Lat for History :param lon: Lon for History :param diagnostic: Boolean. In diagnostic mode? Not used! :return: None """ - network = sesh.query(Network).filter(Network.name == network_name).first() + network = sesh.query(Network).filter(Network.key == network_key).first() action = "Requires" if diagnostic else "Created" station = Station(native_id=native_id, network_id=network.id) log.info( f"{action} new station entry", - extra={"native_id": station.native_id, "network_name": network.name}, + extra={"native_id": station.native_id, "network_key": network.key}, ) history = History(station=station, lat=lat, lon=lon) @@ -241,7 +241,7 @@ def create_station_and_history_entry( f"{action} new history entry", extra={ "history": history.id, - "network_name": network_name, + "network_key": network.key, "native_id": station.native_id, "lat": lat, "lon": lon, @@ -251,7 +251,7 @@ def create_station_and_history_entry( if diagnostic: log.info( f"In diagnostic mode. Skipping insertion of new history entry for: " - f"network_name={network_name}, native_id={native_id}, lat={lat}, lon={lon}" + f"network_key={network.key}, native_id={native_id}, lat={lat}, lon={lon}" ) return None @@ -271,12 +271,12 @@ def create_station_and_history_entry( @cached_function(["unit", "id"]) -def get_variable(sesh, network_name, variable_name): +def get_variable(sesh, network_key, variable_name): """ Find (but not create) a Variable matching the arguments, if possible. :param sesh: SQLAlchemy db session - :param network_name: Name of network that Variable must be in. + :param network_key: Key of network that Variable must be in. :param variable_name: Name of Variable :return: Variable or None """ @@ -284,7 +284,7 @@ def get_variable(sesh, network_name, variable_name): variable = ( sesh.query(Variable) .join(Network) - .filter(and_(Network.name == network_name, Variable.name == variable_name)) + .filter(and_(Network.key == network_key, Variable.name == variable_name)) .first() ) return variable @@ -292,7 +292,7 @@ def get_variable(sesh, network_name, variable_name): @cached_function(["id"]) def find_or_create_matching_history_and_station( - sesh, network_name, native_id, lat, lon, diagnostic=False + sesh, network_key, native_id, lat, lon, diagnostic=False ): """ Find or create a History and associated Station record matching the arguments, @@ -303,7 +303,7 @@ def find_or_create_matching_history_and_station( In diagnostic mode, do not create any new records (History or Station). :param sesh: SQLAlchemy db session - :param network_name: Name of network that History must be in. + :param network_key: Key of network that History must be in. :param native_id: Native id of station that history must be associated with. :param lat: Lat for history record; either for spatial matching or creation - see below @@ -312,8 +312,7 @@ def find_or_create_matching_history_and_station( :param diagnostic: Boolean. In diagnostic mode? :return: History object or None - Search db for existing history records exactly matching network_name and station_id. - + Search db for existing history records exactly matching network_key and station_id. If no such history is found, create one (along with the necessary station record) and return it. In diagnostic mode, do not create new records. @@ -328,13 +327,13 @@ def find_or_create_matching_history_and_station( sesh.query(History) .join(Station) .join(Network) - .filter(and_(Network.name == network_name, Station.native_id == native_id)) + .filter(and_(Network.key == network_key, Station.native_id == native_id)) ) if histories.count() == 0: log.debug("Cound not find native_id %s", native_id) return create_station_and_history_entry( - sesh, network_name, native_id, lat, lon, diagnostic=diagnostic + sesh, network_key, native_id, lat, lon, diagnostic=diagnostic ) elif histories.count() == 1: log.debug("Found exactly one matching history_id") @@ -342,18 +341,18 @@ def find_or_create_matching_history_and_station( elif histories.count() >= 2: log.debug("Found multiple history entries. Searching for match.") return match_history( - sesh, network_name, native_id, lat, lon, histories, diagnostic=diagnostic + sesh, network_key, native_id, lat, lon, histories, diagnostic=diagnostic ) -@cached_function(["name"]) -def get_network(sesh, network_name): - return sesh.query(Network).filter(Network.name == network_name).first() +@cached_function(["key"]) +def get_network(sesh, network_key): + return sesh.query(Network).filter(Network.key == network_key).first() def has_required_information(row): return ( - row.network_name is not None + row.network_key is not None and row.time is not None and row.val is not None and row.variable_name is not None @@ -382,7 +381,7 @@ def align(sesh, row, diagnostic=False): log.debug( "Observation missing critical information", extra={ - "network_name": row.network_name, + "network_key": row.network_key, "time": row.time, "val": row.val, "variable_name": row.variable_name, @@ -391,17 +390,17 @@ def align(sesh, row, diagnostic=False): return None # Sanity check: specified network exists - if not get_network(sesh, row.network_name): + if not get_network(sesh, row.network_key): log.error( "Network does not exist in db", - extra={"network_name": row.network_name}, + extra={"network_key": row.network_key}, ) return None # Find or create a matching History record, if possible. history = find_or_create_matching_history_and_station( sesh, - row.network_name, + row.network_key, row.station_id, row.lat, row.lon, @@ -411,19 +410,19 @@ def align(sesh, row, diagnostic=False): log.warning( "Could not find history match", extra={ - "network_name": row.network_name, + "network_key": row.network_key, "native_id": row.station_id, }, ) return None # Find a matching Variable object, if possible. - variable = get_variable(sesh, row.network_name, row.variable_name) + variable = get_variable(sesh, row.network_key, row.variable_name) if not variable: log.debug( 'Variable "%s" from network "%s" is not tracked by crmp', row.variable_name, - row.network_name, + row.network_key, ) return None else: @@ -439,7 +438,7 @@ def align(sesh, row, diagnostic=False): "unit_obs": row.unit, "unit_db": var_unit, "data": row.val, - "network_name": row.network_name, + "network_key": row.network_key, }, ) return None diff --git a/crmprtd/bulk_pipeline.py b/crmprtd/bulk_pipeline.py index 7d920fc9..88a78acc 100755 --- a/crmprtd/bulk_pipeline.py +++ b/crmprtd/bulk_pipeline.py @@ -36,7 +36,7 @@ def process(current_time, opts, args): cache_filename = default_cache_filename( timestamp=current_time, - network_name=opts.network, + network_key=opts.network, tag=opts.tag, frequency=opts.frequency if opts.network == "ec" else None, province=opts.province if opts.network == "ec" else None, diff --git a/crmprtd/download_cache_process.py b/crmprtd/download_cache_process.py index 8fada45e..3e80bb95 100644 --- a/crmprtd/download_cache_process.py +++ b/crmprtd/download_cache_process.py @@ -27,13 +27,13 @@ log = logging.getLogger(__name__) -def check_network_name(network_name): - if network_name not in NETWORKS: - raise ValueError(f"Network name '{network_name}' is not recognized.") +def check_network_key(network_key): + if network_key not in NETWORKS: + raise ValueError(f"Network key '{network_key}' is not recognized.") def default_log_filename( - network_name: Optional[str] = None, + network_key: Optional[str] = None, tag: Optional[str] = None, frequency: Optional[str] = None, province: Optional[str] = None, @@ -43,21 +43,21 @@ def default_log_filename( with the network. Its pattern is similar, but not identical, to that of the cache filename. - :param network_name: Network name to include in filename. + :param network_key: Network key to include in filename. :param tag: Tag to include in filename. :param frequency: Download frequency to include in filename (network ec only). :param province: Province code (2 letters) to include in filename (network ec only). :param _: Ignored additional kw args. :return: Filename. """ - check_network_name(network_name) - filepath = f"~/{network_name}/logs" + check_network_key(network_key) + filepath = f"~/{network_key}/logs" tag_prefix = f"{tag}_" if tag is not None else "" - if network_name in ("ec",): + if network_key in ("ec",): assert frequency is not None assert province is not None return f"{filepath}/{tag_prefix}{province.lower()}_{frequency}_json.log" - return f"{filepath}/{tag_prefix}{network_name}_json.log" + return f"{filepath}/{tag_prefix}{network_key}_json.log" def the_log_filename(log_filename: Optional[str] = None, **kwargs): @@ -67,7 +67,7 @@ def the_log_filename(log_filename: Optional[str] = None, **kwargs): def default_cache_filename( timestamp: datetime.datetime = datetime.datetime.now(), timestamp_format: str = "%Y-%m-%dT%H:%M:%S", - network_name: Optional[str] = None, + network_key: Optional[str] = None, tag: Optional[str] = None, frequency: Optional[str] = None, province: Optional[str] = None, @@ -79,22 +79,22 @@ def default_cache_filename( :param timestamp: Timestamp to include in filename. :param timestamp_format: Format for converting timestamp to string. - :param network_name: Network name to include in filename. + :param network_key: Network key to include in filename. :param tag: Tag to include in filename. :param frequency: Download frequency to include in filename (network ec only). :param province: Province code (2 letters) to include in filename (network ec only). :param _: Ignored additional kw args. :return: Filename. """ - check_network_name(network_name) + check_network_key(network_key) ts = timestamp.strftime(timestamp_format) - filepath = f"~/{network_name}/cache" + filepath = f"~/{network_key}/cache" tag_prefix = f"{tag}_" if tag is not None else "" - if network_name in ("ec",): + if network_key in ("ec",): assert frequency is not None assert province is not None return f"{filepath}/{tag_prefix}{frequency}_{province.lower()}_{ts}.xml" - if network_name in ( + if network_key in ( "bc_env_snow", "bc_forestry", "bc_riotinto", @@ -107,8 +107,8 @@ def default_cache_filename( "yt_avalanche", "yt_firewx", ): - return f"{filepath}/{tag_prefix}{network_name}_{ts}.xml" - return f"{filepath}/{tag_prefix}{network_name}_{ts}.txt" + return f"{filepath}/{tag_prefix}{network_key}_{ts}.xml" + return f"{filepath}/{tag_prefix}{network_key}_{ts}.txt" def the_cache_filename(cache_filename: Optional[str] = None, **kwargs) -> str: @@ -136,7 +136,7 @@ def to_utc(d: datetime.datetime, tz_string: str = "Canada/Pacific"): def download_args( - network_name: Optional[str] = None, + network_key: Optional[str] = None, frequency: Optional[str] = None, province: Optional[str] = None, time: Optional[datetime.datetime] = None, # TODO: use now()? @@ -148,27 +148,26 @@ def download_args( :param time: :param start_time: - :param network_name: Network name. + :param network_key: Network key. :param _: Remainder args. Passed through. :return: List of download args. """ - check_network_name(network_name) - - common_args = f"-N {network_name} ".split() + check_network_key(network_key) + common_args = f"-N {network_key} ".split() net_args = None # Set net_args per network. - if network_name == "_test": + if network_key == "_test": net_args = [] - if network_name == "bc_hydro": + if network_key == "bc_hydro": net_args = "-f sftp2.bchydro.com -F pcic -S ~/.ssh/id_rsa".split() - if network_name == "crd": - net_args = f"--auth_fname ~/.rtd_auth.yaml --auth_key={network_name}".split() - if network_name == "ec": + if network_key == "crd": + net_args = f"--auth_fname ~/.rtd_auth.yaml --auth_key={network_key}".split() + if network_key == "ec": if province is None or frequency is None: raise ValueError("EC network requires both province and frequency") net_args = f"-p {province.lower()} -F {frequency}".split() - if network_name in ( + if network_key in ( "bc_env_snow", "bc_forestry", "bc_riotinto", @@ -182,15 +181,15 @@ def download_args( "yt_firewx", ): if time is None: - raise ValueError(f"Network {network_name} requires a time parameter") + raise ValueError(f"Network {network_key} requires a time parameter") ts = to_utc(time).strftime("%Y/%m/%d %H:00:00") net_args = ["-d", f'"{ts}"'] - if network_name == "moti": - net_args = f"-u https://prdoas5.apps.th.gov.bc.ca/saw-data/sawr7110 --auth_fname ~/.rtd_auth.yaml --auth_key={network_name}".split() - if network_name == "wamr": + if network_key == "moti": + net_args = f"-u https://prdoas5.apps.th.gov.bc.ca/saw-data/sawr7110 --auth_fname ~/.rtd_auth.yaml --auth_key={network_key}".split() + if network_key == "wamr": net_args = [] - if network_name == "wmb": - net_args = f"--auth_fname ~/.rtd_auth.yaml --auth_key={network_name}".split() + if network_key == "wmb": + net_args = f"--auth_fname ~/.rtd_auth.yaml --auth_key={network_key}".split() assert net_args is not None return common_args + net_args @@ -218,13 +217,13 @@ def dispatch_network( :param dry_run: If true, print commands but don't run them. :return: None. Side effect: Download and process network specified in args. """ - network_name = kwargs["network_name"] - check_network_name(network_name) + network_key = kwargs["network_key"] + check_network_key(network_key) - if network_name == "_test": + if network_key == "_test": # Use custom time if provided, otherwise no time parameter download_and_process( - network_name=network_name, + network_key=network_key, log_args=log_args(**kwargs), download_args=download_args(time=time, **kwargs), cache_filename=the_cache_filename( @@ -235,7 +234,7 @@ def dispatch_network( connection_string=connection_string, dry_run=dry_run, ) - elif network_name == "ec": + elif network_key == "ec": provinces = kwargs.pop("province") if not provinces or len(provinces) == 0: @@ -244,7 +243,7 @@ def dispatch_network( # Use custom time if provided, otherwise let EC network use its defaults for province in provinces: download_and_process( - network_name=network_name, + network_key=network_key, log_args=log_args(**kwargs, province=province), download_args=download_args(time=time, province=province, **kwargs), cache_filename=the_cache_filename( @@ -256,7 +255,7 @@ def dispatch_network( connection_string=connection_string, dry_run=dry_run, ) - elif network_name in ( + elif network_key in ( "bc_env_snow", "bc_forestry", "bc_riotinto", @@ -274,7 +273,7 @@ def dispatch_network( time if time else (datetime.datetime.now() - datetime.timedelta(hours=1)) ) download_and_process( - network_name=network_name, + network_key=network_key, log_args=log_args(**kwargs), download_args=download_args(**kwargs, time=use_time), cache_filename=the_cache_filename( @@ -287,7 +286,7 @@ def dispatch_network( # Use custom time if provided, otherwise default to "now" use_time = time if time else datetime.datetime.now() download_and_process( - network_name=network_name, + network_key=network_key, log_args=log_args(**kwargs), download_args=download_args(**kwargs, time=use_time), cache_filename=the_cache_filename( @@ -307,8 +306,8 @@ def dispatch_network_alias(network_alias: Optional[str] = None, **kwargs) -> Non :return: None. """ assert network_alias in network_alias_names - for network_name in alias_to_networks(network_alias): - dispatch_network(network_name=network_name, **kwargs) + for network_key in alias_to_networks(network_alias): + dispatch_network(network_key=network_key, **kwargs) def dispatch(network: Optional[str] = None, **kwargs) -> None: @@ -323,7 +322,7 @@ def dispatch(network: Optional[str] = None, **kwargs) -> None: if network in network_alias_names: dispatch_network_alias(network_alias=network, **kwargs) elif network in NETWORKS: - dispatch_network(network_name=network, **kwargs) + dispatch_network(network_key=network, **kwargs) else: raise ValueError( f"Network argument '{network}' is not a valid network name or alias" diff --git a/crmprtd/execution/crmprtd_inserts.sh b/crmprtd/execution/crmprtd_inserts.sh index 54eb9e62..807823b2 100755 --- a/crmprtd/execution/crmprtd_inserts.sh +++ b/crmprtd/execution/crmprtd_inserts.sh @@ -2,5 +2,5 @@ for network in 'EC_raw' 'FLNRO-WMB' 'MoTIe' 'ENV-AQN' 'FLNRO-FERN' 'BCH'; do echo "Insertions into ${network} yesterday:"; - psql -h monsoon -U crmprtd -d crmp -c "select count(*) from obs_raw natural join meta_history natural join meta_station natural join meta_network where network_name = '${network}' and obs_time between DATE 'yesterday' and DATE 'today';"; + psql -h monsoon -U crmprtd -d crmp -c "select count(*) from obs_raw natural join meta_history natural join meta_station natural join meta_network where network_key = '${network}' and obs_time between DATE 'yesterday' and DATE 'today';"; done; diff --git a/crmprtd/infer.py b/crmprtd/infer.py index e17ea861..d9bf36d8 100644 --- a/crmprtd/infer.py +++ b/crmprtd/infer.py @@ -41,7 +41,7 @@ def create_variable( sesh, - network_name, + network_key, variable_name, unit, standard_name=None, @@ -49,7 +49,7 @@ def create_variable( cell_method=None, ): with sesh.no_autoflush: - network = sesh.query(Network).filter(Network.name == network_name).one() + network = sesh.query(Network).filter(Network.key == network_key).one() return Variable( network=network, @@ -80,7 +80,7 @@ def infer(sesh, rows, diagnostic=False): # Reduce observations to unique set of tuples describing required histories # and stations. hists_to_create = { - (row.network_name, row.station_id, row.lat, row.lon) for row in rows + (row.network_key, row.station_id, row.lat, row.lon) for row in rows } # Find or create matching histories and stations. @@ -94,22 +94,22 @@ def infer(sesh, rows, diagnostic=False): ] # Reduce observations to unique set of tuples describing required variables - vars_to_create = {(row.network_name, row.variable_name, row.unit) for row in rows} + vars_to_create = {(row.network_key, row.variable_name, row.unit) for row in rows} # Construct required variables. They are never committed to the database. with sesh.begin_nested() as nested: variables = [ create_variable( sesh, - network_name, + network_key, var_name, unit, standard_name="requires_human_intervention", display_name="requires_human_intervention", cell_method="requires_human_intervention", ) - for network_name, var_name, unit in vars_to_create - if not get_variable(sesh, network_name, var_name) + for network_key, var_name, unit in vars_to_create + if not get_variable(sesh, network_key, var_name) ] for var in variables: diff --git a/crmprtd/infill.py b/crmprtd/infill.py index a54d94db..f815c4bd 100644 --- a/crmprtd/infill.py +++ b/crmprtd/infill.py @@ -51,7 +51,7 @@ def chain_subprocesses( def download_and_process( - network_name: str, + network_key: str, log_args: List[str], download_args: List[str], cache_filename: Optional[str] = None, @@ -65,7 +65,7 @@ def download_and_process( processed if the process step is requested). The process step is optionally performed (using script `crmprtd_process`). - :param network_name: Name of network to download. + :param network_key: Key of network to download. :param log_args: Configuration for application logging. :param download_args: Args to be passed to crmprtd_download. :param cache_filename: Name of file in which to cache downloaded data. If this @@ -84,13 +84,13 @@ def download_and_process( if not do_download: logger.warning( - f"Network {network_name}: Data is to be neither cached nor processed. " + f"Network {network_key}: Data is to be neither cached nor processed. " f"Nothing to do." ) if not do_process: logger.info( - f"Network {network_name}: Data is to be downloaded but not processed." + f"Network {network_key}: Data is to be downloaded but not processed." ) # Build up commands to be chained in this list @@ -114,7 +114,7 @@ def add_command(command): [ "crmprtd_process", "-N", - network_name, + network_key, "-c", connection_string, ] @@ -183,7 +183,7 @@ def infill( "crd", ] download_and_process( - network_name="crd", + network_key="crd", log_args=log_args, download_args=dl_args, connection_string=connection_string, @@ -207,7 +207,7 @@ def infill( time.strftime(time_fmt), ] download_and_process( - network_name="ec", + network_key="ec", log_args=log_args, download_args=dl_args, connection_string=connection_string, @@ -245,7 +245,7 @@ def infill( station, ] download_and_process( - network_name="moti", + network_key="moti", log_args=log_args, download_args=dl_args, connection_string=connection_string, @@ -273,7 +273,7 @@ def infill( logger.warning(warning_msg["not_contained"].format("WAMR", "one_month")) dl_args = [] download_and_process( - network_name="wamr", + network_key="wamr", log_args=log_args, download_args=dl_args, connection_string=connection_string, @@ -293,7 +293,7 @@ def infill( # Run it dl_args = ["--auth_fname", auth_fname, "--auth_key", "wmb"] download_and_process( - network_name="wmb", + network_key="wmb", log_args=log_args, download_args=dl_args, connection_string=connection_string, @@ -307,7 +307,7 @@ def infill( hour = hour.astimezone(pytz.utc) # EC files are named in UTC dl_args = ["-d", hour.strftime(time_fmt)] download_and_process( - network_name=partner, + network_key=partner, log_args=log_args, download_args=dl_args, connection_string=connection_string, diff --git a/crmprtd/networks/bc_forestry/normalize.py b/crmprtd/networks/bc_forestry/normalize.py index 4a435be8..ffa67975 100644 --- a/crmprtd/networks/bc_forestry/normalize.py +++ b/crmprtd/networks/bc_forestry/normalize.py @@ -2,4 +2,4 @@ def normalize(file_stream): - yield from normalize_swob(file_stream, "FLNRO-WMB", station_id_attr="stn_id") + yield from normalize_swob(file_stream, "flnro_wmb", station_id_attr="stn_id") diff --git a/crmprtd/networks/bc_hydro/normalize.py b/crmprtd/networks/bc_hydro/normalize.py index f305c4fd..c8be6ff9 100644 --- a/crmprtd/networks/bc_hydro/normalize.py +++ b/crmprtd/networks/bc_hydro/normalize.py @@ -86,7 +86,7 @@ def normalize(file_stream): time=obs_time, val=value, variable_name=varname, - network_name="BCH", + network_key="bch", station_id=stn_id, unit=None, lat=None, @@ -105,7 +105,7 @@ def normalize(file_stream): stream = open(sys.argv[1], "rb") setup_logging( - "/home/james/code/git/crmprtd/logging.yaml", + "/home/james/code/git/crmprtd/logging.yaml", # TODO: remove references to specific installs "./bch_normalize.log", "nobody@example.com", logging.INFO, diff --git a/crmprtd/networks/crd/normalize.py b/crmprtd/networks/crd/normalize.py index 51cc09de..d17679e5 100644 --- a/crmprtd/networks/crd/normalize.py +++ b/crmprtd/networks/crd/normalize.py @@ -44,7 +44,7 @@ def normalize(stream): val=val, variable_name=var_name, unit=units[f"{var_name}Unit"], - network_name="CRD", + network_key="crd", station_id=record["StationName"], lat=None, lon=None, diff --git a/crmprtd/networks/ec/normalize.py b/crmprtd/networks/ec/normalize.py index d7a0e228..e46cece1 100644 --- a/crmprtd/networks/ec/normalize.py +++ b/crmprtd/networks/ec/normalize.py @@ -1,7 +1,10 @@ from crmprtd.swob_ml import normalize as swob_ml_normalize + +# TODO: Potentially check and store ECCC data flags for quality control of data + def normalize(file_stream): return swob_ml_normalize( - file_stream, "EC_raw", station_id_attr="climate_station_number" + file_stream, "ec_raw", station_id_attr="climate_station_number" ) diff --git a/crmprtd/networks/moti/normalize.py b/crmprtd/networks/moti/normalize.py index dd23a764..31d9a6ca 100644 --- a/crmprtd/networks/moti/normalize.py +++ b/crmprtd/networks/moti/normalize.py @@ -93,7 +93,7 @@ def normalize(file_stream): val=value, variable_name=variable_name, unit=value_element.get("units"), - network_name="MoTIe", + network_key="motie", station_id=stn_id, lat=None, lon=None, diff --git a/crmprtd/networks/wamr/normalize.py b/crmprtd/networks/wamr/normalize.py index 7d74be94..b89f2cbe 100644 --- a/crmprtd/networks/wamr/normalize.py +++ b/crmprtd/networks/wamr/normalize.py @@ -118,17 +118,17 @@ def normalize(file_stream): if reported_station_id in station_substitutions: station_id = station_substitutions[reported_station_id] - network_name = "MVan" + network_key = "MVan" else: station_id = reported_station_id - network_name = "ENV-AQN" + network_key = "ENV-AQN" yield Row( time=dt, val=value, variable_name=variable_name, unit=unit, - network_name=network_name, + network_key=network_key, station_id=station_id, lat=lat, lon=lon, diff --git a/crmprtd/networks/wmb/normalize.py b/crmprtd/networks/wmb/normalize.py index 6f15eb9a..a29eeed7 100644 --- a/crmprtd/networks/wmb/normalize.py +++ b/crmprtd/networks/wmb/normalize.py @@ -66,7 +66,7 @@ def clean_row(row): val=value, variable_name=var_name, unit=None, - network_name="FLNRO-WMB", + network_key="flnro_wmb", station_id=station_id, lat=None, lon=None, diff --git a/crmprtd/swob_ml.py b/crmprtd/swob_ml.py index c4551a0a..e5c829eb 100644 --- a/crmprtd/swob_ml.py +++ b/crmprtd/swob_ml.py @@ -32,26 +32,26 @@ def identity(x): def normalize( file_stream, - network_name, + network_key, station_id_attr="climate_station_number", station_id_xform=identity, ): for xml_file in split_multi_xml_stream(file_stream): yield from normalize_xml( - xml_file, network_name, station_id_attr, station_id_xform + xml_file, network_key, station_id_attr, station_id_xform ) def normalize_xml( file_stream, - network_name, + network_key, station_id_attr="climate_station_number", station_id_xform=identity, ): et = parse_xml(file_stream) members = et.xpath("//om:member", namespaces=ns) - log.info("Starting %s data normalization", network_name) + log.info("Starting %s data normalization", network_key) for member in members: om = OmMember(member) @@ -122,7 +122,7 @@ def normalize_xml( val=val, variable_name=var, unit=om.member_unit(var), - network_name=network_name, + network_key=network_key, station_id=station_id, lat=lat, lon=lon, diff --git a/crmprtd/tests/conftest.py b/crmprtd/tests/conftest.py index 061b565a..1e2dd18e 100644 --- a/crmprtd/tests/conftest.py +++ b/crmprtd/tests/conftest.py @@ -80,10 +80,10 @@ def test_session(crmp_session, caplog): """ caplog.set_level(logging.ERROR, logger="sqlalchemy.engine") - moti = Network(name="MoTIe") - ec = Network(name="EC_raw") - wmb = Network(name="FLNRO-WMB") - wamr = Network(name="ENV-AQN") + moti = Network(name="MoTIe", key=Network.gen_key_from_name("MoTIe")) + ec = Network(name="EC_raw", key=Network.gen_key_from_name("EC_raw")) + wmb = Network(name="FLNRO-WMB", key=Network.gen_key_from_name("FLNRO-WMB")) + wamr = Network(name="ENV-AQN", key=Network.gen_key_from_name("ENV-AQN")) crmp_session.add_all([moti, ec, wmb, wamr]) simon = Contact(name="Simon", networks=[moti]) @@ -222,7 +222,7 @@ def ec_session(crmp_session, caplog): """ caplog.set_level(logging.ERROR, logger="sqlalchemy.engine") - ec = Network(name="EC_raw") + ec = Network(name="EC_raw", key=Network.gen_key_from_name("EC_raw")) crmp_session.add(ec) pat = Contact(name="Pat", networks=[ec]) diff --git a/crmprtd/tests/test_align.py b/crmprtd/tests/test_align.py index 42a8cf70..8897520b 100644 --- a/crmprtd/tests/test_align.py +++ b/crmprtd/tests/test_align.py @@ -16,11 +16,10 @@ @pytest.mark.parametrize( - ("network_name", "expected"), [("FLNRO-WMB", True), ("WMB", False)] + ("network_key", "expected"), [("flnro_wmb", True), ("wmb", False)] ) -def test_is_network(test_session, network_name, expected): - assert bool(get_network(test_session, network_name)) == expected - +def test_is_network(test_session, network_key, expected): + assert bool(get_network(test_session, network_key)) == expected def test_get_history_with_no_matches(test_session): # this observation will not match any in test session @@ -29,7 +28,7 @@ def test_get_history_with_no_matches(test_session): val=123, variable_name="relative_humidity", unit="percent", - network_name="FLNRO-WMB", + network_key="flnro_wmb", station_id="666", lat=None, lon=None, @@ -37,7 +36,7 @@ def test_get_history_with_no_matches(test_session): history = find_or_create_matching_history_and_station( test_session, - obs_tuple.network_name, + obs_tuple.network_key, obs_tuple.station_id, obs_tuple.lat, obs_tuple.lon, @@ -57,7 +56,7 @@ def test_get_history_with_single_match(test_session): val=123, variable_name="relative_humidity", unit="percent", - network_name="MoTIe", + network_key="motie", station_id="11091", lat=None, lon=None, @@ -65,7 +64,7 @@ def test_get_history_with_single_match(test_session): history = find_or_create_matching_history_and_station( test_session, - obs_tuple.network_name, + obs_tuple.network_key, obs_tuple.station_id, obs_tuple.lat, obs_tuple.lon, @@ -85,7 +84,7 @@ def test_get_history_with_multiple_matches_and_location(test_session): val=123, variable_name="relative_humidity", unit="percent", - network_name="EC_raw", + network_key="ec_raw", station_id="1047172", lat=49.45, lon=-123.7, @@ -93,7 +92,7 @@ def test_get_history_with_multiple_matches_and_location(test_session): history = find_or_create_matching_history_and_station( test_session, - obs_tuple.network_name, + obs_tuple.network_key, obs_tuple.station_id, obs_tuple.lat, obs_tuple.lon, @@ -107,7 +106,7 @@ def test_get_history_with_multiple_matches_and_no_location(test_session): val=123, variable_name="relative_humidity", unit="percent", - network_name="EC_raw", + network_key="ec_raw", station_id="1047172", lat=None, lon=None, @@ -115,7 +114,7 @@ def test_get_history_with_multiple_matches_and_no_location(test_session): history = find_or_create_matching_history_and_station( test_session, - obs_tuple.network_name, + obs_tuple.network_key, obs_tuple.station_id, obs_tuple.lat, obs_tuple.lon, @@ -124,24 +123,24 @@ def test_get_history_with_multiple_matches_and_no_location(test_session): def test_get_variable(test_session): - variable = get_variable(test_session, "FLNRO-WMB", "relative_humidity") + variable = get_variable(test_session, "flnro_wmb", "relative_humidity") assert variable.id == 3 def test_get_variable_no_match(test_session): - variable = get_variable(test_session, "FLNRO-WMB", "humidity") + variable = get_variable(test_session, "flnro_wmb", "humidity") assert variable is None @pytest.mark.parametrize( - ("network_name", "variable_name", "unit", "val", "expected"), + ("network_key", "variable_name", "unit", "val", "expected"), [ - ("FLNRO-WMB", "relative_humidity", "percent", 10, 10), - ("EC_raw", "precipitation", "cm", 10, 100), + ("flnro_wmb", "relative_humidity", "percent", 10, 10), + ("ec_raw", "precipitation", "cm", 10, 100), ], ) -def test_unit_check(test_session, network_name, variable_name, unit, val, expected): - variable = get_variable(test_session, network_name, variable_name) +def test_unit_check(test_session, network_key, variable_name, unit, val, expected): + variable = get_variable(test_session, network_key, variable_name) check_val = convert_obs_value_to_db_units(val, unit, variable.unit) assert check_val == expected @@ -156,7 +155,7 @@ def test_unit_check(test_session, network_name, variable_name, unit, val, expect val=123, variable_name="precipitation", unit="mm", - network_name="EC_raw", + network_key="ec_raw", station_id="1047172", lat=None, lon=None, @@ -173,7 +172,7 @@ def test_unit_check(test_session, network_name, variable_name, unit, val, expect val=10, variable_name="precipitation", unit="cm", - network_name="EC_raw", + network_key="ec_raw", station_id="1047172", lat=49.45, lon=-123.7, @@ -190,7 +189,7 @@ def test_unit_check(test_session, network_name, variable_name, unit, val, expect val=10, variable_name="CURRENT_AIR_TEMPERATURE1", unit="celsius", - network_name="MoTIe", + network_key="motie", station_id="11091", lat=None, lon=None, @@ -207,7 +206,7 @@ def test_unit_check(test_session, network_name, variable_name, unit, val, expect val=10, variable_name="CURRENT_AIR_TEMPERATURE1", unit="celsius", - network_name="MoTIe", + network_key="motie", station_id="666", lat=49, lon=-121, @@ -224,7 +223,7 @@ def test_unit_check(test_session, network_name, variable_name, unit, val, expect val=123, variable_name="precipitation", unit="mm", - network_name="EC_raw", + network_key="ec_raw", station_id="1047172", lat=51, lon=-128, @@ -256,7 +255,7 @@ def test_align_successes( val=10, variable_name="CURRENT_AIR_TEMPERATURE1", unit="not_a_unit", - network_name="MoTIe", + network_key="motie", station_id="11091", lat=None, lon=None, @@ -269,7 +268,7 @@ def test_align_successes( val=10, variable_name="no_unit", unit=None, - network_name="ENV-AQN", + network_key="env_aqn", station_id="0260011", lat=None, lon=None, @@ -282,7 +281,7 @@ def test_align_successes( val=10, variable_name="not_a_var", unit="celsius", - network_name="MoTIe", + network_key="motie", station_id="11091", lat=None, lon=None, @@ -295,7 +294,7 @@ def test_align_successes( val=10, variable_name="CURRENT_AIR_TEMPERATURE1", unit="celsius", - network_name="not_a_network", + network_key="not_a_network", station_id="11091", lat=None, lon=None, @@ -308,7 +307,7 @@ def test_align_successes( val=None, variable_name=None, unit="celsius", - network_name="MoTIe", + network_key="motie", station_id="11091", lat=None, lon=None, @@ -321,7 +320,7 @@ def test_align_successes( val=15, variable_name="relative_humidity", unit="percent", - network_name="FLNRO-WMB", + network_key="flnro_wmb", station_id="1029", lat=None, lon=None, @@ -335,7 +334,7 @@ def test_align_failures(test_session, obs_tuple): def test_closest_stns_within_threshold(ec_session): - x = histories_within_threshold(ec_session, "EC_raw", -123.7, 49.45, 1000) + x = histories_within_threshold(ec_session, "ec_raw", -123.7, 49.45, 1000) assert len(x) > 0 @@ -356,7 +355,7 @@ def test_closest_stns_within_threshold_bad_data(ec_session): ec_session.commit() # Just search for the good station and ensure there are not errors - x = histories_within_threshold(ec_session, "EC_raw", x, y, 1) + x = histories_within_threshold(ec_session, "ec_raw", x, y, 1) assert len(x) > 0 diff --git a/crmprtd/tests/test_download_cache_process.py b/crmprtd/tests/test_download_cache_process.py index 081f31a5..84023846 100644 --- a/crmprtd/tests/test_download_cache_process.py +++ b/crmprtd/tests/test_download_cache_process.py @@ -10,7 +10,7 @@ @pytest.mark.parametrize( - "network_name, tag, frequency, province, expected", + "network_key, tag, frequency, province, expected", [ # A representative sample, copied from all present execution scripts. # Some changes due to simplification of filename pattern. @@ -38,10 +38,10 @@ ("nt_forestry", None, None, None, "~/nt_forestry/logs/nt_forestry_json.log"), ], ) -def test_default_log_filename(network_name, tag, frequency, province, expected): +def test_default_log_filename(network_key, tag, frequency, province, expected): assert ( default_log_filename( - network_name=network_name, + network_key=network_key, tag=tag, frequency=frequency, province=province, @@ -51,7 +51,7 @@ def test_default_log_filename(network_name, tag, frequency, province, expected): @pytest.mark.parametrize( - "network_name, tag, frequency, province, expected", + "network_key, tag, frequency, province, expected", [ # A representative sample, copied from all present execution scripts. # Some changes due to simplification of filename pattern. @@ -90,11 +90,11 @@ def test_default_log_filename(network_name, tag, frequency, province, expected): ), ], ) -def test_default_cache_filename(network_name, tag, frequency, province, expected): +def test_default_cache_filename(network_key, tag, frequency, province, expected): assert ( default_cache_filename( timestamp=datetime.datetime(2020, 1, 2, 3, 4, 5), - network_name=network_name, + network_key=network_key, tag=tag, frequency=frequency, province=province, @@ -104,7 +104,7 @@ def test_default_cache_filename(network_name, tag, frequency, province, expected @pytest.mark.parametrize( - "network_name, frequency, province, time, expected", + "network_key, frequency, province, time, expected", [ # A representative sample, covering all present execution scripts. ( @@ -170,11 +170,11 @@ def test_default_cache_filename(network_name, tag, frequency, province, expected ), ], ) -def test_download_args(network_name, frequency, province, time, expected): +def test_download_args(network_key, frequency, province, time, expected): assert ( download_args( time=time, - network_name=network_name, + network_key=network_key, tag="tag", frequency=frequency, province=province, diff --git a/crmprtd/tests/test_ec_normalize.py b/crmprtd/tests/test_ec_normalize.py index 57e900e4..811192c2 100644 --- a/crmprtd/tests/test_ec_normalize.py +++ b/crmprtd/tests/test_ec_normalize.py @@ -77,7 +77,7 @@ def test_normalize_good_data(): assert row.time is not None assert row.variable_name is not None assert row.val is not None - assert row.network_name is not None + assert row.network_key is not None def test_normalize_no_station_id(): diff --git a/crmprtd/tests/test_fixture.py b/crmprtd/tests/test_fixture.py index 052d66f7..6f48e0f8 100644 --- a/crmprtd/tests/test_fixture.py +++ b/crmprtd/tests/test_fixture.py @@ -10,9 +10,9 @@ def test_can_instantiate(test_session): def test_db_has_data(test_session): q = test_session.query(Contact.name) assert set([rv[0] for rv in q.all()]) == set(["Simon", "Pat", "Eric"]) - q = test_session.query(Network.name) + q = test_session.query(Network.key) assert set([rv[0] for rv in q.all()]) == set( - ["MoTIe", "EC_raw", "FLNRO-WMB", "ENV-AQN"] + ["motie", "ec_raw", "flnro_wmb", "env_aqn"] ) diff --git a/crmprtd/tests/test_infer.py b/crmprtd/tests/test_infer.py index b233bce8..f8e20461 100644 --- a/crmprtd/tests/test_infer.py +++ b/crmprtd/tests/test_infer.py @@ -44,11 +44,11 @@ def make_test_rows( All items are associated to the same network. """ - session.add(Network(name=network_name)) + session.add(Network(name=network_name, key=Network.gen_key_from_name(network_name))) station_ids = tuple(f"{stn_id_prefix}_{i}" for i in range(stn_count)) variable_names = tuple(f"{var_name_prefix}_{i}" for i in range(var_count)) return tuple( - Row(time, val, variable_name, unit, network_name, station_id, 40, -120) + Row(time, val, variable_name, unit, Network.gen_key_from_name(network_name), station_id, 40, -120) for station_id in station_ids for variable_name in variable_names ) diff --git a/crmprtd/tests/test_infill.py b/crmprtd/tests/test_infill.py index bcef4f03..ceabcb4c 100644 --- a/crmprtd/tests/test_infill.py +++ b/crmprtd/tests/test_infill.py @@ -56,7 +56,7 @@ def test_chain_subprocesses(commands, final_destination, mocker): @pytest.mark.parametrize( - "network_name, cache_filename, connection_string, expected_commands", + "network_key, cache_filename, connection_string, expected_commands", [ # Do nothing ("netwerk", None, None, []), @@ -69,11 +69,11 @@ def test_chain_subprocesses(commands, final_destination, mocker): ], ) def test_download_and_process_choreography( - network_name, cache_filename, connection_string, expected_commands, mocker + network_key, cache_filename, connection_string, expected_commands, mocker ): mocker.patch("crmprtd.infill.chain_subprocesses", return_value=True) download_and_process( - network_name=network_name, + network_key=network_key, log_args=["--log_args"], download_args=["--download_args"], cache_filename=cache_filename, diff --git a/crmprtd/tests/test_moti_normalize.py b/crmprtd/tests/test_moti_normalize.py index 3e832138..5ef2c676 100644 --- a/crmprtd/tests/test_moti_normalize.py +++ b/crmprtd/tests/test_moti_normalize.py @@ -51,7 +51,7 @@ def test_normalize_good_data(): assert len(rows) == 4 for row in rows: assert row.station_id == "11091" - assert row.network_name == "MoTIe" + assert row.network_key == "motie" def test_normalize_missing_stn_indexerror(): diff --git a/crmprtd/tests/test_process.py b/crmprtd/tests/test_process.py index 0f07e5c5..0310eda3 100644 --- a/crmprtd/tests/test_process.py +++ b/crmprtd/tests/test_process.py @@ -101,7 +101,7 @@ def test_process_by_date( end_date = utc.localize(verify_date(end_date, datetime.max, "end date")) # Insert the network required in the test data - network = Network(name="FLNRO-WMB") + network = Network(name="FLNRO-WMB", key=Network.gen_key_from_name("FLNRO-WMB")) crmp_session.add(network) # Insert the variables required in the test data. diff --git a/crmprtd/tests/test_wamr_normalize.py b/crmprtd/tests/test_wamr_normalize.py index 9bf3aed8..5d4cf53c 100644 --- a/crmprtd/tests/test_wamr_normalize.py +++ b/crmprtd/tests/test_wamr_normalize.py @@ -70,4 +70,4 @@ def test_substitutions(): 2021-05-25 15:15:00,M110514,Does not matter,HUMIDITY,HUMIDITY,HUMIDITY,11.68,% RH,1,n/a,Data Ok,11.7 """ # noq row = next(normalize(BytesIO(lines))) - assert row.network_name == "MVan" + assert row.network_key == "MVan" diff --git a/crmprtd/tests/test_wmb_normalize.py b/crmprtd/tests/test_wmb_normalize.py index 5180d566..b45dcd0a 100644 --- a/crmprtd/tests/test_wmb_normalize.py +++ b/crmprtd/tests/test_wmb_normalize.py @@ -17,7 +17,7 @@ def test_normalize_good_data(): assert row.time == tz.localize(datetime.strptime("2018052710", "%Y%m%d%H")) assert row.variable_name is not None assert row.val is not None - assert row.network_name is not None + assert row.network_key is not None def test_normalize_bad_date(): @@ -40,4 +40,4 @@ def test_normalize_bad_value(): assert row.time == tz.localize(datetime.strptime("2018052710", "%Y%m%d%H")) assert row.variable_name is not None assert row.val is not None - assert row.network_name is not None + assert row.network_key is not None diff --git a/docs/usage.md b/docs/usage.md index 9cfa2780..d6b9e807 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -79,7 +79,7 @@ $ crmprtd_pipeline -N ytnt -T metnorth -c postgresql://user:password@db.uvic.ca/ ### Downloading data -Script `crmprtd_download -N [network_name]` downloads data for the named network. +Script `crmprtd_download -N [network_key]` downloads data for the named network. The standard output stream of this script can be redirected into a file or piped into `crmprtd_process`. Script `crmprtd_process` read data from the standard input stream runs it through a series of formatting changes and checks before inserting @@ -105,7 +105,7 @@ optional arguments: Number of samples to be taken from observations when searching for duplicates to determine which insertion strategy to use -N {bc_env_aq,bc_env_snow,bc_forestry,bc_tran,nt_forestry,nt_water,yt_gov,yt_water,yt_firewx,yt_avalanche,dfo_ccg_lighthouse,bc_hydro,crd,ec,moti,wamr,wmb,_test}, --network {bc_env_aq,bc_env_snow,bc_forestry,bc_tran,nt_forestry,nt_water,yt_gov,yt_water,yt_firewx,yt_avalanche,dfo_ccg_lighthouse,bc_hydro,crd,ec,moti,wamr,wmb,_test} - The network from which the data is coming from. The name will be used + The network from which the data is coming from. The key will be used for a dynamic import of the module's normalization function. -S START_DATE, --start_date START_DATE Optional start time to use for processing (interpreted with @@ -138,12 +138,12 @@ Notes: 3. Command `crmprtd_pipeline` wraps this pattern and supplies required arguments. ```bash -crmprtd_download -N [network_name] > cache_filename -crmprtd_process -N [network_name] < cache_filename +crmprtd_download -N [network_key] > cache_filename +crmprtd_process -N [network_key] < cache_filename # Or -crmprtd_download -N [network_name] | crmprtd_process -N [network_name] +crmprtd_download -N [network_key] | crmprtd_process -N [network_key] # Or -crmprtd_download -N [network_name] | tee cache_filename | crmprtd_process -N [network_name] +crmprtd_download -N [network_key] | tee cache_filename | crmprtd_process -N [network_key] ``` ### Cron usage diff --git a/poetry.lock b/poetry.lock index 4b6918db..6a004f24 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1161,10 +1161,8 @@ description = "An ORM representation of the PCDS and CRMP database" optional = false python-versions = ">=3.9,<3.14" groups = ["main"] -files = [ - {file = "pycds-5.0.0-py3-none-any.whl", hash = "sha256:b9e6c1aacde84fcaf78b5c21acbf7dbf618e8c6fc543a5a7dcc2f9835a64c5f4"}, - {file = "pycds-5.0.0.tar.gz", hash = "sha256:439540ebfaedd4f9091bd2e4e41d44ebf2cf1b71ced28319f3c5f706b440d172"}, -] +files = [] +develop = false [package.dependencies] alembic = ">=1.16" @@ -1178,9 +1176,10 @@ SQLAlchemy = ">=2.0.0,<3.0" dev = ["alembic-verify (>=0.1.4,<0.2.0)", "black (>=24.3.0)", "pytest (>=8.4.0)", "pytest-describe (>=2.1.0)", "pytest-mock (>=3.11.1)", "setuptools (>=72.2.0)", "sqlalchemy-diff (>=0.1.5)", "testing-postgresql (>=1.3.0)"] [package.source] -type = "legacy" -url = "https://pypi.pacificclimate.org/simple" -reference = "pcic-pypi" +type = "git" +url = "https://github.com/pacificclimate/pycds" +reference = "network_keys" +resolved_reference = "8043590c7abd1e4d0b98405d0804896f900a1551" [[package]] name = "pycparser" @@ -1735,4 +1734,4 @@ jsonlogger = ["python-json-logger"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<3.14" -content-hash = "2af8d31f9e6f619583c5be4ed808d498a62526ebc3c41c800c26ff227e81fc81" +content-hash = "f39365238e019b9275eee972daf77b17f6acf2abcf94f97aac7dffef33d39ad4" diff --git a/pyproject.toml b/pyproject.toml index c1acc0ad..8b3ab022 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,7 @@ pyyaml = "*" python-dateutil = "*" pytz = "*" requests = { version = "*", extras = ["security"] } -pycds = "5.0.0" +pycds = { git = "https://github.com/pacificclimate/pycds", branch= "network_keys" } pint = "*" pysftp = "*" urllib3 = "<2" diff --git a/scripts/moti_infill_download.py b/scripts/moti_infill_download.py index 0900f3f2..7ea82bc8 100755 --- a/scripts/moti_infill_download.py +++ b/scripts/moti_infill_download.py @@ -98,7 +98,7 @@ def main(args): else: engine = create_engine(args.connection_string) sesh = sessionmaker(bind=engine)() - q = sesh.query(CNG.native_id).filter(CNG.network_name == "MoTIe") + q = sesh.query(CNG.native_id).filter(CNG.network_key == "motie") for (station_id,) in q.all(): for url in url_generator(station_id, start_time, end_time): to, _from = url_to_from(url) diff --git a/scripts/moti_infill_insert.py b/scripts/moti_infill_insert.py index 45d4a8ef..aac83181 100755 --- a/scripts/moti_infill_insert.py +++ b/scripts/moti_infill_insert.py @@ -55,7 +55,7 @@ def copy_stations_to_sqlite(src_dsn, dest_dsn): src_sesh = sessionmaker(bind=create_engine(src_dsn))() dest_sesh = sessionmaker(bind=create_engine(dest_dsn))() - net = Network(name="MoTIe") + net = Network(name="MoTIe", key=Network.gen_key_from_name("MoTIe")) dest_sesh.add(net) dest_sesh.flush()