diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..9b388533 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/crmprtd/__init__.py b/crmprtd/__init__.py index a3b7122b..37af481f 100644 --- a/crmprtd/__init__.py +++ b/crmprtd/__init__.py @@ -55,10 +55,13 @@ from datetime import datetime, timezone +from importlib import import_module from importlib.metadata import version from importlib.resources import files from collections import namedtuple +import dateutil + from crmprtd.argparse_helpers import OneAndDoneAction SWOB_PARTNERS = ( @@ -295,6 +298,21 @@ def add_province_args(parser): required=True, ) +def add_network_arg(parser): + """ Network is fundamental to how we process, so we always require it. """ + parser.add_argument( + "-N", + "--network", + dest="network_name", + choices=NETWORKS + network_alias_names, + required=True, + help=( + "Network identifier (a network name or network alias) from which to " + "download observations. A network alias can stand for one or more " + "individual networks (e.g., 'ytnt' stands for many networks)." + ), + ) + def add_bulk_args(parser): """ @@ -311,15 +329,10 @@ def add_bulk_args(parser): ), ) parser.add_argument( - "-N", - "--network", - choices=NETWORKS + network_alias_names, - required=True, - help=( - "Network identifier (a network name or network alias) from which to " - "download observations. A network alias can stand for one or more " - "individual networks (e.g., 'ytnt' stands for many networks)." - ), + "--force", + dest="force", + action="store_true", + help="Continue processing if individual operations fail", ) @@ -335,6 +348,7 @@ def add_time_range_args(parser): "-S", "--start_date", dest="stime", + type=dateutil.parser.parse, required=True, help=( "Start time (UTC) of range to process (format: '%%Y-%%m-%%d %%H:%%M:%%S'). " @@ -345,10 +359,11 @@ def add_time_range_args(parser): "-E", "--end_date", dest="etime", - default=datetime.now(timezone.utc), + type=dateutil.parser.parse, help=( "End time (UTC) of range to process (format: '%%Y-%%m-%%d %%H:%%M:%%S'). " - "Interpreted with strptime and rounded to the nearest hour. Defaults to the current UTC time." + "Interpreted with strptime and rounded to the nearest hour. " + "Defaults to start time if not provided." ), ) parser.add_argument( @@ -361,14 +376,17 @@ def add_time_range_args(parser): ) -def ensure_log_directory(log_filename): +def ensure_directory(filename: str): """ Ensure the log directory exists. Args: log_filename: The path to the log file. """ - if log_filename: - log_dir = os.path.dirname(log_filename) - if log_dir and not os.path.exists(log_dir): - os.makedirs(log_dir, exist_ok=True) + if filename: + dir_path = os.path.dirname(filename) + if dir_path and not os.path.exists(dir_path): + os.makedirs(dir_path, exist_ok=True) + +def get_defaults_module(network: str): + return import_module(f"crmprtd.networks.{network}.defaults") \ No newline at end of file diff --git a/crmprtd/bulk_pipeline.py b/crmprtd/bulk_pipeline.py index 7d920fc9..51463745 100755 --- a/crmprtd/bulk_pipeline.py +++ b/crmprtd/bulk_pipeline.py @@ -5,8 +5,8 @@ import os import sys import logging -import pytz -from datetime import datetime, timedelta +from zoneinfo import ZoneInfo +from datetime import datetime, timedelta, timezone from argparse import ArgumentParser from time import sleep from importlib.resources import files @@ -14,43 +14,41 @@ # Import from crmprtd from crmprtd import ( add_logging_args, + add_network_arg, add_province_args, + get_defaults_module, setup_logging, add_bulk_args, add_time_range_args, - ensure_log_directory, + ensure_directory, network_alias_names, network_aliases, ) from crmprtd.download_cache_process import ( main as download_cache_process_main, describe_network, - default_cache_filename, ) def process(current_time, opts, args): + network_defaults = get_defaults_module(opts.network_name) + # Generate cache filename if directory specified cache_filename = None if opts.directory: - cache_filename = default_cache_filename( + cache_filename = network_defaults.default_cache_filename( timestamp=current_time, - network_name=opts.network, - tag=opts.tag, - frequency=opts.frequency if opts.network == "ec" else None, - province=opts.province if opts.network == "ec" else None, + **opts ) # Replace the default cache directory with the user-specified directory cache_filename = cache_filename.replace( - f"~/{opts.network}/cache/", f"{opts.directory}/{opts.network}/cache/" + f"~/{opts.network_name}/cache/", f"{opts.directory}/{opts.network_name}/cache/" ) # ensure directory exists - cache_dir = os.path.dirname(cache_filename) - if not os.path.exists(cache_dir): - os.makedirs(cache_dir, exist_ok=True) + ensure_directory(os.path.dirname(cache_filename)) # Build argument list for download_cache_process main function # Start with base arguments from the original args list @@ -60,7 +58,7 @@ def process(current_time, opts, args): fun_args = [ *base_args, "--network", - opts.network, + opts.network_name, # "--log_conf", # opts.log_conf, "--log_filename", @@ -72,11 +70,11 @@ def process(current_time, opts, args): ] # Add frequency if provided (only for EC network, as other networks don't use it) - if opts.network == "ec" and opts.frequency: + if opts.network_name == "ec" and opts.frequency: fun_args.extend(["--frequency", opts.frequency]) # Add province if provided - if opts.network == "ec" and opts.province: + if opts.network_name == "ec" and opts.province: fun_args.extend(["--province", opts.province]) # Add tag if provided @@ -99,9 +97,18 @@ def run(opts, args): Main function to run bulk pipeline operations using download_cache_process for time ranges with specified frequency """ - # Create log directory if it doesn't exist - if opts.log_filename: - ensure_log_directory(opts.log_filename) + + network_defaults = get_defaults_module(opts.network_name) + + if not opts.log_filename: + opts.log_filename = network_defaults.default_log_filename( + network_name=opts.network_name, + tag=opts.tag, + frequency=opts.frequency, + province=opts.province if opts.network_name == "ec" else None, + ) + + ensure_directory(opts.log_filename) # Setup logging first setup_logging( @@ -115,7 +122,7 @@ def run(opts, args): log = logging.getLogger("crmprtd") log.info(f"Parsed opts: {opts}") - log.info(f"Network description: {describe_network(opts.network)}") + log.info(f"Network description: {describe_network(opts.network_name)}") try: stime = datetime.strptime(opts.stime, "%Y-%m-%d %H:%M:%S") @@ -145,13 +152,13 @@ def run(opts, args): log.info(f"Processing time: {iter_time_str}") try: - if opts.network == "ec" and not opts.province: + if opts.network_name == "ec" and not opts.province: log.error( "For network 'ec', province must be specified using --province option" ) raise ValueError("Province must be specified for EC network") # Process each province if specified - if opts.network == "ec": + if opts.network_name == "ec": for p in opts.province: log.info(f"Processing province: {p}") copts = copy.copy(opts) @@ -195,6 +202,12 @@ def main(): "downloads are desired, omit inclusion of connection string arguments." ) + add_network_arg(parser) + + opts, args = parser.parse_known_args(sysargs) + + network_defaults = get_defaults_module(opts.network_name) + add_logging_args(parser) add_bulk_args(parser) add_time_range_args(parser) @@ -205,13 +218,8 @@ def main(): dest="tag", help="Tag to include in cache and log filenames", ) + # Control options - parser.add_argument( - "--force", - dest="force", - action="store_true", - help="Continue processing if individual operations fail", - ) parser.add_argument( "--delay", dest="delay", @@ -229,10 +237,10 @@ def main(): # Set defaults parser.set_defaults( log_conf=default_log_conf, - log_filename="/tmp/crmp/bulk_pipeline.log", + log_filename=None, log_level="INFO", error_email="pcic.devops@uvic.ca", - etime=datetime.now(pytz.timezone("UTC")).strftime("%Y-%m-%d %H:%M:%S"), + etime=network_defaults.default_end_time(), dry_run=False, force=False, delay=3, @@ -240,20 +248,19 @@ def main(): opts, args = parser.parse_known_args(sysargs) - if opts.network == "ec": + # Additional arguments for specific networks, currently only EC so I've not migrated + # then to network defaults or similar yet. + if opts.network_name == "ec": add_province_args(parser) opts, args = parser.parse_known_args(sysargs) # Normalize to lowercase. opts.province = {p.lower() for p in opts.province} - # Validate arguments - if not opts.network: - parser.error("Network (-N/--network) is required") - - if opts.network in network_alias_names: - for alias in network_aliases[opts.network]: + # Network aliases can represent multiple networks, so we loop through them here + if opts.network_name in network_alias_names: + for alias in network_aliases[opts.network_name]: copts = copy.copy(opts) - copts.network = alias + copts.network_name = alias run(copts, args) else: run(opts, args) diff --git a/crmprtd/bulk_process.py b/crmprtd/bulk_process.py index f0731e27..0ef20e10 100755 --- a/crmprtd/bulk_process.py +++ b/crmprtd/bulk_process.py @@ -9,8 +9,9 @@ import sys # Import the process function directly instead of main +from crmprtd.download_cache_process import default_log_filename from crmprtd.process import main as process -from crmprtd import add_logging_args, setup_logging, add_bulk_args +from crmprtd import add_logging_args, ensure_directory, setup_logging, add_bulk_args def run(opts, args): @@ -18,11 +19,16 @@ def run(opts, args): Main function to process multiple files in a directory using crmprtd.process with optional pattern matching """ - # Create log directory if it doesn't exist - if opts.log_filename: - log_dir = os.path.dirname(opts.log_filename) - if log_dir and not os.path.exists(log_dir): - os.makedirs(log_dir, exist_ok=True) + + if not opts.log_filename: + opts.log_filename = default_log_filename( + network_name=opts.network, + tag=opts.tag, + frequency=opts.frequency, + province=opts.province, + ) + + ensure_directory(opts.log_filename) # Setup logging first setup_logging( @@ -170,13 +176,6 @@ def main(): default="*.xml", help="File pattern to match in directory (default: *.xml)", ) - # Processing options - parser.add_argument( - "-f", - "--force", - action="store_true", - help="Continue processing remaining files if one fails", - ) parser.add_argument( "-M", "--move_processed", @@ -194,7 +193,7 @@ def main(): parser.set_defaults( connection_string="dbname=crmprtd user=crmp", log_conf=default_log_conf, - log_filename="/tmp/crmp/bulk_process.log", + log_filename=None, log_level="INFO", error_email="pcic.devops@uvic.ca", force=False, diff --git a/crmprtd/download_cache_process.py b/crmprtd/download_cache_process.py index 8fada45e..ac9a2b1d 100644 --- a/crmprtd/download_cache_process.py +++ b/crmprtd/download_cache_process.py @@ -6,15 +6,16 @@ Factor out long lists of hard-coded network names """ + + import datetime from argparse import ArgumentParser from typing import List, Optional import logging -import pytz - from crmprtd import ( NETWORKS, + get_defaults_module, network_alias_names, network_aliases, add_province_args, @@ -32,127 +33,39 @@ def check_network_name(network_name): raise ValueError(f"Network name '{network_name}' is not recognized.") -def default_log_filename( - network_name: Optional[str] = None, - tag: Optional[str] = None, - frequency: Optional[str] = None, - province: Optional[str] = None, - **_, -): - """Return log filename (filepath). It depends on several parameters, starting - with the network. Its pattern is similar, but not identical, to that of the cache - filename. - - :param network_name: Network name to include in filename. - :param tag: Tag to include in filename. - :param frequency: Download frequency to include in filename (network ec only). - :param province: Province code (2 letters) to include in filename (network ec only). - :param _: Ignored additional kw args. - :return: Filename. - """ - check_network_name(network_name) - filepath = f"~/{network_name}/logs" - tag_prefix = f"{tag}_" if tag is not None else "" - if network_name in ("ec",): - assert frequency is not None - assert province is not None - return f"{filepath}/{tag_prefix}{province.lower()}_{frequency}_json.log" - return f"{filepath}/{tag_prefix}{network_name}_json.log" - - -def the_log_filename(log_filename: Optional[str] = None, **kwargs): - return log_filename or default_log_filename(**kwargs) - - -def default_cache_filename( - timestamp: datetime.datetime = datetime.datetime.now(), - timestamp_format: str = "%Y-%m-%dT%H:%M:%S", - network_name: Optional[str] = None, - tag: Optional[str] = None, - frequency: Optional[str] = None, - province: Optional[str] = None, - **_, -) -> str: - """Return cache filename (filepath). It depends on several parameters, starting - with the network. Its pattern is similar, but not identical, to that of the log - filename. - - :param timestamp: Timestamp to include in filename. - :param timestamp_format: Format for converting timestamp to string. - :param network_name: Network name to include in filename. - :param tag: Tag to include in filename. - :param frequency: Download frequency to include in filename (network ec only). - :param province: Province code (2 letters) to include in filename (network ec only). - :param _: Ignored additional kw args. - :return: Filename. - """ - check_network_name(network_name) - ts = timestamp.strftime(timestamp_format) - filepath = f"~/{network_name}/cache" - tag_prefix = f"{tag}_" if tag is not None else "" - if network_name in ("ec",): - assert frequency is not None - assert province is not None - return f"{filepath}/{tag_prefix}{frequency}_{province.lower()}_{ts}.xml" - if network_name in ( - "bc_env_snow", - "bc_forestry", - "bc_riotinto", - "bc_tran", - "dfo_ccg_lighthouse", - "nt_forestry", - "nt_water", - "yt_gov", - "yt_water", - "yt_avalanche", - "yt_firewx", - ): - return f"{filepath}/{tag_prefix}{network_name}_{ts}.xml" - return f"{filepath}/{tag_prefix}{network_name}_{ts}.txt" - - -def the_cache_filename(cache_filename: Optional[str] = None, **kwargs) -> str: - return cache_filename or default_cache_filename(**kwargs) - - # TODO: This is inconsistent with how all the other functions pull the log config # They use crmprtd/data/logging.yaml, but this uses a default of the home directory -def log_args(**kwargs) -> List[str]: +def log_args( + network_name: str, log_filename: Optional[str] = None, **kwargs +) -> List[str]: """Return logging args. Only the log filename depends on the arguments.""" + network_defaults = get_defaults_module(network_name) + return [ "-L", "~/logging.yaml", "--log_filename", - the_log_filename(**kwargs), + log_filename or network_defaults.default_log_filename(network_name = network_name, **kwargs), ] -def to_utc(d: datetime.datetime, tz_string: str = "Canada/Pacific"): - if d.tzinfo is not None and d.tzinfo.utcoffset(d) is not None: - # d is not localized. Make it so. - tz = pytz.timezone(tz_string) - d = tz.localize(d) - return d.astimezone(pytz.utc) - - def download_args( - network_name: Optional[str] = None, + network_name: str, frequency: Optional[str] = None, province: Optional[str] = None, time: Optional[datetime.datetime] = None, # TODO: use now()? - start_time: Optional[datetime.datetime] = None, **_, ) -> List[str]: """Return command-line args for download phase. They depend on the network and other arguments. :param time: - :param start_time: :param network_name: Network name. :param _: Remainder args. Passed through. :return: List of download args. """ check_network_name(network_name) + network_defaults = get_defaults_module(network_name) common_args = f"-N {network_name} ".split() net_args = None @@ -160,37 +73,10 @@ def download_args( # Set net_args per network. if network_name == "_test": net_args = [] - if network_name == "bc_hydro": - net_args = "-f sftp2.bchydro.com -F pcic -S ~/.ssh/id_rsa".split() - if network_name == "crd": - net_args = f"--auth_fname ~/.rtd_auth.yaml --auth_key={network_name}".split() - if network_name == "ec": - if province is None or frequency is None: - raise ValueError("EC network requires both province and frequency") - net_args = f"-p {province.lower()} -F {frequency}".split() - if network_name in ( - "bc_env_snow", - "bc_forestry", - "bc_riotinto", - "bc_tran", - "dfo_ccg_lighthouse", - "nt_forestry", - "nt_water", - "yt_gov", - "yt_water", - "yt_avalanche", - "yt_firewx", - ): - if time is None: - raise ValueError(f"Network {network_name} requires a time parameter") - ts = to_utc(time).strftime("%Y/%m/%d %H:00:00") - net_args = ["-d", f'"{ts}"'] - if network_name == "moti": - net_args = f"-u https://prdoas5.apps.th.gov.bc.ca/saw-data/sawr7110 --auth_fname ~/.rtd_auth.yaml --auth_key={network_name}".split() - if network_name == "wamr": - net_args = [] - if network_name == "wmb": - net_args = f"--auth_fname ~/.rtd_auth.yaml --auth_key={network_name}".split() + else: + net_args = network_defaults.default_download_args( + time=time, province=province, frequency=frequency + ) assert net_args is not None return common_args + net_args @@ -208,18 +94,31 @@ def dispatch_network( **kwargs, ) -> None: """ - Dispatch a single network to the download-and-process pipeline. - - :param cache_filename: Custom cache filename. - :param connection_string: Database connection string for "process" step. - :param time: Custom time to use for downloading. If None, use network defaults. - :param kwargs: Remaining args, passed through to various subfunctions. Note that - network name is one of these args. - :param dry_run: If true, print commands but don't run them. - :return: None. Side effect: Download and process network specified in args. + Dispatch a single network through the download and process pipeline. + + Handles network-specific logic, including handling of special networks like "_test" + and "ec", by appending appropriate arguments and invoking the download-and-process + sequence. + + :param cache_filename: Optional custom filename for cache storage. If None, defaults + are used based on network-specific logic and parameters. + :param connection_string: Optional database connection string for the processing + step. If not provided, no database operations occur. + :param dry_run: If True, commands are displayed without being executed. This is + useful for verifying the command flow without actual processing. + :param time: Optional datetime object to specify custom download time. If None, + network-specific default time logic is applied. + :param kwargs: Additional keyword arguments passed throughout the pipeline. Expected + to contain a valid 'network_name' key along with other customization. + :raises ValueError: If provided network name is unrecognized, or if specific + network requirements like province codes (e.g., for "ec") are + unmet. + :return: None. The function executes the download and processing pipeline with + side effects based on given parameters. """ network_name = kwargs["network_name"] check_network_name(network_name) + network_defaults = get_defaults_module(network_name) if network_name == "_test": # Use custom time if provided, otherwise no time parameter @@ -227,71 +126,51 @@ def dispatch_network( network_name=network_name, log_args=log_args(**kwargs), download_args=download_args(time=time, **kwargs), - cache_filename=the_cache_filename( - cache_filename=cache_filename, - timestamp=time if time else datetime.datetime.now(), + cache_filename=cache_filename + or network_defaults.default_cache_filename( + timestamp=time, **kwargs, ), connection_string=connection_string, - dry_run=dry_run, + dry_run=dry_run or False, ) + # Somewhere we have to break out the multiple provinces for EC into individual processing + # units. So far this is the most logical place, but we could have it be part of network + # defaults if we were smart about it. elif network_name == "ec": provinces = kwargs.pop("province") if not provinces or len(provinces) == 0: raise ValueError("EC network requires at least one province code") + use_time = (time if time else network_defaults.default_time()) + # Use custom time if provided, otherwise let EC network use its defaults for province in provinces: download_and_process( network_name=network_name, log_args=log_args(**kwargs, province=province), download_args=download_args(time=time, province=province, **kwargs), - cache_filename=the_cache_filename( + cache_filename=cache_filename + or network_defaults.default_cache_filename( cache_filename=cache_filename, province=province, - timestamp=time if time else datetime.datetime.now(), + timestamp=use_time, **kwargs, ), connection_string=connection_string, dry_run=dry_run, ) - elif network_name in ( - "bc_env_snow", - "bc_forestry", - "bc_riotinto", - "bc_tran", - "dfo_ccg_lighthouse", - "nt_forestry", - "nt_water", - "yt_gov", - "yt_water", - "yt_avalanche", - "yt_firewx", - ): - # Use custom time if provided, otherwise default to "an hour ago" - use_time = ( - time if time else (datetime.datetime.now() - datetime.timedelta(hours=1)) - ) - download_and_process( - network_name=network_name, - log_args=log_args(**kwargs), - download_args=download_args(**kwargs, time=use_time), - cache_filename=the_cache_filename( - cache_filename=cache_filename, timestamp=use_time, **kwargs - ), - connection_string=connection_string, - dry_run=dry_run, - ) else: - # Use custom time if provided, otherwise default to "now" - use_time = time if time else datetime.datetime.now() + # Use custom time if provided, otherwise let network use its defaults + use_time = (time if time else network_defaults.default_time()) download_and_process( network_name=network_name, log_args=log_args(**kwargs), download_args=download_args(**kwargs, time=use_time), - cache_filename=the_cache_filename( - cache_filename=cache_filename, timestamp=use_time, **kwargs + cache_filename=cache_filename + or network_defaults.default_cache_filename( + cache_filename=cache_filename, timestamp=use_time, **kwargs ), connection_string=connection_string, dry_run=dry_run, diff --git a/crmprtd/networks/__init__.py b/crmprtd/networks/__init__.py index e69de29b..1ea80523 100644 --- a/crmprtd/networks/__init__.py +++ b/crmprtd/networks/__init__.py @@ -0,0 +1,119 @@ +import datetime +from typing import Optional +from zoneinfo import ZoneInfo + +"""Common Default functions for networks. Many do not need bespoke versions, so +they can be defined here and reused. + +The interface for network_defaults should be a module with functions: + +- default_log_filename(**args) -> str +- default_cache_filename(**args) -> str +- default_time(**args) -> datetime.datetime +- default_end_time(**args) -> datetime.datetime +- default_download_args(**args) -> list[str] +""" + +default_timestamp_format = "%Y-%m-%dT%H:%M:%S" + + +# TODO: is tz_string a safe assumption to be our locality? shouldn't this really be based on OS locale? +def to_utc(d: datetime.datetime): + if d.tzinfo is None: + # d is not localized. Make it so. + d = d.astimezone() + return d.astimezone(ZoneInfo("UTC")) + + +def default_cache_filename( + network_name: str, + tag: Optional[str] = None, + timestamp: datetime.datetime = datetime.datetime.now(), + timestamp_format: str = default_timestamp_format, + **_, +) -> str: + """Return cache filename (filepath). It depends on several parameters, starting + with the network. Its pattern is similar, but not identical, to that of the log + filename. + + :param timestamp: Timestamp to include in filename. + :param timestamp_format: Format for converting timestamp to string. + :param network_name: Network name to include in filename. + :param tag: Tag to include in filename. + :param _: Ignored additional kw args. + :return: Filename. + """ + ts = timestamp.strftime(timestamp_format) + filepath = f"~/{network_name}/cache" + tag_prefix = f"{tag}_" if tag is not None else "" + return f"{filepath}/{tag_prefix}{network_name}_{ts}.txt" + + +def default_swob_cache_filename( + timestamp: datetime.datetime = datetime.datetime.now(), + timestamp_format: str = default_timestamp_format, + network_name: Optional[str] = None, + tag: Optional[str] = None, + **_, +) -> str: + """Return cache filename (filepath). It depends on several parameters, starting + with the network. Its pattern is similar, but not identical, to that of the log + filename. + + :param timestamp: Timestamp to include in filename. + :param timestamp_format: Format for converting timestamp to string. + :param network_name: Network name to include in filename. + :param tag: Tag to include in filename. + :param _: Ignored additional kw args. + :return: Filename. + """ + ts = timestamp.strftime(timestamp_format) + filepath = f"~/{network_name}/cache" + tag_prefix = f"{tag}_" if tag is not None else "" + return f"{filepath}/{tag_prefix}{network_name}_{ts}.xml" + + +def default_log_filename( + network_name: str, + tag: Optional[str] = None, + **_, +): + """Return log filename (filepath). It depends on several parameters, starting + with the network. Its pattern is similar, but not identical, to that of the cache + filename. + :param network_name: Network name to include in filename. + :param tag: Tag to include in filename. + :param _: Ignored additional kw args. + :return: Filename. + """ + filepath = f"~/{network_name}/logs" + tag_prefix = f"{tag}_" if tag is not None else "" + return f"{filepath}/{tag_prefix}{network_name}_json.log" + + +def empty_default_download_args(**_): + """Some networks do not require any special download arguments.""" + return [] + + +def gen_default_swob_download_args(network_name: str): + """SWOB networks follow a similar pattern, we can use a generator function to + create a default function for each network.""" + + def swob_default_download_args(time: Optional[datetime.datetime], **_): + if time is None: + raise ValueError(f"Network {network_name} requires a time parameter") + ts = to_utc(time).strftime("%Y/%m/%d %H:00:00") + return ["-d", f'"{ts}"'] + + return swob_default_download_args + +def default_time(**_): + return datetime.datetime.now().astimezone() + +def default_end_time(**_): + return datetime.datetime.now().astimezone() + +def default_swob_time(**_): + # SWOB data is always for the previous hour + return datetime.datetime.now(ZoneInfo("UTC")) - datetime.timedelta(hours=1) \ No newline at end of file diff --git a/crmprtd/networks/bc_env_aq/defaults.py b/crmprtd/networks/bc_env_aq/defaults.py new file mode 100644 index 00000000..4352013a --- /dev/null +++ b/crmprtd/networks/bc_env_aq/defaults.py @@ -0,0 +1,9 @@ +from crmprtd.networks import ( + default_log_filename, + default_swob_cache_filename as default_cache_filename, + gen_default_swob_download_args, + default_time, + default_end_time +) + +default_download_args = gen_default_swob_download_args("bc_env_aq") diff --git a/crmprtd/networks/bc_env_snow/defaults.py b/crmprtd/networks/bc_env_snow/defaults.py new file mode 100644 index 00000000..75a36fa2 --- /dev/null +++ b/crmprtd/networks/bc_env_snow/defaults.py @@ -0,0 +1,9 @@ +from crmprtd.networks import ( + default_log_filename, + default_swob_cache_filename as default_cache_filename, + gen_default_swob_download_args, + default_swob_time as default_time, + default_swob_time as default_end_time +) + +default_download_args = gen_default_swob_download_args("bc_env_snow") diff --git a/crmprtd/networks/bc_forestry/defaults.py b/crmprtd/networks/bc_forestry/defaults.py new file mode 100644 index 00000000..8d1b6919 --- /dev/null +++ b/crmprtd/networks/bc_forestry/defaults.py @@ -0,0 +1,9 @@ +from crmprtd.networks import ( + default_log_filename, + default_swob_cache_filename as default_cache_filename, + gen_default_swob_download_args, + default_swob_time as default_time, + default_swob_time as default_end_time +) + +default_download_args = gen_default_swob_download_args("bc_forestry") diff --git a/crmprtd/networks/bc_hydro/defaults.py b/crmprtd/networks/bc_hydro/defaults.py new file mode 100644 index 00000000..d1d36b19 --- /dev/null +++ b/crmprtd/networks/bc_hydro/defaults.py @@ -0,0 +1,10 @@ +from crmprtd.networks import ( + default_log_filename, + default_cache_filename, + default_time, + default_end_time +) + + +def default_download_args(**_): + return f"-f sftp2.bchydro.com -F pcic -S ~/.ssh/id_rsa".split() diff --git a/crmprtd/networks/bc_riotinto/defaults.py b/crmprtd/networks/bc_riotinto/defaults.py new file mode 100644 index 00000000..4834598e --- /dev/null +++ b/crmprtd/networks/bc_riotinto/defaults.py @@ -0,0 +1,9 @@ +from crmprtd.networks import ( + default_log_filename, + default_swob_cache_filename as default_cache_filename, + gen_default_swob_download_args, + default_swob_time as default_time, + default_swob_time as default_end_time +) + +default_download_args = gen_default_swob_download_args("bc_riotinto") diff --git a/crmprtd/networks/bc_tran/defaults.py b/crmprtd/networks/bc_tran/defaults.py new file mode 100644 index 00000000..7e1cfbf7 --- /dev/null +++ b/crmprtd/networks/bc_tran/defaults.py @@ -0,0 +1,9 @@ +from crmprtd.networks import ( + default_log_filename, + default_swob_cache_filename as default_cache_filename, + gen_default_swob_download_args, + default_swob_time as default_time, + default_swob_time as default_end_time +) + +default_download_args = gen_default_swob_download_args("bc_tran") diff --git a/crmprtd/networks/crd/defaults.py b/crmprtd/networks/crd/defaults.py new file mode 100644 index 00000000..9bc144c8 --- /dev/null +++ b/crmprtd/networks/crd/defaults.py @@ -0,0 +1,11 @@ +from crmprtd.networks import ( + default_log_filename, + default_cache_filename, + default_time, + default_end_time +) + + +def default_download_args(**_): + return "--auth_fname ~/.rtd_auth.yaml --auth_key=crd".split() + diff --git a/crmprtd/networks/crd/download.py b/crmprtd/networks/crd/download.py index 910b5c75..7bdf18c6 100644 --- a/crmprtd/networks/crd/download.py +++ b/crmprtd/networks/crd/download.py @@ -70,7 +70,7 @@ def download(client_id, start_date, end_date): # pragma: no cover def main( - arglist: List[str] = None, parent_parser: ArgumentParser = None + arglist: List[str] | None = None, parent_parser: ArgumentParser = None ) -> None: # pragma: no cover """Download CLI function for CRD diff --git a/crmprtd/networks/dfo_ccg_lighthouse/defaults.py b/crmprtd/networks/dfo_ccg_lighthouse/defaults.py new file mode 100644 index 00000000..91e669ba --- /dev/null +++ b/crmprtd/networks/dfo_ccg_lighthouse/defaults.py @@ -0,0 +1,9 @@ +from crmprtd.networks import ( + default_log_filename, + default_swob_cache_filename as default_cache_filename, + gen_default_swob_download_args, + default_swob_time as default_time, + default_swob_time as default_end_time +) + +default_download_args = gen_default_swob_download_args("dfo_ccg_lighthouse") diff --git a/crmprtd/networks/ec/defaults.py b/crmprtd/networks/ec/defaults.py new file mode 100644 index 00000000..3bc5e1e6 --- /dev/null +++ b/crmprtd/networks/ec/defaults.py @@ -0,0 +1,74 @@ +import datetime +from typing import Optional +from crmprtd.networks import default_timestamp_format + + +def default_cache_filename( + timestamp: datetime.datetime = datetime.datetime.now(), + timestamp_format: str = default_timestamp_format, + network_name: Optional[str] = None, + tag: Optional[str] = None, + frequency: Optional[str] = None, + province: Optional[str] = None, + **_, +) -> str: + """Return cache filename (filepath). It depends on several parameters, starting + with the network. Its pattern is similar, but not identical, to that of the log + filename. + + EC network uniquely requires frequency and province to be part of the filename. + + :param timestamp: Timestamp to include in filename. + :param timestamp_format: Format for converting timestamp to string. + :param network_name: Network name to include in filename. + :param tag: Tag to include in filename. + :param frequency: Download frequency to include in filename (network ec only). + :param province: Province code (2 letters) to include in filename (network ec only). + :param _: Ignored additional kw args. + :return: Filename. + """ + ts = timestamp.strftime(timestamp_format) + filepath = f"~/{network_name}/cache" + tag_prefix = f"{tag}_" if tag is not None else "" + assert frequency is not None + assert province is not None + return f"{filepath}/{tag_prefix}{frequency}_{province.lower()}_{ts}.xml" + + +def default_log_filename( + network_name: Optional[str] = None, + tag: Optional[str] = None, + frequency: Optional[str] = None, + province: Optional[str] = None, + **_, +): + """Return log filename (filepath). It depends on several parameters, starting + with the network. Its pattern is similar, but not identical, to that of the cache + filename. + + :param network_name: Network name to include in filename. + :param tag: Tag to include in filename. + :param frequency: Download frequency to include in filename (network ec only). + :param province: Province code (2 letters) to include in filename (network ec only). + :param _: Ignored additional kw args. + :return: Filename. + """ + filepath = f"~/{network_name}/logs" + tag_prefix = f"{tag}_" if tag is not None else "" + assert frequency is not None + assert province is not None + return f"{filepath}/{tag_prefix}{province.lower()}_{frequency}_json.log" + + +def default_download_args(province: Optional[str], frequency: Optional[str], **_): + if province is None or frequency is None: + raise ValueError("EC network requires both province and frequency") + return f"-p {province.lower()} -F {frequency}".split() + +def default_time(**_): + ## EC network uses UTC time + return datetime.datetime.now(datetime.timezone.utc) + +def default_end_time(**_): + ## EC network uses UTC time + return datetime.datetime.now(datetime.timezone.utc) \ No newline at end of file diff --git a/crmprtd/networks/ec_swob/defaults.py b/crmprtd/networks/ec_swob/defaults.py new file mode 100644 index 00000000..8c762ebe --- /dev/null +++ b/crmprtd/networks/ec_swob/defaults.py @@ -0,0 +1,10 @@ +from crmprtd.networks import ( + default_log_filename, + default_cache_filename, + gen_default_swob_download_args, + default_time, + default_end_time +) + + +default_download_args = gen_default_swob_download_args("ec_swob") diff --git a/crmprtd/networks/moti/defaults.py b/crmprtd/networks/moti/defaults.py new file mode 100644 index 00000000..987c3cbb --- /dev/null +++ b/crmprtd/networks/moti/defaults.py @@ -0,0 +1,10 @@ +from crmprtd.networks import ( + default_log_filename, + default_cache_filename, + default_time, + default_end_time +) + + +def default_download_args(**_): + return "-u https://prdoas5.apps.th.gov.bc.ca/saw-data/sawr7110 --auth_fname ~/.rtd_auth.yaml --auth_key=moti".split() diff --git a/crmprtd/networks/nt_forestry/defaults.py b/crmprtd/networks/nt_forestry/defaults.py new file mode 100644 index 00000000..5066c9cf --- /dev/null +++ b/crmprtd/networks/nt_forestry/defaults.py @@ -0,0 +1,9 @@ +from crmprtd.networks import ( + default_log_filename, + default_swob_cache_filename as default_cache_filename, + gen_default_swob_download_args, + default_swob_time as default_time, + default_swob_time as default_end_time +) + +default_download_args = gen_default_swob_download_args("nt_forestry") diff --git a/crmprtd/networks/nt_water/defaults.py b/crmprtd/networks/nt_water/defaults.py new file mode 100644 index 00000000..a01f2f1e --- /dev/null +++ b/crmprtd/networks/nt_water/defaults.py @@ -0,0 +1,9 @@ +from crmprtd.networks import ( + default_log_filename, + default_swob_cache_filename as default_cache_filename, + gen_default_swob_download_args, + default_swob_time as default_time, + default_swob_time as default_end_time +) + +default_download_args = gen_default_swob_download_args("nt_water") diff --git a/crmprtd/networks/wamr/defaults.py b/crmprtd/networks/wamr/defaults.py new file mode 100644 index 00000000..66901bc6 --- /dev/null +++ b/crmprtd/networks/wamr/defaults.py @@ -0,0 +1,7 @@ +from crmprtd.networks import ( + default_log_filename, + default_cache_filename, + empty_default_download_args as default_download_args, + default_time, + default_end_time +) diff --git a/crmprtd/networks/wmb/defaults.py b/crmprtd/networks/wmb/defaults.py new file mode 100644 index 00000000..3a409f97 --- /dev/null +++ b/crmprtd/networks/wmb/defaults.py @@ -0,0 +1,10 @@ +from crmprtd.networks import ( + default_log_filename, + default_cache_filename, + default_time, + default_end_time +) + + +def default_download_args(**_): + return "--auth_fname ~/.rtd_auth.yaml --auth_key=wmb".split() diff --git a/crmprtd/networks/yt_avalanche/defaults.py b/crmprtd/networks/yt_avalanche/defaults.py new file mode 100644 index 00000000..c806925d --- /dev/null +++ b/crmprtd/networks/yt_avalanche/defaults.py @@ -0,0 +1,9 @@ +from crmprtd.networks import ( + default_log_filename, + default_swob_cache_filename as default_cache_filename, + gen_default_swob_download_args, + default_swob_time as default_time, + default_swob_time as default_end_time +) + +default_download_args = gen_default_swob_download_args("yt_avalanche") diff --git a/crmprtd/networks/yt_firewx/defaults.py b/crmprtd/networks/yt_firewx/defaults.py new file mode 100644 index 00000000..c31370b9 --- /dev/null +++ b/crmprtd/networks/yt_firewx/defaults.py @@ -0,0 +1,9 @@ +from crmprtd.networks import ( + default_log_filename, + default_swob_cache_filename as default_cache_filename, + gen_default_swob_download_args, + default_swob_time as default_time, + default_swob_time as default_end_time +) + +default_download_args = gen_default_swob_download_args("yt_firewx") diff --git a/crmprtd/networks/yt_gov/defaults.py b/crmprtd/networks/yt_gov/defaults.py new file mode 100644 index 00000000..eb223e94 --- /dev/null +++ b/crmprtd/networks/yt_gov/defaults.py @@ -0,0 +1,9 @@ +from crmprtd.networks import ( + default_log_filename, + default_swob_cache_filename as default_cache_filename, + gen_default_swob_download_args, + default_swob_time as default_time, + default_swob_time as default_end_time +) + +default_download_args = gen_default_swob_download_args("yt_gov") diff --git a/crmprtd/networks/yt_water/defaults.py b/crmprtd/networks/yt_water/defaults.py new file mode 100644 index 00000000..eefdb8ea --- /dev/null +++ b/crmprtd/networks/yt_water/defaults.py @@ -0,0 +1,9 @@ +from crmprtd.networks import ( + default_log_filename, + default_swob_cache_filename as default_cache_filename, + gen_default_swob_download_args, + default_swob_time as default_time, + default_swob_time as default_end_time +) + +default_download_args = gen_default_swob_download_args("yt_water") diff --git a/crmprtd/tests/test_bulk.py b/crmprtd/tests/test_bulk.py new file mode 100644 index 00000000..32ab30b7 --- /dev/null +++ b/crmprtd/tests/test_bulk.py @@ -0,0 +1,373 @@ +import datetime +import sys +from unittest.mock import Mock, patch +import pytest +from crmprtd import bulk_pipeline +from crmprtd.bulk_pipeline import main, run, process + + +@pytest.mark.parametrize( + "network, frequency, stime, etime, tag, directory, expected_calls", + [ + # Test hourly frequency processing + ( + "bc_hydro", + "hourly", + "2020-01-01 00:00:00", + "2020-01-01 02:00:00", + "test_tag", + None, + 3, # Should call process 3 times (00:00, 01:00, 02:00) + ), + # Test daily frequency processing + ( + "crd", + "daily", + "2020-01-01 00:00:00", + "2020-01-03 00:00:00", + "daily_tag", + "/tmp/test", + 3, # Should call process 3 times (01-01, 01-02, 01-03) + ), + # Test EC network with province + ( + "ec", + "hourly", + "2020-01-01 00:00:00", + "2020-01-01 01:00:00", + "ec_tag", + None, + 2, # Should call process 2 times (00:00, 01:00) + ), + ], +) +def test_bulk_pipeline_run(network, frequency, stime, etime, tag, directory, expected_calls, mocker): + """Test the run function with various network and time configurations.""" + + # Mock the process function to track calls + mock_process = mocker.patch("crmprtd.bulk_pipeline.process") + + # Mock setup_logging to avoid actual logging setup + mock_setup_logging = mocker.patch("crmprtd.bulk_pipeline.setup_logging") + + # Mock ensure_directory to avoid filesystem operations + mock_ensure_directory = mocker.patch("crmprtd.bulk_pipeline.ensure_directory") + + # Mock get_defaults_module to return a mock with required methods + mock_defaults = Mock() + mock_defaults.default_log_filename.return_value = f"~/{network}/logs/{tag}_{network}_json.log" + mocker.patch("crmprtd.bulk_pipeline.get_defaults_module", return_value=mock_defaults) + + # Create mock options object + opts = Mock() + opts.network_name = network + opts.frequency = frequency + opts.stime = stime + opts.etime = etime + opts.tag = tag + opts.directory = directory + opts.province = ["bc"] if network == "ec" else None + opts.log_filename = None + opts.log_conf = "test_log.yaml" + opts.error_email = "test@test.com" + opts.log_level = "INFO" + opts.delay = 1 + + # Mock args list + args = ["--connection_string", "test_dsn"] + + # Run the function + run(opts, args) + + # Verify process was called the expected number of times + assert mock_process.call_count == expected_calls + + # Verify setup_logging was called + mock_setup_logging.assert_called_once() + + # Verify ensure_directory was called for log filename + mock_ensure_directory.assert_called() + + +@pytest.mark.parametrize( + "network, tag, frequency, province, directory, time_str, expected_download_cache_process_calls", + [ + # Test basic network without province + ( + "bc_hydro", + "test_tag", + None, + None, + None, + "2020-01-01 12:00:00", + 1, + ), + # Test EC network with province and frequency + ( + "ec", + "ec_tag", + "daily", + "bc", + None, + "2020-01-01 12:00:00", + 1, + ), + # Test with custom directory + ( + "crd", + "dir_tag", + None, + None, + "/custom/dir", + "2020-01-01 12:00:00", + 1, + ), + ], +) +def test_bulk_pipeline_process(network, tag, frequency, province, directory, time_str, expected_download_cache_process_calls, mocker): + """Test the process function with various configurations.""" + + # Mock download_cache_process_main to track calls + mock_download_cache_process_main = mocker.patch("crmprtd.bulk_pipeline.download_cache_process_main") + + # Mock ensure_directory to avoid filesystem operations + mock_ensure_directory = mocker.patch("crmprtd.bulk_pipeline.ensure_directory") + + + + # Create mock options object + opts = Mock() + opts.network_name = network + opts.tag = tag + opts.frequency = frequency + opts.province = province + opts.directory = directory + opts.log_filename = f"~/{network}/logs/{tag}_{network}_json.log" + + # Parse time + current_time = datetime.datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S") + + # Mock args list + args = ["--connection_string", "test_dsn"] + + # Run the function + process(current_time, opts, args) + + # Verify download_cache_process_main was called the expected number of times + assert mock_download_cache_process_main.call_count == expected_download_cache_process_calls + + # Verify the arguments passed to download_cache_process_main + call_args = mock_download_cache_process_main.call_args[0][0] + assert "--network" in call_args + assert network in call_args + assert "--time" in call_args + assert time_str in call_args + + if tag: + assert "--tag" in call_args + assert tag in call_args + + if frequency and network == "ec": + assert "--frequency" in call_args + assert frequency in call_args + + if province and network == "ec": + assert "--province" in call_args + assert province in call_args + + if directory: + assert "--cache_filename" in call_args + mock_ensure_directory.assert_called() + + +@pytest.mark.parametrize( + "network, additional_args, expected_network_calls", + [ + # Test single network + ("bc_hydro", ["--tag", "test", "--frequency", "hourly"], 1), + # Test network alias - should process multiple networks + ("hourly_swobml2", ["--tag", "alias_test"], len(bulk_pipeline.network_aliases.get("hourly_swobml2", []))), + # Test EC network with province + ("ec", ["--tag", "ec_test", "--frequency", "daily", "--province", "bc"], 1), + ], +) +def test_bulk_pipeline_main(network, additional_args, expected_network_calls, mocker): + """Test the main function with various network configurations.""" + + # Mock the run function to track calls + mock_run = mocker.patch("crmprtd.bulk_pipeline.run") + + # Mock get_defaults_module to return a mock with required methods + mock_defaults = Mock() + mock_defaults.default_end_time.return_value = "2020-01-02 00:00:00" + mocker.patch("crmprtd.bulk_pipeline.get_defaults_module", return_value=mock_defaults) + + # Mock files function for default log config + mock_files = mocker.patch("crmprtd.bulk_pipeline.files") + mock_files.return_value.__truediv__.return_value = "default_log.yaml" + + # Mock add_province_args for EC network + mock_add_province_args = mocker.patch("crmprtd.bulk_pipeline.add_province_args") + + # Prepare test arguments + test_args = [ + "--network", network, + "--start_date", "2020-01-01 00:00:00", + "--end_date", "2020-01-01 23:00:00", + + ] + additional_args + + # Mock sys.argv + with patch.object(sys, 'argv', ['bulk_pipeline.py'] + test_args): + # Run main function + main() + + # Verify run was called the expected number of times + assert mock_run.call_count == expected_network_calls + + # If EC network, verify add_province_args was called + if network == "ec": + mock_add_province_args.assert_called() + + +@pytest.mark.parametrize( + "network, frequency, error_type", + [ + # Test invalid time format + ("bc_hydro", "hourly", "time_parse_error"), + # Test invalid frequency + ("crd", "invalid_freq", "frequency_error"), + ], +) +def test_bulk_pipeline_error_handling(network, frequency, error_type, mocker): + """Test error handling in bulk pipeline functions.""" + + # Mock logging + mock_logger = Mock() + mocker.patch("crmprtd.bulk_pipeline.logging.getLogger", return_value=mock_logger) + + # Mock setup_logging + mock_setup_logging = mocker.patch("crmprtd.bulk_pipeline.setup_logging") + + # Mock get_defaults_module + mock_defaults = Mock() + mock_defaults.default_log_filename.return_value = f"~/{network}/logs/test_{network}_json.log" + mocker.patch("crmprtd.bulk_pipeline.get_defaults_module", return_value=mock_defaults) + + # Mock ensure_directory + mocker.patch("crmprtd.bulk_pipeline.ensure_directory") + + # Create mock options with invalid data + opts = Mock() + opts.network_name = network + opts.frequency = frequency + opts.log_filename = None + opts.tag = "test" + opts.province = None + opts.log_conf = "test_log.yaml" + opts.error_email = "test@test.com" + opts.log_level = "INFO" + opts.delay = 1 + + if error_type == "time_parse_error": + opts.stime = "invalid-time-format" + opts.etime = "2020-01-01 23:00:00" + elif error_type == "frequency_error": + opts.stime = "2020-01-01 00:00:00" + opts.etime = "2020-01-01 23:00:00" + # frequency is already set to invalid value + + args = [] + + # Run function and expect it to handle the error gracefully + run(opts, args) + + # Verify that error was logged + mock_logger.error.assert_called() + + +def test_bulk_pipeline_time_range_processing(mocker): + """Test that time ranges are processed correctly with proper intervals.""" + + # Mock process function to track individual calls + mock_process = mocker.patch("crmprtd.bulk_pipeline.process") + + # Mock other dependencies + mocker.patch("crmprtd.bulk_pipeline.setup_logging") + mocker.patch("crmprtd.bulk_pipeline.ensure_directory") + + mock_defaults = Mock() + mock_defaults.default_log_filename.return_value = "~/test/logs/test_log.log" + mocker.patch("crmprtd.bulk_pipeline.get_defaults_module", return_value=mock_defaults) + + mock_logger = Mock() + mocker.patch("crmprtd.bulk_pipeline.logging.getLogger", return_value=mock_logger) + + # Test hourly processing + opts = Mock() + opts.network_name = "bc_hydro" + opts.frequency = "hourly" + opts.stime = "2020-01-01 00:00:00" + opts.etime = "2020-01-01 03:00:00" # 4 hours total + opts.tag = "hourly_test" + opts.directory = None + opts.province = None + opts.log_filename = None + opts.log_conf = "test.yaml" + opts.error_email = "test@test.com" + opts.log_level = "INFO" + opts.delay = 0 # No delay for testing + + args = [] + + # Run the function + run(opts, args) + + # Should be called 4 times (00:00, 01:00, 02:00, 03:00) + assert mock_process.call_count == 4 + + # Verify the timestamps passed to process function + call_times = [call[0][0] for call in mock_process.call_args_list] + expected_times = [ + datetime.datetime(2020, 1, 1, 0, 0, 0), + datetime.datetime(2020, 1, 1, 1, 0, 0), + datetime.datetime(2020, 1, 1, 2, 0, 0), + datetime.datetime(2020, 1, 1, 3, 0, 0), + ] + + assert call_times == expected_times + + +def test_bulk_pipeline_network_alias_handling(mocker): + """Test that network aliases are properly expanded to individual networks.""" + + # Mock the run function to track calls per network + mock_run = mocker.patch("crmprtd.bulk_pipeline.run") + + # Mock dependencies + mock_defaults = Mock() + mock_defaults.default_end_time.return_value = "2020-01-02 00:00:00" + mocker.patch("crmprtd.bulk_pipeline.get_defaults_module", return_value=mock_defaults) + + mocker.patch("crmprtd.bulk_pipeline.files") + + # Test with a network alias that maps to multiple networks + test_alias = "hourly_swobml2" + expected_networks = bulk_pipeline.network_aliases.get(test_alias, []) + + test_args = [ + "--network", test_alias, + "--stime", "2020-01-01 00:00:00", + "--etime", "2020-01-01 01:00:00", + "--frequency", "hourly", + ] + + with patch.object(sys, 'argv', ['bulk_pipeline.py'] + test_args): + main() + + # Should call run once for each network in the alias + assert mock_run.call_count == len(expected_networks) + + # Verify each network was processed + called_networks = [call[0][0].network_name for call in mock_run.call_args_list] + assert set(called_networks) == set(expected_networks) \ No newline at end of file diff --git a/crmprtd/tests/test_download_cache_process.py b/crmprtd/tests/test_download_cache_process.py index 081f31a5..906ef072 100644 --- a/crmprtd/tests/test_download_cache_process.py +++ b/crmprtd/tests/test_download_cache_process.py @@ -2,11 +2,7 @@ import pytest -from crmprtd.download_cache_process import ( - default_log_filename, - default_cache_filename, - download_args, -) +from crmprtd.download_cache_process import download_args, get_defaults_module @pytest.mark.parametrize( @@ -39,8 +35,9 @@ ], ) def test_default_log_filename(network_name, tag, frequency, province, expected): + network_defaults = get_defaults_module(network_name) # Just to cover that function assert ( - default_log_filename( + network_defaults.default_log_filename( network_name=network_name, tag=tag, frequency=frequency, @@ -91,8 +88,9 @@ def test_default_log_filename(network_name, tag, frequency, province, expected): ], ) def test_default_cache_filename(network_name, tag, frequency, province, expected): + network_defaults = get_defaults_module(network_name) assert ( - default_cache_filename( + network_defaults.default_cache_filename( timestamp=datetime.datetime(2020, 1, 2, 3, 4, 5), network_name=network_name, tag=tag, @@ -136,7 +134,7 @@ def test_default_cache_filename(network_name, tag, frequency, province, expected None, None, datetime.datetime(2020, 1, 2, 3, 4, 5), - ["-N", "bc_env_snow", "-d", '"2020/01/02 11:00:00"'], + ["-N", "bc_env_snow", "-d", '"2020/01/02 03:00:00"'], ), ( "moti", @@ -166,7 +164,7 @@ def test_default_cache_filename(network_name, tag, frequency, province, expected None, None, datetime.datetime(2020, 1, 2, 3, 4, 5), - ["-N", "nt_forestry", "-d", '"2020/01/02 11:00:00"'], + ["-N", "nt_forestry", "-d", '"2020/01/02 03:00:00"'], ), ], ) diff --git a/pyproject.toml b/pyproject.toml index 4bcdf63a..dd01779e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,6 @@ lxml = "*" psycopg2 = "*" pyyaml = "*" python-dateutil = "*" -pytz = "*" requests = { version = "*", extras = ["security"] } pycds = "5.0.0" pint = "*" diff --git a/scripts/ec_recovery.py b/scripts/ec_recovery.py index b54e5ea9..6a8900b5 100755 --- a/scripts/ec_recovery.py +++ b/scripts/ec_recovery.py @@ -44,7 +44,7 @@ def main(opts, args): help="PostgreSQL connection string", ) parser.add_option( - "-y", + "-L", "--log_conf", dest="log_conf", help=("YAML file to use to override the default logging " " configuration"), @@ -75,7 +75,7 @@ def main(opts, args): "-p", "--province", dest="province", help="2 letter province code" ) parser.add_option( - "-L", "--language", dest="language", help="'e' (english) | 'f' (french)" + "--language", dest="language", help="'e' (english) | 'f' (french)" ) parser.add_option("-F", "--frequency", dest="frequency", help="daily|hourly") parser.add_option(