From 95aaba4ed5fe29ff48a30ac090715089df43c8a0 Mon Sep 17 00:00:00 2001 From: Hong-Kit Randy Yeung Date: Wed, 5 Mar 2025 15:31:59 -0600 Subject: [PATCH 1/2] fix issue wehre -gr and -temporal cannot be used together --- subscriber/podaac_data_downloader.py | 54 +++++++++++++--------------- tests/test_subscriber.py | 5 +-- 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index aff5956..8e0307d 100755 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -213,55 +213,49 @@ def cmr_downloader(args, token, data_path): if args.offset: ts_shift = timedelta(hours=int(args.offset)) + # Base param values + params = [ + ('page_size', page_size), + ('sort_key', "-start_date"), + ('provider', provider), + ('ShortName', short_name) + ] + if search_cycles is not None: cmr_cycles = search_cycles - params = [ - ('page_size', page_size), - ('provider', provider), - ('ShortName', short_name), - ('token', token), - ] for v in cmr_cycles: params.append(("cycle[]", v)) if args.verbose: logging.info("cycles: " + str(cmr_cycles)) - elif granule is not None: - #This line is added to strip out the extensions. Not sure if this works across the board for all collections but it seem to work on few collections that were tested. - cmr_granule = granule.rsplit( ".", 1 )[ 0 ] - params = [ - ('page_size', page_size), - ('sort_key', "-start_date"), - ('provider', provider), - ('ShortName', short_name), - ('GranuleUR[]', cmr_granule), - ('token', token), - ] - #jmcnelis, 2023/06/14 - provide for wildcards in granuleur-based search + if granule is not None: + # This line is added to strip out the extensions. Not sure if this works across the board for all collections, + # but it seems to work on few collections that were tested. + # This isn't perfect, since it cannot deal with compound extensions + cmr_granule = granule.rsplit(".", 1)[0] + params.append(('GranuleUR[]', cmr_granule)) + # jmcnelis, 2023/06/14 - provide for wildcards in granuleur-based search if '*' in cmr_granule or '?' in cmr_granule: params.append(('options[GranuleUR][pattern]', 'true')) if args.verbose: logging.info("Granule: " + str(cmr_granule)) - else: + if start_date_time is not None and end_date_time is not None: temporal_range = pa.get_temporal_range(start_date_time, end_date_time, datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) # noqa E501 - params = [ - ('page_size', page_size), - ('sort_key', "-start_date"), - ('provider', provider), - ('ShortName', short_name), - ('temporal', temporal_range), - ('token', token), - ] + params.append(('temporal', temporal_range)) if args.verbose: logging.info("Temporal Range: " + temporal_range) - if args.verbose: - logging.info("Provider: " + provider) if args.bbox is not None: params.append(('bounding_box', args.bbox)) + if args.verbose: + logging.info("Provider: " + provider) + + # Final token appending; seems to bug urlencode(params) when it's not last + params.append(('token', token)) + # If 401 is raised, refresh token and try one more time try: results = pa.get_search_results(params, args.verbose) @@ -270,7 +264,7 @@ def cmr_downloader(args, token, data_path): token = pa.refresh_token(token) # Updated: This is not always a dictionary... # in fact, here it's always a list of tuples - for i, p in enumerate(params) : + for i, p in enumerate(params): if p[1] == "token": params[i] = ("token", token) results = pa.get_search_results(params, args.verbose) diff --git a/tests/test_subscriber.py b/tests/test_subscriber.py index 01e468b..6106723 100644 --- a/tests/test_subscriber.py +++ b/tests/test_subscriber.py @@ -85,8 +85,9 @@ def test_search_after(): 'bounding_box': "-180,-90,180,90", } results = pa.get_search_results(params, True) - assert results['hits'] == 3762 - assert len(results['items']) == 3762 + # hits and items should always be more than 2000, ignoring page_size set + assert results['hits'] > 2000 + assert len(results['items']) != 2000 def test_update_format_change(cleanup_update_test): print("Running Test") From cf1aac7efb87e7fee2c7095b4ff76b8a77581953 Mon Sep 17 00:00:00 2001 From: Hong-Kit Randy Yeung Date: Wed, 5 Mar 2025 15:35:04 -0600 Subject: [PATCH 2/2] update CHANGELOG.md and add in test --- CHANGELOG.md | 3 +++ tests/test_downloader_regression.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c77c016..29a266c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ## [Unreleased] ### Added - Added error messages to inform user if .harmony file is formatted incorrectly or missing a key +### Fixed +- **PODAAC-6303 (issues/167)** + - Fixed issue where -gr and -sd/-ed (temporal) cannot be used together as a query ## [1.15.2] ### Fixed diff --git a/tests/test_downloader_regression.py b/tests/test_downloader_regression.py index 027ea50..6369665 100644 --- a/tests/test_downloader_regression.py +++ b/tests/test_downloader_regression.py @@ -109,3 +109,18 @@ def test_downloader_GRACE_with_SHA_512(tmpdir): modified_time_2 = os.path.getmtime(filename) print( modified_time_2 ) assert modified_time_1 == modified_time_2 + +@pytest.mark.regression +def test_downloader_temporal_and_granule_together(): + # Command: podaac-data-downloader -c TRPSDL2ALLCRSMGLOS -d data -p GES_DISC -sd 2020-01-01T00:00:00Z -ed 2020-01-02T23:59:59Z -gr *NH3* + shutil.rmtree('./TMP', ignore_errors=True) + args2 = create_downloader_args( + '-c TRPSDL2ALLCRSMGLOS -d ./TMP -p GES_DISC -sd 2020-01-01T00:00:00Z -ed 2020-01-02T23:59:59Z -gr *NH3*' + .split()) + pdd.run(args2) + # So running the test in parallel, sometimes we get a 401 on the token... + # Let's ensure we're only looking for data files here + assert len([name for name in os.listdir('./TMP') if + os.path.isfile('./TMP/' + name) and "citation.txt" not in name]) == 2 + shutil.rmtree('./TMP') + assert True