diff --git a/CHANGELOG.md b/CHANGELOG.md index c77c016..29a266c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ## [Unreleased] ### Added - Added error messages to inform user if .harmony file is formatted incorrectly or missing a key +### Fixed +- **PODAAC-6303 (issues/167)** + - Fixed issue where -gr and -sd/-ed (temporal) cannot be used together as a query ## [1.15.2] ### Fixed diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index aff5956..8e0307d 100755 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -213,55 +213,49 @@ def cmr_downloader(args, token, data_path): if args.offset: ts_shift = timedelta(hours=int(args.offset)) + # Base param values + params = [ + ('page_size', page_size), + ('sort_key', "-start_date"), + ('provider', provider), + ('ShortName', short_name) + ] + if search_cycles is not None: cmr_cycles = search_cycles - params = [ - ('page_size', page_size), - ('provider', provider), - ('ShortName', short_name), - ('token', token), - ] for v in cmr_cycles: params.append(("cycle[]", v)) if args.verbose: logging.info("cycles: " + str(cmr_cycles)) - elif granule is not None: - #This line is added to strip out the extensions. Not sure if this works across the board for all collections but it seem to work on few collections that were tested. - cmr_granule = granule.rsplit( ".", 1 )[ 0 ] - params = [ - ('page_size', page_size), - ('sort_key', "-start_date"), - ('provider', provider), - ('ShortName', short_name), - ('GranuleUR[]', cmr_granule), - ('token', token), - ] - #jmcnelis, 2023/06/14 - provide for wildcards in granuleur-based search + if granule is not None: + # This line is added to strip out the extensions. Not sure if this works across the board for all collections, + # but it seems to work on few collections that were tested. + # This isn't perfect, since it cannot deal with compound extensions + cmr_granule = granule.rsplit(".", 1)[0] + params.append(('GranuleUR[]', cmr_granule)) + # jmcnelis, 2023/06/14 - provide for wildcards in granuleur-based search if '*' in cmr_granule or '?' in cmr_granule: params.append(('options[GranuleUR][pattern]', 'true')) if args.verbose: logging.info("Granule: " + str(cmr_granule)) - else: + if start_date_time is not None and end_date_time is not None: temporal_range = pa.get_temporal_range(start_date_time, end_date_time, datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) # noqa E501 - params = [ - ('page_size', page_size), - ('sort_key', "-start_date"), - ('provider', provider), - ('ShortName', short_name), - ('temporal', temporal_range), - ('token', token), - ] + params.append(('temporal', temporal_range)) if args.verbose: logging.info("Temporal Range: " + temporal_range) - if args.verbose: - logging.info("Provider: " + provider) if args.bbox is not None: params.append(('bounding_box', args.bbox)) + if args.verbose: + logging.info("Provider: " + provider) + + # Final token appending; seems to bug urlencode(params) when it's not last + params.append(('token', token)) + # If 401 is raised, refresh token and try one more time try: results = pa.get_search_results(params, args.verbose) @@ -270,7 +264,7 @@ def cmr_downloader(args, token, data_path): token = pa.refresh_token(token) # Updated: This is not always a dictionary... # in fact, here it's always a list of tuples - for i, p in enumerate(params) : + for i, p in enumerate(params): if p[1] == "token": params[i] = ("token", token) results = pa.get_search_results(params, args.verbose) diff --git a/tests/test_downloader_regression.py b/tests/test_downloader_regression.py index 027ea50..6369665 100644 --- a/tests/test_downloader_regression.py +++ b/tests/test_downloader_regression.py @@ -109,3 +109,18 @@ def test_downloader_GRACE_with_SHA_512(tmpdir): modified_time_2 = os.path.getmtime(filename) print( modified_time_2 ) assert modified_time_1 == modified_time_2 + +@pytest.mark.regression +def test_downloader_temporal_and_granule_together(): + # Command: podaac-data-downloader -c TRPSDL2ALLCRSMGLOS -d data -p GES_DISC -sd 2020-01-01T00:00:00Z -ed 2020-01-02T23:59:59Z -gr *NH3* + shutil.rmtree('./TMP', ignore_errors=True) + args2 = create_downloader_args( + '-c TRPSDL2ALLCRSMGLOS -d ./TMP -p GES_DISC -sd 2020-01-01T00:00:00Z -ed 2020-01-02T23:59:59Z -gr *NH3*' + .split()) + pdd.run(args2) + # So running the test in parallel, sometimes we get a 401 on the token... + # Let's ensure we're only looking for data files here + assert len([name for name in os.listdir('./TMP') if + os.path.isfile('./TMP/' + name) and "citation.txt" not in name]) == 2 + shutil.rmtree('./TMP') + assert True diff --git a/tests/test_subscriber.py b/tests/test_subscriber.py index 01e468b..6106723 100644 --- a/tests/test_subscriber.py +++ b/tests/test_subscriber.py @@ -85,8 +85,9 @@ def test_search_after(): 'bounding_box': "-180,-90,180,90", } results = pa.get_search_results(params, True) - assert results['hits'] == 3762 - assert len(results['items']) == 3762 + # hits and items should always be more than 2000, ignoring page_size set + assert results['hits'] > 2000 + assert len(results['items']) != 2000 def test_update_format_change(cleanup_update_test): print("Running Test")