Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature/PODAAC-6303][issue/167] Fixed issue where -gr and -sd/-ed (temporal) cannot be used together as a query #178

Merged
merged 2 commits into from
Mar 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
## [Unreleased]
### Added
- Added error messages to inform user if .harmony file is formatted incorrectly or missing a key
### Fixed
- **PODAAC-6303 (issues/167)**
- Fixed issue where -gr and -sd/-ed (temporal) cannot be used together as a query

## [1.15.2]
### Fixed
Expand Down
54 changes: 24 additions & 30 deletions subscriber/podaac_data_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,55 +213,49 @@ def cmr_downloader(args, token, data_path):
if args.offset:
ts_shift = timedelta(hours=int(args.offset))

# Base param values
params = [
('page_size', page_size),
('sort_key', "-start_date"),
('provider', provider),
('ShortName', short_name)
]

if search_cycles is not None:
cmr_cycles = search_cycles
params = [
('page_size', page_size),
('provider', provider),
('ShortName', short_name),
('token', token),
]
for v in cmr_cycles:
params.append(("cycle[]", v))
if args.verbose:
logging.info("cycles: " + str(cmr_cycles))

elif granule is not None:
#This line is added to strip out the extensions. Not sure if this works across the board for all collections but it seem to work on few collections that were tested.
cmr_granule = granule.rsplit( ".", 1 )[ 0 ]
params = [
('page_size', page_size),
('sort_key', "-start_date"),
('provider', provider),
('ShortName', short_name),
('GranuleUR[]', cmr_granule),
('token', token),
]
#jmcnelis, 2023/06/14 - provide for wildcards in granuleur-based search
if granule is not None:
# This line is added to strip out the extensions. Not sure if this works across the board for all collections,
# but it seems to work on few collections that were tested.
# This isn't perfect, since it cannot deal with compound extensions
cmr_granule = granule.rsplit(".", 1)[0]
params.append(('GranuleUR[]', cmr_granule))
# jmcnelis, 2023/06/14 - provide for wildcards in granuleur-based search
if '*' in cmr_granule or '?' in cmr_granule:
params.append(('options[GranuleUR][pattern]', 'true'))
if args.verbose:
logging.info("Granule: " + str(cmr_granule))

else:
if start_date_time is not None and end_date_time is not None:
temporal_range = pa.get_temporal_range(start_date_time, end_date_time,
datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) # noqa E501
params = [
('page_size', page_size),
('sort_key', "-start_date"),
('provider', provider),
('ShortName', short_name),
('temporal', temporal_range),
('token', token),
]
params.append(('temporal', temporal_range))
if args.verbose:
logging.info("Temporal Range: " + temporal_range)

if args.verbose:
logging.info("Provider: " + provider)
if args.bbox is not None:
params.append(('bounding_box', args.bbox))

if args.verbose:
logging.info("Provider: " + provider)

# Final token appending; seems to bug urlencode(params) when it's not last
params.append(('token', token))

# If 401 is raised, refresh token and try one more time
try:
results = pa.get_search_results(params, args.verbose)
Expand All @@ -270,7 +264,7 @@ def cmr_downloader(args, token, data_path):
token = pa.refresh_token(token)
# Updated: This is not always a dictionary...
# in fact, here it's always a list of tuples
for i, p in enumerate(params) :
for i, p in enumerate(params):
if p[1] == "token":
params[i] = ("token", token)
results = pa.get_search_results(params, args.verbose)
Expand Down
15 changes: 15 additions & 0 deletions tests/test_downloader_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,18 @@ def test_downloader_GRACE_with_SHA_512(tmpdir):
modified_time_2 = os.path.getmtime(filename)
print( modified_time_2 )
assert modified_time_1 == modified_time_2

@pytest.mark.regression
def test_downloader_temporal_and_granule_together():
# Command: podaac-data-downloader -c TRPSDL2ALLCRSMGLOS -d data -p GES_DISC -sd 2020-01-01T00:00:00Z -ed 2020-01-02T23:59:59Z -gr *NH3*
shutil.rmtree('./TMP', ignore_errors=True)
args2 = create_downloader_args(
'-c TRPSDL2ALLCRSMGLOS -d ./TMP -p GES_DISC -sd 2020-01-01T00:00:00Z -ed 2020-01-02T23:59:59Z -gr *NH3*'
.split())
pdd.run(args2)
# So running the test in parallel, sometimes we get a 401 on the token...
# Let's ensure we're only looking for data files here
assert len([name for name in os.listdir('./TMP') if
os.path.isfile('./TMP/' + name) and "citation.txt" not in name]) == 2
shutil.rmtree('./TMP')
assert True
5 changes: 3 additions & 2 deletions tests/test_subscriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,9 @@ def test_search_after():
'bounding_box': "-180,-90,180,90",
}
results = pa.get_search_results(params, True)
assert results['hits'] == 3762
assert len(results['items']) == 3762
# hits and items should always be more than 2000, ignoring page_size set
assert results['hits'] > 2000
assert len(results['items']) != 2000

def test_update_format_change(cleanup_update_test):
print("Running Test")
Expand Down
Loading