From b76b11843cfe6bb14fdb2300ce5635fd10ee15ab Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Thu, 4 Sep 2025 08:17:54 -0400 Subject: [PATCH 01/13] create a new _query_execute that is used by query_region and query_parameters --- astroquery/heasarc/core.py | 202 +++++++++++++++++++---- astroquery/heasarc/tests/test_heasarc.py | 5 +- docs/heasarc/heasarc.rst | 8 +- 3 files changed, 182 insertions(+), 33 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 7489267e3e..8a0a14628e 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -1,6 +1,4 @@ - import os - import shutil import requests import tarfile @@ -288,6 +286,76 @@ def query_tap(self, query, *, maxrec=None): self._saved_query = query return self.tap.search(query, language='ADQL', maxrec=maxrec) + def _query_execute(self, catalog=None, where=None, *, + get_query_payload=False, columns=None, + verbose=False, maxrec=None): + """Queries some catalog using the HEASARC TAP server based on the + where condition and returns an `~astropy.table.Table`. + + Parameters + ---------- + catalog : str + The catalog to query. To list the available catalogs, use + :meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`. + where : str + The WHERE condition to be used in the query. It must + include the 'WHERE' keyword or be empty. + get_query_payload : bool, optional + If `True` then returns the generated ADQL query as str. + Defaults to `False`. + columns : str, optional + Target column list with value separated by a comma(,). + Use * for all the columns. The default is to return a subset + of the columns that are generally the most useful. + verbose : bool, optional + If False, suppress vo warnings. + maxrec : int, optional + Maximum number of records + + + Returns + ------- + table : A `~astropy.table.Table` object. + """ + # if verbose is False then suppress any VOTable related warnings + if not verbose: + commons.suppress_vo_warnings() + + if catalog is None: + raise InvalidQueryError("catalog name is required! Use 'xray' " + "to search the master X-ray catalog") + + # __row is needed for locate_data; we add it if not already present + # and remove it afterwards only if the user requested specific + # columns. keep_row tracks that. + keep_row = ( + columns in (None, '*') or + isinstance(columns, str) and '__row' in columns + ) + + if columns is None: + columns = ', '.join(self._get_default_columns(catalog)) + + if '__row' not in columns and columns != '*': + columns += ',__row' + + if where != '' and not where.startswith(' '): + where = ' ' + where.strip() + adql = f'SELECT {columns} FROM {catalog}{where}' + + if get_query_payload: + return adql + response = self.query_tap(query=adql, maxrec=maxrec) + + # save the response in case we want to use it later + self._last_result = response + self._last_catalog_name = catalog + + table = response.to_table() + if not keep_row and '__row' in table.colnames: + table.remove_column('__row') + return table + @deprecated_renamed_argument( ('mission', 'fields', 'resultmax', 'entry', 'coordsys', 'equinox', 'displaymode', 'action', 'sortvar', 'cache'), @@ -356,18 +424,6 @@ def query_region(self, position=None, catalog=None, radius=None, *, ------- table : A `~astropy.table.Table` object. """ - # if verbose is False then suppress any VOTable related warnings - if not verbose: - commons.suppress_vo_warnings() - - if catalog is None: - raise InvalidQueryError("catalog name is required! Use 'xray' " - "to search the master X-ray catalog") - - if columns is None: - columns = ', '.join(self._get_default_columns(catalog)) - if '__row' not in columns: - columns += ',__row' if spatial.lower() == 'all-sky': where = '' @@ -390,7 +446,7 @@ def query_region(self, position=None, catalog=None, radius=None, *, coords_str = [f'{coord.ra.deg},{coord.dec.deg}' for coord in coords_list] - where = (" WHERE CONTAINS(POINT('ICRS',ra,dec)," + where = ("WHERE CONTAINS(POINT('ICRS',ra,dec)," f"POLYGON('ICRS',{','.join(coords_str)}))=1") else: coords_icrs = parse_coordinates(position).icrs @@ -401,7 +457,7 @@ def query_region(self, position=None, catalog=None, radius=None, *, radius = self.get_default_radius(catalog) elif isinstance(radius, str): radius = coordinates.Angle(radius) - where = (" WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE(" + where = ("WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE(" f"'ICRS',{ra},{dec},{radius.to(u.deg).value}))=1") # add search_offset for the case of cone if add_offset: @@ -410,24 +466,23 @@ def query_region(self, position=None, catalog=None, radius=None, *, elif spatial.lower() == 'box': if isinstance(width, str): width = coordinates.Angle(width) - where = (" WHERE CONTAINS(POINT('ICRS',ra,dec)," + where = ("WHERE CONTAINS(POINT('ICRS',ra,dec)," f"BOX('ICRS',{ra},{dec},{width.to(u.deg).value}," f"{width.to(u.deg).value}))=1") else: raise ValueError("Unrecognized spatial query type. Must be one" " of 'cone', 'box', 'polygon', or 'all-sky'.") - - adql = f'SELECT {columns} FROM {catalog}{where}' - + + table_or_query = self._query_execute( + catalog=catalog, where=where, + get_query_payload=get_query_payload, + columns=columns, verbose=verbose, + maxrec=maxrec + ) if get_query_payload: - return adql - response = self.query_tap(query=adql, maxrec=maxrec) + return table_or_query + table = table_or_query - # save the response in case we want to use it later - self._last_result = response - self._last_catalog_name = catalog - - table = response.to_table() if add_offset: table['search_offset'].unit = u.arcmin if len(table) == 0: @@ -464,6 +519,96 @@ def query_object(self, object_name, mission, *, return self.query_region(pos, catalog=mission, spatial='cone', get_query_payload=get_query_payload) + + def query_parameters(self, catalog, params, *, + get_query_payload=False, columns=None, + verbose=False, maxrec=None): + """Query the HEASARC TAP server using a set of parameters. + + This is a simple wrapper around + `~astroquery.heasarc.HeasarcClass.query_tap` + that constructs an ADQL query from a dictionary of parameters. + + Parameters + ---------- + catalog : str + The catalog to query. To list the available catalogs, use + :meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`. + params : dict + A dictionary of parameters to include in the query. + Each key-value pair will be translated into an ADQL condition. + - For a range query, use a tuple of two values (min, max). + e.g. `{'flux': (1e-12, 1e-10)}` translates to + `flux BETWEEN 1e-12 AND 1e-10`. + - For list values, use a list of values. + e.g. `{'object_type': ['QSO', 'GALAXY']}` translates to + `object_type IN ('QSO', 'GALAXY')`. + - For comparison queries, use a tuple of (operator, value), + where operator is one of '=', '!=', '<', '>', '<=', '>='. + e.g. `{'magnitude': ('<', 15)}` translates to `magnitude < 15`. + - For exact matches, use a single value (str, int, float). + e.g. `{'object_type': 'QSO'}` translates to + `object_type = 'QSO'`. + The keys should correspond to valid column names in the catalog. + Use `list_columns` to see available columns. + get_query_payload : bool, optional + If `True` then returns the generated ADQL query as str. + Defaults to `False`. + columns : str, optional + Target column list with value separated by a comma(,). + Use * for all the columns. The default is to return a subset + of the columns that are generally the most useful. + verbose : bool, optional + If False, suppress vo warnings. + maxrec : int, optional + Maximum number of records + + """ + + conditions = [] + for key, value in params.items(): + if isinstance(value, tuple): + if ( + len(value) == 2 and + all(isinstance(v, (int, float)) for v in value) + ): + conditions.append( + f"{key} BETWEEN {value[0]} AND {value[1]}" + ) + elif ( + len(value) == 2 and + value[0] in (">", "<", ">=", "<=") + ): + conditions.append(f"{key} {value[0]} {value[1]}") + elif isinstance(value, list): + # handle list values: key IN (...) + formatted = [] + for v in value: + if isinstance(v, str): + formatted.append(f"'{v}'") + else: + formatted.append(str(v)) + conditions.append(f"{key} IN ({', '.join(formatted)})") + else: + conditions.append( + f"{key} = '{value}'" + if isinstance(value, str) else f"{key} = {value}" + ) + if len(conditions) == 0: + where = "" + else: + where = "WHERE " + (" AND ".join(conditions)) + + + + table_or_query = self._query_execute( + catalog=catalog, where=where, + get_query_payload=get_query_payload, + columns=columns, verbose=verbose, + maxrec=maxrec + ) + return table_or_query + def locate_data(self, query_result=None, catalog_name=None): """Get links to data products Use vo/datalinks to query the data products for some query_results. @@ -505,7 +650,8 @@ def locate_data(self, query_result=None, catalog_name=None): if '__row' not in query_result.colnames: raise ValueError('No __row column found in query_result. ' 'query_result needs to be the output of ' - 'query_region or a subset.') + 'query_region or a subset. try adding ' + '__row to the requested columns') if catalog_name is None: catalog_name = self._last_catalog_name diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index a2f51b5c67..091a33313b 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -195,7 +195,10 @@ def test_spatial_invalid(spatial): def test_no_catalog(): with pytest.raises(InvalidQueryError): - Heasarc.query_region("m31", spatial="cone", columns="*") + # OBJ_LIST[0] and radius added to avoid a remote call + Heasarc.query_region( + OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin" + ) def test_tap_def(): diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index 0812bf8477..22a888b24e 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -111,8 +111,8 @@ catalogs. name description ---------- ------------------------------------------------------------- ascamaster ASCA Master Catalog + burcbmastr BurstCube Master Observation Catalog chanmaster Chandra Observations - cmbmaster LAMBDA Cosmic Microwave Background Experiments Master Catalog ... If you do not know the name of the catalog you are looking for, you can use the ``keywords`` @@ -191,10 +191,10 @@ with those results. >>> tab = tab[tab['exposure'] > 0] >>> links = Heasarc.locate_data(tab[:2]) >>> links['access_url'].pprint() - access_url + access_url --------------------------------------------------------------------- - https://heasarc.gsfc.nasa.gov/FTP/nicer/data/obs/2018_08//1100120101/ - https://heasarc.gsfc.nasa.gov/FTP/nicer/data/obs/2018_08//1100120102/ + https://heasarc.gsfc.nasa.gov/FTP/nicer/data/obs/2025_01//7100120102/ + https://heasarc.gsfc.nasa.gov/FTP/nicer/data/obs/2025_01//7100120101/ The ``links`` table has three relevant columns: ``access_url``, ``sciserver`` and ``aws``. The first gives the url to the data from the main heasarc server. The second gives From 9bec59ec4c0d46cce279b0fa61ce4f2488d38614 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Fri, 5 Sep 2025 08:56:24 -0400 Subject: [PATCH 02/13] add unit tests and increase coverage --- astroquery/heasarc/core.py | 10 +- astroquery/heasarc/tests/test_heasarc.py | 196 ++++++++++++++++++++++- 2 files changed, 200 insertions(+), 6 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 8a0a14628e..d986b9ca58 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -325,6 +325,9 @@ def _query_execute(self, catalog=None, where=None, *, raise InvalidQueryError("catalog name is required! Use 'xray' " "to search the master X-ray catalog") + if where is None: + where = '' + # __row is needed for locate_data; we add it if not already present # and remove it afterwards only if the user requested specific # columns. keep_row tracks that. @@ -337,7 +340,7 @@ def _query_execute(self, catalog=None, where=None, *, columns = ', '.join(self._get_default_columns(catalog)) if '__row' not in columns and columns != '*': - columns += ',__row' + columns += ', __row' if where != '' and not where.startswith(' '): where = ' ' + where.strip() @@ -520,7 +523,7 @@ def query_object(self, object_name, mission, *, get_query_payload=get_query_payload) - def query_parameters(self, catalog, params, *, + def query_by_parameters(self, catalog, params, *, get_query_payload=False, columns=None, verbose=False, maxrec=None): """Query the HEASARC TAP server using a set of parameters. @@ -565,6 +568,9 @@ def query_parameters(self, catalog, params, *, """ + if not isinstance(params, dict): + raise ValueError('params must be a dictionary of key-value pairs') + conditions = [] for key, value in params.items(): if isinstance(value, tuple): diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index 091a33313b..2f491c9c70 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -53,7 +53,13 @@ def __init__(self, desc, cols=[]): 'name-2': vTable('description-2 chandra', cols), 'TAPname': None } + + def search(self, query, language='ADQL', maxrec=1000): + return MockResult() +class MockResult: + def to_table(self): + return Table({'value': ['1.5', '1.2', '-0.3']}) @pytest.fixture def mock_tap(): @@ -92,7 +98,7 @@ def test_query_region_cone(coordinates, radius, offset): radius=radius, columns="*", get_query_payload=True, - add_offset=True, + add_offset=offset, ) # We don't fully float compare in this string, there are slight @@ -168,6 +174,27 @@ def test_query_region_polygon(polygon): "10.1,10.1,10.0,10.1,10.0,10.0))=1" ) +def test_query_region_polygon_no_unit(): + # position is not used for polygon + poly = [ + (10.1, 10.1), + (10.0, 10.1), + (10.0, 10.0), + ] + with pytest.warns(UserWarning, match="Polygon endpoints are being interpreted as"): + query = Heasarc.query_region( + catalog="suzamaster", + spatial="polygon", + polygon=poly, + columns="*", + get_query_payload=True, + ) + + assert query == ( + "SELECT * FROM suzamaster " + "WHERE CONTAINS(POINT('ICRS',ra,dec),POLYGON('ICRS'," + "10.1,10.1,10.0,10.1,10.0,10.0))=1" + ) def test_query_allsky(): query1 = Heasarc.query_region( @@ -200,6 +227,164 @@ def test_no_catalog(): OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin" ) +def test_by_params_no_catalog(): + with pytest.raises(InvalidQueryError): + # OBJ_LIST[0] and radius added to avoid a remote call + Heasarc.query_by_parameters( + None, params={"flux": (1e-12, 1e-10)} + ) + + +def test__query_execute_no_catalog(): + with pytest.raises(InvalidQueryError): + # OBJ_LIST[0] and radius added to avoid a remote call + Heasarc._query_execute(None) + + +def test_by_params_none_params(): + with pytest.raises(ValueError): + Heasarc.query_by_parameters('testcatalog', params=None) + + +def test_by_params_no_params(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster" + + +def test_by_params_range(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={"flux": (1e-12, 1e-10)}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux BETWEEN 1e-12 AND 1e-10" + + +def test_by_params_eq_float(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={"flux": 1.2}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux = 1.2" + + +def test_by_params_eq_str(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={"flux": "1.2"}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux = '1.2'" + + +def test_by_params_cmp_float(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={"flux": ('>', 1.2)}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux > 1.2" + + +def test_by_params_cmp_float_2(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={"flux": ('>', 1.2), "magnitude": ('<=', 15)}, + columns="*", + get_query_payload=True, + ) + assert query == ("SELECT * FROM suzamaster WHERE flux > 1.2 " + "AND magnitude <= 15") + + +def test_by_params_list(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={"flux": [1.2, 2.3, 3.4]}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux IN (1.2, 2.3, 3.4)" + + +def test__query_execute_none_where(): + query = Heasarc._query_execute( + catalog="suzamaster", + columns="*", + get_query_payload=True, + ) + assert query == ("SELECT * FROM suzamaster") + + +def test__query_execute_none_where(): + query = Heasarc._query_execute( + catalog="suzamaster", + where=" EXTRA", + columns="*", + get_query_payload=True, + ) + assert query == ("SELECT * FROM suzamaster EXTRA") + + +def test__query_execute_add_row(): + query1 = Heasarc._query_execute( + catalog="suzamaster", + where="", + columns="col1, col2", + get_query_payload=True, + ) + query2 = Heasarc._query_execute( + catalog="suzamaster", + where=None, + columns="col1, col2", + get_query_payload=True, + ) + assert query1 == query2 == ("SELECT col1, col2, __row FROM suzamaster") + +def test__query_execute_extra_space(): + query1 = Heasarc._query_execute( + catalog="suzamaster", + where="WHERE EXTRA", + columns="*", + get_query_payload=True, + ) + + query2 = Heasarc._query_execute( + catalog="suzamaster", + where=" WHERE EXTRA", + columns="*", + get_query_payload=True, + ) + assert query1 == query2 == ("SELECT * FROM suzamaster WHERE EXTRA") + +def test_query_execute_columns(mock_tap, mock_default_cols): + query = Heasarc._query_execute( + catalog="suzamaster", + where="WHERE EXTRA", + columns=None, + get_query_payload=True, + ) + assert query == ("SELECT col-3, col-2, __row FROM suzamaster WHERE EXTRA") + +def test_query_execute_columns(mock_tap, mock_default_cols): + res = Heasarc._query_execute( + catalog="suzamaster", + where="WHERE EXTRA", + columns='*' + ) + assert Heasarc._last_catalog_name == "suzamaster" + # reset last result to avoid interference with other tests + Heasarc._last_result = None def test_tap_def(): # Use a new HeasarcClass object @@ -210,9 +395,6 @@ def test_tap_def(): def test_meta_def(): - class MockResult: - def to_table(self): - return Table({'value': ['1.5', '1.2', '-0.3']}) # Use a new HeasarcClass object Heasarc = HeasarcClass() assert Heasarc._meta_info is None @@ -261,6 +443,12 @@ def test_list_catalogs_keywords_list_non_str(): Heasarc.list_catalogs(keywords=['x-ray', 12]) +def test__list_catalogs_keywords(mock_tap): + catalogs = Heasarc.list_catalogs(keywords=['xmm']) + assert list(catalogs['name']) == [ + lab for lab, desc in MockTap().tables.items() if 'TAP' not in lab and 'xmm' in desc.description.lower() + ] + def test__list_columns__missing_table(mock_tap): with pytest.raises(ValueError, match="not available as a public catalog"): Heasarc.list_columns(catalog_name='missing-table') From 7b19e0ca67826b064159f693e15e240227133170 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Fri, 5 Sep 2025 09:01:15 -0400 Subject: [PATCH 03/13] add TAP limit if maxrec is high --- astroquery/heasarc/core.py | 4 ++++ astroquery/heasarc/tests/test_heasarc.py | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index d986b9ca58..5614099f50 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -346,6 +346,10 @@ def _query_execute(self, catalog=None, where=None, *, where = ' ' + where.strip() adql = f'SELECT {columns} FROM {catalog}{where}' + # if maxrec is more than the server limit, we set a higher limit + if maxrec is not None and maxrec > 100000: + adql += f' LIMIT {maxrec*4}' + if get_query_payload: return adql response = self.query_tap(query=adql, maxrec=maxrec) diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index 2f491c9c70..eaaa7f6ad0 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -255,6 +255,17 @@ def test_by_params_no_params(): ) assert query == "SELECT * FROM suzamaster" +def test_by_params_limit(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={}, + columns="*", + get_query_payload=True, + maxrec=500000, + ) + assert query == "SELECT * FROM suzamaster LIMIT 2000000" + + def test_by_params_range(): query = Heasarc.query_by_parameters( From 2ff05de27c33565a91f36c6b90d5de3c2658d371 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Fri, 5 Sep 2025 12:16:47 -0400 Subject: [PATCH 04/13] add uploads to query_tap that gets passed to pyvo --- astroquery/heasarc/core.py | 9 +++++++-- astroquery/heasarc/tests/test_heasarc.py | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 5614099f50..ad8826c41b 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -259,7 +259,7 @@ def query_mission_cols(self, mission, *, cache=True, cols = [col.upper() for col in cols['name'] if '__' not in col] return cols - def query_tap(self, query, *, maxrec=None): + def query_tap(self, query, *, maxrec=None, uploads=None): """ Send query to HEASARC's Xamin TAP using ADQL. Results in `~pyvo.dal.TAPResults` format. @@ -271,6 +271,10 @@ def query_tap(self, query, *, maxrec=None): ADQL query to be executed maxrec : int maximum number of records to return + uploads : dict + a mapping from table names used in the query to file like + objects containing a votable + (e.g. a file path or `~astropy.table.Table`). Returns ------- @@ -284,7 +288,8 @@ def query_tap(self, query, *, maxrec=None): """ log.debug(f'TAP query: {query}') self._saved_query = query - return self.tap.search(query, language='ADQL', maxrec=maxrec) + return self.tap.search( + query, language='ADQL', maxrec=maxrec, uploads=uploads) def _query_execute(self, catalog=None, where=None, *, get_query_payload=False, columns=None, diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index eaaa7f6ad0..a7bf434122 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -54,7 +54,7 @@ def __init__(self, desc, cols=[]): 'TAPname': None } - def search(self, query, language='ADQL', maxrec=1000): + def search(self, query, language='ADQL', maxrec=1000, uploads=None): return MockResult() class MockResult: From 0d1661cead14062b370f4f438f2aa98b0bade4cc Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 11:08:14 -0400 Subject: [PATCH 05/13] renamed query_by_parameters to query_by_column; add docs --- astroquery/heasarc/core.py | 8 +-- astroquery/heasarc/tests/test_heasarc.py | 40 +++++------ docs/heasarc/heasarc.rst | 90 ++++++++++++++++++++++-- 3 files changed, 110 insertions(+), 28 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index ad8826c41b..a2106ea2ce 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -532,10 +532,10 @@ def query_object(self, object_name, mission, *, get_query_payload=get_query_payload) - def query_by_parameters(self, catalog, params, *, + def query_by_column(self, catalog, params, *, get_query_payload=False, columns=None, verbose=False, maxrec=None): - """Query the HEASARC TAP server using a set of parameters. + """Query the HEASARC TAP server using a constraints on the columns. This is a simple wrapper around `~astroquery.heasarc.HeasarcClass.query_tap` @@ -547,7 +547,7 @@ def query_by_parameters(self, catalog, params, *, The catalog to query. To list the available catalogs, use :meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`. params : dict - A dictionary of parameters to include in the query. + A dictionary of column constraint parameters to include in the query. Each key-value pair will be translated into an ADQL condition. - For a range query, use a tuple of two values (min, max). e.g. `{'flux': (1e-12, 1e-10)}` translates to @@ -562,7 +562,7 @@ def query_by_parameters(self, catalog, params, *, e.g. `{'object_type': 'QSO'}` translates to `object_type = 'QSO'`. The keys should correspond to valid column names in the catalog. - Use `list_columns` to see available columns. + Use `list_columns` to see the available columns. get_query_payload : bool, optional If `True` then returns the generated ADQL query as str. Defaults to `False`. diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index a7bf434122..c5ea3a8c8a 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -227,10 +227,10 @@ def test_no_catalog(): OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin" ) -def test_by_params_no_catalog(): +def test_by_columns_no_catalog(): with pytest.raises(InvalidQueryError): # OBJ_LIST[0] and radius added to avoid a remote call - Heasarc.query_by_parameters( + Heasarc.query_by_column( None, params={"flux": (1e-12, 1e-10)} ) @@ -241,13 +241,13 @@ def test__query_execute_no_catalog(): Heasarc._query_execute(None) -def test_by_params_none_params(): +def test_by_columns_none_params(): with pytest.raises(ValueError): - Heasarc.query_by_parameters('testcatalog', params=None) + Heasarc.query_by_column('testcatalog', params=None) -def test_by_params_no_params(): - query = Heasarc.query_by_parameters( +def test_by_columns_no_params(): + query = Heasarc.query_by_column( catalog="suzamaster", params={}, columns="*", @@ -255,8 +255,8 @@ def test_by_params_no_params(): ) assert query == "SELECT * FROM suzamaster" -def test_by_params_limit(): - query = Heasarc.query_by_parameters( +def test_by_columns_limit(): + query = Heasarc.query_by_column( catalog="suzamaster", params={}, columns="*", @@ -267,8 +267,8 @@ def test_by_params_limit(): -def test_by_params_range(): - query = Heasarc.query_by_parameters( +def test_by_columns_range(): + query = Heasarc.query_by_column( catalog="suzamaster", params={"flux": (1e-12, 1e-10)}, columns="*", @@ -277,8 +277,8 @@ def test_by_params_range(): assert query == "SELECT * FROM suzamaster WHERE flux BETWEEN 1e-12 AND 1e-10" -def test_by_params_eq_float(): - query = Heasarc.query_by_parameters( +def test_by_columns_eq_float(): + query = Heasarc.query_by_column( catalog="suzamaster", params={"flux": 1.2}, columns="*", @@ -287,8 +287,8 @@ def test_by_params_eq_float(): assert query == "SELECT * FROM suzamaster WHERE flux = 1.2" -def test_by_params_eq_str(): - query = Heasarc.query_by_parameters( +def test_by_columns_eq_str(): + query = Heasarc.query_by_column( catalog="suzamaster", params={"flux": "1.2"}, columns="*", @@ -297,8 +297,8 @@ def test_by_params_eq_str(): assert query == "SELECT * FROM suzamaster WHERE flux = '1.2'" -def test_by_params_cmp_float(): - query = Heasarc.query_by_parameters( +def test_by_columns_cmp_float(): + query = Heasarc.query_by_column( catalog="suzamaster", params={"flux": ('>', 1.2)}, columns="*", @@ -307,8 +307,8 @@ def test_by_params_cmp_float(): assert query == "SELECT * FROM suzamaster WHERE flux > 1.2" -def test_by_params_cmp_float_2(): - query = Heasarc.query_by_parameters( +def test_by_columns_cmp_float_2(): + query = Heasarc.query_by_column( catalog="suzamaster", params={"flux": ('>', 1.2), "magnitude": ('<=', 15)}, columns="*", @@ -318,8 +318,8 @@ def test_by_params_cmp_float_2(): "AND magnitude <= 15") -def test_by_params_list(): - query = Heasarc.query_by_parameters( +def test_by_columns_list(): + query = Heasarc.query_by_column( catalog="suzamaster", params={"flux": [1.2, 2.3, 3.4]}, columns="*", diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index 22a888b24e..b1b21bcd1e 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -26,7 +26,7 @@ Query a Catalog The basic use case is one where we want to query a catalog from some position in the sky. In this example, we query the NuSTAR master catalog ``numaster`` for all observations of the AGN ``NGC 3783``. We use `~astropy.coordinates.SkyCoord` to obtain the coordinates -and then pass them to `~astroquery.heasarc.HeasarcClass.query_region`. In following, we +and then pass them to `~astroquery.heasarc.HeasarcClass.query_region`. In the following, we also select only columns with ``time > 0``. Zero values are typically used for observations that have been approved but not observed. @@ -98,8 +98,11 @@ If you want all the columns returned, use ``columns='*'`` List Available Catalogs ----------------------- The collection of available catalogs can be obtained by calling the `~astroquery.heasarc.HeasarcClass.list_catalogs` -method. In this example, we query the master catalogs only by passing ``master=True``. -which is ``False`` by default (i.e. return all catalogs). `~astroquery.heasarc.HeasarcClass.list_catalogs` returns an +method. In this example, we request the master catalogs only by passing ``master=True``. +Master catalogs are catalogs that contain one entry per observation, as opposed to +other catalogs that may record other information. There is typically one master catalog +per mission. The ``master`` parameter is a boolean flag, which is ``False`` by default +(i.e. return all catalogs). `~astroquery.heasarc.HeasarcClass.list_catalogs` returns an `~astropy.table.Table` with two columns containing the names and description of the available catalogs. @@ -138,7 +141,7 @@ are related to Chandra, you can do: cargm31cxo Carina Nebula Gum 31 Chandra X-Ray Point Source Catalog carinaclas Carina Nebula Chandra X-Ray Point Source Classes -If you are interested only finding the master catalogs, you can also set ``master`` to ``True``. +If you are interested only finding the master catalogs only, you can set ``master`` to ``True``. .. doctest-remote-data:: @@ -177,6 +180,47 @@ following for instance will find master catalogs that have keywords 'nicer' or ' nicermastr NICER Master Catalog swiftmastr Swift Master Catalog + +Other non-region queries +---------------------------------------- +In addition to `~astroquery.heasarc.HeasarcClass.query_region`, `~astroquery.heasarc.HeasarcClass.query_by_column` +is also available. This method allows you to query a catalog by specifying +various column constraints. For example, the following query searches the ``chanmaster`` +catalog for all observations with exposure time greater than 190 ks. + +.. doctest-remote-data:: + + >>> from astroquery.heasarc import Heasarc + >>> tab = Heasarc.query_by_column( + ... catalog='chanmaster', params={'exposure': ('>', '190000')} + ... ) + >>> tab['name', 'obsid', 'ra', 'dec', 'exposure'][:3].pprint() + name obsid ra dec exposure + deg deg s + --------------- ----- --------- --------- -------- + Sgr A* 13842 266.41667 -29.00781 191760 + IGR J17480-2446 30481 267.02013 -24.78024 200000 + IGR J17480-2446 31425 267.02013 -24.78024 200000 + +Another example may be to search the ``xmmmaster`` for a observation in some time range: + +.. doctest-remote-data:: + + >>> from astroquery.heasarc import Heasarc + >>> tab = Heasarc.query_by_column( + ... catalog='xmmmaster', params={'time': (52300, 52310)} + ... ) + >>> tab['name', 'obsid', 'ra', 'dec', 'time', 'duration'][:3].pprint() + name obsid ra dec time duration + deg deg d s + ------------- ---------- -------- --------- ---------------- -------- + NGC 1316 0091770101 50.95833 -37.28333 52308.6872337963 60362 + NGC 1316 0091770201 50.67296 -37.20928 52308.642974537 3462 + Fei 16 offset 0154150101 28.64374 -6.86667 52305.2210416667 24619 + +To see the available columns that can be queried for a given catalog and their units, +use `~astroquery.heasarc.HeasarcClass.list_columns` (see below). + Links to Data Products ---------------------- Once the query result is obtained, you can query any data products associated @@ -250,6 +294,44 @@ returns the constructed ADQL query. 121.92084 39.00417 UGC4229 0138951401 121.92099 39.00422 MRK 622 0852180501 +Table Uploads +----------------- +You can also upload a table of positions to be queried. The table can be an +`~astropy.table.Table` or a path to a file in VOtable format. The following example +shows how to use the upload feature to do a cross-match between the +``chanmaster`` catalog and a list of known source positions: + +.. doctest-remote-data:: + + >>> from astroquery.heasarc import Heasarc + >>> from astropy.table import Table + >>> sample = Table({ + ... 'ra': [1.58, 188.90], + ... 'dec': [20.20, -39.90] + ... }) + >>> query = """ + ... SELECT cat.name, cat.ra, cat.dec, cat.obsid + ... FROM chanmaster cat, tap_upload.mytable mt + ... WHERE 1=CONTAINS(POINT('ICRS', mt.ra, mt.dec), CIRCLE('ICRS',cat.ra, cat.dec, 0.1)) + ... """ + >>> result = Heasarc.query_tap(query, uploads={'mytable': sample}).to_table() + >>> result.pprint() + name ra dec obsid + deg deg + ----------- --------- --------- ----- + NGC 4507 188.90250 -39.90928 12292 + NGC 4507 188.90208 -39.90925 2150 + HR4796 189.00417 -39.86950 7414 + KUG0003+199 1.58134 20.20291 23709 + Mrk 335 1.58142 20.20295 23292 + Mrk 335 1.58142 20.20295 23297 + Mrk 335 1.58142 20.20295 23298 + Mrk 335 1.58142 20.20295 23299 + Mrk 335 1.58142 20.20295 23300 + Mrk 335 1.58142 20.20295 23301 + Mrk 335 1.58142 20.20295 23302 + + Complex Regions --------------- In addition to a cone search (some position and search radius), ```Heasarc.query_region``` accepts From 6a980cb5c31eb85b34690972cec9b47de5dc869b Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 11:29:08 -0400 Subject: [PATCH 06/13] fix the maxrec fix --- astroquery/heasarc/core.py | 2 +- astroquery/heasarc/tests/test_heasarc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index a2106ea2ce..22891d827a 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -353,7 +353,7 @@ def _query_execute(self, catalog=None, where=None, *, # if maxrec is more than the server limit, we set a higher limit if maxrec is not None and maxrec > 100000: - adql += f' LIMIT {maxrec*4}' + adql = adql.replace('SELECT ', f'SELECT TOP {maxrec*4} ') if get_query_payload: return adql diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index c5ea3a8c8a..51714d6731 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -263,7 +263,7 @@ def test_by_columns_limit(): get_query_payload=True, maxrec=500000, ) - assert query == "SELECT * FROM suzamaster LIMIT 2000000" + assert query == "SELECT TOP 2000000 * FROM suzamaster" From 728bdaad8f50a5aaf29819572f23ffe0b6b35ab8 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 11:35:11 -0400 Subject: [PATCH 07/13] update changelog --- CHANGES.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 0aac6fa3a8..d88c774519 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -36,6 +36,13 @@ mast Service fixes and enhancements ------------------------------ +heasarc +^^^^^^^ + +- Add ``query_by_column`` to allow querying of different catalog columns.The user passes a dict that is parsed into a TAP WHERE statements. +- Add support for uploading tables when using TAP directly through ``query_tap``. +- Improve how maxrec works. If it is bigger than the default server limit, add a TOP statement. + alma ^^^^ From e7d9cfd94b870403b5994674d4f30c271c856b6e Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 11:44:22 -0400 Subject: [PATCH 08/13] fix styles --- astroquery/heasarc/core.py | 41 +++++++++++------------- astroquery/heasarc/tests/test_heasarc.py | 32 +++++++++++------- 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 22891d827a..f6d0e1b942 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -292,8 +292,8 @@ def query_tap(self, query, *, maxrec=None, uploads=None): query, language='ADQL', maxrec=maxrec, uploads=uploads) def _query_execute(self, catalog=None, where=None, *, - get_query_payload=False, columns=None, - verbose=False, maxrec=None): + get_query_payload=False, columns=None, + verbose=False, maxrec=None): """Queries some catalog using the HEASARC TAP server based on the where condition and returns an `~astropy.table.Table`. @@ -334,16 +334,16 @@ def _query_execute(self, catalog=None, where=None, *, where = '' # __row is needed for locate_data; we add it if not already present - # and remove it afterwards only if the user requested specific + # and remove it afterwards only if the user requested specific # columns. keep_row tracks that. keep_row = ( - columns in (None, '*') or - isinstance(columns, str) and '__row' in columns + columns in (None, '*') + or isinstance(columns, str) and '__row' in columns ) if columns is None: columns = ', '.join(self._get_default_columns(catalog)) - + if '__row' not in columns and columns != '*': columns += ', __row' @@ -484,7 +484,7 @@ def query_region(self, position=None, catalog=None, radius=None, *, else: raise ValueError("Unrecognized spatial query type. Must be one" " of 'cone', 'box', 'polygon', or 'all-sky'.") - + table_or_query = self._query_execute( catalog=catalog, where=where, get_query_payload=get_query_payload, @@ -531,16 +531,15 @@ def query_object(self, object_name, mission, *, return self.query_region(pos, catalog=mission, spatial='cone', get_query_payload=get_query_payload) - def query_by_column(self, catalog, params, *, - get_query_payload=False, columns=None, - verbose=False, maxrec=None): + get_query_payload=False, columns=None, + verbose=False, maxrec=None): """Query the HEASARC TAP server using a constraints on the columns. - - This is a simple wrapper around + + This is a simple wrapper around `~astroquery.heasarc.HeasarcClass.query_tap` that constructs an ADQL query from a dictionary of parameters. - + Parameters ---------- catalog : str @@ -550,16 +549,16 @@ def query_by_column(self, catalog, params, *, A dictionary of column constraint parameters to include in the query. Each key-value pair will be translated into an ADQL condition. - For a range query, use a tuple of two values (min, max). - e.g. `{'flux': (1e-12, 1e-10)}` translates to + e.g. `{'flux': (1e-12, 1e-10)}` translates to `flux BETWEEN 1e-12 AND 1e-10`. - For list values, use a list of values. - e.g. `{'object_type': ['QSO', 'GALAXY']}` translates to + e.g. `{'object_type': ['QSO', 'GALAXY']}` translates to `object_type IN ('QSO', 'GALAXY')`. - For comparison queries, use a tuple of (operator, value), where operator is one of '=', '!=', '<', '>', '<=', '>='. e.g. `{'magnitude': ('<', 15)}` translates to `magnitude < 15`. - For exact matches, use a single value (str, int, float). - e.g. `{'object_type': 'QSO'}` translates to + e.g. `{'object_type': 'QSO'}` translates to `object_type = 'QSO'`. The keys should correspond to valid column names in the catalog. Use `list_columns` to see the available columns. @@ -584,15 +583,15 @@ def query_by_column(self, catalog, params, *, for key, value in params.items(): if isinstance(value, tuple): if ( - len(value) == 2 and - all(isinstance(v, (int, float)) for v in value) + len(value) == 2 + and all(isinstance(v, (int, float)) for v in value) ): conditions.append( f"{key} BETWEEN {value[0]} AND {value[1]}" ) elif ( - len(value) == 2 and - value[0] in (">", "<", ">=", "<=") + len(value) == 2 + and value[0] in (">", "<", ">=", "<=") ): conditions.append(f"{key} {value[0]} {value[1]}") elif isinstance(value, list): @@ -613,8 +612,6 @@ def query_by_column(self, catalog, params, *, where = "" else: where = "WHERE " + (" AND ".join(conditions)) - - table_or_query = self._query_execute( catalog=catalog, where=where, diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index 51714d6731..aaec5ec390 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -53,14 +53,16 @@ def __init__(self, desc, cols=[]): 'name-2': vTable('description-2 chandra', cols), 'TAPname': None } - + def search(self, query, language='ADQL', maxrec=1000, uploads=None): return MockResult() + class MockResult: def to_table(self): return Table({'value': ['1.5', '1.2', '-0.3']}) + @pytest.fixture def mock_tap(): with patch('astroquery.heasarc.core.HeasarcClass.tap', new_callable=PropertyMock) as tap: @@ -174,6 +176,7 @@ def test_query_region_polygon(polygon): "10.1,10.1,10.0,10.1,10.0,10.0))=1" ) + def test_query_region_polygon_no_unit(): # position is not used for polygon poly = [ @@ -187,8 +190,8 @@ def test_query_region_polygon_no_unit(): spatial="polygon", polygon=poly, columns="*", - get_query_payload=True, - ) + get_query_payload=True, + ) assert query == ( "SELECT * FROM suzamaster " @@ -196,6 +199,7 @@ def test_query_region_polygon_no_unit(): "10.1,10.1,10.0,10.1,10.0,10.0))=1" ) + def test_query_allsky(): query1 = Heasarc.query_region( catalog="suzamaster", spatial="all-sky", columns="*", @@ -224,15 +228,14 @@ def test_no_catalog(): with pytest.raises(InvalidQueryError): # OBJ_LIST[0] and radius added to avoid a remote call Heasarc.query_region( - OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin" - ) + OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin") + def test_by_columns_no_catalog(): with pytest.raises(InvalidQueryError): # OBJ_LIST[0] and radius added to avoid a remote call Heasarc.query_by_column( - None, params={"flux": (1e-12, 1e-10)} - ) + None, params={"flux": (1e-12, 1e-10)}) def test__query_execute_no_catalog(): @@ -255,6 +258,7 @@ def test_by_columns_no_params(): ) assert query == "SELECT * FROM suzamaster" + def test_by_columns_limit(): query = Heasarc.query_by_column( catalog="suzamaster", @@ -266,7 +270,6 @@ def test_by_columns_limit(): assert query == "SELECT TOP 2000000 * FROM suzamaster" - def test_by_columns_range(): query = Heasarc.query_by_column( catalog="suzamaster", @@ -337,7 +340,7 @@ def test__query_execute_none_where(): assert query == ("SELECT * FROM suzamaster") -def test__query_execute_none_where(): +def test__query_execute_extra_where(): query = Heasarc._query_execute( catalog="suzamaster", where=" EXTRA", @@ -362,6 +365,7 @@ def test__query_execute_add_row(): ) assert query1 == query2 == ("SELECT col1, col2, __row FROM suzamaster") + def test__query_execute_extra_space(): query1 = Heasarc._query_execute( catalog="suzamaster", @@ -378,7 +382,8 @@ def test__query_execute_extra_space(): ) assert query1 == query2 == ("SELECT * FROM suzamaster WHERE EXTRA") -def test_query_execute_columns(mock_tap, mock_default_cols): + +def test_query_execute_columns1(mock_tap, mock_default_cols): query = Heasarc._query_execute( catalog="suzamaster", where="WHERE EXTRA", @@ -387,8 +392,9 @@ def test_query_execute_columns(mock_tap, mock_default_cols): ) assert query == ("SELECT col-3, col-2, __row FROM suzamaster WHERE EXTRA") -def test_query_execute_columns(mock_tap, mock_default_cols): - res = Heasarc._query_execute( + +def test_query_execute_columns2(mock_tap, mock_default_cols): + _ = Heasarc._query_execute( catalog="suzamaster", where="WHERE EXTRA", columns='*' @@ -397,6 +403,7 @@ def test_query_execute_columns(mock_tap, mock_default_cols): # reset last result to avoid interference with other tests Heasarc._last_result = None + def test_tap_def(): # Use a new HeasarcClass object Heasarc = HeasarcClass() @@ -460,6 +467,7 @@ def test__list_catalogs_keywords(mock_tap): lab for lab, desc in MockTap().tables.items() if 'TAP' not in lab and 'xmm' in desc.description.lower() ] + def test__list_columns__missing_table(mock_tap): with pytest.raises(ValueError, match="not available as a public catalog"): Heasarc.list_columns(catalog_name='missing-table') From 46371101de9df2fd2cc286786815ed66ac3f071f Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 12:01:04 -0400 Subject: [PATCH 09/13] fix changelog --- CHANGES.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index d88c774519..3328fdccf1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -38,10 +38,9 @@ Service fixes and enhancements heasarc ^^^^^^^ - -- Add ``query_by_column`` to allow querying of different catalog columns.The user passes a dict that is parsed into a TAP WHERE statements. -- Add support for uploading tables when using TAP directly through ``query_tap``. -- Improve how maxrec works. If it is bigger than the default server limit, add a TOP statement. +- Add ``query_by_column`` to allow querying of different catalog columns. [#3403] +- Add support for uploading tables when using TAP directly through ``query_tap``. [#3403] +- Improve how maxrec works. If it is bigger than the default server limit, add a TOP statement. [#3403] alma ^^^^ From 36a7369cbb8da05b6320f5497387840f50bd63d2 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 12:08:03 -0400 Subject: [PATCH 10/13] fix docstring --- astroquery/heasarc/core.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index f6d0e1b942..d049898954 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -549,17 +549,17 @@ def query_by_column(self, catalog, params, *, A dictionary of column constraint parameters to include in the query. Each key-value pair will be translated into an ADQL condition. - For a range query, use a tuple of two values (min, max). - e.g. `{'flux': (1e-12, 1e-10)}` translates to - `flux BETWEEN 1e-12 AND 1e-10`. + e.g. ``{'flux': (1e-12, 1e-10)}`` translates to + ``flux BETWEEN 1e-12 AND 1e-10``. - For list values, use a list of values. - e.g. `{'object_type': ['QSO', 'GALAXY']}` translates to - `object_type IN ('QSO', 'GALAXY')`. + e.g. ``{'object_type': ['QSO', 'GALAXY']}`` translates to + ``object_type IN ('QSO', 'GALAXY')``. - For comparison queries, use a tuple of (operator, value), where operator is one of '=', '!=', '<', '>', '<=', '>='. - e.g. `{'magnitude': ('<', 15)}` translates to `magnitude < 15`. + e.g. ``{'magnitude': ('<', 15)}`` translates to ``magnitude < 15``. - For exact matches, use a single value (str, int, float). - e.g. `{'object_type': 'QSO'}` translates to - `object_type = 'QSO'`. + e.g. ``{'object_type': 'QSO'}`` translates to + ``object_type = 'QSO'``. The keys should correspond to valid column names in the catalog. Use `list_columns` to see the available columns. get_query_payload : bool, optional From 61f78c2900727322d662b4e649320d8e6d2055b6 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 12:12:36 -0400 Subject: [PATCH 11/13] another attempt to fix docstring --- astroquery/heasarc/core.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index d049898954..b691673d49 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -549,17 +549,17 @@ def query_by_column(self, catalog, params, *, A dictionary of column constraint parameters to include in the query. Each key-value pair will be translated into an ADQL condition. - For a range query, use a tuple of two values (min, max). - e.g. ``{'flux': (1e-12, 1e-10)}`` translates to - ``flux BETWEEN 1e-12 AND 1e-10``. + e.g. ``{'flux': (1e-12, 1e-10)}`` translates to + ``flux BETWEEN 1e-12 AND 1e-10``. - For list values, use a list of values. - e.g. ``{'object_type': ['QSO', 'GALAXY']}`` translates to - ``object_type IN ('QSO', 'GALAXY')``. + e.g. ``{'object_type': ['QSO', 'GALAXY']}`` translates to + ``object_type IN ('QSO', 'GALAXY')``. - For comparison queries, use a tuple of (operator, value), - where operator is one of '=', '!=', '<', '>', '<=', '>='. - e.g. ``{'magnitude': ('<', 15)}`` translates to ``magnitude < 15``. + where operator is one of '=', '!=', '<', '>', '<=', '>='. + e.g. ``{'magnitude': ('<', 15)}`` translates to ``magnitude < 15``. - For exact matches, use a single value (str, int, float). - e.g. ``{'object_type': 'QSO'}`` translates to - ``object_type = 'QSO'``. + e.g. ``{'object_type': 'QSO'}`` translates to + ``object_type = 'QSO'``. The keys should correspond to valid column names in the catalog. Use `list_columns` to see the available columns. get_query_payload : bool, optional From 660687379de9fbd993c2312527fed9d3d5e127dd Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 17:48:31 -0400 Subject: [PATCH 12/13] add automatic host guess in download_data --- CHANGES.rst | 1 + astroquery/heasarc/core.py | 34 ++++++++++++++++++++++-- astroquery/heasarc/tests/test_heasarc.py | 23 ++++++++++++++++ docs/heasarc/heasarc.rst | 1 + 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 3328fdccf1..3f9abcbd7f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -41,6 +41,7 @@ heasarc - Add ``query_by_column`` to allow querying of different catalog columns. [#3403] - Add support for uploading tables when using TAP directly through ``query_tap``. [#3403] - Improve how maxrec works. If it is bigger than the default server limit, add a TOP statement. [#3403] +- Add automatic guessing for the data host in ``download_data``. [#3403] alma ^^^^ diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index b691673d49..82c1ca9d61 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -749,6 +749,36 @@ def enable_cloud(self, provider='aws', profile=None): self.s3_client = self.s3_resource.meta.client + def _guess_host(self, host): + """Guess the host to use for downloading data + + Parameters + ---------- + host : str + The host provided by the user + + Returns + ------- + host : str + The guessed host + + """ + if host in ['heasarc', 'sciserver', 'aws']: + return host + elif host is not None: + raise ValueError( + 'host has to be one of heasarc, sciserver, aws or None') + + # host is None, so we guess + if os.environ['HOME'] == '/home/idies' and os.path.exists('/FTP/'): + # we are on idies, so we can use sciserver + return 'sciserver' + + for var in ['AWS_REGION', 'AWS_DEFAULT_REGION', 'AWS_ROLE_ARN']: + if var in os.environ: + return 'aws' + return 'heasarc' + def download_data(self, links, host='heasarc', location='.'): """Download data products in links with a choice of getting the data from either the heasarc server, sciserver, or the cloud in AWS. @@ -780,8 +810,8 @@ def download_data(self, links, host='heasarc', location='.'): if isinstance(links, Row): links = links.table[[links.index]] - if host not in ['heasarc', 'sciserver', 'aws']: - raise ValueError('host has to be one of heasarc, sciserver, aws') + # guess the host if not provided + host = self._guess_host(host) host_column = 'access_url' if host == 'heasarc' else host if host_column not in links.colnames: diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index aaec5ec390..e0eba65438 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -506,6 +506,29 @@ def test_locate_data_row(): Heasarc.locate_data(table[0:2], catalog_name="xray") +def test__guess_host_default(): + # Use a new HeasarcClass object + assert Heasarc._guess_host(host=None) == 'heasarc' + + +@pytest.mark.parametrize("host", ["heasarc", "sciserver", "aws"]) +def test__guess_host_know(host): + # Use a new HeasarcClass object + assert Heasarc._guess_host(host=host) == host + + +def test__guess_host_sciserver(monkeypatch): + monkeypatch.setenv("HOME", "/home/idies") + monkeypatch.setattr("os.path.exists", lambda path: path.startswith('/FTP')) + assert Heasarc._guess_host(host=None) == 'sciserver' + + +@pytest.mark.parametrize("var", ["AWS_REGION", "AWS_REGION_DEFAULT", "AWS_ROLE_ARN"]) +def test__guess_host_aws(monkeypatch, var): + monkeypatch.setenv("AWS_REGION", var) + assert Heasarc._guess_host(host=None) == 'aws' + + def test_download_data__empty(): with pytest.raises(ValueError, match="Input links table is empty"): Heasarc.download_data(Table()) diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index b1b21bcd1e..773cf1d411 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -247,6 +247,7 @@ You can specify where the data are to be downloaded using the ``location`` param To download the data, you can pass ``links`` table (or row) to `~astroquery.heasarc.HeasarcClass.download_data`, specifying from where you want the data to be fetched by specifying the ``host`` parameter. By default, +the function will try to guess the best host based on your environment. If it cannot guess, then the data is fetched from the main HEASARC servers. The recommendation is to use different hosts depending on where your code is running: * ``host='sciserver'``: Use this option if you running you analysis on Sciserver. Because From a3b86e877d6586ededf10669eb8eb6c29ebca260 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Tue, 9 Sep 2025 10:18:35 -0400 Subject: [PATCH 13/13] fix _guess_host in windows --- astroquery/heasarc/core.py | 6 +++++- docs/heasarc/heasarc.rst | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 82c1ca9d61..4583918341 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -770,7 +770,11 @@ def _guess_host(self, host): 'host has to be one of heasarc, sciserver, aws or None') # host is None, so we guess - if os.environ['HOME'] == '/home/idies' and os.path.exists('/FTP/'): + if ( + 'HOME' in os.environ + and os.environ['HOME'] == '/home/idies' + and os.path.exists('/FTP/') + ): # we are on idies, so we can use sciserver return 'sciserver' diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index 773cf1d411..c0eb069bcc 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -198,9 +198,9 @@ catalog for all observations with exposure time greater than 190 ks. name obsid ra dec exposure deg deg s --------------- ----- --------- --------- -------- - Sgr A* 13842 266.41667 -29.00781 191760 + GW Transient 29852 -- -- 300000 + Sgr A* 13842 266.41667 -29.00781 191760 IGR J17480-2446 30481 267.02013 -24.78024 200000 - IGR J17480-2446 31425 267.02013 -24.78024 200000 Another example may be to search the ``xmmmaster`` for a observation in some time range: