Skip to content
Open
7 changes: 7 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ mast
Service fixes and enhancements
------------------------------

heasarc
^^^^^^^
- Add ``query_by_column`` to allow querying of different catalog columns. [#3403]
- Add support for uploading tables when using TAP directly through ``query_tap``. [#3403]
- Improve how maxrec works. If it is bigger than the default server limit, add a TOP statement. [#3403]
- Add automatic guessing for the data host in ``download_data``. [#3403]

alma
^^^^

Expand Down
254 changes: 223 additions & 31 deletions astroquery/heasarc/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@

import os

import shutil
import requests
import tarfile
Expand Down Expand Up @@ -261,7 +259,7 @@ def query_mission_cols(self, mission, *, cache=True,
cols = [col.upper() for col in cols['name'] if '__' not in col]
return cols

def query_tap(self, query, *, maxrec=None):
def query_tap(self, query, *, maxrec=None, uploads=None):
"""
Send query to HEASARC's Xamin TAP using ADQL.
Results in `~pyvo.dal.TAPResults` format.
Expand All @@ -273,6 +271,10 @@ def query_tap(self, query, *, maxrec=None):
ADQL query to be executed
maxrec : int
maximum number of records to return
uploads : dict
a mapping from table names used in the query to file like
objects containing a votable
(e.g. a file path or `~astropy.table.Table`).

Returns
-------
Expand All @@ -286,7 +288,85 @@ def query_tap(self, query, *, maxrec=None):
"""
log.debug(f'TAP query: {query}')
self._saved_query = query
return self.tap.search(query, language='ADQL', maxrec=maxrec)
return self.tap.search(
query, language='ADQL', maxrec=maxrec, uploads=uploads)

def _query_execute(self, catalog=None, where=None, *,
get_query_payload=False, columns=None,
verbose=False, maxrec=None):
"""Queries some catalog using the HEASARC TAP server based on the
where condition and returns an `~astropy.table.Table`.

Parameters
----------
catalog : str
The catalog to query. To list the available catalogs, use
:meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`.
where : str
The WHERE condition to be used in the query. It must
include the 'WHERE' keyword or be empty.
get_query_payload : bool, optional
If `True` then returns the generated ADQL query as str.
Defaults to `False`.
columns : str, optional
Target column list with value separated by a comma(,).
Use * for all the columns. The default is to return a subset
of the columns that are generally the most useful.
verbose : bool, optional
If False, suppress vo warnings.
maxrec : int, optional
Maximum number of records


Returns
-------
table : A `~astropy.table.Table` object.
"""
# if verbose is False then suppress any VOTable related warnings
if not verbose:
commons.suppress_vo_warnings()

if catalog is None:
raise InvalidQueryError("catalog name is required! Use 'xray' "
"to search the master X-ray catalog")

if where is None:
where = ''

# __row is needed for locate_data; we add it if not already present
# and remove it afterwards only if the user requested specific
# columns. keep_row tracks that.
keep_row = (
columns in (None, '*')
or isinstance(columns, str) and '__row' in columns
)

if columns is None:
columns = ', '.join(self._get_default_columns(catalog))

if '__row' not in columns and columns != '*':
columns += ', __row'

if where != '' and not where.startswith(' '):
where = ' ' + where.strip()
adql = f'SELECT {columns} FROM {catalog}{where}'

# if maxrec is more than the server limit, we set a higher limit
if maxrec is not None and maxrec > 100000:
adql = adql.replace('SELECT ', f'SELECT TOP {maxrec*4} ')

if get_query_payload:
return adql
response = self.query_tap(query=adql, maxrec=maxrec)

# save the response in case we want to use it later
self._last_result = response
self._last_catalog_name = catalog

table = response.to_table()
if not keep_row and '__row' in table.colnames:
table.remove_column('__row')
return table

@deprecated_renamed_argument(
('mission', 'fields', 'resultmax', 'entry', 'coordsys', 'equinox',
Expand Down Expand Up @@ -356,18 +436,6 @@ def query_region(self, position=None, catalog=None, radius=None, *,
-------
table : A `~astropy.table.Table` object.
"""
# if verbose is False then suppress any VOTable related warnings
if not verbose:
commons.suppress_vo_warnings()

if catalog is None:
raise InvalidQueryError("catalog name is required! Use 'xray' "
"to search the master X-ray catalog")

if columns is None:
columns = ', '.join(self._get_default_columns(catalog))
if '__row' not in columns:
columns += ',__row'

if spatial.lower() == 'all-sky':
where = ''
Expand All @@ -390,7 +458,7 @@ def query_region(self, position=None, catalog=None, radius=None, *,

coords_str = [f'{coord.ra.deg},{coord.dec.deg}'
for coord in coords_list]
where = (" WHERE CONTAINS(POINT('ICRS',ra,dec),"
where = ("WHERE CONTAINS(POINT('ICRS',ra,dec),"
f"POLYGON('ICRS',{','.join(coords_str)}))=1")
else:
coords_icrs = parse_coordinates(position).icrs
Expand All @@ -401,7 +469,7 @@ def query_region(self, position=None, catalog=None, radius=None, *,
radius = self.get_default_radius(catalog)
elif isinstance(radius, str):
radius = coordinates.Angle(radius)
where = (" WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE("
where = ("WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE("
f"'ICRS',{ra},{dec},{radius.to(u.deg).value}))=1")
# add search_offset for the case of cone
if add_offset:
Expand All @@ -410,24 +478,23 @@ def query_region(self, position=None, catalog=None, radius=None, *,
elif spatial.lower() == 'box':
if isinstance(width, str):
width = coordinates.Angle(width)
where = (" WHERE CONTAINS(POINT('ICRS',ra,dec),"
where = ("WHERE CONTAINS(POINT('ICRS',ra,dec),"
f"BOX('ICRS',{ra},{dec},{width.to(u.deg).value},"
f"{width.to(u.deg).value}))=1")
else:
raise ValueError("Unrecognized spatial query type. Must be one"
" of 'cone', 'box', 'polygon', or 'all-sky'.")

adql = f'SELECT {columns} FROM {catalog}{where}'

table_or_query = self._query_execute(
catalog=catalog, where=where,
get_query_payload=get_query_payload,
columns=columns, verbose=verbose,
maxrec=maxrec
)
if get_query_payload:
return adql
response = self.query_tap(query=adql, maxrec=maxrec)

# save the response in case we want to use it later
self._last_result = response
self._last_catalog_name = catalog
return table_or_query
table = table_or_query

table = response.to_table()
if add_offset:
table['search_offset'].unit = u.arcmin
if len(table) == 0:
Expand Down Expand Up @@ -464,6 +531,96 @@ def query_object(self, object_name, mission, *,
return self.query_region(pos, catalog=mission, spatial='cone',
get_query_payload=get_query_payload)

def query_by_column(self, catalog, params, *,
get_query_payload=False, columns=None,
verbose=False, maxrec=None):
"""Query the HEASARC TAP server using a constraints on the columns.

This is a simple wrapper around
`~astroquery.heasarc.HeasarcClass.query_tap`
that constructs an ADQL query from a dictionary of parameters.

Parameters
----------
catalog : str
The catalog to query. To list the available catalogs, use
:meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`.
params : dict
A dictionary of column constraint parameters to include in the query.
Each key-value pair will be translated into an ADQL condition.
- For a range query, use a tuple of two values (min, max).
e.g. ``{'flux': (1e-12, 1e-10)}`` translates to
``flux BETWEEN 1e-12 AND 1e-10``.
- For list values, use a list of values.
e.g. ``{'object_type': ['QSO', 'GALAXY']}`` translates to
``object_type IN ('QSO', 'GALAXY')``.
- For comparison queries, use a tuple of (operator, value),
where operator is one of '=', '!=', '<', '>', '<=', '>='.
e.g. ``{'magnitude': ('<', 15)}`` translates to ``magnitude < 15``.
- For exact matches, use a single value (str, int, float).
e.g. ``{'object_type': 'QSO'}`` translates to
``object_type = 'QSO'``.
The keys should correspond to valid column names in the catalog.
Use `list_columns` to see the available columns.
get_query_payload : bool, optional
If `True` then returns the generated ADQL query as str.
Defaults to `False`.
columns : str, optional
Target column list with value separated by a comma(,).
Use * for all the columns. The default is to return a subset
of the columns that are generally the most useful.
verbose : bool, optional
If False, suppress vo warnings.
maxrec : int, optional
Maximum number of records

"""

if not isinstance(params, dict):
raise ValueError('params must be a dictionary of key-value pairs')

conditions = []
for key, value in params.items():
if isinstance(value, tuple):
if (
len(value) == 2
and all(isinstance(v, (int, float)) for v in value)
):
conditions.append(
f"{key} BETWEEN {value[0]} AND {value[1]}"
)
elif (
len(value) == 2
and value[0] in (">", "<", ">=", "<=")
):
conditions.append(f"{key} {value[0]} {value[1]}")
elif isinstance(value, list):
# handle list values: key IN (...)
formatted = []
for v in value:
if isinstance(v, str):
formatted.append(f"'{v}'")
else:
formatted.append(str(v))
conditions.append(f"{key} IN ({', '.join(formatted)})")
else:
conditions.append(
f"{key} = '{value}'"
if isinstance(value, str) else f"{key} = {value}"
)
if len(conditions) == 0:
where = ""
else:
where = "WHERE " + (" AND ".join(conditions))

table_or_query = self._query_execute(
catalog=catalog, where=where,
get_query_payload=get_query_payload,
columns=columns, verbose=verbose,
maxrec=maxrec
)
return table_or_query

def locate_data(self, query_result=None, catalog_name=None):
"""Get links to data products
Use vo/datalinks to query the data products for some query_results.
Expand Down Expand Up @@ -505,7 +662,8 @@ def locate_data(self, query_result=None, catalog_name=None):
if '__row' not in query_result.colnames:
raise ValueError('No __row column found in query_result. '
'query_result needs to be the output of '
'query_region or a subset.')
'query_region or a subset. try adding '
'__row to the requested columns')

if catalog_name is None:
catalog_name = self._last_catalog_name
Expand Down Expand Up @@ -591,6 +749,40 @@ def enable_cloud(self, provider='aws', profile=None):

self.s3_client = self.s3_resource.meta.client

def _guess_host(self, host):
"""Guess the host to use for downloading data

Parameters
----------
host : str
The host provided by the user

Returns
-------
host : str
The guessed host

"""
if host in ['heasarc', 'sciserver', 'aws']:
return host
elif host is not None:
raise ValueError(
'host has to be one of heasarc, sciserver, aws or None')

# host is None, so we guess
if (
'HOME' in os.environ
and os.environ['HOME'] == '/home/idies'
and os.path.exists('/FTP/')
):
# we are on idies, so we can use sciserver
return 'sciserver'

for var in ['AWS_REGION', 'AWS_DEFAULT_REGION', 'AWS_ROLE_ARN']:
if var in os.environ:
return 'aws'
return 'heasarc'

def download_data(self, links, host='heasarc', location='.'):
"""Download data products in links with a choice of getting the
data from either the heasarc server, sciserver, or the cloud in AWS.
Expand Down Expand Up @@ -622,8 +814,8 @@ def download_data(self, links, host='heasarc', location='.'):
if isinstance(links, Row):
links = links.table[[links.index]]

if host not in ['heasarc', 'sciserver', 'aws']:
raise ValueError('host has to be one of heasarc, sciserver, aws')
# guess the host if not provided
host = self._guess_host(host)

host_column = 'access_url' if host == 'heasarc' else host
if host_column not in links.colnames:
Expand Down
Loading
Loading