astropy / astroquery
1
# Licensed under a 3-clause BSD style license - see LICENSE.rst
2 1
from __future__ import print_function
3

4 1
import copy
5 1
import io
6 1
import re
7 1
import warnings
8

9 1
import astropy.coordinates as coord
10 1
import astropy.units as u
11 1
import astropy.units.cds as cds
12 1
import numpy as np
13 1
from astropy.coordinates import SkyCoord
14 1
from astropy.io import ascii
15 1
from astropy.io.votable import parse_single_table
16 1
from astropy.table import QTable
17 1
from astropy.utils import deprecated, deprecated_renamed_argument
18 1
from astropy.utils.exceptions import AstropyWarning
19

20 1
from ..exceptions import (InputWarning, InvalidQueryError, NoResultsWarning,
21
                          RemoteServiceError)
22 1
from ..query import BaseQuery
23 1
from ..utils import async_to_sync, commons
24 1
from ..utils.class_or_instance import class_or_instance
25 1
from . import conf
26

27 1
__all__ = ["NasaExoplanetArchive", "NasaExoplanetArchiveClass"]
28

29

30 1
UNIT_MAPPER = {
31
    "--": None,
32
    "BJD": None,  # TODO: optionally supprot mapping columns to Time objects
33
    "BKJD": None,  # TODO: optionally supprot mapping columns to Time objects
34
    "D_L": u.pc,
35
    "D_S": u.pc,
36
    "Earth flux": None,  # TODO: Include Earth insolation units
37
    "Fearth": None,  # TODO: Include Earth insolation units
38
    "M_E": u.M_earth,
39
    "M_J": u.M_jupiter,
40
    "R_Earth": u.R_earth,
41
    "R_Sun": u.R_sun,
42
    "Rstar": u.R_sun,
43
    "a_perp": u.au,
44
    "arc-sec/year": u.arcsec / u.yr,
45
    "cm/s**2": u.dex(u.dm / u.s ** 2),
46
    "days": u.day,
47
    "degrees": u.deg,
48
    "dexincgs": u.dex(u.cm / u.s ** 2),
49
    "hours": u.hr,
50
    "hrs": u.hr,
51
    "kelvin": u.K,
52
    "logLsun": u.dex(u.L_sun),
53
    "mags": u.mag,
54
    "microas": u.uas,
55
    "perc": u.percent,
56
    "pi_E": None,
57
    "pi_EE": None,
58
    "pi_EN": None,
59
    "pi_rel": None,
60
    "ppm": cds.ppm,
61
    "seconds": u.s,
62
    "solarradius": u.R_sun,
63
}
64 1
CONVERTERS = dict(koi_quarters=[ascii.convert_numpy(np.str)])
65 1
OBJECT_TABLES = {"exoplanets": "pl_", "compositepars": "fpl_", "exomultpars": "mpl_"}
66

67

68 1
class InvalidTableError(InvalidQueryError):
69
    """Exception thrown if the given table is not recognized by the Exoplanet Archive Servers"""
70

71 1
    pass
72

73

74 1
@async_to_sync
75 1
class NasaExoplanetArchiveClass(BaseQuery):
76
    """
77
    The interface for querying the NASA Exoplanet Archive API
78

79
    A full discussion of the available tables and query syntax is available on `the documentation
80
    page <https://exoplanetarchive.ipac.caltech.edu/docs/program_interfaces.html>`_.
81
    """
82

83 1
    URL = conf.url
84 1
    TIMEOUT = conf.timeout
85 1
    CACHE = conf.cache
86

87 1
    @class_or_instance
88 1
    def query_criteria_async(self, table, get_query_payload=False, cache=None, **criteria):
89
        """
90
        Search a table given a set of criteria or return the full table
91

92
        The syntax for these queries is described on the Exoplanet Archive API documentation page
93
        [1]_. In particular, the most commonly used criteria will be ``select`` and ``where``.
94

95
        Parameters
96
        ----------
97
        table : str
98
            The name of the table to query. A list of the tables on the Exoplanet Archive can be
99
            found on the documentation page [1]_.
100
        get_query_payload : bool, optional
101
            Just return the dict of HTTP request parameters. Defaults to ``False``.
102
        cache : bool, optional
103
            Should the request result be cached? This can be useful for large repeated queries,
104
            but since the data in the archive is updated regularly, this defaults to ``False``.
105
        **criteria
106
            The filtering criteria to apply. These are described in detail in the archive
107
            documentation [1]_, but some examples include ``select="*"`` to return all columns of
108
            the queried table or ``where=pl_name='K2-18 b'`` to filter a specific column.
109

110
        Returns
111
        -------
112
        response : `requests.Response`
113
            The HTTP response returned from the service.
114

115
        References
116
        ----------
117

118
        .. [1] `NASA Exoplanet Archive API Documentation
119
           <https://exoplanetarchive.ipac.caltech.edu/docs/program_interfaces.html>`_
120
        """
121 1
        table = table.lower()
122

123
        # Deal with lists of columns instead of comma separated strings
124 1
        criteria = copy.copy(criteria)
125 1
        if "select" in criteria:
126 1
            select = criteria["select"]
127 1
            if not isinstance(select, str):
128 1
                select = ",".join(select)
129 1
            criteria["select"] = select
130

131
        # We prefer to work with IPAC format so that we get units, but everything it should work
132
        # with the other options too
133 1
        criteria["format"] = criteria.get("format", "ipac")
134 1
        if "json" in criteria["format"].lower():
135 0
            raise InvalidQueryError("The 'json' format is not supported")
136

137
        # Build the query
138 1
        request_payload = dict(table=table, **criteria)
139 1
        if get_query_payload:
140 1
            return request_payload
141

142
        # Use the default cache setting if one was not provided
143 1
        if cache is None:
144 1
            cache = self.CACHE
145

146
        # Execute the request
147 1
        response = self._request(
148
            "GET", self.URL, params=request_payload, timeout=self.TIMEOUT, cache=cache,
149
        )
150 1
        response.requested_format = criteria["format"]
151

152 1
        return response
153

154 1
    @class_or_instance
155 1
    def query_region_async(self, table, coordinates, radius, *, get_query_payload=False, cache=None,
156
                           **criteria):
157
        """
158
        Filter a table using a cone search around specified coordinates
159

160
        Parameters
161
        ----------
162
        table : str
163
            The name of the table to query. A list of the tables on the Exoplanet Archive can be
164
            found on the documentation page [1]_.
165
        coordinates : str or `~astropy.coordinates`
166
            The coordinates around which to query.
167
        radius : str or `~astropy.units.Quantity`
168
            The radius of the cone search. Assumed to be have units of degrees if not provided as
169
            a ``Quantity``.
170
        get_query_payload : bool, optional
171
            Just return the dict of HTTP request parameters. Defaults to ``False``.
172
        cache : bool, optional
173
            Should the request result be cached? This can be useful for large repeated queries,
174
            but since the data in the archive is updated regularly, this defaults to ``False``.
175
        **criteria
176
            Any other filtering criteria to apply. These are described in detail in the archive
177
            documentation [1]_, but some examples include ``select="*"`` to return all columns of
178
            the queried table or ``where=pl_name='K2-18 b'`` to filter a specific column.
179

180
        Returns
181
        -------
182
        response : `requests.Response`
183
            The HTTP response returned from the service.
184

185
        References
186
        ----------
187

188
        .. [1] `NASA Exoplanet Archive API Documentation
189
           <https://exoplanetarchive.ipac.caltech.edu/docs/program_interfaces.html>`_
190
        """
191 1
        coordinates = commons.parse_coordinates(coordinates)
192

193
        # if radius is just a number we assume degrees
194 1
        if isinstance(radius, (int, float)):
195 1
            radius = radius * u.deg
196 1
        radius = coord.Angle(radius)
197

198 1
        criteria["ra"] = coordinates.ra.deg
199 1
        criteria["dec"] = coordinates.dec.deg
200 1
        criteria["radius"] = "{0} degree".format(radius.deg)
201

202 1
        return self.query_criteria_async(
203
            table, get_query_payload=get_query_payload, cache=cache, **criteria,
204
        )
205

206 1
    @class_or_instance
207 1
    def query_object_async(self, object_name, *, table="exoplanets", get_query_payload=False,
208
                           cache=None, regularize=True, **criteria):
209
        """
210
        Search the global tables for information about a confirmed planet or planet host
211

212
        The tables available to this query are the following (more information can be found on
213
        the archive's documentation page [1]_):
214

215
        - ``exoplanets``: This table contains parameters derived from a single, published
216
          reference that are designated as the archive's default parameter set.
217
        - ``compositepars``: This table contains a full set of parameters compiled from multiple,
218
          published references.
219
        - ``exomultpars``: This table includes all sets of planet and stellar parameters for
220
          confirmed planets and hosts in the archive.
221

222
        Parameters
223
        ----------
224
        object_name : str
225
            The name of the planet or star.  If ``regularize`` is ``True``, an attempt will be made
226
            to regularize this name using the ``aliastable`` table.
227
        table : [``"exoplanets"``, ``"compositepars"``, or ``"exomultpars"``], optional
228
            The table to query, must be one of the supported tables: ``"exoplanets"``,
229
            ``"compositepars"``, or ``"exomultpars"``. Defaults to ``"exoplanets"``.
230
        get_query_payload : bool, optional
231
            Just return the dict of HTTP request parameters. Defaults to ``False``.
232
        cache : bool, optional
233
            Should the request result be cached? This can be useful for large repeated queries,
234
            but since the data in the archive is updated regularly, this defaults to ``False``.
235
        regularize : bool, optional
236
            If ``True``, the ``aliastable`` will be used to regularize the target name.
237
        **criteria
238
            Any other filtering criteria to apply. Values provided using the ``where`` keyword will
239
            be ignored.
240

241
        Returns
242
        -------
243
        response : `requests.Response`
244
            The HTTP response returned from the service.
245

246
        References
247
        ----------
248

249
        .. [1] `NASA Exoplanet Archive API Documentation
250
           <https://exoplanetarchive.ipac.caltech.edu/docs/program_interfaces.html>`_
251
        """
252 1
        prefix = OBJECT_TABLES.get(table, None)
253 1
        if prefix is None:
254 1
            raise InvalidQueryError(
255
                "Invalid table '{0}'. The allowed options are: {1}".format(
256
                    table, OBJECT_TABLES.keys()
257
                )
258
            )
259

260 1
        if regularize:
261 1
            object_name = self._regularize_object_name(object_name)
262

263 1
        if "where" in criteria:
264 1
            warnings.warn(
265
                "Any filters using the 'where' argument are ignored in ``query_object``",
266
                InputWarning,
267
            )
268 1
        criteria["where"] = "{0}hostname='{1}' OR {0}name='{1}'".format(prefix, object_name.strip())
269

270 1
        return self.query_criteria_async(
271
            table, get_query_payload=get_query_payload, cache=cache, **criteria,
272
        )
273

274 1
    @class_or_instance
275 1
    def query_aliases(self, object_name, *, cache=None):
276
        """
277
        Search for aliases for a given confirmed planet or planet host
278

279
        Parameters
280
        ----------
281
        object_name : str
282
            The name of a planet or star to regularize using the ``aliastable`` table.
283
        cache : bool, optional
284
            Should the request result be cached? This can be useful for large repeated queries,
285
            but since the data in the archive is updated regularly, this defaults to ``False``.
286

287
        Returns
288
        -------
289
        response : list
290
            A list of aliases found for the object name. The default name will be listed first.
291
        """
292 1
        return list(
293
            self.query_criteria(
294
                "aliastable", objname=object_name.strip(), cache=cache, format="csv"
295
            )["aliasdis"]
296
        )
297

298 1
    @class_or_instance
299
    def _regularize_object_name(self, object_name):
300
        """Regularize the name of a planet or planet host using the ``aliastable`` table"""
301 1
        try:
302 1
            aliases = self.query_aliases(object_name, cache=False)
303 1
        except RemoteServiceError:
304 1
            aliases = []
305 1
        if aliases:
306 1
            return aliases[0]
307 1
        warnings.warn("No aliases found for name: '{0}'".format(object_name), NoResultsWarning)
308 1
        return object_name
309

310 1
    def _handle_error(self, text):
311
        """
312
        Parse the response from a request to see if it failed
313

314
        Parameters
315
        ----------
316
        text : str
317
            The decoded body of the response.
318

319
        Raises
320
        ------
321
        InvalidColumnError :
322
            If ``select`` included an invalid column.
323
        InvalidTableError :
324
            If the queried ``table`` does not exist.
325
        RemoteServiceError :
326
            If anything else went wrong.
327
        """
328
        # Error messages will always be formatted starting with the word "ERROR"
329 1
        if not text.startswith("ERROR"):
330 1
            return
331

332
        # Some errors have the form:
333
        #   Error type: ...
334
        #   Message: ...
335
        # so we'll parse those to try to provide some reasonable feedback to the user
336 1
        error_type = None
337 1
        error_message = None
338 1
        for line in text.replace("<br>", "").splitlines():
339 1
            match = re.search(r"Error Type:\s(.+)$", line)
340 1
            if match:
341 1
                error_type = match.group(1).strip()
342 1
                continue
343

344 1
            match = re.search(r"Message:\s(.+)$", line)
345 1
            if match:
346 1
                error_message = match.group(1).strip()
347 1
                continue
348

349
        # If we hit this condition, that means that we weren't able to parse the error so we'll
350
        # just throw the full response
351 1
        if error_type is None or error_message is None:
352 0
            raise RemoteServiceError(text)
353

354
        # A useful special is if a column name is unrecognized. This has the format
355
        #   Error type: SystemError
356
        #   Message: ... "NAME_OF_COLUMN": invalid identifier ...
357 1
        if error_type.startswith("SystemError"):
358 0
            match = re.search(r'"(.*)": invalid identifier', error_message)
359 0
            if match:
360 0
                raise InvalidQueryError(
361
                    (
362
                        "'{0}' is an invalid identifier. This error can be caused by invalid "
363
                        "column names, missing quotes, or other syntax errors"
364
                    ).format(match.group(1).lower())
365
                )
366

367 1
        elif error_type.startswith("UserError"):
368
            # Another important one is when the table is not recognized. This has the format:
369
            #   Error type: UserError - "table" parameter
370
            #   Message: ... "NAME_OF_TABLE" is not a valid table.
371 0
            match = re.search(r'"(.*)" is not a valid table', error_message)
372 0
            if match:
373 0
                raise InvalidTableError("'{0}' is not a valid table".format(match.group(1).lower()))
374

375 0
            raise InvalidQueryError("{0}\n{1}".format(error_type, error_message))
376

377
        # Finally just return the full error message if we got here
378 1
        message = "\n".join(line for line in (error_type, error_message) if line is not None)
379 1
        raise RemoteServiceError(message)
380

381 1
    def _fix_units(self, data):
382
        """
383
        Fix any undefined units using a set of hacks
384

385
        Parameters
386
        ----------
387
        data : `~astropy.table.Table`
388
            The original data table without units.
389

390
        Returns
391
        -------
392
        new_data : `~astropy.table.QTable` or `~astropy.table.Table`
393
            The original ``data`` table with units applied where possible.
394
        """
395

396
        # To deal with masked data and quantities properly, we need to construct the QTable
397
        # manually so we'll loop over the columns and process each one independently
398 1
        column_names = list(data.columns)
399 1
        column_data = []
400 1
        column_masks = dict()
401 1
        for col in column_names:
402 1
            unit = data[col].unit
403 1
            unit = UNIT_MAPPER.get(str(unit), unit)
404 1
            if isinstance(unit, u.UnrecognizedUnit):
405 1
                unit_str = str(unit).lower()
406 1
                if unit_str == "earth" and "prad" in col:
407 1
                    unit = u.R_earth
408 1
                elif unit_str == "solar" and "radius" in col.lower():
409 1
                    unit = u.R_sun
410 1
                elif unit_str == "solar" and "mass" in col.lower():
411 1
                    unit = u.M_sun
412 1
                elif (
413
                    col.startswith("mlmag")
414
                    or col.startswith("mlext")
415
                    or col.startswith("mlcol")
416
                    or col.startswith("mlred")
417
                ):
418 1
                    unit = u.mag
419

420
                else:  # pragma: nocover
421
                    warnings.warn("Unrecognized unit: '{0}'".format(unit), AstropyWarning)
422

423
            # Here we're figuring out out if the column is masked because this doesn't
424
            # play nice with quantities so we need to keep track of the mask separately.
425 1
            try:
426 1
                column_masks[col] = data[col].mask
427 1
            except AttributeError:
428 1
                pass
429
            else:
430 1
                data[col].mask[:] = False
431

432
            # Deal with strings consistently
433 1
            if data[col].dtype == np.object:
434 1
                data[col] = data[col].astype(str)
435

436 1
            data[col].unit = unit
437 1
            column_data.append(data[col])
438

439
        # Build the new `QTable` and copy over the data masks if there are any
440 1
        result = QTable(column_data, names=column_names, masked=len(column_masks) > 0)
441 1
        for key, mask in column_masks.items():
442 1
            result[key].mask = mask
443

444 1
        return result
445

446 1
    def _parse_result(self, response, verbose=False):
447
        """
448
        Parse the result of a `~requests.Response` object and return an `~astropy.table.Table`
449

450
        Parameters
451
        ----------
452
        response : `~requests.Response`
453
            The response from the server.
454
        verbose : bool
455
            Currently has no effect.
456

457
        Returns
458
        -------
459
        data : `~astropy.table.Table` or `~astropy.table.QTable`
460
        """
461

462
        # Extract the decoded body of the response
463 1
        text = response.text
464

465
        # Raise an exception if anything went wrong
466 1
        self._handle_error(text)
467

468
        # Parse the requested format to figure out how to parse the returned data
469 1
        fmt = response.requested_format.lower()
470 1
        if "ascii" in fmt or "ipac" in fmt:
471 1
            data = ascii.read(text, format="ipac", fast_reader=False, converters=CONVERTERS)
472 1
        elif "csv" in fmt:
473 1
            data = ascii.read(text, format="csv", fast_reader=False, converters=CONVERTERS)
474 1
        elif "bar" in fmt or "pipe" in fmt:
475 1
            data = ascii.read(text, fast_reader=False, delimiter="|", converters=CONVERTERS)
476 1
        elif "xml" in fmt or "table" in fmt:
477 1
            data = parse_single_table(io.BytesIO(response.content)).to_table()
478
        else:
479 0
            data = ascii.read(text, fast_reader=False, converters=CONVERTERS)
480

481
        # Fix any undefined units
482 1
        data = self._fix_units(data)
483

484
        # For backwards compatibility, add a `sky_coord` column with the coordinates of the object
485
        # if possible
486 1
        if "ra" in data.columns and "dec" in data.columns:
487 1
            data["sky_coord"] = SkyCoord(ra=data["ra"], dec=data["dec"], unit=u.deg)
488

489 1
        if not data:
490 1
            warnings.warn("Query returned no results.", NoResultsWarning)
491

492 1
        return data
493

494 1
    def _handle_all_columns_argument(self, **kwargs):
495
        """
496
        Deal with the ``all_columns`` argument that was exposed by earlier versions
497

498
        This method will warn users about this deprecated argument and update the query syntax
499
        to use ``select='*'``.
500
        """
501
        # We also have to manually pop these arguments from the dict because
502
        # `deprecated_renamed_argument` doesn't do that for some reason for all supported astropy
503
        # versions (v3.1 was beheaving as expected)
504 1
        kwargs.pop("show_progress", None)
505 1
        kwargs.pop("table_path", None)
506

507
        # Deal with `all_columns` properly
508 1
        if kwargs.pop("all_columns", None):
509 1
            kwargs["select"] = kwargs.get("select", "*")
510

511 1
        return kwargs
512

513 1
    @deprecated(since="v0.4.1", alternative="query_object")
514 1
    @deprecated_renamed_argument(["show_progress", "table_path"],
515
                                 [None, None], "v0.4.1", arg_in_kwargs=True)
516 1
    def query_planet(self, planet_name, cache=None, regularize=True, **criteria):
517
        """
518
        Search the ``exoplanets`` table for a confirmed planet
519

520
        Parameters
521
        ----------
522
        planet_name : str
523
            The name of a confirmed planet. If ``regularize`` is ``True``, an attempt will be made
524
            to regularize this name using the ``aliastable`` table.
525
        cache : bool, optional
526
            Should the request result be cached? This can be useful for large repeated queries,
527
            but since the data in the archive is updated regularly, this defaults to ``False``.
528
        regularize : bool, optional
529
            If ``True``, the ``aliastable`` will be used to regularize the target name.
530
        **criteria
531
            Any other filtering criteria to apply. Values provided using the ``where`` keyword will
532
            be ignored.
533
        """
534 1
        if regularize:
535 1
            planet_name = self._regularize_object_name(planet_name)
536 1
        criteria = self._handle_all_columns_argument(**criteria)
537 1
        criteria["where"] = "pl_name='{0}'".format(planet_name.strip())
538 1
        return self.query_criteria("exoplanets", cache=cache, **criteria)
539

540 1
    @deprecated(since="v0.4.1", alternative="query_object")
541 1
    @deprecated_renamed_argument(["show_progress", "table_path"],
542
                                 [None, None], "v0.4.1", arg_in_kwargs=True)
543 1
    def query_star(self, host_name, cache=None, regularize=True, **criteria):
544
        """
545
        Search the ``exoplanets`` table for a confirmed planet host
546

547
        Parameters
548
        ----------
549
        host_name : str
550
            The name of a confirmed planet host. If ``regularize`` is ``True``, an attempt will be
551
            made to regularize this name using the ``aliastable`` table.
552
        cache : bool, optional
553
            Should the request result be cached? This can be useful for large repeated queries,
554
            but since the data in the archive is updated regularly, this defaults to ``False``.
555
        regularize : bool, optional
556
            If ``True``, the ``aliastable`` will be used to regularize the target name.
557
        **criteria
558
            Any other filtering criteria to apply. Values provided using the ``where`` keyword will
559
            be ignored.
560
        """
561 1
        if regularize:
562 1
            host_name = self._regularize_object_name(host_name)
563 1
        criteria = self._handle_all_columns_argument(**criteria)
564 1
        criteria["where"] = "pl_hostname='{0}'".format(host_name.strip())
565 1
        return self.query_criteria("exoplanets", cache=cache, **criteria)
566

567

568 1
NasaExoplanetArchive = NasaExoplanetArchiveClass()

Read our documentation on viewing source code .

Loading