astropy / astroquery
1
# Licensed under a 3-clause BSD style license - see LICENSE.rst
2
# -*- coding: utf-8 -*-
3 1
"""
4
Module to search Splatalogue.net via splat, modeled loosely on
5
ftp://ftp.cv.nrao.edu/NRAO-staff/bkent/slap/idl/
6

7
:author: Adam Ginsburg <adam.g.ginsburg@gmail.com>
8
"""
9 1
import warnings
10 1
import sys
11 1
from astropy.io import ascii
12 1
from astropy import units as u
13 1
from astropy import log
14 1
from ..query import BaseQuery
15 1
from ..utils import async_to_sync, prepend_docstr_nosections
16 1
from . import conf
17 1
from . import load_species_table
18 1
from .utils import clean_column_headings
19

20 1
__all__ = ['Splatalogue', 'SplatalogueClass']
21

22
# example query of SPLATALOGUE directly:
23
# https://www.cv.nrao.edu/php/splat/c.php?sid%5B%5D=64&sid%5B%5D=108&calcIn=&data_version=v3.0&from=&to=&frequency_units=MHz&energy_range_from=&energy_range_to=&lill=on&tran=&submit=Search&no_atmospheric=no_atmospheric&no_potential=no_potential&no_probable=no_probable&include_only_nrao=include_only_nrao&displayLovas=displayLovas&displaySLAIM=displaySLAIM&displayJPL=displayJPL&displayCDMS=displayCDMS&displayToyaMA=displayToyaMA&displayOSU=displayOSU&displayRecomb=displayRecomb&displayLisa=displayLisa&displayRFI=displayRFI&ls1=ls1&ls5=ls5&el1=el1
24

25 1
if sys.version_info.major == 2:
26
    # can't do unicode doctests in py2
27 0
    __doctest_skip__ = ['SplatalogueClass.get_species_ids']
28

29

30 1
@async_to_sync
31 1
class SplatalogueClass(BaseQuery):
32

33 1
    SLAP_URL = conf.slap_url
34 1
    QUERY_URL = conf.query_url
35 1
    TIMEOUT = conf.timeout
36 1
    LINES_LIMIT = conf.lines_limit
37 1
    versions = ('v1.0', 'v2.0', 'v3.0', 'vall')
38
    # global constant, not user-configurable
39 1
    ALL_LINE_LISTS = ('Lovas', 'SLAIM', 'JPL', 'CDMS', 'ToyoMA', 'OSU',
40
                      'Recomb', 'Lisa', 'RFI')
41 1
    TOP20_LIST = ('comet', 'planet', 'top20', 'ism_hotcore', 'ism_darkcloud',
42
                  'ism_diffusecloud')
43 1
    FREQUENCY_BANDS = {"any": "Any",
44
                       "alma3": "ALMA Band 3 (84-116 GHz)",
45
                       "alma4": " ALMA Band 4 (125-163 GHz)",
46
                       "alma5": " ALMA Band 5 (163-211 GHz)",
47
                       "alma6": "ALMA Band 6 (211-275 GHz)",
48
                       "alma7": "ALMA Band 7 (275-373 GHz)",
49
                       "alma8": "ALMA Band 8 (385-500 GHz)",
50
                       "alma9": "ALMA Band 9 (602-720 GHz)",
51
                       "alma10": "ALMA Band 10 (787-950 GHz)",
52
                       "pf1": "GBT PF1 (0.29-0.92 GHz)",
53
                       "pf2": "GBT PF2 (0.91-1.23 GHz)",
54
                       "l": "GBT/VLA L (1-2 GHz)",
55
                       "s": "GBT/VLA S (1.7-4 GHz)",
56
                       "c": "GBT/VLA C (3.9-8 GHz)",
57
                       "x": "GBT/VLA X (8-12 GHz)",
58
                       "ku": " GBT/VLA Ku (12-18 GHz)",
59
                       "kfpa": "GBT KFPA (18-27.5 GHz)",
60
                       "k": "VLA K (18-26.5 GHz)",
61
                       "ka": " GBT/VLA Ka (26-40 GHz)",
62
                       "q": "GBT/VLA Q (38-50 GHz)",
63
                       "w": "GBT W (67-93.3 GHz)",
64
                       "mustang": "GBT Mustang (80-100 GHz)", }
65

66 1
    def __init__(self, **kwargs):
67
        """
68
        Initialize a Splatalogue query class with default arguments set.
69
        Frequency specification is required for *every* query, but any
70
        default keyword arguments (see `query_lines`) can be overridden
71
        here.
72
        """
73 1
        super(SplatalogueClass, self).__init__()
74 1
        self.data = self._default_kwargs()
75 1
        self.set_default_options(**kwargs)
76

77 1
    def set_default_options(self, **kwargs):
78
        """
79
        Modify the default options.
80
        See `query_lines`
81
        """
82 1
        self.data.update(self._parse_kwargs(**kwargs))
83

84 1
    def get_species_ids(self, restr=None, reflags=0, recache=False):
85
        """
86
        Get a dictionary of "species" IDs, where species refers to the molecule
87
        name, mass, and chemical composition.
88

89
        Parameters
90
        ----------
91
        restr : str
92
            String to compile into an re, if specified.   Searches table for
93
            species whose names match
94
        reflags : int
95
            Flags to pass to `re`.
96
        recache : bool
97
            Flag whether to refresh the local cache of species IDs
98

99
        Examples
100
        --------
101
        >>> import re
102
        >>> import pprint # unfortunate hack required for documentation testing
103
        >>> rslt = Splatalogue.get_species_ids('Formaldehyde')
104
        >>> pprint.pprint(rslt)
105
        {'03023 H2CO - Formaldehyde': '194',
106
         '03106 H213CO - Formaldehyde': '324',
107
         '03107 HDCO - Formaldehyde': '109',
108
         '03108 H2C17O - Formaldehyde': '982',
109
         '03202 H2C18O - Formaldehyde': '155',
110
         '03203 D2CO - Formaldehyde': '94',
111
         '03204 HD13CO - Formaldehyde': '1219',
112
         '03301 D213CO - Formaldehyde': '1220',
113
         '03315 HDC18O - Formaldehyde': '21141',
114
         '0348 D2C18O - Formaldehyde': '21140'}
115
        >>> rslt = Splatalogue.get_species_ids('H2CO')
116
        >>> pprint.pprint(rslt)
117
        {'03023 H2CO - Formaldehyde': '194',
118
         '03109 H2COH+ - Hydroxymethylium ion': '224',
119
         '04406 c-H2COCH2 - Ethylene Oxide': '21',
120
         '06029 NH2CONH2 - Urea': '21166',
121
         '07510 H2NCH2COOH - I v=0 - Glycine': '389',
122
         '07511 H2NCH2COOH - I v=1 - Glycine': '1312',
123
         '07512 H2NCH2COOH - I v=2 - Glycine': '1313',
124
         '07513 H2NCH2COOH - II v=0 - Glycine': '262',
125
         '07514 H2NCH2COOH - II v=1 - Glycine': '1314',
126
         '07515 H2NCH2COOH - II v=2 - Glycine': '1315',
127
         '07517 NH2CO2CH3 v=0 - Methyl Carbamate': '1334',
128
         '07518 NH2CO2CH3 v=1 - Methyl Carbamate': '1335',
129
         '08902 CH3CHNH2COOH - I - α-Alanine': '1321',
130
         '08903 CH3CHNH2COOH - II - α-Alanine': '1322'}
131
        >>> # note the whitespace, preventing H2CO within other
132
        >>> # more complex molecules
133
        >>> Splatalogue.get_species_ids(' H2CO ')
134
        {'03023 H2CO - Formaldehyde': '194'}
135
        >>> Splatalogue.get_species_ids(' h2co ', re.IGNORECASE)
136
        {'03023 H2CO - Formaldehyde': '194'}
137

138
        """
139
        # loading can be an expensive operation and should not change at
140
        # runtime: do it lazily
141 1
        if not hasattr(self, '_species_ids'):
142 1
            self._species_ids = load_species_table.species_lookuptable(recache=recache)
143

144 1
        if restr is not None:
145 1
            return self._species_ids.find(restr, reflags)
146
        else:
147 0
            return self._species_ids
148

149 1
    def _default_kwargs(self):
150 1
        kwargs = dict(min_frequency=0 * u.GHz,
151
                      max_frequency=100 * u.THz,
152
                      chemical_name='',
153
                      line_lists=self.ALL_LINE_LISTS,
154
                      line_strengths=('ls1', 'ls2', 'ls3', 'ls4', 'ls5'),
155
                      energy_levels=('el1', 'el2', 'el3', 'el4'),
156
                      exclude=('potential', 'atmospheric', 'probable'),
157
                      version='v3.0',
158
                      only_NRAO_recommended=None,
159
                      export=True,
160
                      export_limit=self.LINES_LIMIT,
161
                      noHFS=False, displayHFS=False, show_unres_qn=False,
162
                      show_upper_degeneracy=False, show_molecule_tag=False,
163
                      show_qn_code=False, show_lovas_labref=False,
164
                      show_lovas_obsref=False, show_orderedfreq_only=False,
165
                      show_nrao_recommended=False,)
166 1
        return self._parse_kwargs(**kwargs)
167

168 1
    def _parse_kwargs(self, min_frequency=None, max_frequency=None,
169
                      band='any', top20=None, chemical_name=None,
170
                      chem_re_flags=0, energy_min=None, energy_max=None,
171
                      energy_type=None, intensity_lower_limit=None,
172
                      intensity_type=None, transition=None, version=None,
173
                      exclude=None,
174
                      only_astronomically_observed=None,
175
                      only_NRAO_recommended=None,
176
                      line_lists=None, line_strengths=None, energy_levels=None,
177
                      export=None, export_limit=None, noHFS=None,
178
                      displayHFS=None, show_unres_qn=None,
179
                      show_upper_degeneracy=None, show_molecule_tag=None,
180
                      show_qn_code=None, show_lovas_labref=None,
181
                      show_lovas_obsref=None, show_orderedfreq_only=None,
182
                      show_nrao_recommended=None,
183
                      parse_chemistry_locally=True):
184
        """
185
        The Splatalogue service returns lines with rest frequencies in the
186
        range [min_frequency, max_frequency].
187

188
        Parameters
189
        ----------
190
        min_frequency : `astropy.units`
191
            Minimum frequency (or any spectral() equivalent)
192
        max_frequency : `astropy.units`
193
            Maximum frequency (or any spectral() equivalent)
194
        band : str
195
            The observing band.  If it is not 'any', it overrides
196
            minfreq/maxfreq.
197
        top20: str
198
            One of ``'comet'``, ``'planet'``, ``'top20'``, ``'ism_hotcore'``,
199
            ``'ism_darkcloud'``, ``'ism_diffusecloud'``.
200
            Overrides chemical_name
201
        chemical_name : str
202
            Name of the chemical to search for. Treated as a regular
203
            expression.  An empty set ('', (), [], {}) will match *any*
204
            species. Examples:
205

206
            ``'H2CO'`` - 13 species have H2CO somewhere in their formula.
207

208
            ``'Formaldehyde'`` - There are 8 isotopologues of Formaldehyde
209
                                 (e.g., H213CO).
210

211
            ``'formaldehyde'`` - Thioformaldehyde,Cyanoformaldehyde.
212

213
            ``'formaldehyde',chem_re_flags=re.I`` - Formaldehyde,thioformaldehyde,
214
                                                    and Cyanoformaldehyde.
215

216
            ``' H2CO '`` - Just 1 species, H2CO. The spaces prevent including
217
                           others.
218
        parse_chemistry_locally : bool
219
            Attempt to determine the species ID #'s locally before sending the
220
            query?  This will prevent queries that have no matching species.
221
            It also performs a more flexible regular expression match to the
222
            species IDs.  See the examples in `get_species_ids`
223
        chem_re_flags : int
224
            See the `re` module
225
        energy_min : `None` or float
226
            Energy range to include.  See energy_type
227
        energy_max : `None` or float
228
            Energy range to include.  See energy_type
229
        energy_type : ``'el_cm1'``, ``'eu_cm1'``, ``'eu_k'``, ``'el_k'``
230
            Type of energy to restrict.  L/U for lower/upper state energy,
231
            cm/K for *inverse* cm, i.e. wavenumber, or K for Kelvin
232
        intensity_lower_limit : `None` or float
233
            Lower limit on the intensity.  See intensity_type
234
        intensity_type : `None` or ``'sij'``, ``'cdms_jpl'``, ``'aij'``
235
            The type of intensity on which to place a lower limit
236
        transition : str
237
            e.g. 1-0
238
        version : ``'v1.0'``, ``'v2.0'``, ``'v3.0'`` or ``'vall'``
239
            Data version
240
        exclude : list
241
            Types of lines to exclude.  Default is:
242
            (``'potential'``, ``'atmospheric'``, ``'probable'``)
243
            Can also exclude ``'known'``.
244
            To exclude nothing, use 'none', not the python object None, since
245
            the latter is meant to indicate 'leave as default'
246
        only_astronomically_observed : bool
247
            Show only astronomically observed species?
248
        only_NRAO_recommended : bool
249
            Show only NRAO recommended species?
250
        line_lists : list
251
            Options:
252
            Lovas, SLAIM, JPL, CDMS, ToyoMA, OSU, Recomb, Lisa, RFI
253
        line_strengths : list
254
            * CDMS/JPL Intensity : ls1
255
            * Sij : ls3
256
            * Aij : ls4
257
            * Lovas/AST : ls5
258
        energy_levels : list
259
            * E_lower (cm^-1) : el1
260
            * E_lower (K) : el2
261
            * E_upper (cm^-1) : el3
262
            * E_upper (K) : el4
263
        export : bool
264
            Set up arguments for the export server (as opposed to the HTML
265
            server)?
266
        export_limit : int
267
            Maximum number of lines in output file
268
        noHFS : bool
269
            No HFS Display
270
        displayHFS : bool
271
            Display HFS Intensity
272
        show_unres_qn : bool
273
            Display Unresolved Quantum Numbers
274
        show_upper_degeneracy : bool
275
            Display Upper State Degeneracy
276
        show_molecule_tag : bool
277
            Display Molecule Tag
278
        show_qn_code : bool
279
            Display Quantum Number Code
280
        show_lovas_labref : bool
281
            Display Lab Ref
282
        show_lovas_obsref : bool
283
            Display Obs Ref
284
        show_orderedfreq_only : bool
285
            Display Ordered Frequency ONLY
286
        show_nrao_recommended : bool
287
            Display NRAO Recommended Frequencies
288

289
        Returns
290
        -------
291
        payload : dict
292
            Dictionary of the parameters to send to the SPLAT page
293

294
        """
295

296 1
        payload = {'submit': 'Search',
297
                   'frequency_units': 'GHz',
298
                   }
299

300 1
        if band != 'any':
301 1
            if band not in self.FREQUENCY_BANDS:
302 1
                raise ValueError("Invalid frequency band.")
303 1
            if min_frequency is not None or max_frequency is not None:
304 0
                warnings.warn("Band was specified, so the frequency "
305
                              "specification is overridden")
306 1
            payload['band'] = band
307 1
        elif min_frequency is not None and max_frequency is not None:
308
            # allow setting payload without having *ANY* valid frequencies set
309 1
            min_frequency = min_frequency.to(u.GHz, u.spectral())
310 1
            max_frequency = max_frequency.to(u.GHz, u.spectral())
311 1
            if min_frequency > max_frequency:
312 0
                min_frequency, max_frequency = max_frequency, min_frequency
313

314 1
            payload['from'] = min_frequency.value
315 1
            payload['to'] = max_frequency.value
316

317 1
        if top20 is not None:
318 1
            if top20 in self.TOP20_LIST:
319 1
                payload['top20'] = top20
320
            else:
321 1
                raise ValueError("Top20 is not one of the allowed values")
322 1
        elif chemical_name in ('', {}, (), [], set()):
323
            # include all
324 1
            payload['sid[]'] = []
325 1
        elif chemical_name is not None:
326 1
            if parse_chemistry_locally:
327 1
                species_ids = self.get_species_ids(chemical_name, chem_re_flags)
328 1
                if len(species_ids) == 0:
329 0
                    raise ValueError("No matching chemical species found.")
330 1
                payload['sid[]'] = list(species_ids.values())
331
            else:
332 0
                payload['chemical_name'] = chemical_name
333

334 1
        if energy_min is not None:
335 0
            payload['energy_range_from'] = float(energy_min)
336 1
        if energy_max is not None:
337 1
            payload['energy_range_to'] = float(energy_max)
338 1
        if energy_type is not None:
339 1
            validate_energy_type(energy_type)
340 1
            payload['energy_range_type'] = energy_type
341

342 1
        if intensity_type is not None:
343 0
            payload['lill'] = 'lill_' + intensity_type
344 0
            if intensity_lower_limit is not None:
345 0
                payload[payload['lill']] = intensity_lower_limit
346

347 1
        if transition is not None:
348 0
            payload['tran'] = transition
349

350 1
        if version in self.versions:
351 1
            payload['data_version'] = version
352 1
        elif version is not None:
353 0
            raise ValueError("Invalid version specified.  Allowed versions "
354
                             "are {vers}".format(vers=str(self.versions)))
355

356 1
        if exclude == 'none':
357 1
            for e in ('potential', 'atmospheric', 'probable', 'known'):
358
                # Setting a keyword value to 'None' removes it (see query_lines_async)
359 1
                log.debug("Setting no_{0} to None".format(e))
360 1
                payload['no_' + e] = None
361 1
        elif exclude is not None:
362 1
            for e in exclude:
363 1
                payload['no_' + e] = 'no_' + e
364

365 1
        if only_astronomically_observed:
366 0
            payload['include_only_observed'] = 'include_only_observed'
367 1
        if only_NRAO_recommended:
368 1
            payload['include_only_nrao'] = 'include_only_nrao'
369

370 1
        if line_lists is not None:
371 1
            if type(line_lists) not in (tuple, list):
372 1
                raise TypeError("Line lists should be a list of linelist "
373
                                "names.  See Splatalogue.ALL_LINE_LISTS")
374 1
            for L in self.ALL_LINE_LISTS:
375 1
                kwd = 'display' + L
376 1
                if L in line_lists:
377 1
                    payload[kwd] = kwd
378
                else:
379 1
                    payload[kwd] = ''
380

381 1
        if line_strengths is not None:
382 1
            for LS in line_strengths:
383 1
                payload[LS] = LS
384

385 1
        if energy_levels is not None:
386 1
            for EL in energy_levels:
387 1
                payload[EL] = EL
388

389 1
        for b in ("noHFS", "displayHFS", "show_unres_qn",
390
                  "show_upper_degeneracy", "show_molecule_tag",
391
                  "show_qn_code", "show_lovas_labref",
392
                  "show_orderedfreq_only", "show_lovas_obsref",
393
                  "show_nrao_recommended"):
394 1
            if locals()[b]:
395 1
                payload[b] = b
396

397
        # default arg, unmodifiable...
398 1
        payload['jsMath'] = 'font:symbol,warn:0'
399 1
        payload['__utma'] = ''
400 1
        payload['__utmc'] = ''
401

402 1
        if export:
403 1
            payload['submit'] = 'Export'
404 1
            payload['export_delimiter'] = 'colon'  # or tab or comma
405 1
            payload['export_type'] = 'current'
406 1
            payload['offset'] = 0
407 1
            payload['range'] = 'on'
408

409 1
        if export_limit is not None:
410 1
            payload['limit'] = export_limit
411
        else:
412 1
            payload['limit'] = self.LINES_LIMIT
413

414 1
        return payload
415

416 1
    def _validate_kwargs(self, min_frequency=None, max_frequency=None,
417
                         band='any', **kwargs):
418
        """
419
        Check that either min_frequency + max_frequency or band are specified
420
        """
421 1
        if band == 'any':
422 1
            if min_frequency is None or max_frequency is None:
423 0
                raise ValueError("Must specify either min/max frequency or "
424
                                 "a valid Band.")
425

426 1
    @prepend_docstr_nosections("\n" + _parse_kwargs.__doc__)
427 1
    def query_lines_async(self, min_frequency=None, max_frequency=None,
428
                          cache=True, **kwargs):
429
        """
430

431
        Returns
432
        -------
433
        response : `requests.Response`
434
            The response of the HTTP request.
435

436
        """
437
        # have to chomp this kwd here...
438 1
        get_query_payload = kwargs.pop('get_query_payload', False)
439

440 1
        self._validate_kwargs(min_frequency=min_frequency,
441
                              max_frequency=max_frequency, **kwargs)
442

443 1
        if hasattr(self, 'data'):
444 1
            data_payload = self.data.copy()
445 1
            data_payload.update(self._parse_kwargs(min_frequency=min_frequency,
446
                                                   max_frequency=max_frequency,
447
                                                   **kwargs))
448
        else:
449 0
            data_payload = self._default_kwargs()
450 0
            data_payload.update(self._parse_kwargs(min_frequency=min_frequency,
451
                                                   max_frequency=max_frequency,
452
                                                   **kwargs))
453

454
        # Add an extra step: sometimes, need to REMOVE keywords
455 1
        data_payload = {k: v for k, v in data_payload.items() if v is not None}
456

457 1
        if get_query_payload:
458 1
            return data_payload
459

460 1
        response = self._request(method='POST',
461
                                 url=self.QUERY_URL,
462
                                 data=data_payload,
463
                                 timeout=self.TIMEOUT,
464
                                 cache=cache)
465

466 1
        self.response = response
467

468 1
        return response
469

470 1
    def _parse_result(self, response, verbose=False):
471
        """
472
        Parse a response into an `~astropy.table.Table`
473

474
        Parameters
475
        ----------
476
        clean_headers : bool
477
            Attempt to simplify / clean up the column headers returned by
478
            splatalogue to make them more terminal-friendly
479
        """
480

481 1
        result = ascii.read(response.text.split('\n'), delimiter=':',
482
                            format='basic', fast_reader=False)
483

484 1
        return result
485

486 1
    def get_fixed_table(self, columns=None):
487
        """
488
        Convenience function to get the table with html column names made human
489
        readable.  It returns only the columns identified with the ``columns``
490
        keyword.  See the source for the defaults.
491
        """
492 0
        if columns is None:
493 0
            columns = ('Species', 'Chemical Name', 'Resolved QNs',
494
                       'Freq-GHz(rest frame,redshifted)',
495
                       'Meas Freq-GHz(rest frame,redshifted)',
496
                       'Log<sub>10</sub> (A<sub>ij</sub>)',
497
                       'E_U (K)')
498 0
        table = clean_column_headings(self.table[columns])
499 0
        return table
500

501

502 1
def validate_energy_type(etype):
503 1
    valid_energy_types = ('el_cm1', 'eu_cm1', 'eu_k', 'el_k')
504 1
    if etype not in valid_energy_types:
505 0
        raise ValueError("Energy type must be one of {0}"
506
                         .format(valid_energy_types))
507

508

509 1
Splatalogue = SplatalogueClass()

Read our documentation on viewing source code .

Loading