astropy / astroquery
1
# Licensed under a 3-clause BSD style license - see LICENSE.rst
2 1
"""
3
Support module for splatalogue.  Requires bs4, and is therefore not intended
4
for users / not part of the core package.
5

6
:author: Adam Ginsburg <adam.g.ginsburg@gmail.com>
7
"""
8 1
import json
9 1
import os
10 1
import requests
11

12 1
from astropy.config import paths
13

14 1
from . import conf
15

16

17 1
def data_path(filename: str):
18
    """
19
    Build the path to save a file.  Note that this path is part of the
20
    astroquery source code, not the astropy cache directory, as the existence
21
    of the file is a prerequisite for performing queries.
22

23
    Parameters
24
    ----------
25
    filename : str
26
        Name of the file (generally should be splat-species.json)
27

28
    Returns
29
    -------
30
    str
31
        Full path to the cache directory
32
    """
33 1
    data_dir = os.path.join(os.path.dirname(__file__), 'data')
34 1
    return os.path.join(data_dir, filename)
35

36

37 1
def get_json_species_ids(outfile='splat-species.json', base_url=conf.base_url):
38
    """
39
    Uses BeautifulSoup to scrape the NRAO Splatalogue species
40
    selector form, and caches the result as JSON. The file
41
    is saved to the ``astropy`` cache.
42

43
    Parameters
44
    ----------
45
    outfile : str, optional
46
        Name of the output JSON, by default 'splat-species.json'
47

48
    Returns
49
    -------
50
    str
51
        Formatted string representation of the JSON object
52
    """
53 0
    import bs4
54

55 0
    result = requests.get(f'{base_url}/b.php')
56 0
    page = bs4.BeautifulSoup(result.content, 'html5lib')
57
    # The ID needs to be checked periodically if Splatalogue is updated
58 0
    sid = page.findAll('select', attrs={'id': 'speciesselectbox'})[0]
59

60 0
    species_types = set()
61 0
    for kid in sid.children:
62 0
        if hasattr(kid, 'attrs') and 'class' in kid.attrs:
63 0
            species_types.add(kid['class'][0])
64

65 0
    species = dict((k, {}) for k in species_types)
66

67 0
    for kid in sid.children:
68 0
        if hasattr(kid, 'attrs') and 'class' in kid.attrs:
69 0
            species[kid['class'][0]][kid['value']] = kid.text
70

71 0
    with open(data_path(outfile), 'w') as f:
72 0
        json.dump(species, f)
73

74 0
    return json.dumps(species)
75

76

77 1
if __name__ == "__main__":
78 0
    get_json_species_ids()

Read our documentation on viewing source code .

Loading