Skip to content

Commit d0e29ba

Browse files
authored
Ingest parallax function and ingest data from Ultracool sheet (#528)
* updated ingest_parallax to return flags * formatting fixes and made logger stream to stdout * improved tests * refactored keyword arguments
1 parent 4a95a00 commit d0e29ba

File tree

5 files changed

+386
-155
lines changed

5 files changed

+386
-155
lines changed
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
from astrodb_utils import load_astrodb, find_source_in_db, AstroDBError
2+
import sys
3+
4+
sys.path.append(".")
5+
import logging
6+
from astropy.io import ascii
7+
from simple.schema import Photometry
8+
from simple.schema import REFERENCE_TABLES
9+
from math import isnan
10+
from simple.utils.astrometry import ingest_parallax
11+
from scripts.ingests.ultracool_sheet.references import uc_ref_to_simple_ref
12+
13+
14+
logger = logging.getLogger(__name__)
15+
16+
# Logger setup
17+
# This will stream all logger messages to the standard output and
18+
# apply formatting for that
19+
logger.propagate = False # prevents duplicated logging messages
20+
LOGFORMAT = logging.Formatter(
21+
"%(asctime)s %(levelname)s: %(message)s", datefmt="%m/%d/%Y %I:%M:%S%p"
22+
)
23+
ch = logging.StreamHandler(stream=sys.stdout)
24+
ch.setFormatter(LOGFORMAT)
25+
# To prevent duplicate handlers, only add if they haven't been set previously
26+
if len(logger.handlers) == 0:
27+
logger.addHandler(ch)
28+
logger.setLevel(logging.INFO)
29+
30+
DB_SAVE = False
31+
RECREATE_DB = True
32+
db = load_astrodb(
33+
"SIMPLE.sqlite", recreatedb=RECREATE_DB, reference_tables=REFERENCE_TABLES
34+
)
35+
36+
37+
# Load Ultracool sheet
38+
doc_id = "1i98ft8g5mzPp2DNno0kcz4B9nzMxdpyz5UquAVhz-U8"
39+
sheet_id = "361525788"
40+
link = (
41+
f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=csv&gid={sheet_id}"
42+
)
43+
44+
# read the csv data into an astropy table
45+
uc_sheet_table = ascii.read(
46+
link,
47+
format="csv",
48+
data_start=1,
49+
header_start=0,
50+
guess=False,
51+
fast_reader=False,
52+
delimiter=",",
53+
)
54+
55+
no_sources = 0
56+
multiple_sources = 0
57+
ingested = 0
58+
already_exists = 0
59+
no_data = 0
60+
61+
# Ingest loop
62+
for source in uc_sheet_table:
63+
if isnan(source["plx_lit"]): # skip if no data
64+
no_data += 1
65+
continue
66+
uc_sheet_name = source["name"]
67+
match = find_source_in_db(
68+
db,
69+
uc_sheet_name,
70+
ra=source["ra_j2000_formula"],
71+
dec=source["dec_j2000_formula"],
72+
)
73+
74+
if len(match) == 1:
75+
# 1 Match found. INGEST!
76+
simple_source = match[0]
77+
logger.debug(f"Match found for {uc_sheet_name}: {simple_source}")
78+
79+
try:
80+
references = source["ref_plx_lit"].split(";")
81+
reference = uc_ref_to_simple_ref(db, references[0])
82+
83+
comment = None
84+
if len(references) > 1:
85+
comment = f"other references: {uc_ref_to_simple_ref(db, references[1])}"
86+
ingest_parallax(
87+
db,
88+
source=simple_source,
89+
parallax_mas=source["plx_lit"],
90+
parallax_err_mas=source["plxerr_lit"],
91+
reference=reference,
92+
comment=comment,
93+
)
94+
ingested += 1
95+
except AstroDBError as e:
96+
msg = "ingest failed with error: " + str(e)
97+
if "Duplicate measurement exists" in str(e):
98+
already_exists += 1
99+
else:
100+
logger.warning(msg)
101+
raise AstroDBError(msg) from e
102+
103+
elif len(match) == 0:
104+
no_sources += 1
105+
elif len(match) > 1:
106+
multiple_sources += 1
107+
else:
108+
msg = "Unexpected situation occured"
109+
logger.error(msg)
110+
raise AstroDBError(msg)
111+
112+
113+
# 1108 data points in UC sheet in total
114+
logger.info(f"ingested:{ingested}") # 1014 ingested
115+
logger.info(f"already exists:{already_exists}") # skipped 6 due to preexisting data
116+
logger.info(f"no sources:{no_sources}") # skipped 86 due to 0 matches
117+
logger.info(f"multiple sources:{multiple_sources}") # skipped 2 due to multiple matches
118+
logger.info(f"no data: {no_data}") # 2782
119+
logger.info(
120+
f"data points tracked:{ingested+already_exists+no_sources+multiple_sources}"
121+
) # 1108
122+
total = ingested + already_exists + no_sources + multiple_sources + no_data
123+
logger.info(f"total: {total}") # 3890
124+
125+
if total != len(uc_sheet_table):
126+
msg = "data points tracked inconsistent with UC sheet"
127+
logger.error(msg)
128+
raise AstroDBError(msg)
129+
elif DB_SAVE:
130+
db.save_database(directory="data/")

scripts/ingests/ultracool_sheet/Ingest_spitzer_photometry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
logger = logging.getLogger("AstroDB")
1818
logger.setLevel(logging.INFO)
1919

20-
DB_SAVE = True
20+
DB_SAVE = False
2121
RECREATE_DB = True
2222
db = load_astrodb(
2323
"SIMPLE.sqlite", recreatedb=RECREATE_DB, reference_tables=REFERENCE_TABLES

scripts/ingests/ultracool_sheet/references.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727

2828

2929
def uc_ref_to_simple_ref(db, ref):
30+
if ref == "Harr15": # Reference with no ADS.
31+
return ref
3032
t = (
3133
db.query(db.Publications)
3234
.filter(db.Publications.c.bibcode == uc_ref_to_ADS[ref])

0 commit comments

Comments
 (0)