-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgenerate_references.py
101 lines (85 loc) · 2.85 KB
/
generate_references.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Script to read References.dat and generate a json document for the database
# Read References.dat
# This has references across multiple rows and in multiple formats.
# Will need to see if ADS can be used to get the unique reference automatically
# Should each reference be a separate JSON? They will be separate entries in the database, but it may be
# easier as a single file on disk
# Example JSON
"""
[
{
"key": "Bellazzini_2006_1",
"id": 1,
"year": 2006,
"doi": "",
"bibcode": "",
"authors": [
"Bellazzini, M.",
"Ibata, R.",
"Martin, N.",
"Lewis, G. F.",
"Conn, B.",
"Irwin, M. J."
],
"journal": "MNRAS",
"title": "The core of the Canis Major galaxy as traced by red clump stars"
},
{
"key": "Martin_2005_1",
"id": 2,
"year": 2005,
"doi": "",
"bibcode": "",
"authors": [
"Martin, N. F.",
"Ibata, R. A.",
"Conn, B. C.",
"Lewis, G. F.",
"Bellazzini, M.",
"Irwin, M. J."
],
"journal": "MNRAS",
"title": "A radial velocity survey of low Galactic latitude structures - I. Kinematics of the Canis Major dwarf galaxy"
}
]
The absolute minimum will probably be just be the key (as author_year_integer), though year and authors may be useful
"""
# I can imagine there being functions to populate the database from calls to ADS given the doi/bibcode
# However, I also think that queries to the reference table will not be required
import json
import requests
import urllib.parse
import ads
from dsii_secrets import ads_api_key
# Example query
papers = list(ads.SearchQuery(q="The core of the Canis Major galaxy as traced by red clump stars"))
papers = list(ads.SearchQuery(author="Bellazzini", year=2006))
papers = list(ads.SearchQuery(first_author="Bellazzini", year=2006,
q='The core of the Canis Major galaxy as traced by red clump stars'))
for p in papers:
print(p.year, p.author[0], p.title, p.bibcode, p.doi)
# Bellazzini_2006_1
# 10.1111/j.1365-2966.2005.09973.x
# 2006MNRAS.366..865B
# Get references data
with open('references.json', 'r') as f:
refs = json.load(f)
for ref in refs:
if ref.get('bibcode', '') == '' or ref.get('doi', '') == '':
author = ref.get('authors', [])[0]
year = ref.get('year')
title = ref.get('title')
papers = list(ads.SearchQuery(first_author=author, year=year, title=title))
if len(papers) == 1:
p = papers[0]
ref['bibcode'] = p.bibcode
ref['doi'] = p.doi[0]
print(ref)
elif len(papers) > 1:
print('Warning: more than one paper matched')
for p in papers:
print(p.year, p.author[0], p.title, p.bibcode, p.doi)
# Output to file
out_refs = json.dumps(refs, indent=2, sort_keys=False)
with open('references.json', 'w') as f:
f.write(out_refs)