Skip to content

Commit a67d5cd

Browse files
committed
Add doi date script and pass through for emails
1 parent c71384b commit a67d5cd

File tree

2 files changed

+58
-12
lines changed

2 files changed

+58
-12
lines changed

add_doi_minting_date.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import os,requests
2+
from progressbar import progressbar
3+
from caltechdata_api import get_metadata, caltechdata_edit
4+
5+
def get_datacite_dates(prefix):
6+
'''Get sumbitted date for DataCite DOIs with specific prefix'''
7+
doi_dates = {}
8+
doi_urls = {}
9+
url = 'https://api.datacite.org/dois?query=prefix:'+prefix+'&page[cursor]=1&page[size]=500'
10+
next_link = url
11+
meta = requests.get(next_link).json()['meta']
12+
for j in progressbar(range(meta['totalPages'])):
13+
r = requests.get(next_link)
14+
data = r.json()
15+
for doi in data['data']:
16+
date = doi['attributes']['registered'].split('T')[0]
17+
doi_dates[doi['id']] = date
18+
doi_urls[doi['id']] = doi['attributes']['url']
19+
if 'next' in data['links']:
20+
next_link = data['links']['next']
21+
else:
22+
next_link = None
23+
return doi_dates,doi_urls
24+
25+
token = os.environ['TINDTOK']
26+
27+
doi_dates,doi_urls = get_datacite_dates('10.14291')
28+
for doi in doi_urls:
29+
if 'data.caltech.edu' in doi_urls[doi]:
30+
caltech_id = doi_urls[doi].split('/')[-1]
31+
if caltech_id not in ['252','253','254','255']:
32+
metadata = get_metadata(caltech_id,emails=True)
33+
print(caltech_id)
34+
#print(metadata['dates'])
35+
for date in metadata['dates']:
36+
if date['dateType'] == 'Issued':
37+
print(date['date'],doi_dates[doi])
38+
date['date'] = doi_dates[doi]
39+
response = caltechdata_edit(token, caltech_id,metadata,production=True)
40+
print(response)
41+

caltechdata_api/get_metadata.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
from datacite import DataCiteMDSClient, schema40
44
from caltechdata_api import decustomize_schema
55

6-
def get_metadata(idv,production=True,auth=None):
7-
# Returns just DataCite metadata
6+
def get_metadata(idv,production=True,auth=None,emails=False):
7+
# Returns just DataCite metadata or DataCite metadata with emails
88

99
if production==True:
1010
api_url = "https://data.caltech.edu/api/record/"
@@ -20,15 +20,18 @@ def get_metadata(idv,production=True,auth=None):
2020
raise AssertionError('expected as metadata property in response, got '+r_data)
2121
metadata = r_data['metadata']
2222

23-
metadata = decustomize_schema(metadata)
24-
try:
25-
assert schema40.validate(metadata)
26-
except AssertionError:
27-
v = schema40.validator.validate(metadata)
28-
errors = sorted(v.iter_errors(instance), key=lambda e:e.path)
29-
for error in errors:
30-
print(error.message)
31-
exit()
23+
if emails == True:
24+
metadata = decustomize_schema(metadata,pass_emails=True)
25+
else:
26+
metadata = decustomize_schema(metadata)
27+
try:
28+
assert schema40.validate(metadata)
29+
except AssertionError:
30+
v = schema40.validator.validate(metadata)
31+
errors = sorted(v.iter_errors(instance), key=lambda e:e.path)
32+
for error in errors:
33+
print(error.message)
34+
exit()
3235

3336
return metadata
3437

@@ -38,6 +41,7 @@ def get_metadata(idv,production=True,auth=None):
3841
and returns DataCite-compatable metadata")
3942
parser.add_argument('ids', metavar='ID', type=int, nargs='+',\
4043
help='The CaltechDATA ID for each record of interest')
44+
parser.add_argument('-emails',dest='emails', action='store_true')
4145
parser.add_argument('-test',dest='production', action='store_false')
4246
parser.add_argument('-xml',dest='save_xml', action='store_true')
4347
parser.add_argument('-auth_user',help='Username for basic authentication')
@@ -46,12 +50,13 @@ def get_metadata(idv,production=True,auth=None):
4650
args = parser.parse_args()
4751

4852
production = args.production
53+
emails = args.emails
4954
auth = None
5055
if args.auth_user != None:
5156
auth = (args.auth_user,args.auth_pass)
5257

5358
for idv in args.ids:
54-
metadata = get_metadata(idv,production,auth)
59+
metadata = get_metadata(idv,emails,production,auth)
5560
outfile = open(str(idv)+'.json','w')
5661
outfile.write(json.dumps(metadata))
5762
outfile.close()

0 commit comments

Comments
 (0)