Skip to content

Commit 63bd8c0

Browse files
committed
New editing examples and error message cleanup
1 parent 280060b commit 63bd8c0

File tree

6 files changed

+127
-66
lines changed

6 files changed

+127
-66
lines changed

caltechdata_api/decustomize_schema.py

Lines changed: 45 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import json
33
import argparse
44

5-
def decustomize_schema(json_record):
5+
def decustomize_schema(json_record,pass_emails=False):
66

77
#Extract subjects to single string
88
if "subjects" in json_record:
@@ -62,14 +62,17 @@ def decustomize_schema(json_record):
6262
new['affiliations'] = [a['authorAffiliation']]
6363
if 'authorIdentifiers' in a:
6464
idv = []
65-
for cid in a['authorIdentifiers']:
66-
nid = {}
67-
nid['nameIdentifier'] =\
68-
cid.pop('authorIdentifier')
69-
nid['nameIdentifierScheme'] =\
70-
cid.pop('authorIdentifierScheme')
71-
idv.append(nid)
72-
new['nameIdentifiers']=idv
65+
if isinstance(a['authorIdentifiers'],list):
66+
for cid in a['authorIdentifiers']:
67+
nid = {}
68+
nid['nameIdentifier'] =\
69+
cid.pop('authorIdentifier')
70+
nid['nameIdentifierScheme'] =\
71+
cid.pop('authorIdentifierScheme')
72+
idv.append(nid)
73+
new['nameIdentifiers']=idv
74+
else:
75+
print("Author identifiers not an array - please check")
7376
del a['authorIdentifiers']
7477
new['creatorName'] = a['authorName']
7578
newa.append(new)
@@ -85,26 +88,23 @@ def decustomize_schema(json_record):
8588
else:
8689
c['affiliations'] = [c.pop('contributorAffiliation')]
8790
if 'contributorIdentifiers' in c:
88-
#if isinstance(c['contributorIdentifiers'],list):
89-
newa = []
90-
for cid in c['contributorIdentifiers']:
91-
new = {}
92-
new['nameIdentifier'] =\
91+
if isinstance(c['contributorIdentifiers'],list):
92+
newa = []
93+
for cid in c['contributorIdentifiers']:
94+
new = {}
95+
new['nameIdentifier'] =\
9396
cid.pop('contributorIdentifier')
94-
if 'contributorIdentifierScheme' in cid:
95-
new['nameIdentifierScheme'] =\
97+
if 'contributorIdentifierScheme' in cid:
98+
new['nameIdentifierScheme'] =\
9699
cid.pop('contributorIdentifierScheme')
97-
newa.append(new)
98-
c['nameIdentifiers']=newa
100+
newa.append(new)
101+
c['nameIdentifiers']=newa
102+
else:
103+
print("Contributor identifier not an array - please check")
99104
del c['contributorIdentifiers']
100-
#else:
101-
# c['contributorIdentifiers']['nameIdentifier'] =\
102-
# c['contributorIdentifiers'].pop('contributorIdentifier')
103-
# c['contributorIdentifiers']['nameIdentifierScheme'] =\
104-
# c['contributorIdentifiers'].pop('contributorIdentifierScheme')
105-
# c['nameIdentifiers'] = [c.pop('contributorIdentifiers')]
106-
if 'contributorEmail' in c:
107-
del c['contributorEmail']
105+
if pass_emails == False:
106+
if 'contributorEmail' in c:
107+
del c['contributorEmail']
108108
#format
109109
if "format" in json_record:
110110
if isinstance(json_record['format'],list):
@@ -124,21 +124,26 @@ def decustomize_schema(json_record):
124124
json_record['dates']=json_record.pop('relevantDates')
125125

126126
#set publicationYear
127-
year = json_record['publicationDate'].split('-')[0]
128-
json_record['publicationYear'] = year
129-
#If "Submitted' date type was not manually set in metadata
130-
#Or 'Issued was not manually set
131-
#We want to save the entire publicationDate
132-
if 'Submitted' in datetypes or 'Issued' in datetypes:
133-
print("Custom Dates Present-Dropping TIND Publication Date")
134-
else:
135-
if 'dates' in json_record:
136-
json_record['dates'].append({"date":json_record['publicationDate'],\
137-
"dateType": "Submitted"})
127+
if 'publicationDate' in json_record:
128+
year = json_record['publicationDate'].split('-')[0]
129+
json_record['publicationYear'] = year
130+
131+
#If "Submitted' date type was not manually set in metadata
132+
#Or 'Issued was not manually set
133+
#We want to save the entire publicationDate
134+
if 'Submitted' in datetypes or 'Issued' in datetypes:
135+
print("Custom Dates Present-Dropping TIND Publication Date")
138136
else:
139-
json_record['dates']=[{"date":json_record['publicationDate'],\
137+
if 'dates' in json_record:
138+
json_record['dates'].append({"date":json_record['publicationDate'],\
139+
"dateType": "Submitted"})
140+
else:
141+
json_record['dates']=[{"date":json_record['publicationDate'],\
140142
"dateType": "Submitted"}]
141-
del json_record['publicationDate']
143+
del json_record['publicationDate']
144+
145+
else:
146+
print("No publication date set - something is odd with the record")
142147

143148
#license - no url available
144149
if 'rightsList' not in json_record:
@@ -152,7 +157,7 @@ def decustomize_schema(json_record):
152157
if 'fundings' in json_record:
153158
#Metadata changes and all should all be DataCite standard
154159
#Clean out any residual issues
155-
print("Check funding information")
160+
print("Legacy funding information (fundings) not transferred")
156161
del json_record['fundings']
157162

158163
#Geo

edit_all_tccon.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import argparse, os, json, requests
2+
from caltechdata_api import caltechdata_edit, decustomize_schema
3+
4+
#Get access token from TIND sed as environment variable with source token.bash
5+
token = os.environ['TINDTOK']
6+
7+
production = True
8+
9+
if production == True:
10+
url = 'https://data.caltech.edu/api/records'
11+
else:
12+
url = 'https://cd-sandbox.tind.io/api/records'
13+
14+
response = requests.get(url+'/?size=1000&q=subjects:TCCON')
15+
hits = response.json()
16+
17+
for h in hits['hits']['hits']:
18+
rid = str(h['id'])
19+
print(rid)
20+
record = decustomize_schema(h['metadata'],True)
21+
22+
group = {'contributorName':'TCCON','contributorType':'ResearchGroup'}
23+
new = ''
24+
if 'contributors' in record:
25+
existing = False
26+
for c in record['contributors']:
27+
if c['contributorName'] == 'TCCON':
28+
existing = True
29+
if existing == False:
30+
v = record['contributors']
31+
v.append(group)
32+
new = {'contributors':v}
33+
else:
34+
new = {'contributors':[group]}
35+
if new != '':
36+
response = caltechdata_edit(token, rid, new, {}, {}, production)
37+
print(response)

edit_files.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
#Get access token from TIND sed as environment variable with source token.bash
1313
token = os.environ['TINDTOK']
1414

15-
production = False
15+
production = True
16+
17+
print(args.delete)
1618

1719
response = caltechdata_edit(token, args.ids, {}, args.fnames, args.delete, production)
1820
print(response)

edit_tccon.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import sys, os, json, requests
2+
from caltechdata_api import caltechdata_edit, decustomize_schema
3+
#USAGE: python edit_tccon.py tccon.ggg2014.darwin01.R0.json 269 0 [email protected]
4+
5+
#Get access token from TIND sed as environment variable with source token.bash
6+
token = os.environ['TINDTOK']
7+
8+
production = True
9+
10+
if production == True:
11+
url = 'https://data.caltech.edu/api/records'
12+
else:
13+
url = 'https://cd-sandbox.tind.io/api/records'
14+
15+
response = requests.get(url+'/?size=1000&q=subjects:TCCON')
16+
hits = response.json()
17+
18+
infile = open(sys.argv[1],'r')
19+
record = json.load(infile)
20+
21+
rid = sys.argv[2]
22+
23+
group = {'contributorName':'TCCON','contributorType':'ResearchGroup'}
24+
new = ''
25+
for c in record['contributors']:
26+
if c['contributorType'] == 'HostingInstitution':
27+
c['contributorName'] == \
28+
'California Institute of Techonolgy, Pasadena, CA (US)'
29+
c['contributorIdentifiers']=\
30+
[{'contributorIdentifier': 'grid.20861.3d',
31+
'contributorIdentifierScheme': 'GRID'}]
32+
v = record['contributors']
33+
v.append(group)
34+
contact = record['creators'][int(sys.argv[3])]
35+
contact['contributorName'] = contact.pop('creatorName')
36+
contact['contributorEmail'] = sys.argv[4]
37+
contact['contributorType'] = 'ContactPerson'
38+
v.append(contact)
39+
new = {'contributors':v}
40+
response = caltechdata_edit(token, rid, new, {}, {}, production)
41+
print(response)

example.py

Lines changed: 0 additions & 24 deletions
This file was deleted.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from setuptools import setup, find_packages
22
setup(
33
name = 'caltechdata_api',
4-
version ='0.0.3',
4+
version ='0.0.5',
55
packages = find_packages(),
66
install_requires=[
77
'requests',

0 commit comments

Comments
 (0)