Skip to content

Commit beaddf2

Browse files
committed
Update edit examples
1 parent d75139a commit beaddf2

File tree

3 files changed

+75
-23
lines changed

3 files changed

+75
-23
lines changed

edit_all_geo.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
1-
import argparse, os, json, requests,csv
1+
import argparse, os, json, requests, csv, dataset
22
from caltechdata_api import caltechdata_edit, decustomize_schema
33

44
#Get access token from TIND sed as environment variable with source token.bash
55
token = os.environ['TINDTOK']
66

7+
collection = 'data/CaltechTHESIS.ds'
8+
79
production = True
810

911
if production == True:
1012
url = 'https://data.caltech.edu/api/records'
1113
else:
1214
url = 'https://cd-sandbox.tind.io/api/records'
1315

14-
response = requests.get(url+'/?size=1000&q=subjects:gps,thesis')
16+
response = requests.get(url+'/?size=1000&q=subjects:gps')
1517
hits = response.json()
1618

1719
#Set up dictionary of links between resolver and thesis IDs
@@ -31,10 +33,20 @@
3133
record = decustomize_schema(h['metadata'],True)
3234
if 'relatedIdentifiers' in record:
3335
for r in record['relatedIdentifiers']:
34-
if r['relationType']=='IsSupplementTo' and 'relatedIdentifierType'=='URL':
36+
if r['relationType']=='IsSupplementTo' and\
37+
r['relatedIdentifierType']=='URL':
3538
idv = record_list[r['relatedIdentifier']]
36-
print(idv)
37-
#metadata =\
38-
#{'descriptions':[{'description':description,'descriptionType':'Abstract'}]}
39-
#response = caltechdata_edit(token, rid, metadata, {}, {}, production)
40-
#print(response)
39+
thesis_metadata,err = dataset.read(collection,idv)
40+
pub_date = thesis_metadata['date']
41+
dates = [{"date":pub_date,"dateType":"Issued"}]
42+
for date in record['dates']:
43+
if date['dateType'] == 'Issued':
44+
dates.append({"date":date['date'],"dateType":"Updated"})
45+
elif date['dateType'] == 'Updated':
46+
pass
47+
elif date['dateType'] != 'Submitted':
48+
dates.append(date)
49+
print(dates)
50+
metadata ={'dates':dates}
51+
response = caltechdata_edit(token, rid, metadata, {}, {}, production)
52+
print(response)

edit_all_github.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import argparse, os, json, requests
2+
from caltechdata_api import caltechdata_edit, decustomize_schema
3+
4+
#Get access token from TIND sed as environment variable with source token.bash
5+
token = os.environ['TINDTOK']
6+
7+
production = True
8+
9+
if production == True:
10+
url = 'https://data.caltech.edu/api/records'
11+
else:
12+
url = 'https://cd-sandbox.tind.io/api/records'
13+
14+
response = requests.get(url+'/?size=2000&q=cal_resource_type=software')
15+
hits = response.json()
16+
17+
for h in hits['hits']['hits']:
18+
rid = h['id']
19+
print(rid)
20+
record = decustomize_schema(h['metadata'],True)
21+
replace = False
22+
#to_update =\
23+
#[288,269,295,291,279,284,266,281,286,278,280,293,283,287,210,274,276,290,300,285,270,268,267,302,744,282,272,289]
24+
#if rid in to_update:
25+
# Find just GitHub records by title
26+
if '/' in record['titles'][0]['title']:
27+
add = True
28+
for s in record['subjects']:
29+
subject = s['subject']
30+
if subject == 'Github':
31+
add = False
32+
if subject == 'GitHub':
33+
add = False
34+
if subject == 'Bitbucket':
35+
add = False
36+
if add == True:
37+
record['subjects'].append({'subject':'GitHub'})
38+
print( record['titles'][0]['title'])
39+
response = caltechdata_edit(token, rid, record, {}, {}, production)
40+
print(response)

edit_all_tccon.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,23 @@
1515
hits = response.json()
1616

1717
for h in hits['hits']['hits']:
18-
rid = str(h['id'])
18+
rid = h['id']
1919
print(rid)
2020
record = decustomize_schema(h['metadata'],True)
2121
replace = False
22-
if 'relatedIdentifiers' in record:
23-
for r in record['relatedIdentifiers']:
24-
if r['relationType']=='IsPreviousVersionOf':
25-
description = \
26-
"<br> These data are now obsolete and should be replaced by the most recent data: https://doi.org/"\
27-
+r['relatedIdentifier']+' <br><br>'
28-
description = description +\
29-
record['descriptions'][0]['description']
30-
replace = True
31-
32-
33-
if replace == True:
34-
metadata =\
35-
{'descriptions':[{'description':description,'descriptionType':'Abstract'}]}
22+
to_update =\
23+
[288,269,295,291,279,284,266,281,286,278,280,293,283,287,210,274,276,290,300,285,270,268,267,302,744,282,272,289]
24+
if rid in to_update:
25+
dates = []
26+
for d in record['dates']:
27+
if d['dateType']=='Issued':
28+
d['dateType'] = 'Submitted'
29+
dates.append(d)
30+
elif d['dateType']!='Submitted':
31+
dates.append(d)
32+
else:
33+
print("Duplicate ",d)
34+
metadata ={'dates':dates}
35+
print(metadata)
3636
response = caltechdata_edit(token, rid, metadata, {}, {}, production)
3737
print(response)

0 commit comments

Comments
 (0)