Update edit examples

tmorrell · tmorrell · commit beaddf2a1eb5 · 2019-04-29T12:44:33.000-07:00
diff --git a/edit_all_geo.py b/edit_all_geo.py
@@ -1,17 +1,19 @@
-import argparse, os, json, requests,csv
+import argparse, os, json, requests, csv, dataset
 from caltechdata_api import caltechdata_edit, decustomize_schema
 
 #Get access token from TIND sed as environment variable with source token.bash
 token = os.environ['TINDTOK']
 
+collection = 'data/CaltechTHESIS.ds'
+
 production = True
 
 if production == True:
     url = 'https://data.caltech.edu/api/records'
 else:
     url = 'https://cd-sandbox.tind.io/api/records'
 
-response = requests.get(url+'/?size=1000&q=subjects:gps,thesis')
+response = requests.get(url+'/?size=1000&q=subjects:gps')
 hits = response.json()
 
 #Set up dictionary of links between resolver and thesis IDs
@@ -31,10 +33,20 @@
     record = decustomize_schema(h['metadata'],True)
     if 'relatedIdentifiers' in record:
         for r in record['relatedIdentifiers']:
-            if r['relationType']=='IsSupplementTo' and 'relatedIdentifierType'=='URL':
+            if r['relationType']=='IsSupplementTo' and\
+            r['relatedIdentifierType']=='URL':
                 idv = record_list[r['relatedIdentifier']]
-                print(idv)
-        #metadata =\
-                #{'descriptions':[{'description':description,'descriptionType':'Abstract'}]}
-            #response = caltechdata_edit(token, rid, metadata, {}, {}, production)
-            #print(response)
+                thesis_metadata,err = dataset.read(collection,idv)
+                pub_date = thesis_metadata['date']
+                dates = [{"date":pub_date,"dateType":"Issued"}]
+                for date in record['dates']:
+                    if date['dateType'] == 'Issued':
+                        dates.append({"date":date['date'],"dateType":"Updated"})
+                    elif date['dateType'] == 'Updated':
+                        pass
+                    elif date['dateType'] != 'Submitted':
+                        dates.append(date)
+                print(dates)
+                metadata ={'dates':dates}
+                response = caltechdata_edit(token, rid, metadata, {}, {}, production)
+                print(response)
diff --git a/edit_all_github.py b/edit_all_github.py
@@ -0,0 +1,40 @@
+import argparse, os, json, requests
+from caltechdata_api import caltechdata_edit, decustomize_schema
+
+#Get access token from TIND sed as environment variable with source token.bash
+token = os.environ['TINDTOK']
+
+production = True
+
+if production == True:
+    url = 'https://data.caltech.edu/api/records'
+else:
+    url = 'https://cd-sandbox.tind.io/api/records'
+
+response = requests.get(url+'/?size=2000&q=cal_resource_type=software')
+hits = response.json()
+
+for h in hits['hits']['hits']:
+        rid = h['id']
+        print(rid)
+        record = decustomize_schema(h['metadata'],True)
+        replace = False
+        #to_update =\
+                #[288,269,295,291,279,284,266,281,286,278,280,293,283,287,210,274,276,290,300,285,270,268,267,302,744,282,272,289]
+        #if rid in to_update:
+        # Find just GitHub records by title
+        if '/' in record['titles'][0]['title']:
+            add = True
+            for s in record['subjects']:
+                subject = s['subject']
+                if subject == 'Github':
+                    add = False
+                if subject == 'GitHub':
+                    add = False
+                if subject == 'Bitbucket':
+                    add = False
+            if add == True:
+                record['subjects'].append({'subject':'GitHub'})
+                print( record['titles'][0]['title'])
+                response = caltechdata_edit(token, rid, record, {}, {}, production)
+                print(response)
diff --git a/edit_all_tccon.py b/edit_all_tccon.py
@@ -15,23 +15,23 @@
 hits = response.json()
 
 for h in hits['hits']['hits']:
-        rid = str(h['id'])
+        rid = h['id']
         print(rid)
         record = decustomize_schema(h['metadata'],True)
         replace = False
-        if 'relatedIdentifiers' in record:
-            for r in record['relatedIdentifiers']:
-                if r['relationType']=='IsPreviousVersionOf':
-                    description = \
-"<br> These data are now obsolete and should be replaced by the most recent data: https://doi.org/"\
-                        +r['relatedIdentifier']+' <br><br>'
-                    description = description +\
-                    record['descriptions'][0]['description']
-                    replace = True
-
-
-        if replace == True:
-            metadata =\
-            {'descriptions':[{'description':description,'descriptionType':'Abstract'}]}
+        to_update =\
+        [288,269,295,291,279,284,266,281,286,278,280,293,283,287,210,274,276,290,300,285,270,268,267,302,744,282,272,289]
+        if rid in to_update:
+            dates = []
+            for d in record['dates']:
+                if d['dateType']=='Issued':
+                    d['dateType'] = 'Submitted'
+                    dates.append(d)
+                elif d['dateType']!='Submitted':
+                    dates.append(d)
+                else:
+                    print("Duplicate ",d)
+            metadata ={'dates':dates}
+            print(metadata)
             response = caltechdata_edit(token, rid, metadata, {}, {}, production)
             print(response)