From e6aca66a2a95ffb9835a9f4690a5a0aadad49d59 Mon Sep 17 00:00:00 2001
From: crockettz <crockettzr@ornl.gov>
Date: Wed, 10 Aug 2022 10:09:05 -0400
Subject: [PATCH 1/4] Adding DOI submission script

---
 submit_doi.py | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 submit_doi.py

diff --git a/submit_doi.py b/submit_doi.py
new file mode 100644
index 0000000..78c00fd
--- /dev/null
+++ b/submit_doi.py
@@ -0,0 +1,139 @@
+import argparse
+import pandas as pd
+import ostiapi
+import xml.etree.ElementTree as ET
+import re
+from dict2xml import dict2xml
+from datetime import datetime
+from bs4 import BeautifulSoup
+import requests
+import datetime
+parser = argparse.ArgumentParser()
+parser.add_argument("account", help="Enter account name for submitting records")
+parser.add_argument("password",help="Enter account password")
+parser.add_argument("user_data",help="Enter file path for User Super Summary")
+parser.add_argument("url",help="Enter URL for the static Narrative to receive the DOI")
+parser.add_argument('--reserve',help='Enter True to only reserve, not submit',action='store_true')
+parser.add_argument("--test_mode",help="Enter True to send record in testmode",action='store_true')
+parser.add_argument("--update_record",help="Enter OSTI record ID to update an existing record")
+
+args = parser.parse_args()
+if args.test_mode:
+    print("Operating in testmode")
+    ostiapi.testmode()
+if args.update_record:
+    osti_id = args.update_record
+    print("Updating record {}".format(osti_id))
+
+print("account",args.account)
+print("password",args.password)
+print("User data file",args.user_data)
+print("SN URL",args.url)
+
+usersummary = pd.read_excel(args.user_data)
+usersummary = usersummary.fillna('blank')
+def gen_record(url):
+    '''
+    Read through a static Narrative to find all the information required for DOI submission, and prompt for anything missing.
+    '''
+    soup = BeautifulSoup(requests.get(url).content,'html.parser')
+    ## Constants
+    site_code = 'KBASE (U.S. Department of Energy Systems Biology Knowledgebase)'
+    dataset_type = 'GD'
+    BER = 'USDOE Office of Science (SC), Biological and Environmental Research (BER)'
+    ## From static Narrative
+    wsid = url[url.find('/n/')+3:url.rfind('/',0,-1)]
+    version = url[url.rfind('/',0,-1)+1:-1]
+    infix = '{}.{}'.format(wsid,version)
+    title = soup.find('title').text
+    author_list = []
+    research_orgs = []
+    keywords = ''
+    contract_nos = ''
+    abstract = ''
+    for d in soup.find_all('div'):
+        if d.get('class')==['kb-author-list']:
+            for a in d.find_all('a'):
+                author_dict = {}
+                # User super summary info pulled up separately in case I need to batch these in the future
+                author_frame = usersummary.loc[usersummary['display_name']==a.text]
+                # If they only have first name and last name in their profile
+                if len(a.text.split(' '))==2:
+                    author_dict['last_name'] = a.text.split(' ')[1]
+                    author_dict['first_name'] = a.text.split(' ')[0]                
+                # If they have middle name or compound last name or only a single name in their profile
+                else:
+                    print("Enter name for: {}".format(a.text))
+                    author_dict['last_name'] = input('Family name: ')
+                    author_dict['first_name'] = input('Given name: ')
+                    author_dict['middle_name'] = input('Middle name: ')
+                if author_frame['email'].to_list()[0]!='blank':
+                    author_dict['private_email']=author_frame['email'].to_list()[0]
+                if author_frame['orcid'].to_list()[0]!='blank':
+                    author_dict['orcid_id']=author_frame['orcid'].to_list()[0]
+                if author_frame['institution'].to_list()[0]!='blank':
+                    author_dict['affiliation_name']=author_frame['institution'].to_list()[0]
+                    if author_frame['institution'].to_list()[0] not in research_orgs:
+                        research_orgs.append(author_frame['institution'].to_list()[0])
+                author_list.append(author_dict)
+        if d.get('class')==['branding']:
+            datestring = d.text.strip('\n')
+            datestring = datestring[datestring.find(' ')+1:]
+            pub_date = datetime.datetime.strptime(datestring, '%B %d, %Y').strftime('%m/%d/%Y')
+        # User defined/custom classes
+        if d.get('class') == ['user-abstract']:
+            abstract = d.text
+    for m in soup.find_all('meta'):
+        if m.get('name') == 'user-keywords':
+            keywords = m.get('content')
+        if m.get('name') == 'user-doi-funding':
+            contract_nos = m.get('content')
+            
+    research_org = ''
+    for ro in research_orgs:
+        research_org+=ro+';'
+    research_org = research_org[:-1]
+    doi_list = []
+    # Finding all the DOIs with regex. App DOIs are already in <li>s, so asking users to do likewise
+    for l in soup.find_all('li'):
+        if l.text.find('doi')!=-1:
+            doi = re.search('10.[0-9]*/\S*',l.text.lower())[0].strip('.')
+            if doi not in doi_list:
+                doi_list.append(doi)
+    related_identifiers = [{'related_identifier':x,'relation_type':"Cites",'related_identifier_type':"DOI"} for x in doi_list]
+    
+    # Manually entering abstract, keywords, contract numbers if they didn't include in HTML
+    if abstract == '':
+        abstract = input('No abstract found. Enter manual value: ')
+    if contract_nos == '':
+        contract_nos = input('No contract numbers found. Enter manual value: ')
+    if keywords == '':
+        keywords = input('No keywords found. Enter manual value: ')
+    if contract_nos == '':
+        contract_nos = 'N/A'
+    # Building record dict
+    record = {
+    'title':title,
+    'dataset_type':dataset_type,
+    'authors':author_list,
+    'publication_date':pub_date,
+    'site_url':url,
+    'contract_nos':contract_nos,
+    'sponsor_org':BER,
+    'keyword':keywords,
+    'description':abstract,
+    'research_org':research_org,
+    'doi_infix':infix,
+    'related_identifiers':related_identifiers
+    }
+    return(record)
+record = gen_record(args.url)
+if args.reserve:
+    submit = ostiapi.reserve(record,args.account,args.password)
+else:
+    submit = ostiapi.post(record,args.account,args.password)
+# Save the record with DOI from OSTI's as a backup
+fname = record['record']['doi'].replace('.','_').replace('/','-')
+xml = dict2xml(submit)
+with open('{}.xml'.format(fname),'w') as f:
+    f.write(xml)

From 17d6e149eb86ec52235f518a71abdb47b4b92a76 Mon Sep 17 00:00:00 2001
From: crockettz <crockettzr@ornl.gov>
Date: Wed, 10 Aug 2022 10:12:10 -0400
Subject: [PATCH 2/4] Adding DOI submission script

---
 submit_doi.py => osti_scripts/submit_doi.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename submit_doi.py => osti_scripts/submit_doi.py (100%)

diff --git a/submit_doi.py b/osti_scripts/submit_doi.py
similarity index 100%
rename from submit_doi.py
rename to osti_scripts/submit_doi.py

From 28eb465d27f6531288614ed4a8baa3402e7b9f01 Mon Sep 17 00:00:00 2001
From: crockettz <crockettzr@ornl.gov>
Date: Wed, 10 Aug 2022 17:16:00 -0400
Subject: [PATCH 3/4] Adding script to get KBase DOIs

---
 osti_scripts/retrieve_records.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 osti_scripts/retrieve_records.py

diff --git a/osti_scripts/retrieve_records.py b/osti_scripts/retrieve_records.py
new file mode 100644
index 0000000..36563c5
--- /dev/null
+++ b/osti_scripts/retrieve_records.py
@@ -0,0 +1,25 @@
+import ostiapi
+import pandas as pd
+from datetime import datetime
+import argparse
+today = datetime.now()
+parser = argparse.ArgumentParser()
+parser.add_argument("account", help="Enter account name for submitting records")
+parser.add_argument("password",help="Enter account password")
+args = parser.parse_args()
+account = args.account
+pw = args.password
+records = ostiapi.get({'site_input_code':'KBASE','rows':500},account,pw)['record']
+simplified_records = {}
+for i in records:
+    title = i['title']
+    doi = i['doi']
+    status = i['@status']
+    try:
+        url = i['site_url']
+    except:
+        url = 'None'
+    simplified_records[doi]=[title,url,status]
+df = pd.DataFrame.from_dict(simplified_records,orient='index',columns=['Title','URL','Status'])
+df.index.name='DOI'
+df.to_csv('KBase_dois_{}_{}_{}.csv'.format(today.year,today.month,today.day))
\ No newline at end of file

From 9f220e51a2867e350bf151568cc0b244b655265b Mon Sep 17 00:00:00 2001
From: ialarmedalien <ialarmedalien@users.noreply.github.com>
Date: Fri, 9 Sep 2022 15:28:55 +0000
Subject: [PATCH 4/4] Auto-commit black formatting changes

---
 osti_scripts/retrieve_records.py |  25 ++--
 osti_scripts/submit_doi.py       | 192 +++++++++++++++++--------------
 2 files changed, 121 insertions(+), 96 deletions(-)

diff --git a/osti_scripts/retrieve_records.py b/osti_scripts/retrieve_records.py
index 36563c5..48b7198 100644
--- a/osti_scripts/retrieve_records.py
+++ b/osti_scripts/retrieve_records.py
@@ -2,24 +2,27 @@
 import pandas as pd
 from datetime import datetime
 import argparse
+
 today = datetime.now()
 parser = argparse.ArgumentParser()
 parser.add_argument("account", help="Enter account name for submitting records")
-parser.add_argument("password",help="Enter account password")
+parser.add_argument("password", help="Enter account password")
 args = parser.parse_args()
 account = args.account
 pw = args.password
-records = ostiapi.get({'site_input_code':'KBASE','rows':500},account,pw)['record']
+records = ostiapi.get({"site_input_code": "KBASE", "rows": 500}, account, pw)["record"]
 simplified_records = {}
 for i in records:
-    title = i['title']
-    doi = i['doi']
-    status = i['@status']
+    title = i["title"]
+    doi = i["doi"]
+    status = i["@status"]
     try:
-        url = i['site_url']
+        url = i["site_url"]
     except:
-        url = 'None'
-    simplified_records[doi]=[title,url,status]
-df = pd.DataFrame.from_dict(simplified_records,orient='index',columns=['Title','URL','Status'])
-df.index.name='DOI'
-df.to_csv('KBase_dois_{}_{}_{}.csv'.format(today.year,today.month,today.day))
\ No newline at end of file
+        url = "None"
+    simplified_records[doi] = [title, url, status]
+df = pd.DataFrame.from_dict(
+    simplified_records, orient="index", columns=["Title", "URL", "Status"]
+)
+df.index.name = "DOI"
+df.to_csv("KBase_dois_{}_{}_{}.csv".format(today.year, today.month, today.day))
diff --git a/osti_scripts/submit_doi.py b/osti_scripts/submit_doi.py
index 78c00fd..0b95718 100644
--- a/osti_scripts/submit_doi.py
+++ b/osti_scripts/submit_doi.py
@@ -8,14 +8,21 @@
 from bs4 import BeautifulSoup
 import requests
 import datetime
+
 parser = argparse.ArgumentParser()
 parser.add_argument("account", help="Enter account name for submitting records")
-parser.add_argument("password",help="Enter account password")
-parser.add_argument("user_data",help="Enter file path for User Super Summary")
-parser.add_argument("url",help="Enter URL for the static Narrative to receive the DOI")
-parser.add_argument('--reserve',help='Enter True to only reserve, not submit',action='store_true')
-parser.add_argument("--test_mode",help="Enter True to send record in testmode",action='store_true')
-parser.add_argument("--update_record",help="Enter OSTI record ID to update an existing record")
+parser.add_argument("password", help="Enter account password")
+parser.add_argument("user_data", help="Enter file path for User Super Summary")
+parser.add_argument("url", help="Enter URL for the static Narrative to receive the DOI")
+parser.add_argument(
+    "--reserve", help="Enter True to only reserve, not submit", action="store_true"
+)
+parser.add_argument(
+    "--test_mode", help="Enter True to send record in testmode", action="store_true"
+)
+parser.add_argument(
+    "--update_record", help="Enter OSTI record ID to update an existing record"
+)
 
 args = parser.parse_args()
 if args.test_mode:
@@ -25,115 +32,130 @@
     osti_id = args.update_record
     print("Updating record {}".format(osti_id))
 
-print("account",args.account)
-print("password",args.password)
-print("User data file",args.user_data)
-print("SN URL",args.url)
+print("account", args.account)
+print("password", args.password)
+print("User data file", args.user_data)
+print("SN URL", args.url)
 
 usersummary = pd.read_excel(args.user_data)
-usersummary = usersummary.fillna('blank')
+usersummary = usersummary.fillna("blank")
+
+
 def gen_record(url):
-    '''
+    """
     Read through a static Narrative to find all the information required for DOI submission, and prompt for anything missing.
-    '''
-    soup = BeautifulSoup(requests.get(url).content,'html.parser')
+    """
+    soup = BeautifulSoup(requests.get(url).content, "html.parser")
     ## Constants
-    site_code = 'KBASE (U.S. Department of Energy Systems Biology Knowledgebase)'
-    dataset_type = 'GD'
-    BER = 'USDOE Office of Science (SC), Biological and Environmental Research (BER)'
+    site_code = "KBASE (U.S. Department of Energy Systems Biology Knowledgebase)"
+    dataset_type = "GD"
+    BER = "USDOE Office of Science (SC), Biological and Environmental Research (BER)"
     ## From static Narrative
-    wsid = url[url.find('/n/')+3:url.rfind('/',0,-1)]
-    version = url[url.rfind('/',0,-1)+1:-1]
-    infix = '{}.{}'.format(wsid,version)
-    title = soup.find('title').text
+    wsid = url[url.find("/n/") + 3 : url.rfind("/", 0, -1)]
+    version = url[url.rfind("/", 0, -1) + 1 : -1]
+    infix = "{}.{}".format(wsid, version)
+    title = soup.find("title").text
     author_list = []
     research_orgs = []
-    keywords = ''
-    contract_nos = ''
-    abstract = ''
-    for d in soup.find_all('div'):
-        if d.get('class')==['kb-author-list']:
-            for a in d.find_all('a'):
+    keywords = ""
+    contract_nos = ""
+    abstract = ""
+    for d in soup.find_all("div"):
+        if d.get("class") == ["kb-author-list"]:
+            for a in d.find_all("a"):
                 author_dict = {}
                 # User super summary info pulled up separately in case I need to batch these in the future
-                author_frame = usersummary.loc[usersummary['display_name']==a.text]
+                author_frame = usersummary.loc[usersummary["display_name"] == a.text]
                 # If they only have first name and last name in their profile
-                if len(a.text.split(' '))==2:
-                    author_dict['last_name'] = a.text.split(' ')[1]
-                    author_dict['first_name'] = a.text.split(' ')[0]                
+                if len(a.text.split(" ")) == 2:
+                    author_dict["last_name"] = a.text.split(" ")[1]
+                    author_dict["first_name"] = a.text.split(" ")[0]
                 # If they have middle name or compound last name or only a single name in their profile
                 else:
                     print("Enter name for: {}".format(a.text))
-                    author_dict['last_name'] = input('Family name: ')
-                    author_dict['first_name'] = input('Given name: ')
-                    author_dict['middle_name'] = input('Middle name: ')
-                if author_frame['email'].to_list()[0]!='blank':
-                    author_dict['private_email']=author_frame['email'].to_list()[0]
-                if author_frame['orcid'].to_list()[0]!='blank':
-                    author_dict['orcid_id']=author_frame['orcid'].to_list()[0]
-                if author_frame['institution'].to_list()[0]!='blank':
-                    author_dict['affiliation_name']=author_frame['institution'].to_list()[0]
-                    if author_frame['institution'].to_list()[0] not in research_orgs:
-                        research_orgs.append(author_frame['institution'].to_list()[0])
+                    author_dict["last_name"] = input("Family name: ")
+                    author_dict["first_name"] = input("Given name: ")
+                    author_dict["middle_name"] = input("Middle name: ")
+                if author_frame["email"].to_list()[0] != "blank":
+                    author_dict["private_email"] = author_frame["email"].to_list()[0]
+                if author_frame["orcid"].to_list()[0] != "blank":
+                    author_dict["orcid_id"] = author_frame["orcid"].to_list()[0]
+                if author_frame["institution"].to_list()[0] != "blank":
+                    author_dict["affiliation_name"] = author_frame[
+                        "institution"
+                    ].to_list()[0]
+                    if author_frame["institution"].to_list()[0] not in research_orgs:
+                        research_orgs.append(author_frame["institution"].to_list()[0])
                 author_list.append(author_dict)
-        if d.get('class')==['branding']:
-            datestring = d.text.strip('\n')
-            datestring = datestring[datestring.find(' ')+1:]
-            pub_date = datetime.datetime.strptime(datestring, '%B %d, %Y').strftime('%m/%d/%Y')
+        if d.get("class") == ["branding"]:
+            datestring = d.text.strip("\n")
+            datestring = datestring[datestring.find(" ") + 1 :]
+            pub_date = datetime.datetime.strptime(datestring, "%B %d, %Y").strftime(
+                "%m/%d/%Y"
+            )
         # User defined/custom classes
-        if d.get('class') == ['user-abstract']:
+        if d.get("class") == ["user-abstract"]:
             abstract = d.text
-    for m in soup.find_all('meta'):
-        if m.get('name') == 'user-keywords':
-            keywords = m.get('content')
-        if m.get('name') == 'user-doi-funding':
-            contract_nos = m.get('content')
-            
-    research_org = ''
+    for m in soup.find_all("meta"):
+        if m.get("name") == "user-keywords":
+            keywords = m.get("content")
+        if m.get("name") == "user-doi-funding":
+            contract_nos = m.get("content")
+
+    research_org = ""
     for ro in research_orgs:
-        research_org+=ro+';'
+        research_org += ro + ";"
     research_org = research_org[:-1]
     doi_list = []
     # Finding all the DOIs with regex. App DOIs are already in <li>s, so asking users to do likewise
-    for l in soup.find_all('li'):
-        if l.text.find('doi')!=-1:
-            doi = re.search('10.[0-9]*/\S*',l.text.lower())[0].strip('.')
+    for l in soup.find_all("li"):
+        if l.text.find("doi") != -1:
+            doi = re.search("10.[0-9]*/\S*", l.text.lower())[0].strip(".")
             if doi not in doi_list:
                 doi_list.append(doi)
-    related_identifiers = [{'related_identifier':x,'relation_type':"Cites",'related_identifier_type':"DOI"} for x in doi_list]
-    
+    related_identifiers = [
+        {
+            "related_identifier": x,
+            "relation_type": "Cites",
+            "related_identifier_type": "DOI",
+        }
+        for x in doi_list
+    ]
+
     # Manually entering abstract, keywords, contract numbers if they didn't include in HTML
-    if abstract == '':
-        abstract = input('No abstract found. Enter manual value: ')
-    if contract_nos == '':
-        contract_nos = input('No contract numbers found. Enter manual value: ')
-    if keywords == '':
-        keywords = input('No keywords found. Enter manual value: ')
-    if contract_nos == '':
-        contract_nos = 'N/A'
+    if abstract == "":
+        abstract = input("No abstract found. Enter manual value: ")
+    if contract_nos == "":
+        contract_nos = input("No contract numbers found. Enter manual value: ")
+    if keywords == "":
+        keywords = input("No keywords found. Enter manual value: ")
+    if contract_nos == "":
+        contract_nos = "N/A"
     # Building record dict
     record = {
-    'title':title,
-    'dataset_type':dataset_type,
-    'authors':author_list,
-    'publication_date':pub_date,
-    'site_url':url,
-    'contract_nos':contract_nos,
-    'sponsor_org':BER,
-    'keyword':keywords,
-    'description':abstract,
-    'research_org':research_org,
-    'doi_infix':infix,
-    'related_identifiers':related_identifiers
+        "title": title,
+        "dataset_type": dataset_type,
+        "authors": author_list,
+        "publication_date": pub_date,
+        "site_url": url,
+        "contract_nos": contract_nos,
+        "sponsor_org": BER,
+        "keyword": keywords,
+        "description": abstract,
+        "research_org": research_org,
+        "doi_infix": infix,
+        "related_identifiers": related_identifiers,
     }
-    return(record)
+    return record
+
+
 record = gen_record(args.url)
 if args.reserve:
-    submit = ostiapi.reserve(record,args.account,args.password)
+    submit = ostiapi.reserve(record, args.account, args.password)
 else:
-    submit = ostiapi.post(record,args.account,args.password)
+    submit = ostiapi.post(record, args.account, args.password)
 # Save the record with DOI from OSTI's as a backup
-fname = record['record']['doi'].replace('.','_').replace('/','-')
+fname = record["record"]["doi"].replace(".", "_").replace("/", "-")
 xml = dict2xml(submit)
-with open('{}.xml'.format(fname),'w') as f:
+with open("{}.xml".format(fname), "w") as f:
     f.write(xml)