diff --git a/osti_scripts/retrieve_records.py b/osti_scripts/retrieve_records.py new file mode 100644 index 0000000..48b7198 --- /dev/null +++ b/osti_scripts/retrieve_records.py @@ -0,0 +1,28 @@ +import ostiapi +import pandas as pd +from datetime import datetime +import argparse + +today = datetime.now() +parser = argparse.ArgumentParser() +parser.add_argument("account", help="Enter account name for submitting records") +parser.add_argument("password", help="Enter account password") +args = parser.parse_args() +account = args.account +pw = args.password +records = ostiapi.get({"site_input_code": "KBASE", "rows": 500}, account, pw)["record"] +simplified_records = {} +for i in records: + title = i["title"] + doi = i["doi"] + status = i["@status"] + try: + url = i["site_url"] + except: + url = "None" + simplified_records[doi] = [title, url, status] +df = pd.DataFrame.from_dict( + simplified_records, orient="index", columns=["Title", "URL", "Status"] +) +df.index.name = "DOI" +df.to_csv("KBase_dois_{}_{}_{}.csv".format(today.year, today.month, today.day)) diff --git a/osti_scripts/submit_doi.py b/osti_scripts/submit_doi.py new file mode 100644 index 0000000..0b95718 --- /dev/null +++ b/osti_scripts/submit_doi.py @@ -0,0 +1,161 @@ +import argparse +import pandas as pd +import ostiapi +import xml.etree.ElementTree as ET +import re +from dict2xml import dict2xml +from datetime import datetime +from bs4 import BeautifulSoup +import requests +import datetime + +parser = argparse.ArgumentParser() +parser.add_argument("account", help="Enter account name for submitting records") +parser.add_argument("password", help="Enter account password") +parser.add_argument("user_data", help="Enter file path for User Super Summary") +parser.add_argument("url", help="Enter URL for the static Narrative to receive the DOI") +parser.add_argument( + "--reserve", help="Enter True to only reserve, not submit", action="store_true" +) +parser.add_argument( + "--test_mode", help="Enter True to send record in testmode", action="store_true" +) +parser.add_argument( + "--update_record", help="Enter OSTI record ID to update an existing record" +) + +args = parser.parse_args() +if args.test_mode: + print("Operating in testmode") + ostiapi.testmode() +if args.update_record: + osti_id = args.update_record + print("Updating record {}".format(osti_id)) + +print("account", args.account) +print("password", args.password) +print("User data file", args.user_data) +print("SN URL", args.url) + +usersummary = pd.read_excel(args.user_data) +usersummary = usersummary.fillna("blank") + + +def gen_record(url): + """ + Read through a static Narrative to find all the information required for DOI submission, and prompt for anything missing. + """ + soup = BeautifulSoup(requests.get(url).content, "html.parser") + ## Constants + site_code = "KBASE (U.S. Department of Energy Systems Biology Knowledgebase)" + dataset_type = "GD" + BER = "USDOE Office of Science (SC), Biological and Environmental Research (BER)" + ## From static Narrative + wsid = url[url.find("/n/") + 3 : url.rfind("/", 0, -1)] + version = url[url.rfind("/", 0, -1) + 1 : -1] + infix = "{}.{}".format(wsid, version) + title = soup.find("title").text + author_list = [] + research_orgs = [] + keywords = "" + contract_nos = "" + abstract = "" + for d in soup.find_all("div"): + if d.get("class") == ["kb-author-list"]: + for a in d.find_all("a"): + author_dict = {} + # User super summary info pulled up separately in case I need to batch these in the future + author_frame = usersummary.loc[usersummary["display_name"] == a.text] + # If they only have first name and last name in their profile + if len(a.text.split(" ")) == 2: + author_dict["last_name"] = a.text.split(" ")[1] + author_dict["first_name"] = a.text.split(" ")[0] + # If they have middle name or compound last name or only a single name in their profile + else: + print("Enter name for: {}".format(a.text)) + author_dict["last_name"] = input("Family name: ") + author_dict["first_name"] = input("Given name: ") + author_dict["middle_name"] = input("Middle name: ") + if author_frame["email"].to_list()[0] != "blank": + author_dict["private_email"] = author_frame["email"].to_list()[0] + if author_frame["orcid"].to_list()[0] != "blank": + author_dict["orcid_id"] = author_frame["orcid"].to_list()[0] + if author_frame["institution"].to_list()[0] != "blank": + author_dict["affiliation_name"] = author_frame[ + "institution" + ].to_list()[0] + if author_frame["institution"].to_list()[0] not in research_orgs: + research_orgs.append(author_frame["institution"].to_list()[0]) + author_list.append(author_dict) + if d.get("class") == ["branding"]: + datestring = d.text.strip("\n") + datestring = datestring[datestring.find(" ") + 1 :] + pub_date = datetime.datetime.strptime(datestring, "%B %d, %Y").strftime( + "%m/%d/%Y" + ) + # User defined/custom classes + if d.get("class") == ["user-abstract"]: + abstract = d.text + for m in soup.find_all("meta"): + if m.get("name") == "user-keywords": + keywords = m.get("content") + if m.get("name") == "user-doi-funding": + contract_nos = m.get("content") + + research_org = "" + for ro in research_orgs: + research_org += ro + ";" + research_org = research_org[:-1] + doi_list = [] + # Finding all the DOIs with regex. App DOIs are already in