Skip to content

Commit eae1945

Browse files
committed
Add download script and black formatting
1 parent 564776b commit eae1945

12 files changed

+233
-22
lines changed

caltechdata_api/1639.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"alternateIdentifiers": [{"alternateIdentifier": "1639", "alternateIdentifierType": "CaltechDATA_Identifier"}], "descriptions": [{"descriptionType": "Other", "description": "This repo contains Custom R scripts organized and summarized Choreography output files."}], "fundingReferences": [{"funderName": "Canadian Institutes of Health Research Doctoral Research Award"}, {"funderName": "Natural Sciences and Engineering Research Council"}, {"awardNumber": {"awardNumber": "PJT-165947"}, "funderName": "Canadian Institutes of Health Research"}], "language": "eng", "relatedIdentifiers": [{"relatedIdentifier": "10.17912/micropub.biology.000307", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://github.com/troymcdiarmid/MWT_Wildtype_Auxin/releases/tag/v1.0", "relationType": "IsIdenticalTo", "relatedIdentifierType": "URL"}], "resourceType": {"resourceTypeGeneral": "Software"}, "rightsList": [{"rights": "cc-by", "rightsURI": "https://creativecommons.org/licenses/by/4.0/"}], "subjects": [{"subject": "C. elegans; Multi-Worm Tracker R scripts; behavioral analysis; Choreography software output"}], "version": "1.0", "titles": [{"title": "Auxin does not affect a suite of morphological or behavioral phenotypes in two wild-type <i>C. elegans</i> strains"}], "dates": [{"date": "2020-09-30", "dateType": "Accepted"}, {"date": "2020-10-02", "dateType": "Issued"}], "publicationYear": "2020", "publisher": "CaltechDATA", "creators": [{"affiliations": ["Djavad Mowafaghian Centre for Brain Health, University of British Columbia, 2211 Wesbrook Mall, Vancouver, British Columbia V6T 2B5, Canada"], "creatorName": "Troy McDiarmid"}]}

caltechdata_api/1639.xml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">
3+
<identifier identifierType="DOI">10.22002/D1.1639</identifier>
4+
<creators>
5+
<creator>
6+
<creatorName>Troy McDiarmid</creatorName>
7+
<affiliation>Djavad Mowafaghian Centre for Brain Health, University of British Columbia, 2211 Wesbrook Mall, Vancouver, British Columbia V6T 2B5, Canada</affiliation>
8+
</creator>
9+
</creators>
10+
<titles>
11+
<title>Auxin does not affect a suite of morphological or behavioral phenotypes in two wild-type &lt;i&gt;C. elegans&lt;/i&gt; strains</title>
12+
</titles>
13+
<publisher>CaltechDATA</publisher>
14+
<publicationYear>2020</publicationYear>
15+
<subjects>
16+
<subject>C. elegans; Multi-Worm Tracker R scripts; behavioral analysis; Choreography software output</subject>
17+
</subjects>
18+
<dates>
19+
<date dateType="Accepted">2020-09-30</date>
20+
<date dateType="Issued">2020-10-02</date>
21+
</dates>
22+
<language>eng</language>
23+
<resourceType resourceTypeGeneral="Software"/>
24+
<alternateIdentifiers>
25+
<alternateIdentifier alternateIdentifierType="CaltechDATA_Identifier">1639</alternateIdentifier>
26+
</alternateIdentifiers>
27+
<relatedIdentifiers>
28+
<relatedIdentifier relatedIdentifierType="DOI" relationType="IsPartOf">10.17912/micropub.biology.000307</relatedIdentifier>
29+
<relatedIdentifier relatedIdentifierType="URL" relationType="IsIdenticalTo">https://github.com/troymcdiarmid/MWT_Wildtype_Auxin/releases/tag/v1.0</relatedIdentifier>
30+
</relatedIdentifiers>
31+
<version>1.0</version>
32+
<rightsList>
33+
<rights rightsURI="https://creativecommons.org/licenses/by/4.0/">cc-by</rights>
34+
</rightsList>
35+
<descriptions>
36+
<description descriptionType="Other">This repo contains Custom R scripts organized and summarized Choreography output files.</description>
37+
</descriptions>
38+
<fundingReferences>
39+
<fundingReference>
40+
<funderName>Canadian Institutes of Health Research Doctoral Research Award</funderName>
41+
</fundingReference>
42+
<fundingReference>
43+
<funderName>Natural Sciences and Engineering Research Council</funderName>
44+
</fundingReference>
45+
<fundingReference>
46+
<funderName>Canadian Institutes of Health Research</funderName>
47+
<awardNumber>PJT-165947</awardNumber>
48+
</fundingReference>
49+
</fundingReferences>
50+
</resource>

caltechdata_api/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
from .customize_schema import customize_schema
44
from .decustomize_schema import decustomize_schema
55
from .get_metadata import get_metadata
6+
from .download_file import download_file

caltechdata_api/caltechdata_edit.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ def caltechdata_unembargo(token, ids, production=False):
4949
return response.text
5050

5151

52-
def caltechdata_edit(token, ids, metadata={}, files={}, delete={}, production=False, schema="40"):
52+
def caltechdata_edit(
53+
token, ids, metadata={}, files={}, delete={}, production=False, schema="40"
54+
):
5355
"""Including files will only replaces files if they have the same name
5456
The delete option will delete any existing files with a given file extension
5557
There are more file operations that could be implemented"""
@@ -72,7 +74,9 @@ def caltechdata_edit(token, ids, metadata={}, files={}, delete={}, production=Fa
7274
api_url = "https://cd-sandbox.tind.io/api/record/"
7375

7476
if metadata:
75-
metadata = customize_schema.customize_schema(copy.deepcopy(metadata),schema=schema)
77+
metadata = customize_schema.customize_schema(
78+
copy.deepcopy(metadata), schema=schema
79+
)
7680

7781
for idv in ids:
7882
metadata["id"] = idv
@@ -135,7 +139,7 @@ def caltechdata_add(token, ids, metadata={}, files={}, production=False, schema=
135139
headers = {"Authorization": "Bearer %s" % token, "Content-type": "application/json"}
136140

137141
if metadata:
138-
metadata = customize_schema.customize_schema(copy.deepcopy(metadata),schema)
142+
metadata = customize_schema.customize_schema(copy.deepcopy(metadata), schema)
139143

140144
fjson = {}
141145

caltechdata_api/caltechdata_write.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def caltechdata_write(metadata, token, files=[], production=False, schema="40"):
7979

8080
fileinfo = []
8181

82-
newdata = customize_schema.customize_schema(copy.deepcopy(metadata),schema=schema)
82+
newdata = customize_schema.customize_schema(copy.deepcopy(metadata), schema=schema)
8383

8484
if files:
8585
for f in files:

caltechdata_api/customize_schema.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def customize_schema_4(json_record):
8282

8383
return json_record
8484

85+
8586
def customize_schema_43(json_record):
8687
json_record = customize_standard(json_record)
8788
# Extract identifiers and label as DOI or alternativeIdentifiers
@@ -143,7 +144,7 @@ def customize_schema_43(json_record):
143144
affiliations = []
144145
for aff in a["affiliation"]:
145146
affiliations.append(aff["name"])
146-
new['affiliation' ] = a['affiliation']
147+
new["affiliation"] = a["affiliation"]
147148
new["contributorAffiliation"] = affiliations
148149
new["contributorName"] = c["name"]
149150
if "contributorType" in c:
@@ -153,20 +154,21 @@ def customize_schema_43(json_record):
153154
newc.append(new)
154155
json_record["contributors"] = newc
155156

156-
#Funding organization
157+
# Funding organization
157158
if "fundingReferences" in json_record:
158159
for funding in json_record["fundingReferences"]:
159-
if 'awardNumber' in funding:
160-
funding['awardNumber'] = {'awardNumber':funding['awardNumber']}
160+
if "awardNumber" in funding:
161+
funding["awardNumber"] = {"awardNumber": funding["awardNumber"]}
161162

162-
#resourceTypeGeneral
163-
typeg = json_record['types']['resourceTypeGeneral']
164-
json_record['resourceType'] = {'resourceTypeGeneral':typeg}
163+
# resourceTypeGeneral
164+
typeg = json_record["types"]["resourceTypeGeneral"]
165+
json_record["resourceType"] = {"resourceTypeGeneral": typeg}
165166

166167
print(json_record)
167168

168169
return json_record
169170

171+
170172
def customize_standard(json_record):
171173

172174
# Extract subjects to single string
@@ -223,7 +225,7 @@ def customize_standard(json_record):
223225
d["relevantDateType"] = d.pop("dateType")
224226
json_record["relevantDates"] = json_record.pop("dates")
225227
else:
226-
json_record["publicationDate"] = date.today().isoformat()
228+
json_record["publicationDate"] = date.today().isoformat()
227229

228230
# license
229231
if "rightsList" in json_record:

caltechdata_api/decustomize_schema.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def decustomize_standard(json_record, pass_emails, pass_media, pass_owner):
161161
"brief_title",
162162
"brief_summary",
163163
"resource_type",
164-
"final_actions"
164+
"final_actions",
165165
]
166166
if pass_owner == False:
167167
others.append("owners")
@@ -181,7 +181,10 @@ def decustomize_schema_43(json_record, pass_emails, pass_media, pass_owner):
181181
if "doi" in json_record:
182182
doi = json_record["doi"]
183183
identifiers.append(
184-
{"identifier": json_record["doi"], "identifierType": "DOI",}
184+
{
185+
"identifier": json_record["doi"],
186+
"identifierType": "DOI",
187+
}
185188
)
186189
del json_record["doi"]
187190

@@ -221,9 +224,9 @@ def decustomize_schema_43(json_record, pass_emails, pass_media, pass_owner):
221224
for a in authors:
222225
new = {}
223226
if "authorAffiliation" in a:
224-
#Prefer full affiliation block
227+
# Prefer full affiliation block
225228
if "affiliation" in a:
226-
new["affiliation"] = a["affiliation"]
229+
new["affiliation"] = a["affiliation"]
227230
else:
228231
if isinstance(a["authorAffiliation"], list) == False:
229232
a["authorAffiliation"] = [a["authorAffiliation"]]
@@ -259,7 +262,7 @@ def decustomize_schema_43(json_record, pass_emails, pass_media, pass_owner):
259262
new = {}
260263
if "contributorAffiliation" in c:
261264
if "affiliation" in c:
262-
new["affiliation"] = c["affiliation"]
265+
new["affiliation"] = c["affiliation"]
263266
else:
264267
if isinstance(c["contributorAffiliation"], list) == False:
265268
c["contributorAffiliation"] = [c["contributorAffiliation"]]

caltechdata_api/download_file.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import requests, argparse
2+
from tqdm import tnrange, tqdm_notebook
3+
4+
5+
def download_file(doi, fname=None, media_type=None):
6+
"""Download a file listed in the media API for a DataCite DOI"""
7+
api_url = "https://api.datacite.org/dois/" + doi + "/media"
8+
r = requests.get(api_url).json()
9+
data = r["data"]
10+
if media_type == None:
11+
url = data[0]["attributes"]["url"]
12+
else:
13+
for media in data:
14+
if media["attributes"]["mediaType"] == media_type:
15+
url = media["attributes"]
16+
r = requests.get(url, stream=True)
17+
# Set file name
18+
if fname == None:
19+
fname = doi.replace("/", "-")
20+
# Download file with progress bar
21+
if r.status_code == 403:
22+
print("File Unavailable")
23+
if "content-length" not in r.headers:
24+
print("Did not get file")
25+
else:
26+
with open(fname, "wb") as f:
27+
total_length = int(r.headers.get("content-length"))
28+
pbar = tnrange(int(total_length / 1024), unit="B")
29+
for chunk in r.iter_content(chunk_size=1024):
30+
if chunk:
31+
pbar.update()
32+
f.write(chunk)
33+
return fname
34+
35+
36+
if __name__ == "__main__":
37+
parser = argparse.ArgumentParser(
38+
description="download_file queries the DaiaCite Media API\
39+
and downloads the file associated with a DOI"
40+
)
41+
parser.add_argument(
42+
"dois",
43+
nargs="+",
44+
help="The DOI for files to be downloaded",
45+
)
46+
parser.add_argument(
47+
"-fname", default=None, help="Username for basic authentication"
48+
)
49+
parser.add_argument(
50+
"-media_type", default=None, help="Password for basic authentication"
51+
)
52+
53+
args = parser.parse_args()
54+
55+
for doi in args.dois:
56+
download_file(doi, args.fname, args.media_type)

caltechdata_api/get_metadata.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ def get_metadata(idv, production=True, auth=None, emails=False, schema="40"):
4444
errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
4545
for error in errors:
4646
print(error.message)
47-
exit()
4847
if schema == "43":
4948
try:
5049
assert schema43.validate(metadata)
@@ -53,7 +52,6 @@ def get_metadata(idv, production=True, auth=None, emails=False, schema="40"):
5352
errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
5453
for error in errors:
5554
print(error.message)
56-
exit()
5755

5856
return metadata
5957

@@ -75,7 +73,7 @@ def get_metadata(idv, production=True, auth=None, emails=False, schema="40"):
7573
parser.add_argument("-xml", dest="save_xml", action="store_true")
7674
parser.add_argument("-auth_user", help="Username for basic authentication")
7775
parser.add_argument("-auth_pass", help="Password for basic authentication")
78-
parser.add_argument("-schema", default = "40",help="Schema Version")
76+
parser.add_argument("-schema", default="40", help="Schema Version")
7977

8078
args = parser.parse_args()
8179

codemeta.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"codeRepository": "https://github.com/caltechlibrary/caltechdata_api",
77
"issueTracker": "https://github.com/caltechlibrary/caltechdata_api/issues",
88
"license": "https://data.caltech.edu/license",
9-
"version": "0.1.6",
9+
"version": "0.1.7",
1010
"author": [
1111
{
1212
"@type": "Person",

0 commit comments

Comments
 (0)