Skip to content

Commit 896b63b

Browse files
committed
Fix record editing bugs and better handle file edits
1 parent 686e6c2 commit 896b63b

File tree

3 files changed

+95
-48
lines changed

3 files changed

+95
-48
lines changed

caltechdata_api/caltechdata_edit.py

Lines changed: 64 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,56 @@ def caltechdata_edit(
7575
if file_links:
7676
metadata = add_file_links(metadata, file_links)
7777

78+
if production == True:
79+
url = "https://data.caltech.edu"
80+
else:
81+
url = "https://data.caltechlibrary.dev"
82+
83+
headers = {
84+
"Authorization": "Bearer %s" % token,
85+
"Content-type": "application/json",
86+
}
87+
f_headers = {
88+
"Authorization": "Bearer %s" % token,
89+
"Content-type": "application/octet-stream",
90+
}
91+
92+
# Check status
93+
existing = requests.get(
94+
url + "/api/records/" + idv,
95+
headers=headers,
96+
)
97+
if existing.status_code != 200:
98+
# Might have a draft
99+
existing = requests.get(
100+
url + "/api/records/" + idv + "/draft",
101+
headers=headers,
102+
)
103+
if existing.status_code != 200:
104+
raise Exception(existing.text)
105+
106+
status = existing.json()["status"]
107+
108+
# Determine whether we need a new version
109+
version = False
110+
if status == "published" and files:
111+
version = True
112+
113+
if new_version:
114+
version = True
115+
116+
if version:
117+
# We need to make new version
118+
result = requests.post(
119+
url + "/api/records/" + idv + "/versions",
120+
headers=headers,
121+
)
122+
if result.status_code != 201:
123+
raise Exception(result.text)
124+
# Get the id of the new version
125+
idv = result.json()["id"]
126+
127+
print(idv)
78128
# Pull out pid information
79129
if production == True:
80130
repo_prefix = "10.22002"
@@ -106,56 +156,34 @@ def caltechdata_edit(
106156
"provider": "oai",
107157
}
108158
oai = True
109-
#Records are not happy without the auto-assigned oai identifier
110-
if oai == False:
159+
# Existing records are not happy without the auto-assigned oai identifier
160+
if oai == False and version == False:
111161
pids["oai"] = {
112162
"identifier": f"oai:data.caltech.edu:{idv}",
113163
"provider": "oai",
114164
}
115-
#We do not want to lose the auto-assigned DOI
116-
#Users with custom DOIs must pass them in the metadata
117-
if doi == False:
165+
# We do not want to lose the auto-assigned DOI
166+
# Users with custom DOIs must pass them in the metadata
167+
if doi == False and version == False:
118168
pids["doi"] = {
119-
"identifier": f'{repo_prefix}/{idv}',
120-
"provider": "datacite",
121-
"client": "datacite",
122-
}
169+
"identifier": f"{repo_prefix}/{idv}",
170+
"provider": "datacite",
171+
"client": "datacite",
172+
}
123173
metadata["pids"] = pids
124174

125175
data = customize_schema.customize_schema(copy.deepcopy(metadata), schema=schema)
126176

127-
if production == True:
128-
url = "https://data.caltech.edu"
129-
else:
130-
url = "https://data.caltechlibrary.dev"
131-
132-
headers = {
133-
"Authorization": "Bearer %s" % token,
134-
"Content-type": "application/json",
135-
}
136-
f_headers = {
137-
"Authorization": "Bearer %s" % token,
138-
"Content-type": "application/octet-stream",
139-
}
140-
141-
if files or new_version:
142-
# We need to make new version
177+
if files:
143178
data["files"] = {"enabled": True}
144-
result = requests.post(
145-
url + "/api/records/" + idv + "/versions",
146-
headers=headers,
147-
)
148-
if result.status_code != 201:
149-
raise Exception(result.text)
150-
# Get the id of the new version
151-
idv = result.json()["id"]
152179
# Update metadata
153180
result = requests.put(
154181
url + "/api/records/" + idv + "/draft",
155182
headers=headers,
156183
json=data,
157184
)
158-
185+
if result.status_code != 200:
186+
raise Exception(result.text)
159187
file_link = result.json()["links"]["files"]
160188
write_files_rdm(files, file_link, headers, f_headers)
161189

@@ -181,7 +209,8 @@ def caltechdata_edit(
181209
if result.status_code != 200:
182210
raise Exception(result.text)
183211
# We want files to stay the same as the existing record
184-
data["files"] = result.json()["files"]
212+
data["files"] = existing.json()["files"]
213+
# Update metadata
185214
result = requests.put(
186215
url + "/api/records/" + idv + "/draft",
187216
headers=headers,

caltechdata_api/caltechdata_write.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,33 +8,51 @@
88
from caltechdata_api import customize_schema
99
from caltechdata_api.utils import humanbytes
1010

11+
1112
def write_files_rdm(files, file_link, headers, f_headers, s3=None):
1213
f_json = []
1314
f_list = {}
1415
for f in files:
1516
filename = f.split("/")[-1]
1617
f_json.append({"key": filename})
1718
f_list[filename] = f
19+
# Now we see if any existing draft files need to be replaced
20+
result = requests.get(file_link, headers=f_headers)
21+
if result.status_code == 200:
22+
ex_files = result.json()["entries"]
23+
for ex in ex_files:
24+
if ex["key"] in f_list:
25+
result = requests.delete(ex["links"]["self"], headers=f_headers)
26+
if result.status_code != 204:
27+
raise Exception(result.text)
28+
# Create new file upload links
1829
result = requests.post(file_link, headers=headers, json=f_json)
1930
if result.status_code != 201:
2031
raise Exception(result.text)
2132
# Now we have the upload links
2233
for entry in result.json()["entries"]:
34+
self = entry["links"]["self"]
2335
link = entry["links"]["content"]
2436
commit = entry["links"]["commit"]
2537
name = entry["key"]
26-
if s3:
27-
infile = s3.open(f_list[name], "rb")
38+
if name in f_list:
39+
if s3:
40+
infile = s3.open(f_list[name], "rb")
41+
else:
42+
infile = open(f_list[name], "rb")
43+
# size = infile.seek(0, 2)
44+
# infile.seek(0, 0) # reset at beginning
45+
result = requests.put(link, headers=f_headers, data=infile)
46+
if result.status_code != 200:
47+
raise Exception(result.text)
48+
result = requests.post(commit, headers=headers)
49+
if result.status_code != 200:
50+
raise Exception(result.text)
2851
else:
29-
infile = open(f_list[name], "rb")
30-
# size = infile.seek(0, 2)
31-
# infile.seek(0, 0) # reset at beginning
32-
result = requests.put(link, headers=f_headers, data=infile)
33-
if result.status_code != 200:
34-
raise Exception(result.text)
35-
result = requests.post(commit, headers=headers)
36-
if result.status_code != 200:
37-
raise Exception(result.text)
52+
# Delete any files not included in this write command
53+
result = requests.delete(self, headers=f_headers)
54+
if result.status_code != 204:
55+
raise Exception(result.text)
3856

3957

4058
def add_file_links(metadata, file_links):
@@ -47,7 +65,7 @@ def add_file_links(metadata, file_links):
4765
path = link.split(endpoint)[1]
4866
try:
4967
size = s3.info(path)["Size"]
50-
size = humanbytes(size)
68+
size = humanbytes(size)
5169
except:
5270
size = 0
5371
if link_string == "":

codemeta.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"codeRepository": "https://github.com/caltechlibrary/caltechdata_api",
77
"issueTracker": "https://github.com/caltechlibrary/caltechdata_api/issues",
88
"license": "https://data.caltech.edu/license",
9-
"version": "1.1.1",
9+
"version": "1.2.0",
1010
"author": [
1111
{
1212
"@type": "Person",

0 commit comments

Comments
 (0)