|
7 | 7 | description="get_metadata queries the caltechDATA (Invenio 3) API\
|
8 | 8 | and returns DataCite-compatable metadata"
|
9 | 9 | )
|
10 |
| - parser.add_argument( |
11 |
| - "output", |
12 |
| - help="Output file name", |
13 |
| - ) |
| 10 | + parser.add_argument("output", help="Output file name") |
14 | 11 | parser.add_argument("-keywords", nargs="*")
|
15 | 12 |
|
16 | 13 | args = parser.parse_args()
|
17 | 14 |
|
18 |
| - url = 'https://data.caltech.edu/api/records/?size=5000' |
| 15 | + url = "https://data.caltech.edu/api/records/?size=5000" |
19 | 16 |
|
20 |
| - search = '' |
| 17 | + search = "" |
21 | 18 | if args.keywords:
|
22 | 19 | for key in args.keywords:
|
23 |
| - if search == '': |
| 20 | + if search == "": |
24 | 21 | search = f'&q=subjects:"{key}"'
|
25 | 22 | else:
|
26 |
| - search = search+f'+"{key}"' |
| 23 | + search = search + f'+"{key}"' |
27 | 24 | url = url + search
|
28 | 25 |
|
29 | 26 | response = requests.get(url)
|
30 | 27 | hits = response.json()
|
31 | 28 |
|
32 |
| - outfile = open(args.output,'w') |
| 29 | + outfile = open(args.output, "w") |
33 | 30 | writer = csv.writer(outfile)
|
34 |
| - writer.writerow(['wkt','name','year','doi']) |
35 |
| - |
36 |
| - for h in hits['hits']['hits']: |
37 |
| - metadata = decustomize_schema(h['metadata']) |
38 |
| - if 'geoLocations' in metadata: |
39 |
| - doi = 'https://doi.org/'+metadata['identifier']['identifier'] |
40 |
| - title=metadata['titles'][0]['title'].split(':')[0] |
41 |
| - geo = metadata['geoLocations'] |
42 |
| - year = metadata['publicationYear'] |
| 31 | + writer.writerow(["wkt", "name", "year", "doi"]) |
| 32 | + |
| 33 | + for h in hits["hits"]["hits"]: |
| 34 | + metadata = decustomize_schema(h["metadata"]) |
| 35 | + if "geoLocations" in metadata: |
| 36 | + doi = "https://doi.org/" + metadata["identifier"]["identifier"] |
| 37 | + title = metadata["titles"][0]["title"].split(":")[0] |
| 38 | + geo = metadata["geoLocations"] |
| 39 | + year = metadata["publicationYear"] |
43 | 40 | for g in geo:
|
44 |
| - if 'geoLocationBox' in g: |
45 |
| - box = g['geoLocationBox'] |
| 41 | + if "geoLocationBox" in g: |
| 42 | + box = g["geoLocationBox"] |
46 | 43 | p1 = f"{box['eastBoundLongitude']} {box['northBoundLatitude']}"
|
47 | 44 | p2 = f"{box['westBoundLongitude']} {box['northBoundLatitude']}"
|
48 | 45 | p3 = f"{box['westBoundLongitude']} {box['southBoundLatitude']}"
|
49 | 46 | p4 = f"{box['eastBoundLongitude']} {box['southBoundLatitude']}"
|
50 |
| - wkt = f'POLYGON (({p1}, {p2}, {p3}, {p4}, {p1}))' |
51 |
| - writer.writerow([wkt,title,year,doi]) |
52 |
| - |
53 |
| - if 'geoLocationPoint' in g: |
54 |
| - point = g['geoLocationPoint'] |
55 |
| - wkt = f"POINT ({point['pointLongitude']} {point['pointLatitude']})" |
56 |
| - writer.writerow([wkt,title,year,doi]) |
| 47 | + wkt = f"POLYGON (({p1}, {p2}, {p3}, {p4}, {p1}))" |
| 48 | + writer.writerow([wkt, title, year, doi]) |
57 | 49 |
|
| 50 | + if "geoLocationPoint" in g: |
| 51 | + point = g["geoLocationPoint"] |
| 52 | + wkt = f"POINT ({point['pointLongitude']} {point['pointLatitude']})" |
| 53 | + writer.writerow([wkt, title, year, doi]) |
0 commit comments