|
| 1 | +import json |
| 2 | +from urllib.request import Request, urlopen |
| 3 | +from urllib.parse import quote |
| 4 | +from util import * |
| 5 | + |
| 6 | + |
| 7 | +def main(entry): |
| 8 | + """ |
| 9 | + receives single list entry from arxiv data file |
| 10 | + returns list of sources to cite |
| 11 | + """ |
| 12 | + |
| 13 | + # arxiv api endpoint |
| 14 | + endpoint = "http://export.arxiv.org/api/query?search_query=au:$AUTHOR&start=0&max_results=1000" |
| 15 | + |
| 16 | + # get author from entry |
| 17 | + author = get_safe(entry, "author", "") |
| 18 | + if not author: |
| 19 | + raise Exception('No "author" key') |
| 20 | + |
| 21 | + # query api |
| 22 | + @log_cache |
| 23 | + @cache.memoize(name=__file__, expire=1 * (60 * 60 * 24)) |
| 24 | + def query(author): |
| 25 | + url = endpoint.replace("$AUTHOR", quote(author)) |
| 26 | + request = Request(url=url) |
| 27 | + response = urlopen(request).read().decode('utf-8') |
| 28 | + return response |
| 29 | + |
| 30 | + response = query(author) |
| 31 | + |
| 32 | + # list of sources to return |
| 33 | + sources = [] |
| 34 | + |
| 35 | + # parse XML response |
| 36 | + import xml.etree.ElementTree as ET |
| 37 | + root = ET.fromstring(response) |
| 38 | + |
| 39 | + # namespace for arxiv |
| 40 | + ns = {'atom': 'http://www.w3.org/2005/Atom', |
| 41 | + 'arxiv': 'http://arxiv.org/schemas/atom'} |
| 42 | + |
| 43 | + # go through response and format sources |
| 44 | + for work in root.findall('atom:entry', ns): |
| 45 | + # extract arxiv id from the id field |
| 46 | + id_element = work.find('atom:id', ns) |
| 47 | + if id_element is not None and id_element.text: |
| 48 | + arxiv_id = id_element.text.split('/abs/')[-1] |
| 49 | + |
| 50 | + # create source with arxiv identifier for Manubot |
| 51 | + source = {"id": f"arxiv:{arxiv_id}"} |
| 52 | + |
| 53 | + # copy fields from entry to source |
| 54 | + source.update(entry) |
| 55 | + |
| 56 | + # add source to list |
| 57 | + sources.append(source) |
| 58 | + |
| 59 | + return sources |
0 commit comments