-
Notifications
You must be signed in to change notification settings - Fork 134
Add BibTeX export for ScopusSearch #302
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
from collections import namedtuple | ||
from typing import List, NamedTuple, Optional, Tuple, Union | ||
import bibtexparser | ||
from bibtexparser import * | ||
|
||
from pybliometrics.scopus.superclasses import Search | ||
from pybliometrics.scopus.utils import check_integrity, chained_get,\ | ||
|
@@ -215,6 +217,179 @@ def get_eids(self): | |
"""EIDs of retrieved documents.""" | ||
return [d['eid'] for d in self._json] | ||
|
||
def add_bibtex_field(self, bibtex_fields: list, key: str, value: str) -> list: | ||
# Check whether value is not empty: | ||
if value: | ||
bibtex_fields.append(bibtexparser.model.Field(key, value)) | ||
return bibtex_fields | ||
|
||
def export_bibtex(self, path: str, imitate_scopus_export: bool = False) -> None: | ||
type_conference_paper = "Conference Paper" | ||
type_conference_review = "Conference Review" | ||
type_article = "Article" | ||
type_review = "Review" | ||
type_short_survey = "Short Survey" | ||
type_editorial = "Editorial" | ||
type_note = "Note" | ||
type_letter = "Letter" | ||
type_data_paper = "Data Paper" | ||
type_erratum = "Erratum" | ||
type_book_chapter = "Book Chapter" | ||
type_book = "Book" | ||
type_report = "Report" | ||
type_retracted = "Retracted" | ||
type_none = None | ||
|
||
aggregation_type_conference_proceedings = "Conference Proceeding" | ||
aggregation_type_journal = "Journal" | ||
aggregation_type_trade_journal = "Trade Journal" | ||
aggregation_type_book_series = "Book Series" | ||
aggregation_type_book = "Book" | ||
aggregation_type_report = "Report" | ||
aggregation_type_none = None | ||
|
||
bib_tex_type_article = "Article" | ||
bib_tex_type_in_proceedings = "InProceedings" | ||
bib_tex_type_in_collection = "InCollection" | ||
bib_tex_type_book = "Book" | ||
bib_tex_type_techreport = "TechReport" | ||
|
||
bib_library = bibtexparser.Library() | ||
|
||
results = self.results | ||
|
||
if results: | ||
for result in results: | ||
# print(result) | ||
|
||
document_type = result.subtypeDescription | ||
aggregation_type = result.aggregationType | ||
|
||
|
||
# Item key | ||
year = result.coverDate[0:4] | ||
|
||
key_author: str = "" | ||
|
||
author_names = result.author_names | ||
|
||
if author_names: | ||
key_author = author_names.split(",", 1)[0] | ||
|
||
if not imitate_scopus_export: | ||
# Remove potential white spaces | ||
key_author = "".join(key_author.split()) | ||
|
||
key = "".join([key_author, year]) | ||
|
||
# Authors | ||
authors = "" | ||
if author_names: | ||
authors = " and ".join(author_names.split(";")) | ||
|
||
# Pages | ||
pages = None | ||
page_range = result.pageRange | ||
if page_range: | ||
pages = page_range.replace("-", " – ") | ||
|
||
# Affiliation | ||
affiliation: str = result.affilname | ||
if affiliation: | ||
affiliation = "; ".join(affiliation.split(";")) | ||
|
||
# Author keywords | ||
author_keywords: str = result.authkeywords | ||
if author_keywords: | ||
author_keywords = "; ".join(author_keywords.split(" | ")) | ||
|
||
# All information | ||
bib_tex_type = None | ||
if (document_type in [type_article, type_review, type_short_survey, type_editorial, type_note, type_letter, type_data_paper, type_erratum, type_conference_review, type_conference_paper, type_retracted, type_none] and aggregation_type == aggregation_type_journal) or (document_type in [type_article, type_review, type_short_survey, type_note] and aggregation_type == aggregation_type_trade_journal) or (document_type == type_article and aggregation_type == aggregation_type_none): | ||
bib_tex_type = bib_tex_type_article | ||
if aggregation_type == aggregation_type_conference_proceedings or (document_type == type_conference_paper and aggregation_type in [aggregation_type_book, aggregation_type_none]): | ||
bib_tex_type = bib_tex_type_in_proceedings | ||
elif aggregation_type == aggregation_type_book_series or (document_type in [type_book_chapter, type_article, type_editorial] and aggregation_type == aggregation_type_book): | ||
bib_tex_type = bib_tex_type_in_collection | ||
Comment on lines
+308
to
+313
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be more readable to define suitable doctypes as a set beforehand:
etc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mh, you mean only for the subcheck |
||
elif document_type == type_book and aggregation_type == aggregation_type_book: | ||
bib_tex_type = bib_tex_type_book | ||
elif document_type == type_report and aggregation_type == aggregation_type_report: | ||
bib_tex_type = bib_tex_type_techreport | ||
if bib_tex_type == None: | ||
raise ValueError(f"Unsupported type | Document type: {document_type} | Aggregation type: {aggregation_type} | DOI: https://doi.org/{result.doi}") | ||
|
||
|
||
fields = [] | ||
|
||
fields = self.add_bibtex_field(fields, "author", authors) | ||
fields = self.add_bibtex_field(fields, "title", result.title) | ||
fields = self.add_bibtex_field(fields, "date", result.coverDate) | ||
Comment on lines
+324
to
+326
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you turn this and the following statements into a loop of tuples?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if I recall this correctly, but I might have followed the style of the other existing bibtex export function in pybliometrics. The problem with putting this in a loop is that there are some fields depending on checks. So the list would then need to be constructed in a similar manner, not making the code much shorter. Still probable makes sense to eliminate a bit of duplication! |
||
if aggregation_type == aggregation_type_journal: | ||
fields = self.add_bibtex_field(fields, "journal", result.publicationName) | ||
fields = self.add_bibtex_field(fields, "volume", result.volume) | ||
fields = self.add_bibtex_field(fields, "number", result.issueIdentifier) | ||
elif aggregation_type == aggregation_type_conference_proceedings or aggregation_type == aggregation_type_book_series: | ||
fields = self.add_bibtex_field(fields, "booktitle", result.publicationName) | ||
elif bib_tex_type == bib_tex_type_techreport: | ||
fields = self.add_bibtex_field(fields, "institution", affiliation) | ||
fields = self.add_bibtex_field(fields, "pages", pages) | ||
fields = self.add_bibtex_field(fields, "doi", result.doi) | ||
fields = self.add_bibtex_field(fields, "url", "https://api.elsevier.com/content/abstract/scopus_id/" + result.eid.rsplit("-", 1)[1]) | ||
if not bib_tex_type == bib_tex_type_techreport: | ||
fields = self.add_bibtex_field(fields, "affiliation", affiliation) | ||
fields = self.add_bibtex_field(fields, "abstract", result.description) | ||
fields = self.add_bibtex_field(fields, "author_keywords", author_keywords) | ||
if bib_tex_type == bib_tex_type_book: | ||
fields = self.add_bibtex_field(fields, "isbn", result.volume) | ||
fields = self.add_bibtex_field(fields, "issn", result.issn) | ||
fields = self.add_bibtex_field(fields, "type", document_type) | ||
fields = self.add_bibtex_field(fields, "scopus_aggregation_type", aggregation_type) | ||
fields = self.add_bibtex_field(fields, "citedby_count", result.citedby_count) | ||
fields = self.add_bibtex_field(fields, "openaccess", result.openaccess) | ||
fields = self.add_bibtex_field(fields, "fund_sponsor", result.fund_sponsor) | ||
fields = self.add_bibtex_field(fields, "source", "Scopus") | ||
|
||
entry = bibtexparser.model.Entry(bib_tex_type, key, fields) | ||
|
||
bib_library.add(entry) | ||
|
||
# Check whether the addition was successful or resulted in a duplicate block that needs fixing. | ||
for i in range(26): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where does the 26 come from? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i is later used for shifting letters from a to z. |
||
failed_blocks = bib_library.failed_blocks | ||
if failed_blocks: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What exactly is happening here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The used python library has problems adding a new entry if the key already exists in the library. So if there is already "@author" in the library, no other entry with |
||
failed_block = failed_blocks[0] | ||
# Add any additional ending, so that the slicing also works for first iteration. | ||
if i == 0: | ||
entry.key += "a" | ||
entry.key = entry.key[:-1] + chr(ord("a") + i) | ||
if type(failed_block) == bibtexparser.model.DuplicateBlockKeyBlock: | ||
# Causes issues: | ||
# bib_library.replace(failed_block, entry) | ||
# This works: | ||
bib_library.remove(failed_block) | ||
bib_library.add(entry) | ||
else: | ||
break | ||
|
||
# print(bib_library.entries_dict) | ||
|
||
bibtex_format = None | ||
|
||
if not imitate_scopus_export: | ||
bibtex_format = bibtexparser.BibtexFormat() | ||
bibtex_format.indent = " " | ||
bibtex_format.block_separator = "\n" | ||
|
||
|
||
# print(bib_library.failed_blocks) | ||
|
||
# bibtexparser.write_file(path, bib_library, bibtex_format=bibtex_format) | ||
|
||
# Workaround since UTF-8 encoding seems to fail with the write_file() function as of now: | ||
export_bib = bibtexparser.write_string(bib_library, bibtex_format=bibtex_format) | ||
with open(path, "w", encoding="utf-8") as f: | ||
f.write(export_bib) | ||
|
||
|
||
def _join(item, key, sep=";"): | ||
"""Auxiliary function to join same elements of a list of dictionaries if | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All these variables just contain one string. Isn't it easier to just use the strings instead of the variables?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also possible. I chose this way to allow reuse, a better structure, making similar strings distinguishable (bib_tex vs. Scopus), and possibly in the future allowing modularity (for example by storing such strings in a different file).
But I'll adhere to the project's style for that!