From 12d7f57c05b543741d1e60ba85c42d1bc59a97f5 Mon Sep 17 00:00:00 2001
From: Claudius Ellsel <claudius.ellsel@live.de>
Date: Tue, 17 Oct 2023 16:01:45 +0200
Subject: [PATCH] Add BibTeX export for ScopusSearch

First implementation, might need some cleanup.
---
 pybliometrics/scopus/scopus_search.py | 175 ++++++++++++++++++++++++++
 1 file changed, 175 insertions(+)

diff --git a/pybliometrics/scopus/scopus_search.py b/pybliometrics/scopus/scopus_search.py
index 45dc82fc..d14fd78c 100644
--- a/pybliometrics/scopus/scopus_search.py
+++ b/pybliometrics/scopus/scopus_search.py
@@ -1,5 +1,7 @@
 from collections import namedtuple
 from typing import List, NamedTuple, Optional, Tuple, Union
+import bibtexparser
+from bibtexparser import *
 
 from pybliometrics.scopus.superclasses import Search
 from pybliometrics.scopus.utils import check_integrity, chained_get,\
@@ -215,6 +217,179 @@ def get_eids(self):
         """EIDs of retrieved documents."""
         return [d['eid'] for d in self._json]
 
+    def add_bibtex_field(self, bibtex_fields: list, key: str, value: str) -> list:
+        # Check whether value is not empty:
+        if value:
+            bibtex_fields.append(bibtexparser.model.Field(key, value))
+        return bibtex_fields
+
+    def export_bibtex(self, path: str, imitate_scopus_export: bool = False) -> None:
+        type_conference_paper = "Conference Paper"
+        type_conference_review = "Conference Review"
+        type_article = "Article"
+        type_review = "Review"
+        type_short_survey = "Short Survey"
+        type_editorial = "Editorial"
+        type_note = "Note"
+        type_letter = "Letter"
+        type_data_paper = "Data Paper"
+        type_erratum = "Erratum"
+        type_book_chapter = "Book Chapter"
+        type_book = "Book"
+        type_report = "Report"
+        type_retracted = "Retracted"
+        type_none = None
+
+        aggregation_type_conference_proceedings = "Conference Proceeding"
+        aggregation_type_journal = "Journal"
+        aggregation_type_trade_journal = "Trade Journal"
+        aggregation_type_book_series = "Book Series"
+        aggregation_type_book = "Book"
+        aggregation_type_report = "Report"
+        aggregation_type_none = None
+
+        bib_tex_type_article = "Article"
+        bib_tex_type_in_proceedings = "InProceedings"
+        bib_tex_type_in_collection = "InCollection"
+        bib_tex_type_book = "Book"
+        bib_tex_type_techreport = "TechReport"
+
+        bib_library = bibtexparser.Library()
+
+        results = self.results
+        
+        if results:
+            for result in results:
+                # print(result)
+
+                document_type = result.subtypeDescription
+                aggregation_type = result.aggregationType
+
+
+                # Item key
+                year = result.coverDate[0:4]
+
+                key_author: str = ""
+
+                author_names = result.author_names
+                
+                if author_names:
+                    key_author = author_names.split(",", 1)[0]
+                    
+                    if not imitate_scopus_export:
+                        # Remove potential white spaces
+                        key_author = "".join(key_author.split())
+                
+                key = "".join([key_author, year])
+
+                # Authors
+                authors = ""
+                if author_names:
+                    authors = " and ".join(author_names.split(";"))
+
+                # Pages
+                pages = None
+                page_range = result.pageRange
+                if page_range:
+                    pages = page_range.replace("-", " – ")
+
+                # Affiliation
+                affiliation: str = result.affilname
+                if affiliation:
+                    affiliation = "; ".join(affiliation.split(";"))
+
+                # Author keywords
+                author_keywords: str = result.authkeywords
+                if author_keywords:
+                    author_keywords = "; ".join(author_keywords.split(" | "))
+
+                # All information
+                bib_tex_type = None
+                if (document_type in [type_article, type_review, type_short_survey, type_editorial, type_note, type_letter, type_data_paper, type_erratum, type_conference_review, type_conference_paper, type_retracted, type_none] and aggregation_type == aggregation_type_journal) or (document_type in [type_article, type_review, type_short_survey, type_note] and aggregation_type == aggregation_type_trade_journal) or (document_type == type_article and aggregation_type == aggregation_type_none):
+                    bib_tex_type = bib_tex_type_article
+                if aggregation_type == aggregation_type_conference_proceedings or (document_type == type_conference_paper and aggregation_type in [aggregation_type_book, aggregation_type_none]):
+                    bib_tex_type = bib_tex_type_in_proceedings
+                elif aggregation_type == aggregation_type_book_series or (document_type in [type_book_chapter, type_article, type_editorial] and aggregation_type == aggregation_type_book):
+                    bib_tex_type = bib_tex_type_in_collection
+                elif document_type == type_book and aggregation_type == aggregation_type_book:
+                    bib_tex_type = bib_tex_type_book
+                elif document_type == type_report and aggregation_type == aggregation_type_report:
+                    bib_tex_type = bib_tex_type_techreport
+                if bib_tex_type == None:
+                    raise ValueError(f"Unsupported type | Document type: {document_type} | Aggregation type: {aggregation_type} | DOI: https://doi.org/{result.doi}")
+
+
+                fields = []
+
+                fields = self.add_bibtex_field(fields, "author", authors)
+                fields = self.add_bibtex_field(fields, "title", result.title)
+                fields = self.add_bibtex_field(fields, "date", result.coverDate)
+                if aggregation_type == aggregation_type_journal:
+                    fields = self.add_bibtex_field(fields, "journal", result.publicationName)
+                    fields = self.add_bibtex_field(fields, "volume", result.volume)
+                    fields = self.add_bibtex_field(fields, "number", result.issueIdentifier)
+                elif aggregation_type == aggregation_type_conference_proceedings or aggregation_type == aggregation_type_book_series:
+                    fields = self.add_bibtex_field(fields, "booktitle", result.publicationName)
+                elif bib_tex_type == bib_tex_type_techreport:
+                    fields = self.add_bibtex_field(fields, "institution", affiliation)
+                fields = self.add_bibtex_field(fields, "pages", pages)
+                fields = self.add_bibtex_field(fields, "doi", result.doi)
+                fields = self.add_bibtex_field(fields, "url", "https://api.elsevier.com/content/abstract/scopus_id/" + result.eid.rsplit("-", 1)[1])
+                if not bib_tex_type == bib_tex_type_techreport:
+                    fields = self.add_bibtex_field(fields, "affiliation", affiliation)
+                fields = self.add_bibtex_field(fields, "abstract", result.description)
+                fields = self.add_bibtex_field(fields, "author_keywords", author_keywords)
+                if bib_tex_type == bib_tex_type_book:
+                    fields = self.add_bibtex_field(fields, "isbn", result.volume)
+                fields = self.add_bibtex_field(fields, "issn", result.issn)
+                fields = self.add_bibtex_field(fields, "type", document_type)
+                fields = self.add_bibtex_field(fields, "scopus_aggregation_type", aggregation_type)
+                fields = self.add_bibtex_field(fields, "citedby_count", result.citedby_count)
+                fields = self.add_bibtex_field(fields, "openaccess", result.openaccess)
+                fields = self.add_bibtex_field(fields, "fund_sponsor", result.fund_sponsor)
+                fields = self.add_bibtex_field(fields, "source", "Scopus")
+
+                entry = bibtexparser.model.Entry(bib_tex_type, key, fields)
+
+                bib_library.add(entry)
+
+                # Check whether the addition was successful or resulted in a duplicate block that needs fixing.
+                for i in range(26):
+                    failed_blocks = bib_library.failed_blocks
+                    if failed_blocks:
+                        failed_block = failed_blocks[0]
+                        # Add any additional ending, so that the slicing also works for first iteration.
+                        if i == 0:
+                            entry.key += "a"
+                        entry.key = entry.key[:-1] + chr(ord("a") + i)
+                        if type(failed_block) == bibtexparser.model.DuplicateBlockKeyBlock:
+                            # Causes issues:
+                            # bib_library.replace(failed_block, entry)
+                            # This works:
+                            bib_library.remove(failed_block)
+                            bib_library.add(entry)
+                    else:
+                        break
+
+        # print(bib_library.entries_dict)
+
+        bibtex_format = None
+        
+        if not imitate_scopus_export:
+            bibtex_format = bibtexparser.BibtexFormat()
+            bibtex_format.indent = "  "
+            bibtex_format.block_separator = "\n"
+
+        
+        # print(bib_library.failed_blocks)
+        
+        # bibtexparser.write_file(path, bib_library, bibtex_format=bibtex_format)
+
+        # Workaround since UTF-8 encoding seems to fail with the write_file() function as of now:
+        export_bib = bibtexparser.write_string(bib_library, bibtex_format=bibtex_format)
+        with open(path, "w", encoding="utf-8") as f:
+            f.write(export_bib)
+
 
 def _join(item, key, sep=";"):
     """Auxiliary function to join same elements of a list of dictionaries if