From cbe9d0aa5807957af8035563c2a7795ef1dcce98 Mon Sep 17 00:00:00 2001 From: Liam Sebestyen Date: Mon, 31 Mar 2025 20:00:54 -0700 Subject: [PATCH 1/5] Fixed issue where poor title is selected. added calculateTitleScore method, and modified mergeCanditates method. --- .../fileformat/PdfMergeMetadataImporter.java | 58 +++++++++++++++++-- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java index 6fb2ab2e7df..903800bf935 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java @@ -161,17 +161,67 @@ private void fetchData(BibEntry candidate, StandardField field, IdBasedFetcher f } private static BibEntry mergeCandidates(Stream candidates) { - final BibEntry entry = new BibEntry(); - candidates.forEach(entry::mergeWith); + // Convert the stream to a list so we can iterate over the list twice + List candidateList = candidates.toList(); - // Retain online links only - List onlineLinks = entry.getFiles().stream().filter(LinkedFile::isOnlineLink).toList(); + + BibEntry entry = new BibEntry(); + + // Score titles to find the "best" title among candidates + int bestTitleScore = -1; + String bestTitle = null; + + for (BibEntry candidate : candidateList) { + Optional candidateTitle = candidate.getField(StandardField.TITLE); + if (candidateTitle.isPresent()) { + int score = calculateTitleScore(candidateTitle.get()); + if (score > bestTitleScore) { + bestTitleScore = score; + bestTitle = candidateTitle.get(); + } + } + } + + // Merge all fields from the candidates, same as previous method + candidateList.forEach(entry::mergeWith); + + // Override the best title we found + if (bestTitle != null) { + entry.setField(StandardField.TITLE, bestTitle); + } + + + List onlineLinks = entry.getFiles().stream() + .filter(LinkedFile::isOnlineLink) + .toList(); entry.clearField(StandardField.FILE); entry.addFiles(onlineLinks); return entry; } + private static int calculateTitleScore(String Title) { + //for every word in the title, plus one point + int wordcount = Title.trim().split("\\s+").length; + if(wordcount > 35){ + wordcount = -2; //super long titles are less favourable + } + + //if the title ends in .ccc or .cccc where c is any alphabetic char, minus 10 points + int endsinExtension = Title.matches(".*\\.[a-zA-Z]{3,4}") ? -10 : 0; + + int endsWithFileExtension = 0; + + if (Title.matches("(?i).*(\\.(pdf|docx?|txt|jpg|png))$")){ + //Check for some common file extensions, remove points if contains these common filepath endings. + endsWithFileExtension = -10; // subtract ten more points for file extension ending, very undesirable. + } + return wordcount + endsinExtension + endsWithFileExtension; + } + + + + /** * Imports the BibTeX data from the given PDF file and relativized the paths of each linked file based on the context and the file preferences. */ From 50b2a65b71468a461ab3b76dfe2bb8cc7b779f13 Mon Sep 17 00:00:00 2001 From: liamsebestyen <134403694+liamsebestyen@users.noreply.github.com> Date: Mon, 31 Mar 2025 23:28:19 -0700 Subject: [PATCH 2/5] Update src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java Co-authored-by: Ethan S <111109194+eswain99@users.noreply.github.com> --- .../logic/importer/fileformat/PdfMergeMetadataImporter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java index 903800bf935..15750dec71c 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java @@ -200,7 +200,7 @@ private static BibEntry mergeCandidates(Stream candidates) { return entry; } - private static int calculateTitleScore(String Title) { + private static int calculateTitleScore(String title) { //for every word in the title, plus one point int wordcount = Title.trim().split("\\s+").length; if(wordcount > 35){ From 6dc434c4634fc1d56b78f5a64426e4cfd8697110 Mon Sep 17 00:00:00 2001 From: liamsebestyen <134403694+liamsebestyen@users.noreply.github.com> Date: Mon, 31 Mar 2025 23:28:34 -0700 Subject: [PATCH 3/5] Update src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java Co-authored-by: Ethan S <111109194+eswain99@users.noreply.github.com> --- .../logic/importer/fileformat/PdfMergeMetadataImporter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java index 15750dec71c..0f50a7b3374 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java @@ -202,7 +202,7 @@ private static BibEntry mergeCandidates(Stream candidates) { private static int calculateTitleScore(String title) { //for every word in the title, plus one point - int wordcount = Title.trim().split("\\s+").length; + int wordCount = title.trim().split("\\s+").length; if(wordcount > 35){ wordcount = -2; //super long titles are less favourable } From ca6c832851a760ce9ecdf0f0968f0070050bdb39 Mon Sep 17 00:00:00 2001 From: liamsebestyen <134403694+liamsebestyen@users.noreply.github.com> Date: Mon, 31 Mar 2025 23:28:51 -0700 Subject: [PATCH 4/5] Update src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java Code case fixes Co-authored-by: Ethan S <111109194+eswain99@users.noreply.github.com> --- .../logic/importer/fileformat/PdfMergeMetadataImporter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java index 0f50a7b3374..8d853a764a8 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java @@ -208,7 +208,7 @@ private static int calculateTitleScore(String title) { } //if the title ends in .ccc or .cccc where c is any alphabetic char, minus 10 points - int endsinExtension = Title.matches(".*\\.[a-zA-Z]{3,4}") ? -10 : 0; + int endsInExtension= title.matches(".*\\.[a-zA-Z]{3,4}") ? -10 : 0; int endsWithFileExtension = 0; From 0fe2a264a7390ab9e43152793df26cc922cfe839 Mon Sep 17 00:00:00 2001 From: liamsebestyen <134403694+liamsebestyen@users.noreply.github.com> Date: Mon, 31 Mar 2025 23:30:09 -0700 Subject: [PATCH 5/5] Update src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java fix code case Co-authored-by: Ethan S <111109194+eswain99@users.noreply.github.com> --- .../logic/importer/fileformat/PdfMergeMetadataImporter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java index 8d853a764a8..aab96b90e09 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/PdfMergeMetadataImporter.java @@ -212,7 +212,7 @@ private static int calculateTitleScore(String title) { int endsWithFileExtension = 0; - if (Title.matches("(?i).*(\\.(pdf|docx?|txt|jpg|png))$")){ + if (title.matches("(?i).*(\\.(pdf|docx?|txt|jpg|png))$")){ //Check for some common file extensions, remove points if contains these common filepath endings. endsWithFileExtension = -10; // subtract ten more points for file extension ending, very undesirable. }