From 279502600af35b02435c27e365e415db82276cb4 Mon Sep 17 00:00:00 2001 From: Michael Froh Date: Tue, 28 Jan 2025 17:05:36 -0800 Subject: [PATCH] Upgrade opennlp and codec 10x (#14177) * Upgrade OpenNLP from 2.3.2 to 2.5.3 (#14130) * Upgrade commons-codec from 1.13.0 to 1.17.2 (#14129) --- lucene/CHANGES.txt | 3 +++ lucene/analysis.tests/src/test/module-info.java | 1 + .../analysis/opennlp/tools/NLPPOSTaggerOp.java | 6 +++--- lucene/licenses/commons-codec-1.13.jar.sha1 | 1 - lucene/licenses/commons-codec-1.17.2.jar.sha1 | 1 + lucene/licenses/opennlp-tools-2.3.2.jar.sha1 | 1 - lucene/licenses/opennlp-tools-2.5.3.jar.sha1 | 1 + lucene/licenses/slf4j-api-1.7.36.jar.sha1 | 1 - lucene/licenses/slf4j-api-2.0.16.jar.sha1 | 1 + versions.lock | 12 ++++++------ versions.toml | 4 ++-- 11 files changed, 18 insertions(+), 14 deletions(-) delete mode 100644 lucene/licenses/commons-codec-1.13.jar.sha1 create mode 100644 lucene/licenses/commons-codec-1.17.2.jar.sha1 delete mode 100644 lucene/licenses/opennlp-tools-2.3.2.jar.sha1 create mode 100644 lucene/licenses/opennlp-tools-2.5.3.jar.sha1 delete mode 100644 lucene/licenses/slf4j-api-1.7.36.jar.sha1 create mode 100644 lucene/licenses/slf4j-api-2.0.16.jar.sha1 diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 65ef123e65cb..faebe6ec8a3b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -68,6 +68,9 @@ Other * GITHUB#14116 Use CDL to block threads to avoid flaky tests. (Ao Li) +* GITHUB#14130: Upgrade OpenNLP from 2.3.2 to 2.5.3, which transitively upgrades Slf4j + from 1.7.36 to 2.0.16. (Michael Froh) + ======================= Lucene 10.1.0 ======================= API Changes diff --git a/lucene/analysis.tests/src/test/module-info.java b/lucene/analysis.tests/src/test/module-info.java index 3a67c75febb0..d4d8957252b2 100644 --- a/lucene/analysis.tests/src/test/module-info.java +++ b/lucene/analysis.tests/src/test/module-info.java @@ -33,6 +33,7 @@ requires org.apache.lucene.analysis.smartcn; requires org.apache.lucene.analysis.stempel; requires org.apache.lucene.test_framework; + requires org.apache.commons.codec; exports org.apache.lucene.analysis.tests; } diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java index dee4afefc58a..ef7a6fb62452 100644 --- a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java +++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java @@ -17,8 +17,8 @@ package org.apache.lucene.analysis.opennlp.tools; -import java.io.IOException; import opennlp.tools.postag.POSModel; +import opennlp.tools.postag.POSTagFormat; import opennlp.tools.postag.POSTagger; import opennlp.tools.postag.POSTaggerME; @@ -29,8 +29,8 @@ public class NLPPOSTaggerOp { private final POSTagger tagger; - public NLPPOSTaggerOp(POSModel model) throws IOException { - tagger = new POSTaggerME(model); + public NLPPOSTaggerOp(POSModel model) { + tagger = new POSTaggerME(model, POSTagFormat.PENN); } public synchronized String[] getPOSTags(String[] words) { diff --git a/lucene/licenses/commons-codec-1.13.jar.sha1 b/lucene/licenses/commons-codec-1.13.jar.sha1 deleted file mode 100644 index 4d9344b4a4e6..000000000000 --- a/lucene/licenses/commons-codec-1.13.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -3f18e1aa31031d89db6f01ba05d501258ce69d2c diff --git a/lucene/licenses/commons-codec-1.17.2.jar.sha1 b/lucene/licenses/commons-codec-1.17.2.jar.sha1 new file mode 100644 index 000000000000..3ef561c0262f --- /dev/null +++ b/lucene/licenses/commons-codec-1.17.2.jar.sha1 @@ -0,0 +1 @@ +cd6bb9d856db5f61871a94d5801efd0b93b7fcb2 diff --git a/lucene/licenses/opennlp-tools-2.3.2.jar.sha1 b/lucene/licenses/opennlp-tools-2.3.2.jar.sha1 deleted file mode 100644 index 94b2924f8fa7..000000000000 --- a/lucene/licenses/opennlp-tools-2.3.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -d739edba1e729691ed5ab80e1ccf330555a02ea7 diff --git a/lucene/licenses/opennlp-tools-2.5.3.jar.sha1 b/lucene/licenses/opennlp-tools-2.5.3.jar.sha1 new file mode 100644 index 000000000000..fb01299fa29d --- /dev/null +++ b/lucene/licenses/opennlp-tools-2.5.3.jar.sha1 @@ -0,0 +1 @@ +4b544138ec079c1c73dc2c1b928506871c4b1b47 diff --git a/lucene/licenses/slf4j-api-1.7.36.jar.sha1 b/lucene/licenses/slf4j-api-1.7.36.jar.sha1 deleted file mode 100644 index 828b7cf7e056..000000000000 --- a/lucene/licenses/slf4j-api-1.7.36.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6c62681a2f655b49963a5983b8b0950a6120ae14 diff --git a/lucene/licenses/slf4j-api-2.0.16.jar.sha1 b/lucene/licenses/slf4j-api-2.0.16.jar.sha1 new file mode 100644 index 000000000000..b1bb75be39b1 --- /dev/null +++ b/lucene/licenses/slf4j-api-2.0.16.jar.sha1 @@ -0,0 +1 @@ +0172931663a09a1fa515567af5fbef00897d3c04 diff --git a/versions.lock b/versions.lock index 26de44f99e2d..07f8ff30543d 100644 --- a/versions.lock +++ b/versions.lock @@ -4,7 +4,7 @@ "main_dependencies" : { "com.carrotsearch.randomizedtesting:randomizedtesting-runner:2.8.1" : "fa9ef26b,refs=4", "com.ibm.icu:icu4j:74.2" : "47ea4550,refs=6", - "commons-codec:commons-codec:1.13" : "e9962aab,refs=4", + "commons-codec:commons-codec:1.17.2" : "e9962aab,refs=4", "io.sgr:s2-geometry-library-java:1.0.0" : "cbc357ab,refs=4", "junit:junit:4.13.1" : "fa9ef26b,refs=4", "net.sf.jopt-simple:jopt-simple:5.0.4" : "85a1e4c6,refs=2", @@ -12,7 +12,7 @@ "org.antlr:antlr4-runtime:4.11.1" : "d9953130,refs=4", "org.apache.commons:commons-compress:1.19" : "5ce8cdc6,refs=2", "org.apache.commons:commons-math3:3.6.1" : "85a1e4c6,refs=2", - "org.apache.opennlp:opennlp-tools:2.3.2" : "2f760bab,refs=4", + "org.apache.opennlp:opennlp-tools:2.5.3" : "2f760bab,refs=4", "org.carrot2:morfologik-fsa:2.1.9" : "79af844b,refs=4", "org.carrot2:morfologik-polish:2.1.9" : "fe494320,refs=3", "org.carrot2:morfologik-stemming:2.1.9" : "79af844b,refs=4", @@ -22,7 +22,7 @@ "org.ow2.asm:asm:9.6" : "d9953130,refs=4", "org.ow2.asm:asm-commons:9.6" : "d9953130,refs=4", "org.ow2.asm:asm-tree:9.6" : "d9953130,refs=4", - "org.slf4j:slf4j-api:1.7.36" : "2f760bab,refs=4", + "org.slf4j:slf4j-api:2.0.16" : "2f760bab,refs=4", "ua.net.nlp:morfologik-ukrainian-search:4.9.1" : "fe494320,refs=3", "xerces:xercesImpl:2.12.0" : "5ce8cdc6,refs=2" }, @@ -46,7 +46,7 @@ "com.google.j2objc:j2objc-annotations:1.3" : "6897bc09,refs=38", "com.google.protobuf:protobuf-java:3.19.2" : "6897bc09,refs=38", "com.ibm.icu:icu4j:74.2" : "ffa00415,refs=8", - "commons-codec:commons-codec:1.13" : "733734f0,refs=6", + "commons-codec:commons-codec:1.17.2" : "733734f0,refs=6", "io.github.java-diff-utils:java-diff-utils:4.0" : "6897bc09,refs=38", "io.sgr:s2-geometry-library-java:1.0.0" : "1d5a4b2b,refs=4", "javax.inject:javax.inject:1" : "6897bc09,refs=38", @@ -56,7 +56,7 @@ "org.antlr:antlr4-runtime:4.11.1" : "6fbc4021,refs=5", "org.apache.commons:commons-compress:1.19" : "6f16ff86,refs=2", "org.apache.commons:commons-math3:3.6.1" : "152d9f78,refs=3", - "org.apache.opennlp:opennlp-tools:2.3.2" : "b91715f0,refs=6", + "org.apache.opennlp:opennlp-tools:2.5.3" : "b91715f0,refs=6", "org.assertj:assertj-core:3.21.0" : "b7ba1646,refs=2", "org.carrot2:morfologik-fsa:2.1.9" : "e077a675,refs=8", "org.carrot2:morfologik-polish:2.1.9" : "cb00cecf,refs=5", @@ -73,7 +73,7 @@ "org.ow2.asm:asm-commons:9.6" : "6fbc4021,refs=5", "org.ow2.asm:asm-tree:9.6" : "6fbc4021,refs=5", "org.pcollections:pcollections:3.1.4" : "6897bc09,refs=38", - "org.slf4j:slf4j-api:1.7.36" : "b91715f0,refs=6", + "org.slf4j:slf4j-api:2.0.16" : "b91715f0,refs=6", "ua.net.nlp:morfologik-ukrainian-search:4.9.1" : "cb00cecf,refs=5", "xerces:xercesImpl:2.12.0" : "6f16ff86,refs=2" } diff --git a/versions.toml b/versions.toml index 80dc51f39bf2..679287f9d7db 100644 --- a/versions.toml +++ b/versions.toml @@ -2,7 +2,7 @@ antlr = "4.11.1" asm = "9.6" assertj = "3.21.0" -commons-codec = "1.13" +commons-codec = "1.17.2" commons-compress = "1.19" ecj = "3.36.0" errorprone = "2.18.0" @@ -25,7 +25,7 @@ minJava = "21" morfologik = "2.1.9" morfologik-ukrainian = "4.9.1" nekohtml = "1.9.17" -opennlp = "2.3.2" +opennlp = "2.5.3" procfork = "1.0.6" randomizedtesting = "2.8.1" rat = "0.14"