From 24376a60b2c31f3ad2fa1330707024df7d512830 Mon Sep 17 00:00:00 2001 From: Erich Schubert Date: Thu, 12 Jan 2017 15:59:52 +0100 Subject: [PATCH 1/8] Add (unfinished) unit test for english. --- pom.xml | 11 + .../test/english/TestEnglishRules.java | 303 ++++++++++++++++++ test/test.props | 78 +++++ 3 files changed, 392 insertions(+) create mode 100644 test/de/unihd/dbs/heideltime/test/english/TestEnglishRules.java create mode 100644 test/test.props diff --git a/pom.xml b/pom.xml index efa8c991..00beefb6 100644 --- a/pom.xml +++ b/pom.xml @@ -24,6 +24,8 @@ UTF-8 + true + true @@ -50,6 +52,8 @@ src ${basedir}/class + test + ${basedir}/testclass ${basedir} @@ -206,5 +210,12 @@ 0.1 provided + + + junit + junit + [4.12,5) + test + diff --git a/test/de/unihd/dbs/heideltime/test/english/TestEnglishRules.java b/test/de/unihd/dbs/heideltime/test/english/TestEnglishRules.java new file mode 100644 index 00000000..18a41d18 --- /dev/null +++ b/test/de/unihd/dbs/heideltime/test/english/TestEnglishRules.java @@ -0,0 +1,303 @@ +package de.unihd.dbs.heideltime.test.english; + +import static org.junit.Assert.fail; + +import org.apache.uima.cas.text.AnnotationIndex; +import org.apache.uima.jcas.JCas; +import org.junit.Before; +import org.junit.Test; + +import de.unihd.dbs.heideltime.standalone.DocumentType; +import de.unihd.dbs.heideltime.standalone.HeidelTimeStandalone; +import de.unihd.dbs.heideltime.standalone.OutputType; +import de.unihd.dbs.heideltime.standalone.POSTagger; +import de.unihd.dbs.heideltime.standalone.components.ResultFormatter; +import de.unihd.dbs.heideltime.standalone.exceptions.DocumentCreationTimeMissingException; +import de.unihd.dbs.uima.annotator.heideltime.resources.Language; +import de.unihd.dbs.uima.types.heideltime.Timex3; + +public class TestEnglishRules { + String[][] CASES = { // Rule name, sample text, [expected covered text] + { "date_historic_1a-BCADhint", "190 BC" }, // 1- to 4-digit year + { "date_historic_1b-BCADhint", "BC 190" }, // 1- to 4-digit year + { "date_historic_1c-BCADhint", "190 or 180 BC" }, // find "190 BC"; 1- to 4-digit year + { "date_historic_2a-BCADhint", "March 190 BC" }, // 1- to 4-digit year + { "date_historic_2b", "March 190" }, // 3-digit year + { "date_historic_2c", "in March 90", "March 90" }, // 2-digit year + { "date_historic_2d", "March of 90", "March of 90" }, // 2-digit year + { "date_historic_3a-BCADhint", "March 29, 190 BC" }, // 1- to 4-digit year + { "date_historic_3b-BCADhint", "29 March 190 BC" }, // 1- to 4-digit year + { "date_historic_3c-BCADhint", "29th of March 190 BC" }, // 1- to 4-digit year + { "date_historic_3d-BCADhint", "March 29, 190" }, // 3-digit year + { "date_historic_3e-BCADhint", "March 29, 90" }, // 2-digit year + { "date_historic_4a-BCADhint", "summer of 190 BC" }, // 1- to 4-digit year + { "date_historic_5a-BCADhint", "the 2nd century BC" }, // + { "date_historic_5b-BCADhint", "beginning of the 2nd century BC" }, // + { "date_historic_5c-BCADhint", "2nd or 3rd century BC" }, // find "2nd century BC" + { "date_historic_5d-BCADhint", "beginning of the 2nd or 3rd century BC" }, // find "beginning 2nd century BC" + { "date_historic_6a-BCADhint", "1990s BC" }, // + { "date_historic_6b-BCADhint", "190s BC" }, // + { "date_historic_6c-BCADhint", "90s BC" }, // + { "date_historic_7ab", "in 190", "190" }, // 3-digit year + { "date_historic_7ab", "in 190,", "190" }, // 3-digit year + { "date_historic_7c", "\n190\n", "190" }, // (2- to 4-digit year + { "date_historic_7d", "year of 90" }, // 2-digit year + { "date_historic_7e", "year of 190" }, // 3-digit year + { "date_historic_8ab", "in 90,", "90" }, // 2-digit year + { "date_historic_8ab", "in 90", "90" }, // 2-digit year + { "date_historic_0ab", "in 90 cases", "" }, // 2- to 4-digit year + { "date_historic_0ab", "in 90 nice cases", "" }, // 2- to 4-digit year + { "date_historic_0ab", "in 90 nice law cases", "" }, // 2- to 4-digit year + { "date_historic_0d", "in 90 percent", "" }, // 2- to 4-digit year + { "date_r0a", "2010-01-29" }, // + { "date_r0b", "10-29-99" }, // + { "date_r0c", "09/26/1999" }, // + { "date_r0d", "09/26/99" }, // + { "date_r0e", "7-14 (AP)", "7-14" }, // find 7-14 + { "date_r0g", "1.3.99" }, // + { "date_r0h", "1.3.1999" }, // + { "date_r1a", "February 25, 2009" }, // + { "date_r1a", "Feb. 25, 2009" }, // + { "date_r1a", "Feb. 25, 2009, Monday" }, // + { "date_r1b", "25 February 2009" }, // + { "date_r1c", "25 of February 2009" }, // + { "date_r2a", "November 19" }, // + { "date_r2a", "Nov 19" }, // + { "date_r2a", "January 19th" }, // + { "date_r2a", "January nineteenth" }, // + { "date_r2b", "November 19-20" }, // find November 20 + { "date_r2c", "19 November" }, // + { "date_r2c", "19 Nov" }, // + { "date_r2c", "19th of November" }, // + { "date_r2d", "3 to 6 May" }, // find May 3 + { "date_r2e", "3 to 6 May 2004" }, // find May 3, 2004 + { "date_r2a2", "January 19th of that year" }, // + { "date_r2c2", "19th of January of the same year" }, // + { "date_r3a", "Friday October 13" }, // + { "date_r3a", "Monday, Oct 12" }, // + { "date_r3b", "Friday October 13 2009" }, // + { "date_r3b", "Monday, October 12th 2009" }, // + { "date_r4a", "September 14 and 18, 2010" }, // find September 14 2010 + { "date_r4b", "September 14 and 18, 2010" }, // find September 18 2010 + { "date_r5a", "tomorrow" }, // + { "date_r5b", "earlier yesterday" }, // + { "date_r5c", "Monday" }, // + { "date_r5d", "earlier Monday" }, // + { "date_r61", "the weekend" }, // + { "date_r7a", "November 2001" }, // + { "date_r7a", "Nov. 2001" }, // + { "date_r7a", "February of 1999" }, // + { "date_r7b", "May and June 2011" }, // find May 2001 AND June 2011 + { "date_r8a", "November next year" }, // + { "date_r8a", "May last year" }, // + { "date_r9a", "summer" }, // + { "date_r9b", "winter 2001" }, // + { "date_r9b", "winter of 2001" }, // + { "date_r9c", "summer of 69" }, // + { "date_r10a", "the third quarter of 2001" }, // + { "date_r10b", "the second half" }, // + { "date_r10c", "the 2001 third quarter" }, // + { "date_r11a", "this year's third quarter" }, // + { "date_r11a", "next year's first quarter" }, // + { "date_r11b", "the year-earlier first half" }, // + { "date_r11c", "the second half of this year" }, // + { "date_r12a", "2009" }, // + { "date_r12b", "1850-58" }, // find: 1858 + { "date_r12c", "nineteen ninety-one" }, // + { "date_r12d", "two-thousand ten" }, // + { "date_r13a", "the 1990s" }, // + { "date_r13b", "the 90s" }, // + { "date_r13c", "the seventies" }, // + { "date_r13d", "the nineteen seventies" }, // + { "date_r14a", "the early 1990s" }, // + { "date_r14b", "the mid-90s" }, // + { "date_r14c", "the late seventies" }, // + { "date_r14d", "the early nineteen seventies" }, // + { "date_r15a", "the 19th century" }, // + { "date_r15a", "the seventh century" }, // + { "date_r16a", "March" }, // + { "date_r16b", "Early 2001" }, // + { "date_r16c", "the beginning of November 1999" }, // + { "date_r16d", "the middle of September" }, // + { "date_r17a", "this year" }, // + { "date_r17b", "this November" }, // + { "date_r17c", "this November 24" }, // + { "date_r17d", "this Monday" }, // + { "date_r17e", "this summer" }, // + { "date_r17f", "this day" }, // using UNDEF-REF normalization + { "date_r18a", "the beginning of this year" }, // + { "date_r18b", "the beginning of this November" }, // + { "date_r18c", "the beginning of this November 24" }, // + { "date_r18d", "the beginning of this Monday" }, // + { "date_r18e", "the beginning of this summer" }, // + { "date_r19a", "at least several years ago" }, // + { "date_r19b", "about twenty years ago" }, // + { "date_r19c", "about 20 years ago" }, // + { "date_r19d", "a month ago" }, // + { "date_r20a", "some days later" }, // + { "date_r20b", "about twenty days later" }, // + { "date_r20c", "about 20 days later" }, // + { "date_r20d", "a week later" }, // + { "date_r21a", "twenty days earlier" }, // + { "date_r21b", "about 20 days earlier" }, // + { "date_r21c", "a week earlier" }, // + { "date_r22a", "a year ago" }, // + { "date_r22b", "a year later" }, // + { "date_r23a", "the year-earlier first quarter" }, // + { "date_r23b", "the year-earlier quarter" }, // + { "date_r23c", "the quarter" }, // + { "date_r24a", "Christmas" }, // + { "date_r24b", "Christmas 2010" }, // + { "date_r24cd", "Christmas 87" }, // + { "date_r24cd", "Christmas '87" }, // + { "date_r25a", "Easter Sunday" }, // + { "date_r25b", "Easter Sunday 2010" }, // + { "date_r25cd", "Easter Sunday 87" }, // + { "date_r25cd", "Easter Sunday '87" }, // + { "date_r1a_negative", "as soon as" }, // do not match soon if it is in "as soon as" + { "date_r2a_negative", "they march the way" }, // if it is a verb + { "date_r2b_negative", "they march the way" }, // if it is a verb + { "date_r2c_negative", "may" }, // if it is a verb + { "date_r2d_negative", "may" }, // or march, fall -- if it is lower case and without any further temporal stuff around it... + { "date_r3a_negative", "2000 soldiers" }, // four digit number followed by a plural noun + { "date_r3b_negative", "2000 dead soldiers" }, // four digit number followed by an adjective and a plural noun + { "date_r3c_negative", "2000 kilometer" }, // four digit number followed a non-temporal unit + { "date_r4a_negative", "W2000.1920" }, // + { "x_date_r11a_negative", "in his 20s" }, // + { "duration_r1ad", "less than sixty days" }, // + { "duration_r1e12", "less than 60 days" }, // + { "duration_r1cf", "several days" }, // + { "duration_r1ad", "less than sixty minutes" }, // + { "duration_r1e12", "less than 60 minutes" }, // + { "duration_r1cf", "several minutes" }, // + { "duration_r2ad", "at least the last twenty years" }, // + { "duration_r2be", "at least the last 20 years" }, // + { "duration_r2cf", "at least the last several years" }, // + { "duration_r2ad", "at least the last twenty minutes" }, // + { "duration_r2be", "at least the last 20 minutes" }, // + { "duration_r2cf", "at least the last several minutes" }, // + { "duration_r3ac", "a three-year period" }, // + { "duration_r3bd", "a 300 year period" }, // + { "duration_r3ac", "a three-hour period" }, // + { "duration_r3bd", "a 300 hour period" }, // + { "duration_r5_a", "two and six days", "two" }, + { "duration_r1a_negative", "about 200 years older" }, // + { "duration_r1b_negative", "several days old" }, // + { "duration_r1c_negative", "59-year-old" }, // + /* */ + /* + { "interval_interval_01", "from 1999 to 2012" }, // + { "interval_interval_02", "between March and May" }, // + { "interval_interval_03", "20.3.2003 - 1.5.2003" }, // + { "interval_interval_04", "20.3.2003 to 1.5.2003" }, // + { "interval_interval_05", "on 20.3.2003 the war began and it lastet until 1.5.2003" }, // + { "interval_interval_06", "for December after leaving in February" }, // + { "interval_interval_07", "began on March 20 in 2003 and ended on May 1" }, // + { "interval_interval_08", "in 1999/2000" }, // + { "interval_interval_09", "War ended in May, after fighting from March on" }, // + { "interval_interval_10", "March, April and May" }, // + { "interval_interval_11", "Monday, Thuesday, Wednesday and Thursday" }, // + { "set_r1a", "each day" }, // + { "set_r1b", "every Monday" }, // + { "set_r1c", "each September" }, // + { "set_r1d", "every summer" }, // + { "set_r2a", "once a week" }, // + { "set_r2b", "twice a month" }, // + { "set_r2c", "three times a month" }, // + { "set_r2d", "40 times per month" }, // + { "set_r2e", "a month" }, // + { "set_r2f", "a minute" }, // + { "set_r3a", "every 5 years" }, // + { "set_r3b", "every two days" }, // + { "set_r4a", "2 days each week" }, // + { "set_r5a", "annually" }, // + { "set_r6a", "Monday afternoons" }, // + { "set_r6b", "Monday and Tuesday nights" }, // find: Monday nights + */ + { "time_r1a", "2009-12-19T17:00:00" }, // + { "time_r1a", "2009-12-19 17:00:00" }, // + { "time_r1b", "2009-12-19T17:00" }, // + { "time_r1c", "12/29/2000 20:29" }, // + { "time_r1d", "12/29/2000 20:29:29" }, // + { "time_r1e", "12/29/2000 20:29:29.79" }, // + { "time_r2a", "09-24-99 1145EST" }, // TimeStamp style with timezone information + { "time_r2b", "November 24, 2011 1535 GMT" }, // + { "time_r2d", "Wed, 29 Dec 2004 00:28:16 +0000" }, // + { "time_r2d", "Sat, 29 Jan 2005 17:21:13 -0600" }, // + { "time_r2d", "1 Feb 2005 16:13:33 +1300" }, // + { "time_r3a", "midnight Monday" }, // + { "time_r3b", "Monday night" }, // + { "time_r3b2", "early Friday morning" }, // + { "time_r3c", "midnight today" }, // + { "time_r3d", "yesterday morning" }, // + { "time_r3d2", "late yesterday evening" }, // + { "time_r3e", "last Friday morning" }, // + { "time_r4a", "earlier this afternoon" }, // + { "time_r4a", "later last night" }, // + { "time_r4b", "tonight" }, // + { "time_r5a", "circa 9 a.m." }, // + { "time_r5b", "11 PM" }, // + { "time_r5c", "11:30 a.m." }, // + { "time_r5d", "9:30 p.m." }, // + { "time_r5e", "10:30:34 a.m." }, // + { "time_r5f", "10:30:34 p.m." }, // + { "time_r6a", "9 am Wednesday" }, // + { "time_r6b", "9 pm Wednesday" }, // + { "time_r6c", "9:30 a.m. Wednesday" }, // + { "time_r6d", "9:30 p.m. Wednesday" }, // + { "time_r8a", "the morning of April 18, 1775" }, // + { "time_r8b", "the morning of April 18" }, // + /* */ + }; + + private HeidelTimeStandalone standalone; + + @Before + public void init() { + standalone = new HeidelTimeStandalone(Language.ENGLISH, DocumentType.NARRATIVES, // + OutputType.XMI, "test/test.props", POSTagger.NO); + } + + @Test + public void testEnglishRules() { + for (String[] set : CASES) { + testSingleCase(set[0], set[1], set.length >= 3 ? set[2] : set[1]); + } + } + + ResultFormatter formatter = new TestResultFormatter(); + + private static class TestResultFormatter implements ResultFormatter { + @Override + public String format(JCas jcas) throws Exception { + StringBuilder buf = new StringBuilder(); + String text = jcas.getDocumentText(); + AnnotationIndex times = jcas.getAnnotationIndex(Timex3.type); + for (Timex3 timex3 : times) { + buf.append(timex3.getFoundByRule()); + buf.append('\t'); + buf.append(text.substring(timex3.getBegin(), timex3.getEnd())); + buf.append('\n'); + } + return buf.toString(); + } + } + + // NOT a @Test, only a part. + private void testSingleCase(String rule, String fragment, String expectf) { + String expected = rule + "\t" + expectf; + if (expected.contains("negative")) + expected = ""; + try { + String result = standalone.process(fragment, null, formatter); + String[] parts = result.split("\n"); + for (String part : parts) { + if (expected.equals(part.replaceAll("-(relative|explicit)", ""))) + continue; + System.err.println(rule + "\t" + fragment + " -> " + part); + } + } catch (DocumentCreationTimeMissingException e) { + fail(e.getMessage()); + } + } +} diff --git a/test/test.props b/test/test.props new file mode 100644 index 00000000..ad97718a --- /dev/null +++ b/test/test.props @@ -0,0 +1,78 @@ +################################ +## MAIN ## +################################ +# Consideration of different timex3-types +# Date +considerDate = true + +# Duration +considerDuration = true + +# Set +considerSet = true + +# Time +considerTime = true + +# Temponyms (make sure you know what you do if you set this to "true") +considerTemponym = false + +################################### +# Path to TreeTagger home directory +################################### +# Ensure there is no white space in path (try to escape white spaces) +treeTaggerHome = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/treetagger) +# This one is only necessary if you want to process chinese documents. +chineseTokenizerPath = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/treetagger/chinese-tokenizer) + +################################## +# paths to JVnTextPro model paths: +################################## +sent_model_path = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/jvntextpro/models/jvnsensegmenter) +word_model_path = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/jvntextpro/models/jvnsegmenter) +pos_model_path = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/jvntextpro/models/jvnpostag/maxent) + +##################################################### +# paths to Stanford POS Tagger model or config files: +##################################################### +model_path = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/stanford-postagger-full-2014-01-04/models/arabic.tagger) +# leave this unset if you do not need one (e.g., /home/jannik/stanford-postagger-full-2014-01-04/tagger.config) +config_path = + +######################################## +## paths to hunpos and its tagger files: +######################################## +hunpos_path = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/hunpos) +hunpos_model_name = SET ME IN CONFIG.PROPS! (e.g., model.hunpos.mte5.defnpout) + + + +# DO NOT CHANGE THE FOLLOWING +################################ +# Relative path of type system in HeidelTime home directory +typeSystemHome = desc/type/HeidelTime_TypeSystem.xml + +# Relative path of dkpro type system in HeidelTime home directory +typeSystemHome_DKPro = desc/type/DKPro_TypeSystem.xml + +# Name of uima-context variables... +# ...for date-consideration +uimaVarDate = Date + +# ...for duration-consideration +uimaVarDuration = Duration + +# ...for language +uimaVarLanguage = Language + +# ...for set-consideration +uimaVarSet = Set + +# ...for time-consideration +uimaVarTime = Time + +# ...for temponym-consideration +uimaVarTemponym = Temponym + +# ...for type to process +uimaVarTypeToProcess = Type From df7ec1a010824a3261a799534d2717a8e1baea24 Mon Sep 17 00:00:00 2001 From: Erich Schubert Date: Mon, 16 Jan 2017 16:37:38 +0100 Subject: [PATCH 2/8] Refactoring of unit tests. Some fail (needs more cleanup) E.g. wrong rule matching correctly, negative tests not working due to missing POS tags. Interval tagging does not yet work at all. --- .../test/english/AbstractHeideltimeTest.java | 144 ++++ .../test/english/EnglishDateHistoricTest.java | 188 ++++++ .../test/english/EnglishDateTest.java | 631 ++++++++++++++++++ .../test/english/EnglishDurationTest.java | 87 +++ .../test/english/EnglishIntervalTest.java | 106 +++ .../test/english/EnglishSetRules.java | 103 +++ .../test/english/EnglishTimeTest.java | 192 ++++++ .../test/english/TestEnglishRules.java | 303 --------- 8 files changed, 1451 insertions(+), 303 deletions(-) create mode 100644 test/de/unihd/dbs/heideltime/test/english/AbstractHeideltimeTest.java create mode 100644 test/de/unihd/dbs/heideltime/test/english/EnglishDateHistoricTest.java create mode 100644 test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java create mode 100644 test/de/unihd/dbs/heideltime/test/english/EnglishDurationTest.java create mode 100644 test/de/unihd/dbs/heideltime/test/english/EnglishIntervalTest.java create mode 100644 test/de/unihd/dbs/heideltime/test/english/EnglishSetRules.java create mode 100644 test/de/unihd/dbs/heideltime/test/english/EnglishTimeTest.java delete mode 100644 test/de/unihd/dbs/heideltime/test/english/TestEnglishRules.java diff --git a/test/de/unihd/dbs/heideltime/test/english/AbstractHeideltimeTest.java b/test/de/unihd/dbs/heideltime/test/english/AbstractHeideltimeTest.java new file mode 100644 index 00000000..b102b069 --- /dev/null +++ b/test/de/unihd/dbs/heideltime/test/english/AbstractHeideltimeTest.java @@ -0,0 +1,144 @@ +package de.unihd.dbs.heideltime.test.english; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.uima.UIMAFramework; +import org.apache.uima.cas.text.AnnotationIndex; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.metadata.TypeSystemDescription; +import org.apache.uima.util.XMLInputSource; +import org.junit.Before; + +import de.unihd.dbs.heideltime.standalone.Config; +import de.unihd.dbs.heideltime.standalone.DocumentType; +import de.unihd.dbs.heideltime.standalone.HeidelTimeStandalone; +import de.unihd.dbs.heideltime.standalone.components.impl.JCasFactoryImpl; +import de.unihd.dbs.heideltime.standalone.components.impl.UimaContextImpl; +import de.unihd.dbs.uima.annotator.heideltime.HeidelTime; +import de.unihd.dbs.uima.annotator.heideltime.resources.Language; +import de.unihd.dbs.uima.types.heideltime.Sentence; +import de.unihd.dbs.uima.types.heideltime.Timex3; +import de.unihd.dbs.uima.types.heideltime.Token; + +/** + * Abstract base class for unit testing Heideltime annotations. + * + * @author Erich Schubert + */ +public class AbstractHeideltimeTest { + + private JCasFactoryImpl jcasFactory; + protected HeidelTime heideltime; + private boolean debugTokenization = false; + static final Pattern LINEWRAP = Pattern.compile("\\s*[\\n\\r]+\\s*"); + static final Pattern WORDS = Pattern.compile("([^\\s\\w]*)(\\w+)([^\\s\\w]*)"); + + @Before + public void init() { + try { + if (!Config.isInitialized()) + HeidelTimeStandalone.readConfigFile("test/test.props"); + TypeSystemDescription[] descriptions = new TypeSystemDescription[] { + UIMAFramework.getXMLParser().parseTypeSystemDescription(new XMLInputSource(this.getClass().getClassLoader().getResource(Config.get(Config.TYPESYSTEMHOME)))) }; + jcasFactory = new JCasFactoryImpl(descriptions); + heideltime = new HeidelTime(); + heideltime.initialize(new UimaContextImpl(Language.ENGLISH, DocumentType.NARRATIVES, false)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public AbstractHeideltimeTest() { + super(); + } + + protected JCas tokenize(String fragment) { + JCas jcas = null; + try { + jcas = jcasFactory.createJCas(); + jcas.setDocumentText(fragment); + } catch (Exception e) { + fail("Cas object could not be generated"); + } + int last = 0; + for (Matcher sm = LINEWRAP.matcher(fragment); sm.find();) { + int ss = sm.start(), se = sm.end(); + if (last < ss) + tokenizeSentence(fragment, jcas, last, ss); + last = se; + } + if (last < fragment.length()) + tokenizeSentence(fragment, jcas, last, fragment.length()); + return jcas; + } + + private void tokenizeSentence(String fragment, JCas jcas, int ss, int se) { + // A single sentence: + Sentence s = new Sentence(jcas); + s.setBegin(ss); + s.setEnd(se); + s.addToIndexes(); + // Hard-coded tokenization: + for (Matcher m = WORDS.matcher(fragment).region(ss, se); m.find();) { + for (int i = 1; i <= 3; i++) { + int start = m.start(i), end = m.end(i); + if (start == end) + continue; + Token t = new Token(jcas); + t.setBegin(start); + t.setEnd(end); + t.setPos(""); + t.addToIndexes(); + if (debugTokenization) + System.out.print(fragment.substring(start, end) + "<=>"); + } + } + if (debugTokenization) + System.out.println(); + } + + protected JCas analyze(String fragment) { + try { + JCas jcas = tokenize(fragment); + heideltime.process(jcas); + // intervaltagger.process(jcas); + return jcas; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + protected void testSingleCase(String fragment, String[]... expectf) { + JCas jcas = analyze(fragment); + AnnotationIndex times = jcas.getAnnotationIndex(Timex3.type); + int cnt = 0; + for (Timex3 timex3 : times) { + ++cnt; + String mrule = timex3.getFoundByRule().replaceAll("-(relative|explicit)", ""); + String mstr = fragment.substring(timex3.getBegin(), timex3.getEnd()); + String mres = timex3.getTimexValue(); + boolean samerule = false, samestring = false, sameres = false; + for (String[] expect : expectf) { + samerule |= expect[0].equals(mrule); + samestring |= (expect.length > 1 ? expect[1] : "").equals(mstr); + sameres |= (expect.length > 2) ? expect[2].equals(mres) : false; + } + if (!samerule || !samestring || !sameres) { + System.err.println("Received: " + timex3); + for (String[] expect : expectf) { + System.err.println("Expected: " + String.join("\t", expect)); + } + } + assertTrue("Fragment >>" + fragment + "<< matched in a different part: >>" + mstr + "<< (rule " + mrule + ")", samestring); + assertTrue("Fragment >>" + fragment + "<< returned a different result: >>" + mres + "<< (rule " + mrule + ")", sameres); + assertTrue("Fragment >>" + fragment + "<< matched by different rule: " + mrule, samerule); + } + assertEquals("Number of results do not match.", expectf.length, cnt); + } + +} \ No newline at end of file diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishDateHistoricTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishDateHistoricTest.java new file mode 100644 index 00000000..bbbea12e --- /dev/null +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishDateHistoricTest.java @@ -0,0 +1,188 @@ +package de.unihd.dbs.heideltime.test.english; + +import org.junit.Test; + +public class EnglishDateHistoricTest extends AbstractHeideltimeTest { + @Test + public void testdate_historic_1a_BCADhint() { + // 1- to 4-digit year + testSingleCase("190 BC", // + new String[] { "date_historic_1a-BCADhint", "190 BC", "BC0190" }); + } + + @Test + public void testdate_historic_1b_BCADhint() { + // 1- to 4-digit year + testSingleCase("BC 190", // + new String[] { "date_historic_1b-BCADhint", "BC 190", "BC0190" }); + } + + @Test + public void testdate_historic_1c_BCADhint() { + // find "190 BC"; 1- to 4-digit year + testSingleCase("190 or 180 BC", // + new String[] { "x_date_historic_1c-BCADhint", "190", "BC0190" }, // + new String[] { "date_historic_1a-BCADhint", "180 BC", "BC0180" }); + } + + @Test + public void testdate_historic_2a_BCADhint() { + // 1- to 4-digit year + testSingleCase("March 190 BC", // + new String[] { "date_historic_2a-BCADhint", "March 190 BC", "BC0190-03" }); + } + + @Test + public void testdate_historic_2b() { + // 3-digit year + testSingleCase("March 190", // + new String[] { "date_historic_2b", "March 190", "0190-03" }); + } + + @Test + public void testdate_historic_2c() { + // 2-digit year + testSingleCase("in March 90", new String[] { "date_historic_2c", "March 90", "0090-03" }); + } + + @Test + public void testdate_historic_2d() { + // 2-digit year + testSingleCase("March of 90", new String[] { "date_historic_2d", "March of 90", "0090-03" }); + } + + @Test + public void testdate_historic_3a_BCADhint() { + // 1- to 4-digit year + testSingleCase("March 29, 190 BC", // + new String[] { "date_historic_3a-BCADhint", "March 29, 190 BC", "BC0190-03-29" }); + } + + @Test + public void testdate_historic_3b_BCADhint() { + // 1- to 4-digit year + testSingleCase("29 March 190 BC", // + new String[] { "date_historic_3b-BCADhint", "29 March 190 BC", "BC0190-03-29" }); + } + + @Test + public void testdate_historic_3c_BCADhint() { + // 1- to 4-digit year + testSingleCase("29th of March 190 BC", // + new String[] { "date_historic_3c-BCADhint", "29th of March 190 BC", "BC0190-03-29" }); + } + + @Test + public void testdate_historic_3d() { + // 3-digit year + testSingleCase("March 29, 190", // + new String[] { "date_historic_3d", "March 29, 190", "0190-03-29" }); + } + + @Test + public void testdate_historic_3e() { + // 2-digit year + testSingleCase("March 29, 90", // + new String[] { "date_historic_3e", "March 29, 90", "0090-03-29" }); + } + + @Test + public void testdate_historic_4a_BCADhint() { + // 1- to 4-digit year + testSingleCase("summer of 190 BC", // + new String[] { "date_historic_4a-BCADhint", "summer of 190 BC", "BC0190-SU" }); + } + + @Test + public void testdate_historic_5a_BCADhint() { + testSingleCase("the 2nd century BC", // + new String[] { "date_historic_5a-BCADhint", "the 2nd century BC", "BC01" }); + } + + @Test + public void testdate_historic_5b_BCADhint() { + testSingleCase("beginning of the 2nd century BC", // + new String[] { "date_historic_5b-BCADhint", "beginning of the 2nd century BC", "BC01" }); + } + + @Test + public void testdate_historic_5c_BCADhint() { + // find "2nd century BC" + testSingleCase("2nd or 3rd century BC", // + new String[] { "date_historic_5c-BCADhint", "2nd", "BC01" }, // + new String[] { "date_historic_5c-BCADhint", "3rd century BC", "BC02" }); + } + + @Test + public void testdate_historic_5ad_BCADhint() { + // find "beginning 2nd century BC" + testSingleCase("beginning of the 2nd or 3rd century BC", // + new String[] { "date_historic_5d-BCADhint", "beginning of the 2nd", "BC01" }, // + new String[] { "date_historic_5a-BCADhint", "3rd century BC", "BC02" }); + } + + @Test + public void testdate_historic_6a_BCADhint() { + testSingleCase("1990s BC", // + new String[] { "date_historic_6a-BCADhint", "1990s BC", "BC199" }); + } + + @Test + public void testdate_historic_6b_BCADhint() { + testSingleCase("190s BC", // + new String[] { "date_historic_6b-BCADhint", "190s BC", "BC019" }); + } + + @Test + public void testdate_historic_6c_BCADhint() { + testSingleCase("90s BC", // + new String[] { "date_historic_6c-BCADhint", "90s BC", "BC009" }); + } + + @Test + public void testdate_historic_7ab() { + // 3-digit year + testSingleCase("in 190", new String[] { "date_historic_7ab", "190", "0190" }); + } + + @Test + public void testdate_historic_7c() { + // (2- to 4-digit year + testSingleCase("\n190\n", new String[] { "date_historic_7c", "190", "0190" }); + } + + @Test + public void testdate_historic_7d() { + // 2-digit year + testSingleCase("year of 90", // + new String[] { "date_historic_7d", "year of 90", "0090" }); + } + + @Test + public void testdate_historic_7e() { + // 3-digit year + testSingleCase("year of 190", // + new String[] { "date_historic_7e", "year of 190", "0190" }); + } + + @Test + public void testdate_historic_8ab() { + // 2-digit year + testSingleCase("in 90,", new String[] { "date_historic_8ab", "0090" }); + testSingleCase("in 90", new String[] { "date_historic_8ab", "0090" }); + } + + @Test + public void testdate_historic_0ab_negative() { + // 2- to 4-digit year + testSingleCase("in 90 cases"); + testSingleCase("in 90 nice cases"); + testSingleCase("in 90 nice law cases"); + } + + @Test + public void testdate_historic_0d_negative() { + // 2- to 4-digit year + testSingleCase("in 90 percent"); // EMPTY! + } +} diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java new file mode 100644 index 00000000..18551323 --- /dev/null +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java @@ -0,0 +1,631 @@ +package de.unihd.dbs.heideltime.test.english; + +import org.junit.Test; + +public class EnglishDateTest extends AbstractHeideltimeTest { + @Test + public void testdate_r0a() { + testSingleCase("2010-01-29", // + new String[] { "date_r0a", "2010-01-29", "2010-01-29" }); + } + + @Test + public void testdate_r0b() { + testSingleCase("10-29-99", // + new String[] { "date_r0b", "10-29-99", "0099-10-29" }); + // TODO: really? better 1999 + } + + @Test + public void testdate_r0c() { + testSingleCase("09/26/1999", // + new String[] { "date_r0c", "09/26/1999", "1999-09-26" }); + } + + @Test + public void testdate_r0d() { + testSingleCase("09/26/99", // + new String[] { "date_r0d", "09/26/99", "0099-09-26" }); + // TODO: really? better 1999 + } + + @Test + public void testdate_r0e() { + // find 7-14 + testSingleCase("7-14 (AP)", new String[] { "date_r0e", "7-14", "XXXX-07-14" }); + } + + @Test + public void testdate_r0g() { + testSingleCase("1.3.99", // + new String[] { "date_r0g", "1.3.99", "0099-03-01" }); + } + + @Test + public void testdate_r0h() { + testSingleCase("1.3.1999", // + new String[] { "date_r0h", "1.3.1999", "1999-03-01" }); + } + + @Test + public void testdate_r1a() { + testSingleCase("February 25, 2009", // + new String[] { "date_r1a", "February 25, 2009", "2009-02-25" }); + testSingleCase("Feb. 25, 2009", // + new String[] { "date_r1a", "Feb. 25, 2009", "2009-02-25" }); + testSingleCase("Feb. 25, 2009, Monday", // + new String[] { "date_r1a", "Feb. 25, 2009, Monday", "2009-02-25" }); + } + + @Test + public void testdate_r1b() { + testSingleCase("25 February 2009", // + new String[] { "date_r1b", "25 February 2009", "2009-02-25" }); + } + + @Test + public void testdate_r1c() { + testSingleCase("25 of February 2009", // + new String[] { "date_r1c", "25 of February 2009", "2009-02-25" }); + } + + @Test + public void testdate_r2a() { + testSingleCase("November 19", // + new String[] { "date_r2a", "November 19", "XXXX-11-19" }); + testSingleCase("Nov 19", // + new String[] { "date_r2a", "Nov 19", "XXXX-11-19" }); + testSingleCase("January 19th", // + new String[] { "date_r2a", "January 19th", "XXXX-01-19" }); + testSingleCase("January nineteenth", // + new String[] { "date_r2a", "January nineteenth", "XXXX-01-19" }); + } + + @Test + public void testdate_r2b() { + testSingleCase("November 19-20", // + new String[] { "date_r2a", "November 19", "XXXX-11-19" }, // + new String[] { "date_r2b", "20", "XXXX-11-20" }); + } + + @Test + public void testdate_r2c() { + testSingleCase("19 November", // + new String[] { "date_r2c", "19 November", "XXXX-11-19" }); + testSingleCase("19 Nov", // + new String[] { "date_r2c", "19 Nov", "XXXX-11-19" }); + testSingleCase("19th of November", // + new String[] { "date_r2c", "19th of November", "XXXX-11-19" }); + } + + @Test + public void testdate_r2d() { + // find May 3 + testSingleCase("3 to 6 May", // + new String[] { "date_r2d", "3", "XXXX-05-03" }, // + new String[] { "date_r2c", "6 May", "XXXX-05-06" }); + } + + @Test + public void testdate_r2e() { + // find May 3, 2004 + testSingleCase("3 to 6 May 2004", // + new String[] { "date_r2e", "3", "2004-05-03" }, // + new String[] { "date_r1b", "6 May 2004", "2004-05-06" }); + } + + @Test + public void testdate_r2a2() { + testSingleCase("January 19th of that year", // + new String[] { "date_r2a2", "January 19th of that year", "XXXX-01-19" }); + } + + @Test + public void testdate_r2c2() { + testSingleCase("19th of January of the same year", // + new String[] { "date_r2c2", "19th of January of the same year", "XXXX-01-19" }); + } + + @Test + public void testdate_r3a() { + testSingleCase("Friday October 13", // + new String[] { "date_r3a", "Friday October 13", "XXXX-10-13" }); + testSingleCase("Monday, Oct 12", // + new String[] { "date_r3a", "Monday, Oct 12", "XXXX-10-12" }); + testSingleCase("Friday October 13 2009", // + new String[] { "date_r3b", "Friday October 13 2009", "2009-10-13" }); + testSingleCase("Monday, October 12th 2009", // + new String[] { "date_r3b", "Monday, October 12th 2009", "2009-10-12" }); + } + + @Test + public void testdate_r4ab() { + // find September 18 2010 + testSingleCase("September 14 and 18, 2010", // + new String[] { "date_r4a", "September 14", "2010-09-14" }, // + new String[] { "date_r4b", "18, 2010", "2010-09-18" }); + } + + @Test + public void testdate_r5a() { + testSingleCase("tomorrow", // + new String[] { "date_r5a", "tomorrow", "XXXX-XX-XX" }); + } + + @Test + public void testdate_r5b() { + testSingleCase("earlier yesterday", // + new String[] { "date_r5b", "earlier yesterday", "XXXX-XX-XX" }); + } + + @Test + public void testdate_r5c() { + testSingleCase("Monday", // + new String[] { "date_r5c", "Monday", "XXXX-XX-XX" }); + } + + @Test + public void testdate_r5d() { + testSingleCase("earlier Monday", // + new String[] { "date_r5d", "earlier Monday", "XXXX-XX-XX" }); + } + + @Test + public void testdate_r61() { + testSingleCase("the weekend", // + new String[] { "date_r61", "the weekend", "XXXX-WXX-WE" }); + } + + @Test + public void testdate_r7a() { + testSingleCase("November 2001", // + new String[] { "date_r7a", "November 2001", "2001-11" }); + testSingleCase("Nov. 2001", // + new String[] { "date_r7a", "Nov. 2001", "2001-11" }); + testSingleCase("February of 1999", // + new String[] { "date_r7a", "February of 1999", "1999-02" }); + } + + @Test + public void testdate_r7ac() { + // find May 2001 AND June 2011 + testSingleCase("May and June 2011", // + new String[] { "date_r7c", "May", "2011-05" }, // + new String[] { "date_r7a", "June 2011", "2011-06" }); + } + + @Test + public void testdate_r8a() { + testSingleCase("November next year", // + new String[] { "date_r8a", "November next year", "XXXX-11" }); + testSingleCase("May last year", // + new String[] { "date_r8a", "May last year", "XXXX-05" }); + } + + @Test + public void testdate_r9a() { + testSingleCase("summer", // + new String[] { "date_r9a", "summer", "XXXX-SU" }); + } + + @Test + public void testdate_r9b() { + testSingleCase("winter 2001", // + new String[] { "date_r9b", "winter 2001", "2001-WI" }); + testSingleCase("winter of 2001", // + new String[] { "date_r9b", "winter of 2001", "2001-WI" }); + } + + @Test + public void testdate_r9c() { + testSingleCase("summer of 69", // + new String[] { "date_r9c", "summer of 69", "0069-SU" }); + // FIXME: shouldn't this be 1969-SU? + } + + @Test + public void testdate_r10a() { + testSingleCase("the third quarter of 2001", // + new String[] { "date_r10a", "the third quarter of 2001", "2001-Q3" }); + } + + @Test + public void testdate_r10b() { + testSingleCase("the second half", // + new String[] { "date_r10b", "the second half", "XXXX-H2" }); + } + + @Test + public void testdate_r10c() { + testSingleCase("the 2001 third quarter", // + new String[] { "date_r10c", "the 2001 third quarter", "2001-Q3" }); + } + + @Test + public void testdate_r11a() { + testSingleCase("this year's third quarter", // + new String[] { "date_r11a", "this year's third quarter", "XXXX-Q3" }); + testSingleCase("next year's first quarter", // + new String[] { "date_r11a", "next year's first quarter", "XXXX-Q1" }); + } + + @Test + public void testdate_r11b() { + testSingleCase("the year-earlier first half", // + new String[] { "date_r11b", "the year-earlier first half", "XXXX-H1" }); + } + + @Test + public void testdate_r11c() { + testSingleCase("the second half of this year", // + new String[] { "date_r11c", "the second half of this year", "XXXX-H2" }); + } + + @Test + public void testdate_r12a() { + testSingleCase("2009", // + new String[] { "date_r12a", "2009", "2009" }); + } + + @Test + public void testdate_r12b() { + testSingleCase("1850-58", // + new String[] { "date_r12a", "1850", "1850" }, // + new String[] { "date_r12b", "58", "1858" }); + } + + @Test + public void testdate_r12c() { + testSingleCase("nineteen ninety-one", // + new String[] { "date_r12c", "nineteen ninety-one", "1991" }); + } + + @Test + public void testdate_r12d() { + testSingleCase("two-thousand ten", // + new String[] { "date_r12d", "two-thousand ten", "2010" }); + } + + @Test + public void testdate_r13a() { + testSingleCase("the 1990s", // + new String[] { "date_r13a", "the 1990s", "199" }); + } + + @Test + public void testdate_r13b() { + testSingleCase("the 90s", // + new String[] { "date_r13b", "the 90s", "199" }); + } + + @Test + public void testdate_r13c() { + testSingleCase("the seventies", // + new String[] { "date_r13c", "the seventies", "197" }); + } + + @Test + public void testdate_r13d() { + testSingleCase("the nineteen seventies", // + new String[] { "date_r13d", "the nineteen seventies", "197" }); + } + + @Test + public void testdate_r14a() { + testSingleCase("the early 1990s", // + new String[] { "date_r14a", "the early 1990s", "199" }); + } + + @Test + public void testdate_r14b() { + testSingleCase("the mid-90s", // + new String[] { "date_r14b", "the mid-90s", "199" }); + } + + @Test + public void testdate_r14c() { + testSingleCase("the late seventies", // + new String[] { "date_r14c", "the late seventies", "197" }); + } + + @Test + public void testdate_r14d() { + testSingleCase("the early nineteen seventies", // + new String[] { "date_r14d", "the early nineteen seventies", "197" }); + } + + @Test + public void testdate_r15a() { + testSingleCase("the 19th century", // + new String[] { "date_r15a", "the 19th century", "18" }); + testSingleCase("the seventh century", // + new String[] { "date_r15a", "the seventh century", "06" }); + } + + @Test + public void testdate_r16a() { + testSingleCase("March", // + new String[] { "date_r16a", "March", "XXXX-03" }); + } + + @Test + public void testdate_r16b() { + testSingleCase("Early 2001", // + new String[] { "date_r16b", "Early 2001", "2001" }); + } + + @Test + public void testdate_r16c() { + testSingleCase("the beginning of November 1999", // + new String[] { "date_r16c", "the beginning of November 1999", "1999-11" }); + } + + @Test + public void testdate_r16d() { + testSingleCase("the middle of September", // + new String[] { "date_r16d", "the middle of September", "XXXX-09" }); + } + + @Test + public void testdate_r17a() { + testSingleCase("this year", // + new String[] { "date_r17a", "this year", "XXXX" }); + } + + @Test + public void testdate_r17b() { + testSingleCase("this November", // + new String[] { "date_r17b", "this November", "XXXX-11" }); + } + + @Test + public void testdate_r17c() { + testSingleCase("this November 24", // + new String[] { "date_r17c", "this November 24", "XXXX-11-24" }); + } + + @Test + public void testdate_r17d() { + testSingleCase("this Monday", // + new String[] { "date_r17d", "this Monday", "XXXX-WXX-1" }); + } + + @Test + public void testdate_r17e() { + testSingleCase("this summer", // + new String[] { "date_r17e", "this summer", "XXXX-SU" }); + } + + @Test + public void testdate_r17f() { + // using UNDEF-REF normalization + testSingleCase("this day", // + new String[] { "date_r17f", "this day", "XXXX-XX-XX" }); + } + + @Test + public void testdate_r18a() { + testSingleCase("the beginning of this year", // + new String[] { "date_r18a", "the beginning of this year", "XXXX" }); + } + + @Test + public void testdate_r18b() { + testSingleCase("the beginning of this November", // + new String[] { "date_r18b", "the beginning of this November", "XXXX-11" }); + } + + @Test + public void testdate_r18c() { + testSingleCase("the beginning of this November 24", // + new String[] { "date_r18c", "the beginning of this November 24", "XXXX-11-24" }); + } + + @Test + public void testdate_r18d() { + testSingleCase("the beginning of this Monday", // + new String[] { "date_r18d", "the beginning of this Monday", "XXXX-WXX-1" }); + } + + @Test + public void testdate_r18e() { + testSingleCase("the beginning of this summer", // + new String[] { "date_r18e", "the beginning of this summer", "XXXX-SU" }); + } + + @Test + public void testdate_r19a() { + testSingleCase("at least several years ago", // + new String[] { "date_r19a", "at least several years ago", "PAST_REF" }); + } + + @Test + public void testdate_r19b() { + testSingleCase("about twenty years ago", // + new String[] { "date_r19b", "about twenty years ago", "XXXX" }); + } + + @Test + public void testdate_r19c() { + testSingleCase("about 20 years ago", // + new String[] { "date_r19c", "about 20 years ago", "XXXX" }); + } + + @Test + public void testdate_r19d() { + testSingleCase("a month ago", // + new String[] { "date_r19d", "a month ago", "XXXX-XX" }); + } + + @Test + public void testdate_r20a() { + testSingleCase("some days later", // + new String[] { "date_r20a", "some days later", "FUTURE_REF" }); + } + + @Test + public void testdate_r20b() { + testSingleCase("about twenty days later", // + new String[] { "date_r20b", "about twenty days later", "XXXX-XX-XX" }); + } + + @Test + public void testdate_r20c() { + testSingleCase("about 20 days later", // + new String[] { "date_r20c", "about 20 days later", "XXXX-XX-XX" }); + } + + @Test + public void testdate_r20d() { + testSingleCase("a week later", // + new String[] { "date_r20d", "a week later", "XXXX-XX-XX" }); + } + + @Test + public void testdate_r21a() { + testSingleCase("twenty days earlier", // + new String[] { "date_r21a", "twenty days earlier", "XXXX-XX-XX" }); + } + + @Test + public void testdate_r21b() { + testSingleCase("about 20 days earlier", // + new String[] { "date_r21b", "about 20 days earlier", "XXXX-XX-XX" }); + } + + @Test + public void testdate_r21c() { + testSingleCase("a week earlier", // + new String[] { "date_r21c", "a week earlier", "XXXX-XX-XX" }); + } + + @Test + public void testdate_r22a() { + testSingleCase("a year ago", // + new String[] { "date_r22a", "a year ago", "XXXX" }); + } + + @Test + public void testdate_r22b() { + testSingleCase("a year later", // + new String[] { "date_r22b", "a year later", "XXXX" }); + } + + @Test + public void testdate_r23a() { + testSingleCase("the year-earlier first quarter", // + new String[] { "date_r23a", "the year-earlier first quarter", "XXXX-Q1" }); + } + + @Test + public void testdate_r23b() { + testSingleCase("the year-earlier quarter", // + new String[] { "date_r23b", "the year-earlier quarter", "XXXX-XX" }); + } + + @Test + public void testdate_r23c() { + testSingleCase("the quarter", // + new String[] { "date_r23c", "the quarter", "XXXX-XX" }); + } + + @Test + public void testdate_r24a() { + testSingleCase("Christmas", // + new String[] { "date_r24a", "Christmas", "XXXX-12-25" }); + } + + @Test + public void testdate_r24b() { + testSingleCase("Christmas 2010", // + new String[] { "date_r24b", "Christmas 2010", "2010-12-25" }); + } + + @Test + public void testdate_r24cd() { + testSingleCase("Christmas 87", // + new String[] { "date_r24cd", "Christmas 87", "0087-12-25" }); + testSingleCase("Christmas '87", // + new String[] { "date_r24cd", "Christmas '87", "0087-12-25" }); + } + + @Test + public void testdate_r25a() { + testSingleCase("Easter Sunday", // + new String[] { "date_r25a", "Easter Sunday", "XXXX-XX-XX" }); + } + + @Test + public void testdate_r25b() { + testSingleCase("Easter Sunday 2010", // + new String[] { "date_r25b", "Easter Sunday 2010", "2010-04-04" }); + } + + @Test + public void testdate_r25cd() { + testSingleCase("Easter Sunday 87", // + new String[] { "date_r25cd", "Easter Sunday 87", "0087-04-06" }); + // TODO: 1987? + testSingleCase("Easter Sunday '87", // + new String[] { "date_r25cd", "Easter Sunday '87", "0087-04-06" }); + // TODO: 1987? + } + + @Test + public void testdate_r1a_negative() { + // do not match soon if it is in "as soon as" + testSingleCase("as soon as"); + } + + @Test + public void testdate_r2a_negative() { + // if it is a verb + testSingleCase("they march the way"); + } + + @Test + public void testdate_r2b_negative() { + // if it is a verb + testSingleCase("they march the way"); + } + + @Test + public void testdate_r2c_negative() { + // if it is a verb + testSingleCase("may"); + } + + @Test + public void testdate_r2d_negative() { + // or march, fall -- if it is lower case and without any further temporal stuff around it... + testSingleCase("may"); + } + + @Test + public void testdate_r3a_negative() { + // four digit number followed by a plural noun + testSingleCase("2000 soldiers"); + } + + @Test + public void testdate_r3b_negative() { + // four digit number followed by an adjective and a plural noun + testSingleCase("2000 dead soldiers"); + } + + @Test + public void testdate_r3c_negative() { + // four digit number followed a non-temporal unit + testSingleCase("2000 kilometer"); + } + + @Test + public void testdate_r4a_negative() { + testSingleCase("W2000.1920"); + } + + @Test + public void testx_date_r11a_negative() { + testSingleCase("in his 20s"); + } +} diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishDurationTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishDurationTest.java new file mode 100644 index 00000000..29f13740 --- /dev/null +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishDurationTest.java @@ -0,0 +1,87 @@ +package de.unihd.dbs.heideltime.test.english; + +import org.junit.Test; + +public class EnglishDurationTest extends AbstractHeideltimeTest { + @Test + public void testduration_r1ad() { + testSingleCase("less than sixty days", // + new String[] { "duration_r1a", "less than sixty days", "P60D" }); + testSingleCase("less than sixty minutes", // + new String[] { "duration_r1d", "less than sixty minutes", "PT1H" }); + } + + @Test + public void testduration_r1e12() { + testSingleCase("less than 60 days", // + new String[] { "duration_r1e1", "less than 60 days", "P60D" }); + testSingleCase("less than 60 minutes", // + new String[] { "duration_r1e2", "less than 60 minutes", "PT1H" }); + } + + @Test + public void testduration_r1cf() { + testSingleCase("several days", // + new String[] { "duration_r1c", "several days", "PXD" }); + testSingleCase("several minutes", // + new String[] { "duration_r1f", "several minutes", "PTXM" }); + } + + @Test + public void testduration_r2ad() { + testSingleCase("at least the last twenty years", // + new String[] { "duration_r2a", "at least the last twenty years", "P20Y" }); + testSingleCase("at least the last twenty minutes", // + new String[] { "duration_r2d", "at least the last twenty minutes", "PT20M" }); + } + + @Test + public void testduration_r2be() { + testSingleCase("at least the last 20 years", // + new String[] { "duration_r2b", "at least the last 20 years", "P20Y" }); + testSingleCase("at least the last 20 minutes", // + new String[] { "duration_r2e", "at least the last 20 minutes", "PT20M" }); + } + + @Test + public void testduration_r2cf() { + testSingleCase("at least the last several years", // + new String[] { "duration_r2c", "at least the last several years", "PXY" }); + testSingleCase("at least the last several minutes", // + new String[] { "duration_r2f", "at least the last several minutes", "PTXM" }); + } + + @Test + public void testduration_r3a() { + testSingleCase("a three-year period", // + new String[] { "duration_r3a", "a three-year period", "P3Y" }); + } + + @Test + public void testduration_r3b() { + testSingleCase("a 300 year period", // + new String[] { "duration_r3b", "a 300 year period", "P300Y" }); + } + + @Test + public void testduration_r5b1() { + testSingleCase("two and six days", // + new String[] { "duration_r5b1", "two", "P2D" }, // + new String[] { "duration_r1a", "six days", "P6D" }); + } + + @Test + public void testduration_r1a_negative() { + testSingleCase("about 200 years older"); // EMPTY! + } + + @Test + public void testduration_r1b_negative() { + testSingleCase("several days old"); // EMPTY! + } + + @Test + public void testduration_r1c_negative() { + testSingleCase("59-year-old"); // EMPTY! + } +} diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishIntervalTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishIntervalTest.java new file mode 100644 index 00000000..6d65ea03 --- /dev/null +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishIntervalTest.java @@ -0,0 +1,106 @@ +package de.unihd.dbs.heideltime.test.english; + +import org.apache.uima.jcas.JCas; +import org.junit.Test; + +import de.unihd.dbs.heideltime.standalone.components.impl.StandaloneConfigContext; +import de.unihd.dbs.uima.annotator.intervaltagger.IntervalTagger; + +public class EnglishIntervalTest extends AbstractHeideltimeTest { + protected IntervalTagger intervaltagger; + + @Override + public void init() { + super.init(); + try { + intervaltagger = new IntervalTagger(); + StandaloneConfigContext aContext = new StandaloneConfigContext(); + + // construct a context for the uima engine + aContext.setConfigParameterValue(IntervalTagger.PARAM_LANGUAGE, "english"); + aContext.setConfigParameterValue(IntervalTagger.PARAM_INTERVALS, Boolean.TRUE); + aContext.setConfigParameterValue(IntervalTagger.PARAM_INTERVAL_CANDIDATES, Boolean.FALSE); + + intervaltagger.initialize(aContext); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void testinterval_01() { + testSingleCase("from 1999 to 2012", // + new String[] { "interval_01", "from 1999 to 2012" }); + } + + @Test + public void testinterval_02() { + testSingleCase("between March and May", // + new String[] { "interval_02", "between March and May" }); + } + + @Test + public void testinterval_03() { + testSingleCase("20.3.2003 - 1.5.2003", // + new String[] { "interval_03", "20.3.2003 - 1.5.2003" }); + } + + @Test + public void testinterval_04() { + testSingleCase("20.3.2003 to 1.5.2003", // + new String[] { "interval_04", "20.3.2003 to 1.5.2003" }); + } + + @Test + public void testinterval_05() { + testSingleCase("on 20.3.2003 the war began and it lasted until 1.5.2003", // + new String[] { "interval_05", "on 20.3.2003 the war began and it lasted until 1.5.2003" }); + } + + @Test + public void testinterval_06() { + testSingleCase("for December after leaving in February", // + new String[] { "interval_06", "for December after leaving in February" }); + } + + @Test + public void testinterval_07() { + testSingleCase("began on March 20 in 2003 and ended on May 1", // + new String[] { "interval_07", "began on March 20 in 2003 and ended on May 1" }); + } + + @Test + public void testinterval_08() { + testSingleCase("in 1999/2000", // + new String[] { "interval_08", "in 1999/2000" }); + } + + @Test + public void testinterval_09() { + testSingleCase("War ended in May, after fighting from March on", // + new String[] { "interval_09", "War ended in May, after fighting from March on" }); + } + + @Test + public void testinterval_10() { + testSingleCase("March, April and May", // + new String[] { "interval_10", "March, April and May" }); + } + + @Test + public void testinterval_11() { + testSingleCase("Monday, Thuesday, Wednesday and Thursday", // + new String[] { "interval_11", "Monday, Thuesday, Wednesday and Thursday" }); + } + + protected JCas analyze(String fragment) { + try { + JCas jcas = tokenize(fragment); + heideltime.process(jcas); + intervaltagger.process(jcas); + return jcas; + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishSetRules.java b/test/de/unihd/dbs/heideltime/test/english/EnglishSetRules.java new file mode 100644 index 00000000..2ec8a4c0 --- /dev/null +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishSetRules.java @@ -0,0 +1,103 @@ +package de.unihd.dbs.heideltime.test.english; + +import org.junit.Test; + +public class EnglishSetRules extends AbstractHeideltimeTest { + @Test + public void testset_r1a() { + testSingleCase("each day", // + new String[] { "set_r1a", "each day", "P1D" }); + } + + @Test + public void testset_r1b() { + testSingleCase("every Monday", // + new String[] { "set_r1b", "every Monday", "XXXX-WXX-1" }); + } + + @Test + public void testset_r1c() { + testSingleCase("each September", // + new String[] { "set_r1c", "each September", "XXXX-09" }); + } + + @Test + public void testset_r1d() { + testSingleCase("every summer", // + new String[] { "set_r1d", "every summer", "XXXX-SU" }); + } + + @Test + public void testset_r2a() { + testSingleCase("once a week", // + new String[] { "set_r2a", "once a week", "P1W" }); + } + + @Test + public void testset_r2b() { + testSingleCase("twice a month", // + new String[] { "set_r2b", "twice a month", "P1M" }); + } + + @Test + public void testset_r2c() { + testSingleCase("three times a month", // + new String[] { "set_r2c", "three times a month", "P1M" }); + } + + @Test + public void testset_r2d() { + testSingleCase("40 times per month", // + new String[] { "set_r2d", "40 times per month", "P1M" }); + } + + @Test + public void testset_r2e() { + testSingleCase("a month", // + new String[] { "set_r2e", "a month", "P1M" }); + } + + @Test + public void testset_r2f() { + testSingleCase("a minute", // + new String[] { "set_r2f", "a minute", "PT1M" }); + } + + @Test + public void testset_r3a() { + testSingleCase("every 5 years", // + new String[] { "set_r3a", "every 5 years", "P5Y" }); + } + + @Test + public void testset_r3b() { + testSingleCase("every two days", // + new String[] { "set_r3b", "every two days", "P2D" }); + } + + @Test + public void testset_r4a() { + testSingleCase("2 days each week", // + new String[] { "set_r4a", "2 days each week", "P1W" }); + } + + @Test + public void testset_r5a() { + testSingleCase("annually", // + new String[] { "set_r5a", "annually", "XXXX" }); + } + + @Test + public void testset_r6a() { + testSingleCase("Monday afternoons", // + new String[] { "set_r6a", "Monday afternoons", "XXXX-WXX-1TAF" }); + } + + @Test + public void testset_r6b() { + // find: Monday nights + testSingleCase("Monday and Tuesday nights", // + new String[] { "set_r6b", "Monday", "XXXX-WXX-1TNI" }, // + new String[] { "set_r6a", "Tuesday nights", "XXXX-WXX-2TNI" }); + } +} diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishTimeTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishTimeTest.java new file mode 100644 index 00000000..002db864 --- /dev/null +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishTimeTest.java @@ -0,0 +1,192 @@ +package de.unihd.dbs.heideltime.test.english; + +import org.junit.Test; + +public class EnglishTimeTest extends AbstractHeideltimeTest { + @Test + public void testtime_r1a() { + testSingleCase("2009-12-19T17:00:00", // + new String[] { "time_r1a", "2009-12-19T17:00:00", "2009-12-19T17:00:00" }); + testSingleCase("2009-12-19 17:00:00", // + new String[] { "time_r1a", "2009-12-19 17:00:00", "2009-12-19T17:00:00" }); + } + + @Test + public void testtime_r1b() { + testSingleCase("2009-12-19T17:00", // + new String[] { "time_r1b", "2009-12-19T17:00", "2009-12-19T17:00" }); + } + + @Test + public void testtime_r1c() { + testSingleCase("12/29/2000 20:29", // + new String[] { "time_r1c", "12/29/2000 20:29", "2000-12-29T20:29" }); + } + + @Test + public void testtime_r1d() { + testSingleCase("12/29/2000 20:29:29", // + new String[] { "time_r1d", "12/29/2000 20:29:29", "2000-12-29T20:29:29" }); + } + + @Test + public void testtime_r1e() { + testSingleCase("12/29/2000 20:29:29.79", // + new String[] { "time_r1e", "12/29/2000 20:29:29.79", "2000-12-29T20:29:29.79" }); + } + + @Test + public void testtime_r2a() { + testSingleCase("09-24-99 1145EST", // + new String[] { "time_r2a", "09-24-99 1145EST", "0099-09-24T11:45-05" }); + // TODO: 0099? can't we do better? + } + + @Test + public void testtime_r2b() { + testSingleCase("November 24, 2011 1535 GMT", // + new String[] { "time_r2b", "November 24, 2011 1535 GMT", "2011-11-24T15:35" }); + } + + @Test + public void testtime_r2d() { + testSingleCase("Wed, 29 Dec 2004 00:28:16 +0000", // + new String[] { "time_r2d", "Wed, 29 Dec 2004 00:28:16 +0000", "2004-12-29T00:28:16+00" }); + testSingleCase("Sat, 29 Jan 2005 17:21:13 -0600", // + new String[] { "time_r2d", "Sat, 29 Jan 2005 17:21:13 -0600", "2005-01-29T17:21:13-06" }); + testSingleCase("1 Feb 2005 16:13:33 +1300", // + new String[] { "time_r2d", "1 Feb 2005 16:13:33 +1300", "2005-02-01T16:13:33+13" }); + } + + @Test + public void testtime_r3a() { + testSingleCase("midnight Monday", // + new String[] { "time_r3a", "midnight Monday", "XXXX-XX-XXT24:00" }); + // TODO: 'monday' is lost? + } + + @Test + public void testtime_r3b() { + testSingleCase("Monday night", // + new String[] { "time_r3b", "Monday night", "XXXX-XX-XXTNI" }); + // TODO: 'monday' is lost? + } + + @Test + public void testtime_r3b2() { + testSingleCase("early Friday morning", // + new String[] { "time_r3b2", "early Friday morning", "XXXX-XX-XXTMO" }); + // TODO: 'friday' is lost? + } + + @Test + public void testtime_r3c() { + testSingleCase("midnight today", // + new String[] { "time_r3c", "midnight today", "XXXX-XX-XXT24:00" }); + } + + @Test + public void testtime_r3d() { + testSingleCase("yesterday morning", // + new String[] { "time_r3d", "yesterday morning", "XXXX-XX-XXTMO" }); + } + + @Test + public void testtime_r3d2() { + testSingleCase("late yesterday evening", // + new String[] { "time_r3d2", "late yesterday evening", "XXXX-XX-XXTEV" }); + } + + @Test + public void testtime_r3e() { + testSingleCase("last Friday morning", // + new String[] { "time_r3e", "last Friday morning", "XXXX-XX-XXTMO" }); + // TODO: 'friday' is lost? + } + + @Test + public void testtime_r4a() { + testSingleCase("earlier this afternoon", // + new String[] { "time_r4a", "earlier this afternoon", "XXXX-XX-XXTAF" }); + testSingleCase("later last night", // + new String[] { "time_r4a", "later last night", "XXXX-XX-XXTNI" }); + } + + @Test + public void testtime_r4b() { + testSingleCase("tonight", // + new String[] { "time_r4b", "tonight", "XXXX-XX-XXTNI" }); + } + + @Test + public void testtime_r5a() { + testSingleCase("circa 9 a.m.", // + new String[] { "time_r5a", "circa 9 a.m.", "XXXX-XX-XXT09:00" }); + } + + @Test + public void testtime_r5b() { + testSingleCase("11 PM", // + new String[] { "time_r5b", "11 PM", "XXXX-XX-XXT23:00" }); + } + + @Test + public void testtime_r5c() { + testSingleCase("11:30 a.m.", // + new String[] { "time_r5c", "11:30 a.m.", "XXXX-XX-XXT11:30" }); + } + + @Test + public void testtime_r5d() { + testSingleCase("9:30 p.m.", // + new String[] { "time_r5d", "9:30 p.m.", "XXXX-XX-XXT21:30" }); + } + + @Test + public void testtime_r5e() { + testSingleCase("10:30:34 a.m.", // + new String[] { "time_r5e", "10:30:34 a.m.", "XXXX-XX-XXT10:30:34" }); + } + + @Test + public void testtime_r5f() { + testSingleCase("10:30:34 p.m.", // + new String[] { "time_r5f", "10:30:34 p.m.", "XXXX-XX-XXT22:30:34" }); + } + + @Test + public void testtime_r6a() { + testSingleCase("9 am Wednesday", // + new String[] { "time_r6a", "9 am Wednesday", "XXXX-XX-XXT09:00" }); + } + + @Test + public void testtime_r6b() { + testSingleCase("9 pm Wednesday", // + new String[] { "time_r6b", "9 pm Wednesday", "XXXX-XX-XXT21:00" }); + } + + @Test + public void testtime_r6c() { + testSingleCase("9:30 a.m. Wednesday", // + new String[] { "time_r6c", "9:30 a.m. Wednesday", "XXXX-XX-XXT09:30" }); + } + + @Test + public void testtime_r6d() { + testSingleCase("9:30 p.m. Wednesday", // + new String[] { "time_r6d", "9:30 p.m. Wednesday", "XXXX-XX-XXT21:30" }); + } + + @Test + public void testtime_r8a() { + testSingleCase("the morning of April 18, 1775", // + new String[] { "time_r8a", "the morning of April 18, 1775", "1775-04-18TMO" }); + } + + @Test + public void testtime_r8b() { + testSingleCase("the morning of April 18", // + new String[] { "time_r8b", "the morning of April 18", "XXXX-04-18TMO" }); + } +} diff --git a/test/de/unihd/dbs/heideltime/test/english/TestEnglishRules.java b/test/de/unihd/dbs/heideltime/test/english/TestEnglishRules.java deleted file mode 100644 index 18a41d18..00000000 --- a/test/de/unihd/dbs/heideltime/test/english/TestEnglishRules.java +++ /dev/null @@ -1,303 +0,0 @@ -package de.unihd.dbs.heideltime.test.english; - -import static org.junit.Assert.fail; - -import org.apache.uima.cas.text.AnnotationIndex; -import org.apache.uima.jcas.JCas; -import org.junit.Before; -import org.junit.Test; - -import de.unihd.dbs.heideltime.standalone.DocumentType; -import de.unihd.dbs.heideltime.standalone.HeidelTimeStandalone; -import de.unihd.dbs.heideltime.standalone.OutputType; -import de.unihd.dbs.heideltime.standalone.POSTagger; -import de.unihd.dbs.heideltime.standalone.components.ResultFormatter; -import de.unihd.dbs.heideltime.standalone.exceptions.DocumentCreationTimeMissingException; -import de.unihd.dbs.uima.annotator.heideltime.resources.Language; -import de.unihd.dbs.uima.types.heideltime.Timex3; - -public class TestEnglishRules { - String[][] CASES = { // Rule name, sample text, [expected covered text] - { "date_historic_1a-BCADhint", "190 BC" }, // 1- to 4-digit year - { "date_historic_1b-BCADhint", "BC 190" }, // 1- to 4-digit year - { "date_historic_1c-BCADhint", "190 or 180 BC" }, // find "190 BC"; 1- to 4-digit year - { "date_historic_2a-BCADhint", "March 190 BC" }, // 1- to 4-digit year - { "date_historic_2b", "March 190" }, // 3-digit year - { "date_historic_2c", "in March 90", "March 90" }, // 2-digit year - { "date_historic_2d", "March of 90", "March of 90" }, // 2-digit year - { "date_historic_3a-BCADhint", "March 29, 190 BC" }, // 1- to 4-digit year - { "date_historic_3b-BCADhint", "29 March 190 BC" }, // 1- to 4-digit year - { "date_historic_3c-BCADhint", "29th of March 190 BC" }, // 1- to 4-digit year - { "date_historic_3d-BCADhint", "March 29, 190" }, // 3-digit year - { "date_historic_3e-BCADhint", "March 29, 90" }, // 2-digit year - { "date_historic_4a-BCADhint", "summer of 190 BC" }, // 1- to 4-digit year - { "date_historic_5a-BCADhint", "the 2nd century BC" }, // - { "date_historic_5b-BCADhint", "beginning of the 2nd century BC" }, // - { "date_historic_5c-BCADhint", "2nd or 3rd century BC" }, // find "2nd century BC" - { "date_historic_5d-BCADhint", "beginning of the 2nd or 3rd century BC" }, // find "beginning 2nd century BC" - { "date_historic_6a-BCADhint", "1990s BC" }, // - { "date_historic_6b-BCADhint", "190s BC" }, // - { "date_historic_6c-BCADhint", "90s BC" }, // - { "date_historic_7ab", "in 190", "190" }, // 3-digit year - { "date_historic_7ab", "in 190,", "190" }, // 3-digit year - { "date_historic_7c", "\n190\n", "190" }, // (2- to 4-digit year - { "date_historic_7d", "year of 90" }, // 2-digit year - { "date_historic_7e", "year of 190" }, // 3-digit year - { "date_historic_8ab", "in 90,", "90" }, // 2-digit year - { "date_historic_8ab", "in 90", "90" }, // 2-digit year - { "date_historic_0ab", "in 90 cases", "" }, // 2- to 4-digit year - { "date_historic_0ab", "in 90 nice cases", "" }, // 2- to 4-digit year - { "date_historic_0ab", "in 90 nice law cases", "" }, // 2- to 4-digit year - { "date_historic_0d", "in 90 percent", "" }, // 2- to 4-digit year - { "date_r0a", "2010-01-29" }, // - { "date_r0b", "10-29-99" }, // - { "date_r0c", "09/26/1999" }, // - { "date_r0d", "09/26/99" }, // - { "date_r0e", "7-14 (AP)", "7-14" }, // find 7-14 - { "date_r0g", "1.3.99" }, // - { "date_r0h", "1.3.1999" }, // - { "date_r1a", "February 25, 2009" }, // - { "date_r1a", "Feb. 25, 2009" }, // - { "date_r1a", "Feb. 25, 2009, Monday" }, // - { "date_r1b", "25 February 2009" }, // - { "date_r1c", "25 of February 2009" }, // - { "date_r2a", "November 19" }, // - { "date_r2a", "Nov 19" }, // - { "date_r2a", "January 19th" }, // - { "date_r2a", "January nineteenth" }, // - { "date_r2b", "November 19-20" }, // find November 20 - { "date_r2c", "19 November" }, // - { "date_r2c", "19 Nov" }, // - { "date_r2c", "19th of November" }, // - { "date_r2d", "3 to 6 May" }, // find May 3 - { "date_r2e", "3 to 6 May 2004" }, // find May 3, 2004 - { "date_r2a2", "January 19th of that year" }, // - { "date_r2c2", "19th of January of the same year" }, // - { "date_r3a", "Friday October 13" }, // - { "date_r3a", "Monday, Oct 12" }, // - { "date_r3b", "Friday October 13 2009" }, // - { "date_r3b", "Monday, October 12th 2009" }, // - { "date_r4a", "September 14 and 18, 2010" }, // find September 14 2010 - { "date_r4b", "September 14 and 18, 2010" }, // find September 18 2010 - { "date_r5a", "tomorrow" }, // - { "date_r5b", "earlier yesterday" }, // - { "date_r5c", "Monday" }, // - { "date_r5d", "earlier Monday" }, // - { "date_r61", "the weekend" }, // - { "date_r7a", "November 2001" }, // - { "date_r7a", "Nov. 2001" }, // - { "date_r7a", "February of 1999" }, // - { "date_r7b", "May and June 2011" }, // find May 2001 AND June 2011 - { "date_r8a", "November next year" }, // - { "date_r8a", "May last year" }, // - { "date_r9a", "summer" }, // - { "date_r9b", "winter 2001" }, // - { "date_r9b", "winter of 2001" }, // - { "date_r9c", "summer of 69" }, // - { "date_r10a", "the third quarter of 2001" }, // - { "date_r10b", "the second half" }, // - { "date_r10c", "the 2001 third quarter" }, // - { "date_r11a", "this year's third quarter" }, // - { "date_r11a", "next year's first quarter" }, // - { "date_r11b", "the year-earlier first half" }, // - { "date_r11c", "the second half of this year" }, // - { "date_r12a", "2009" }, // - { "date_r12b", "1850-58" }, // find: 1858 - { "date_r12c", "nineteen ninety-one" }, // - { "date_r12d", "two-thousand ten" }, // - { "date_r13a", "the 1990s" }, // - { "date_r13b", "the 90s" }, // - { "date_r13c", "the seventies" }, // - { "date_r13d", "the nineteen seventies" }, // - { "date_r14a", "the early 1990s" }, // - { "date_r14b", "the mid-90s" }, // - { "date_r14c", "the late seventies" }, // - { "date_r14d", "the early nineteen seventies" }, // - { "date_r15a", "the 19th century" }, // - { "date_r15a", "the seventh century" }, // - { "date_r16a", "March" }, // - { "date_r16b", "Early 2001" }, // - { "date_r16c", "the beginning of November 1999" }, // - { "date_r16d", "the middle of September" }, // - { "date_r17a", "this year" }, // - { "date_r17b", "this November" }, // - { "date_r17c", "this November 24" }, // - { "date_r17d", "this Monday" }, // - { "date_r17e", "this summer" }, // - { "date_r17f", "this day" }, // using UNDEF-REF normalization - { "date_r18a", "the beginning of this year" }, // - { "date_r18b", "the beginning of this November" }, // - { "date_r18c", "the beginning of this November 24" }, // - { "date_r18d", "the beginning of this Monday" }, // - { "date_r18e", "the beginning of this summer" }, // - { "date_r19a", "at least several years ago" }, // - { "date_r19b", "about twenty years ago" }, // - { "date_r19c", "about 20 years ago" }, // - { "date_r19d", "a month ago" }, // - { "date_r20a", "some days later" }, // - { "date_r20b", "about twenty days later" }, // - { "date_r20c", "about 20 days later" }, // - { "date_r20d", "a week later" }, // - { "date_r21a", "twenty days earlier" }, // - { "date_r21b", "about 20 days earlier" }, // - { "date_r21c", "a week earlier" }, // - { "date_r22a", "a year ago" }, // - { "date_r22b", "a year later" }, // - { "date_r23a", "the year-earlier first quarter" }, // - { "date_r23b", "the year-earlier quarter" }, // - { "date_r23c", "the quarter" }, // - { "date_r24a", "Christmas" }, // - { "date_r24b", "Christmas 2010" }, // - { "date_r24cd", "Christmas 87" }, // - { "date_r24cd", "Christmas '87" }, // - { "date_r25a", "Easter Sunday" }, // - { "date_r25b", "Easter Sunday 2010" }, // - { "date_r25cd", "Easter Sunday 87" }, // - { "date_r25cd", "Easter Sunday '87" }, // - { "date_r1a_negative", "as soon as" }, // do not match soon if it is in "as soon as" - { "date_r2a_negative", "they march the way" }, // if it is a verb - { "date_r2b_negative", "they march the way" }, // if it is a verb - { "date_r2c_negative", "may" }, // if it is a verb - { "date_r2d_negative", "may" }, // or march, fall -- if it is lower case and without any further temporal stuff around it... - { "date_r3a_negative", "2000 soldiers" }, // four digit number followed by a plural noun - { "date_r3b_negative", "2000 dead soldiers" }, // four digit number followed by an adjective and a plural noun - { "date_r3c_negative", "2000 kilometer" }, // four digit number followed a non-temporal unit - { "date_r4a_negative", "W2000.1920" }, // - { "x_date_r11a_negative", "in his 20s" }, // - { "duration_r1ad", "less than sixty days" }, // - { "duration_r1e12", "less than 60 days" }, // - { "duration_r1cf", "several days" }, // - { "duration_r1ad", "less than sixty minutes" }, // - { "duration_r1e12", "less than 60 minutes" }, // - { "duration_r1cf", "several minutes" }, // - { "duration_r2ad", "at least the last twenty years" }, // - { "duration_r2be", "at least the last 20 years" }, // - { "duration_r2cf", "at least the last several years" }, // - { "duration_r2ad", "at least the last twenty minutes" }, // - { "duration_r2be", "at least the last 20 minutes" }, // - { "duration_r2cf", "at least the last several minutes" }, // - { "duration_r3ac", "a three-year period" }, // - { "duration_r3bd", "a 300 year period" }, // - { "duration_r3ac", "a three-hour period" }, // - { "duration_r3bd", "a 300 hour period" }, // - { "duration_r5_a", "two and six days", "two" }, - { "duration_r1a_negative", "about 200 years older" }, // - { "duration_r1b_negative", "several days old" }, // - { "duration_r1c_negative", "59-year-old" }, // - /* */ - /* - { "interval_interval_01", "from 1999 to 2012" }, // - { "interval_interval_02", "between March and May" }, // - { "interval_interval_03", "20.3.2003 - 1.5.2003" }, // - { "interval_interval_04", "20.3.2003 to 1.5.2003" }, // - { "interval_interval_05", "on 20.3.2003 the war began and it lastet until 1.5.2003" }, // - { "interval_interval_06", "for December after leaving in February" }, // - { "interval_interval_07", "began on March 20 in 2003 and ended on May 1" }, // - { "interval_interval_08", "in 1999/2000" }, // - { "interval_interval_09", "War ended in May, after fighting from March on" }, // - { "interval_interval_10", "March, April and May" }, // - { "interval_interval_11", "Monday, Thuesday, Wednesday and Thursday" }, // - { "set_r1a", "each day" }, // - { "set_r1b", "every Monday" }, // - { "set_r1c", "each September" }, // - { "set_r1d", "every summer" }, // - { "set_r2a", "once a week" }, // - { "set_r2b", "twice a month" }, // - { "set_r2c", "three times a month" }, // - { "set_r2d", "40 times per month" }, // - { "set_r2e", "a month" }, // - { "set_r2f", "a minute" }, // - { "set_r3a", "every 5 years" }, // - { "set_r3b", "every two days" }, // - { "set_r4a", "2 days each week" }, // - { "set_r5a", "annually" }, // - { "set_r6a", "Monday afternoons" }, // - { "set_r6b", "Monday and Tuesday nights" }, // find: Monday nights - */ - { "time_r1a", "2009-12-19T17:00:00" }, // - { "time_r1a", "2009-12-19 17:00:00" }, // - { "time_r1b", "2009-12-19T17:00" }, // - { "time_r1c", "12/29/2000 20:29" }, // - { "time_r1d", "12/29/2000 20:29:29" }, // - { "time_r1e", "12/29/2000 20:29:29.79" }, // - { "time_r2a", "09-24-99 1145EST" }, // TimeStamp style with timezone information - { "time_r2b", "November 24, 2011 1535 GMT" }, // - { "time_r2d", "Wed, 29 Dec 2004 00:28:16 +0000" }, // - { "time_r2d", "Sat, 29 Jan 2005 17:21:13 -0600" }, // - { "time_r2d", "1 Feb 2005 16:13:33 +1300" }, // - { "time_r3a", "midnight Monday" }, // - { "time_r3b", "Monday night" }, // - { "time_r3b2", "early Friday morning" }, // - { "time_r3c", "midnight today" }, // - { "time_r3d", "yesterday morning" }, // - { "time_r3d2", "late yesterday evening" }, // - { "time_r3e", "last Friday morning" }, // - { "time_r4a", "earlier this afternoon" }, // - { "time_r4a", "later last night" }, // - { "time_r4b", "tonight" }, // - { "time_r5a", "circa 9 a.m." }, // - { "time_r5b", "11 PM" }, // - { "time_r5c", "11:30 a.m." }, // - { "time_r5d", "9:30 p.m." }, // - { "time_r5e", "10:30:34 a.m." }, // - { "time_r5f", "10:30:34 p.m." }, // - { "time_r6a", "9 am Wednesday" }, // - { "time_r6b", "9 pm Wednesday" }, // - { "time_r6c", "9:30 a.m. Wednesday" }, // - { "time_r6d", "9:30 p.m. Wednesday" }, // - { "time_r8a", "the morning of April 18, 1775" }, // - { "time_r8b", "the morning of April 18" }, // - /* */ - }; - - private HeidelTimeStandalone standalone; - - @Before - public void init() { - standalone = new HeidelTimeStandalone(Language.ENGLISH, DocumentType.NARRATIVES, // - OutputType.XMI, "test/test.props", POSTagger.NO); - } - - @Test - public void testEnglishRules() { - for (String[] set : CASES) { - testSingleCase(set[0], set[1], set.length >= 3 ? set[2] : set[1]); - } - } - - ResultFormatter formatter = new TestResultFormatter(); - - private static class TestResultFormatter implements ResultFormatter { - @Override - public String format(JCas jcas) throws Exception { - StringBuilder buf = new StringBuilder(); - String text = jcas.getDocumentText(); - AnnotationIndex times = jcas.getAnnotationIndex(Timex3.type); - for (Timex3 timex3 : times) { - buf.append(timex3.getFoundByRule()); - buf.append('\t'); - buf.append(text.substring(timex3.getBegin(), timex3.getEnd())); - buf.append('\n'); - } - return buf.toString(); - } - } - - // NOT a @Test, only a part. - private void testSingleCase(String rule, String fragment, String expectf) { - String expected = rule + "\t" + expectf; - if (expected.contains("negative")) - expected = ""; - try { - String result = standalone.process(fragment, null, formatter); - String[] parts = result.split("\n"); - for (String part : parts) { - if (expected.equals(part.replaceAll("-(relative|explicit)", ""))) - continue; - System.err.println(rule + "\t" + fragment + " -> " + part); - } - } catch (DocumentCreationTimeMissingException e) { - fail(e.getMessage()); - } - } -} From 7b28d22947a830a6e47933299da9f5bbd10cc007 Mon Sep 17 00:00:00 2001 From: Erich Schubert Date: Wed, 18 Jan 2017 18:37:04 +0100 Subject: [PATCH 3/8] Add @Ignore to tests that require POS tags for now. --- .../test/english/EnglishDateHistoricTest.java | 11 +++++++---- .../dbs/heideltime/test/english/EnglishDateTest.java | 10 +++++++++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishDateHistoricTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishDateHistoricTest.java index bbbea12e..177069f4 100644 --- a/test/de/unihd/dbs/heideltime/test/english/EnglishDateHistoricTest.java +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishDateHistoricTest.java @@ -1,5 +1,6 @@ package de.unihd.dbs.heideltime.test.english; +import org.junit.Ignore; import org.junit.Test; public class EnglishDateHistoricTest extends AbstractHeideltimeTest { @@ -106,11 +107,11 @@ public void testdate_historic_5b_BCADhint() { } @Test - public void testdate_historic_5c_BCADhint() { + public void testdate_historic_5ca_BCADhint() { // find "2nd century BC" testSingleCase("2nd or 3rd century BC", // new String[] { "date_historic_5c-BCADhint", "2nd", "BC01" }, // - new String[] { "date_historic_5c-BCADhint", "3rd century BC", "BC02" }); + new String[] { "date_historic_5a-BCADhint", "3rd century BC", "BC02" }); } @Test @@ -168,10 +169,12 @@ public void testdate_historic_7e() { @Test public void testdate_historic_8ab() { // 2-digit year - testSingleCase("in 90,", new String[] { "date_historic_8ab", "0090" }); - testSingleCase("in 90", new String[] { "date_historic_8ab", "0090" }); + testSingleCase("in 90,", new String[] { "date_historic_8ab", "90", "0090" }); + testSingleCase("in 90", new String[] { "date_historic_8ab", "90", "0090" }); } + // FIXME: add POS tags for unit test + @Ignore("Needs POS") @Test public void testdate_historic_0ab_negative() { // 2- to 4-digit year diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java index 18551323..dfd23395 100644 --- a/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java @@ -1,5 +1,6 @@ package de.unihd.dbs.heideltime.test.english; +import org.junit.Ignore; import org.junit.Test; public class EnglishDateTest extends AbstractHeideltimeTest { @@ -251,8 +252,11 @@ public void testdate_r11a() { @Test public void testdate_r11b() { + // FIXME: this is supposed to match r11b, but is matched by date_r23a-relative + // As far as I can tell, they should both be good. testSingleCase("the year-earlier first half", // - new String[] { "date_r11b", "the year-earlier first half", "XXXX-H1" }); + new String[] { "date_r23a", "the year-earlier first half", "XXXX-H1" }); + // new String[] { "date_r11b", "the year-earlier first half", "XXXX-H1" }); } @Test @@ -601,12 +605,16 @@ public void testdate_r2d_negative() { testSingleCase("may"); } + // FIXME: add POS information + @Ignore("Requires POS tagging") @Test public void testdate_r3a_negative() { // four digit number followed by a plural noun testSingleCase("2000 soldiers"); } + // FIXME: add POS information + @Ignore("Requires POS tagging") @Test public void testdate_r3b_negative() { // four digit number followed by an adjective and a plural noun From 927a4b17f78ade1104f3517ed8a52d325a46f845 Mon Sep 17 00:00:00 2001 From: Erich Schubert Date: Thu, 19 Jan 2017 10:43:19 +0100 Subject: [PATCH 4/8] Disable "second half" which causes sports false positives --- .../heideltime/test/english/EnglishDateTest.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java index dfd23395..e9f54ec9 100644 --- a/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java @@ -230,6 +230,7 @@ public void testdate_r10a() { new String[] { "date_r10a", "the third quarter of 2001", "2001-Q3" }); } + @Ignore("Disabled, false positives: shot a goal in the second half") @Test public void testdate_r10b() { testSingleCase("the second half", // @@ -485,6 +486,18 @@ public void testdate_r20d() { new String[] { "date_r20d", "a week later", "XXXX-XX-XX" }); } + @Test + public void testdate_r20f() { + testSingleCase("on 30 minutes something happened", // + new String[] { "date_r20f", "on 30 minutes", "UNDEF-REF-minute-PLUS-30" }); + } + + @Test + public void testdate_r20g() { + testSingleCase("on approximately thirty minutes something happened", // + new String[] { "date_r20g", "on approximately thirty minutes", "UNDEF-REF-minute-PLUS-30" }); + } + @Test public void testdate_r21a() { testSingleCase("twenty days earlier", // From e5c1e8315a34e684396ee347d87d9cd8a09982e8 Mon Sep 17 00:00:00 2001 From: Erich Schubert Date: Thu, 19 Jan 2017 16:38:03 +0100 Subject: [PATCH 5/8] Improve tests, by usually using colloquial (except for historic) --- .../test/english/AbstractHeideltimeTest.java | 6 +- .../test/english/EnglishDateHistoricTest.java | 29 +++++- .../test/english/EnglishDateTest.java | 92 +++++++++++-------- .../test/english/EnglishTimeTest.java | 3 +- 4 files changed, 88 insertions(+), 42 deletions(-) diff --git a/test/de/unihd/dbs/heideltime/test/english/AbstractHeideltimeTest.java b/test/de/unihd/dbs/heideltime/test/english/AbstractHeideltimeTest.java index b102b069..a1f206cb 100644 --- a/test/de/unihd/dbs/heideltime/test/english/AbstractHeideltimeTest.java +++ b/test/de/unihd/dbs/heideltime/test/english/AbstractHeideltimeTest.java @@ -32,7 +32,7 @@ */ public class AbstractHeideltimeTest { - private JCasFactoryImpl jcasFactory; + protected JCasFactoryImpl jcasFactory; protected HeidelTime heideltime; private boolean debugTokenization = false; static final Pattern LINEWRAP = Pattern.compile("\\s*[\\n\\r]+\\s*"); @@ -47,7 +47,7 @@ public void init() { UIMAFramework.getXMLParser().parseTypeSystemDescription(new XMLInputSource(this.getClass().getClassLoader().getResource(Config.get(Config.TYPESYSTEMHOME)))) }; jcasFactory = new JCasFactoryImpl(descriptions); heideltime = new HeidelTime(); - heideltime.initialize(new UimaContextImpl(Language.ENGLISH, DocumentType.NARRATIVES, false)); + heideltime.initialize(new UimaContextImpl(Language.ENGLISH, DocumentType.COLLOQUIAL, false)); } catch (Exception e) { throw new RuntimeException(e); } @@ -141,4 +141,4 @@ protected void testSingleCase(String fragment, String[]... expectf) { assertEquals("Number of results do not match.", expectf.length, cnt); } -} \ No newline at end of file +} diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishDateHistoricTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishDateHistoricTest.java index 177069f4..3b84c045 100644 --- a/test/de/unihd/dbs/heideltime/test/english/EnglishDateHistoricTest.java +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishDateHistoricTest.java @@ -1,9 +1,36 @@ package de.unihd.dbs.heideltime.test.english; +import org.apache.uima.UIMAFramework; +import org.apache.uima.resource.metadata.TypeSystemDescription; +import org.apache.uima.util.XMLInputSource; +import org.junit.Before; import org.junit.Ignore; import org.junit.Test; +import de.unihd.dbs.heideltime.standalone.Config; +import de.unihd.dbs.heideltime.standalone.DocumentType; +import de.unihd.dbs.heideltime.standalone.HeidelTimeStandalone; +import de.unihd.dbs.heideltime.standalone.components.impl.JCasFactoryImpl; +import de.unihd.dbs.heideltime.standalone.components.impl.UimaContextImpl; +import de.unihd.dbs.uima.annotator.heideltime.HeidelTime; +import de.unihd.dbs.uima.annotator.heideltime.resources.Language; + public class EnglishDateHistoricTest extends AbstractHeideltimeTest { + @Before + public void init() { + try { + if (!Config.isInitialized()) + HeidelTimeStandalone.readConfigFile("test/test.props"); + TypeSystemDescription[] descriptions = new TypeSystemDescription[] { + UIMAFramework.getXMLParser().parseTypeSystemDescription(new XMLInputSource(this.getClass().getClassLoader().getResource(Config.get(Config.TYPESYSTEMHOME)))) }; + jcasFactory = new JCasFactoryImpl(descriptions); + heideltime = new HeidelTime(); + heideltime.initialize(new UimaContextImpl(Language.ENGLISH, DocumentType.NARRATIVES, false)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + @Test public void testdate_historic_1a_BCADhint() { // 1- to 4-digit year @@ -146,9 +173,9 @@ public void testdate_historic_7ab() { testSingleCase("in 190", new String[] { "date_historic_7ab", "190", "0190" }); } + @Ignore("Disabled, as this is also matched by the regular year pattern") @Test public void testdate_historic_7c() { - // (2- to 4-digit year testSingleCase("\n190\n", new String[] { "date_historic_7c", "190", "0190" }); } diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java index e9f54ec9..49511846 100644 --- a/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java @@ -13,8 +13,7 @@ public void testdate_r0a() { @Test public void testdate_r0b() { testSingleCase("10-29-99", // - new String[] { "date_r0b", "10-29-99", "0099-10-29" }); - // TODO: really? better 1999 + new String[] { "date_r0b", "10-29-99", "1999-10-29" }); } @Test @@ -26,8 +25,7 @@ public void testdate_r0c() { @Test public void testdate_r0d() { testSingleCase("09/26/99", // - new String[] { "date_r0d", "09/26/99", "0099-09-26" }); - // TODO: really? better 1999 + new String[] { "date_r0d", "09/26/99", "1999-09-26" }); } @Test @@ -39,7 +37,7 @@ public void testdate_r0e() { @Test public void testdate_r0g() { testSingleCase("1.3.99", // - new String[] { "date_r0g", "1.3.99", "0099-03-01" }); + new String[] { "date_r0g", "1.3.99", "1999-03-01" }); } @Test @@ -62,6 +60,8 @@ public void testdate_r1a() { public void testdate_r1b() { testSingleCase("25 February 2009", // new String[] { "date_r1b", "25 February 2009", "2009-02-25" }); + testSingleCase("On 1 July 1913,", // + new String[] { "date_r1b", "1 July 1913", "1913-07-01" }); } @Test @@ -220,8 +220,7 @@ public void testdate_r9b() { @Test public void testdate_r9c() { testSingleCase("summer of 69", // - new String[] { "date_r9c", "summer of 69", "0069-SU" }); - // FIXME: shouldn't this be 1969-SU? + new String[] { "date_r9c", "summer of 69", "1969-SU" }); } @Test @@ -373,20 +372,23 @@ public void testdate_r16d() { @Test public void testdate_r17a() { - testSingleCase("this year", // - new String[] { "date_r17a", "this year", "XXXX" }); + testSingleCase("In 2010, this year", // + new String[] { "date_r12a", "2010", "2010" }, // + new String[] { "date_r17a", "this year", "2010" }); } @Test public void testdate_r17b() { - testSingleCase("this November", // - new String[] { "date_r17b", "this November", "XXXX-11" }); + testSingleCase("In 1999, this November", // + new String[] { "date_r12a", "1999", "1999" }, // + new String[] { "date_r17b", "this November", "1999-11" }); } @Test public void testdate_r17c() { - testSingleCase("this November 24", // - new String[] { "date_r17c", "this November 24", "XXXX-11-24" }); + testSingleCase("In 1998, this November 24", // + new String[] { "date_r12a", "1998", "1998" }, // + new String[] { "date_r17c", "this November 24", "1998-11-24" }); } @Test @@ -403,9 +405,9 @@ public void testdate_r17e() { @Test public void testdate_r17f() { - // using UNDEF-REF normalization - testSingleCase("this day", // - new String[] { "date_r17f", "this day", "XXXX-XX-XX" }); + testSingleCase("On November 24 1998, this day", // + new String[] { "date_r1a", "November 24 1998", "1998-11-24" }, // + new String[] { "date_r17f", "this day", "1998-11-24" }); } @Test @@ -446,8 +448,9 @@ public void testdate_r19a() { @Test public void testdate_r19b() { - testSingleCase("about twenty years ago", // - new String[] { "date_r19b", "about twenty years ago", "XXXX" }); + testSingleCase("In 2010, about twenty years ago", // + new String[] { "date_r12a", "2010", "2010" }, // + new String[] { "date_r19b", "about twenty years ago", "1990" }); } @Test @@ -458,8 +461,9 @@ public void testdate_r19c() { @Test public void testdate_r19d() { - testSingleCase("a month ago", // - new String[] { "date_r19d", "a month ago", "XXXX-XX" }); + testSingleCase("January 24 1998, a month ago", // + new String[] { "date_r1a", "January 24 1998", "1998-01-24" }, // + new String[] { "date_r19d", "a month ago", "1997-12" }); } @Test @@ -482,8 +486,9 @@ public void testdate_r20c() { @Test public void testdate_r20d() { - testSingleCase("a week later", // - new String[] { "date_r20d", "a week later", "XXXX-XX-XX" }); + testSingleCase("December 29 1998, a week later", // + new String[] { "date_r1a", "December 29 1998", "1998-12-29" }, // + new String[] { "date_r20d", "a week later", "1999-01-05" }); } @Test @@ -500,14 +505,16 @@ public void testdate_r20g() { @Test public void testdate_r21a() { - testSingleCase("twenty days earlier", // - new String[] { "date_r21a", "twenty days earlier", "XXXX-XX-XX" }); + testSingleCase("14 January 1998, twenty days earlier", // + new String[] { "date_r1b", "14 January 1998", "1998-01-14" }, // + new String[] { "date_r21a", "twenty days earlier", "1997-12-25" }); } @Test public void testdate_r21b() { - testSingleCase("about 20 days earlier", // - new String[] { "date_r21b", "about 20 days earlier", "XXXX-XX-XX" }); + testSingleCase("14 January 1998, about 20 days earlier", // + new String[] { "date_r1b", "14 January 1998", "1998-01-14" }, // + new String[] { "date_r21b", "about 20 days earlier", "1997-12-25" }); } @Test @@ -518,14 +525,18 @@ public void testdate_r21c() { @Test public void testdate_r22a() { + testSingleCase("14 January 1998, a year ago", // + new String[] { "date_r1b", "14 January 1998", "1998-01-14" }, // + new String[] { "date_r22a", "a year ago", "1997-01-14" }); testSingleCase("a year ago", // new String[] { "date_r22a", "a year ago", "XXXX" }); } @Test public void testdate_r22b() { - testSingleCase("a year later", // - new String[] { "date_r22b", "a year later", "XXXX" }); + testSingleCase("14 January 1998, a year later", // + new String[] { "date_r1b", "14 January 1998", "1998-01-14" }, // + new String[] { "date_r22b", "a year later", "1999-01-14" }); } @Test @@ -561,15 +572,16 @@ public void testdate_r24b() { @Test public void testdate_r24cd() { testSingleCase("Christmas 87", // - new String[] { "date_r24cd", "Christmas 87", "0087-12-25" }); + new String[] { "date_r24cd", "Christmas 87", "1987-12-25" }); testSingleCase("Christmas '87", // - new String[] { "date_r24cd", "Christmas '87", "0087-12-25" }); + new String[] { "date_r24cd", "Christmas '87", "1987-12-25" }); } @Test public void testdate_r25a() { - testSingleCase("Easter Sunday", // - new String[] { "date_r25a", "Easter Sunday", "XXXX-XX-XX" }); + testSingleCase("In 2010, on Easter Sunday", // + new String[] { "date_r12a", "2010", "2010" }, // + new String[] { "date_r25a", "Easter Sunday", "2010-04-04" }); } @Test @@ -581,11 +593,9 @@ public void testdate_r25b() { @Test public void testdate_r25cd() { testSingleCase("Easter Sunday 87", // - new String[] { "date_r25cd", "Easter Sunday 87", "0087-04-06" }); - // TODO: 1987? + new String[] { "date_r25cd", "Easter Sunday 87", "1987-04-19" }); testSingleCase("Easter Sunday '87", // - new String[] { "date_r25cd", "Easter Sunday '87", "0087-04-06" }); - // TODO: 1987? + new String[] { "date_r25cd", "Easter Sunday '87", "1987-04-19" }); } @Test @@ -649,4 +659,14 @@ public void testdate_r4a_negative() { public void testx_date_r11a_negative() { testSingleCase("in his 20s"); } + + @Test + public void testTokenBoundaryFilter() { + testSingleCase("$2016 is not a date."); + testSingleCase("2016° is too hot"); + testSingleCase("1234.2016 or 2016.1234 are not a date either."); + testSingleCase("2016dimensional nonsense"); + testSingleCase("Okay: (2016).", // + new String[] { "date_r12a", "2016", "2016" }); + } } diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishTimeTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishTimeTest.java index 002db864..d92e6950 100644 --- a/test/de/unihd/dbs/heideltime/test/english/EnglishTimeTest.java +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishTimeTest.java @@ -38,8 +38,7 @@ public void testtime_r1e() { @Test public void testtime_r2a() { testSingleCase("09-24-99 1145EST", // - new String[] { "time_r2a", "09-24-99 1145EST", "0099-09-24T11:45-05" }); - // TODO: 0099? can't we do better? + new String[] { "time_r2a", "09-24-99 1145EST", "1999-09-24T11:45-05" }); } @Test From fc1f3e79bb2ca71cae3759f432016b16bc2a2e71 Mon Sep 17 00:00:00 2001 From: Erich Schubert Date: Wed, 31 May 2017 16:06:39 +0200 Subject: [PATCH 6/8] Test matching of 24:00 --- .../dbs/heideltime/test/english/EnglishTimeTest.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishTimeTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishTimeTest.java index d92e6950..91e74ac4 100644 --- a/test/de/unihd/dbs/heideltime/test/english/EnglishTimeTest.java +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishTimeTest.java @@ -177,6 +177,14 @@ public void testtime_r6d() { new String[] { "time_r6d", "9:30 p.m. Wednesday", "XXXX-XX-XXT21:30" }); } + @Test + public void testtime_r7a() { + testSingleCase("16:00 CET", // + new String[] { "time_r7a", "16:00 CET", "XXXX-XX-XXT16:00" }); + testSingleCase("1600 CET", // + new String[] { "time_r7a", "1600 CET", "XXXX-XX-XXT16:00" }); + } + @Test public void testtime_r8a() { testSingleCase("the morning of April 18, 1775", // From 7391a322110990be942c799b45737731bc260973 Mon Sep 17 00:00:00 2001 From: Erich Schubert Date: Thu, 8 Jun 2017 14:08:11 +0200 Subject: [PATCH 7/8] Allow testing with dct --- .../test/english/AbstractHeideltimeTest.java | 17 +++++++++++++---- .../test/english/EnglishDateTest.java | 13 ++++++++++++- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/test/de/unihd/dbs/heideltime/test/english/AbstractHeideltimeTest.java b/test/de/unihd/dbs/heideltime/test/english/AbstractHeideltimeTest.java index a1f206cb..864ee044 100644 --- a/test/de/unihd/dbs/heideltime/test/english/AbstractHeideltimeTest.java +++ b/test/de/unihd/dbs/heideltime/test/english/AbstractHeideltimeTest.java @@ -21,6 +21,7 @@ import de.unihd.dbs.heideltime.standalone.components.impl.UimaContextImpl; import de.unihd.dbs.uima.annotator.heideltime.HeidelTime; import de.unihd.dbs.uima.annotator.heideltime.resources.Language; +import de.unihd.dbs.uima.types.heideltime.Dct; import de.unihd.dbs.uima.types.heideltime.Sentence; import de.unihd.dbs.uima.types.heideltime.Timex3; import de.unihd.dbs.uima.types.heideltime.Token; @@ -36,7 +37,7 @@ public class AbstractHeideltimeTest { protected HeidelTime heideltime; private boolean debugTokenization = false; static final Pattern LINEWRAP = Pattern.compile("\\s*[\\n\\r]+\\s*"); - static final Pattern WORDS = Pattern.compile("([^\\s\\w]*)(\\w+)([^\\s\\w]*)"); + static final Pattern WORDS = Pattern.compile("(?U)([^\\s\\w]*)([\\w/]+(?:\\.\\d+)?)([^\\s\\w]*)"); @Before public void init() { @@ -102,9 +103,14 @@ private void tokenizeSentence(String fragment, JCas jcas, int ss, int se) { System.out.println(); } - protected JCas analyze(String fragment) { + protected JCas analyze(String fragment, String dctv) { try { JCas jcas = tokenize(fragment); + if (dctv != null) { + Dct dct = new Dct(jcas); + dct.setValue(dctv); + dct.addToIndexes(); + } heideltime.process(jcas); // intervaltagger.process(jcas); return jcas; @@ -114,7 +120,11 @@ protected JCas analyze(String fragment) { } protected void testSingleCase(String fragment, String[]... expectf) { - JCas jcas = analyze(fragment); + testSingleCase(fragment, null, expectf); + } + + protected void testSingleCase(String fragment, String dctv, String[]... expectf) { + JCas jcas = analyze(fragment, dctv); AnnotationIndex times = jcas.getAnnotationIndex(Timex3.type); int cnt = 0; for (Timex3 timex3 : times) { @@ -140,5 +150,4 @@ protected void testSingleCase(String fragment, String[]... expectf) { } assertEquals("Number of results do not match.", expectf.length, cnt); } - } diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java index 49511846..036444dd 100644 --- a/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java @@ -80,6 +80,9 @@ public void testdate_r2a() { new String[] { "date_r2a", "January 19th", "XXXX-01-19" }); testSingleCase("January nineteenth", // new String[] { "date_r2a", "January nineteenth", "XXXX-01-19" }); + // Test with dct: + testSingleCase("Nov. 21", "19981102", // + new String[] { "date_r2a", "Nov. 21", "1998-11-21" }); } @Test @@ -163,6 +166,9 @@ public void testdate_r5b() { public void testdate_r5c() { testSingleCase("Monday", // new String[] { "date_r5c", "Monday", "XXXX-XX-XX" }); + // Test with dct: + testSingleCase("Monday", "19981104", // + new String[] { "date_r5c", "Monday", "1998-11-02" }); } @Test @@ -229,11 +235,13 @@ public void testdate_r10a() { new String[] { "date_r10a", "the third quarter of 2001", "2001-Q3" }); } - @Ignore("Disabled, false positives: shot a goal in the second half") + // @Ignore("Disabled, false positives: shot a goal in the second half") @Test public void testdate_r10b() { testSingleCase("the second half", // new String[] { "date_r10b", "the second half", "XXXX-H2" }); + testSingleCase("the third-quarter", "2010-12-01", // + new String[] { "date_r10b", "the third-quarter", "2010-Q3" }); } @Test @@ -543,6 +551,8 @@ public void testdate_r22b() { public void testdate_r23a() { testSingleCase("the year-earlier first quarter", // new String[] { "date_r23a", "the year-earlier first quarter", "XXXX-Q1" }); + testSingleCase("the year-earlier first quarter", "2010-12-01", // + new String[] { "date_r23a", "the year-earlier first quarter", "2009-Q1" }); } @Test @@ -653,6 +663,7 @@ public void testdate_r3c_negative() { @Test public void testdate_r4a_negative() { testSingleCase("W2000.1920"); + testSingleCase("to 1462.93."); } @Test From b32a882efc0daf296b0be000977622e4ff65c906 Mon Sep 17 00:00:00 2001 From: Erich Schubert Date: Fri, 9 Jun 2017 17:01:11 +0200 Subject: [PATCH 8/8] Test matching of `1940/1941` etc. --- .../test/english/EnglishDateTest.java | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java b/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java index 036444dd..81767460 100644 --- a/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java +++ b/test/de/unihd/dbs/heideltime/test/english/EnglishDateTest.java @@ -194,11 +194,14 @@ public void testdate_r7a() { } @Test - public void testdate_r7ac() { + public void testdate_r7cd() { // find May 2001 AND June 2011 testSingleCase("May and June 2011", // new String[] { "date_r7c", "May", "2011-05" }, // - new String[] { "date_r7a", "June 2011", "2011-06" }); + new String[] { "date_r7d", "June 2011", "2011-06" }); + testSingleCase("May/June 2011", // + new String[] { "date_r7c", "May", "2011-05" }, // + new String[] { "date_r7d", "June 2011", "2011-06" }); } @Test @@ -298,6 +301,13 @@ public void testdate_r12d() { new String[] { "date_r12d", "two-thousand ten", "2010" }); } + @Test + public void testdate_r12f() { + testSingleCase("1940/1941", // + new String[] { "date_r12f1", "1940", "1940" }, // + new String[] { "date_r12f2", "1941", "1941" }); + } + @Test public void testdate_r13a() { testSingleCase("the 1990s", // @@ -354,6 +364,20 @@ public void testdate_r15a() { new String[] { "date_r15a", "the seventh century", "06" }); } + @Test + public void testdate_r15c() { + testSingleCase("19th and 20th century", // + new String[] { "date_r15c", "19th", "18" }, // + new String[] { "date_r15a", "20th century", "19" }); + } + + @Test + public void testdate_r15b() { + testSingleCase("19th and early 20th century", // + new String[] { "date_r15c", "19th", "18" }, // + new String[] { "date_r15b", "early 20th century", "19" }); + } + @Test public void testdate_r16a() { testSingleCase("March", // @@ -680,4 +704,11 @@ public void testTokenBoundaryFilter() { testSingleCase("Okay: (2016).", // new String[] { "date_r12a", "2016", "2016" }); } + + @Test + public void testNextQuarter() { + testSingleCase("November 2015, 1 quarter later", // + new String[] { "date_r7a", "November 2015", "2015-11" }, // + new String[] { "date_r20c", "1 quarter later", "2016-Q1" }); + } }