From 173d73f119939ed3165bff260b464288b51be4ae Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Tue, 31 Dec 2019 00:54:39 +0100 Subject: [PATCH] #1453 - Better I/O testing facilities - Introduce and document ReaderAssert and WriterAssert --- .../asciidoc/developer-guide/testing.adoc | 40 +- .../core/io/brat/BratReaderWriterTest.java | 24 +- .../io/conll/Conll2006ReaderWriterTest.java | 35 +- .../core/io/conll/ConllUReaderWriterTest.java | 27 +- .../test/resources/conll/2003/en-orig.conll | 1 + .../test/resources/conll/2006/fi-ref.conll | 1 + .../conllu-morphological_annotation.conllu | 1 + .../u_v2/conllu-syntactic_annotation.conllu | 1 + .../conll/u_v2/conllu-words_and_tokens.conllu | 1 + dkpro-core-testing-asl/pom.xml | 8 + .../org/dkpro/core/testing/IOTestRunner.java | 21 +- .../org/dkpro/core/testing/ReaderAssert.java | 299 ++++++++++++ .../org/dkpro/core/testing/WriterAssert.java | 452 ++++++++++++++++++ 13 files changed, 865 insertions(+), 46 deletions(-) create mode 100644 dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/ReaderAssert.java create mode 100644 dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/WriterAssert.java diff --git a/dkpro-core-doc/src/main/asciidoc/developer-guide/testing.adoc b/dkpro-core-doc/src/main/asciidoc/developer-guide/testing.adoc index 0a0a398d2c..3f2f79b4a6 100644 --- a/dkpro-core-doc/src/main/asciidoc/developer-guide/testing.adoc +++ b/dkpro-core-doc/src/main/asciidoc/developer-guide/testing.adoc @@ -92,31 +92,39 @@ of annotation supported by DKPro Core, e.g.: == Testing I/O componets -The IOTestRunner class offers convenient methods to test I/O components: +The `ReaderAssert` and `WriterAssert` classes can be used to text I/O components. They allow building +AssertJ-style unit tests with DKPro Core reader and writer components. -* `testRoundTrip` can be used to test converting a format to CAS, converting it back and comparing - it to the original -* `testOneWay` instead is useful to read data and compare it to a reference file in a different - format (e.g. CasDumpWriter format). It can also be used if there a full round-trip is not possible - because some information is lost or cannot be exported exactly as ingested from the original file. +One of the simplest tests is a *round-trip test* where an input file is read using a reader for a +particular format, then written out again using a writer for the same format. -The input file and reference file path given to these methods is always considered relative to -`src/test/resources`. - -.Example using `testRoundTrip` with extra parameters (Conll2006ReaderWriterTest) +.Example of a round-trip test [source,java,indent=0] ---- include::{source-dir}dkpro-core-io-conll-asl/src/test/java/de/tudarmstadt/ukp/dkpro/core/io/conll/Conll2006ReaderWriterTest.java[tags=testRoundTrip] ---- -.Example using `testOneWay` with extra parameters (Conll2006ReaderWriterTest) +The reader is set up to reader the test input file. Instead of setting `PARAM_SOURCE_LOCATION`, it is +also possible to set the input location using `readingFrom()`. The writer automatically makes use of +a test output folder provided by a `DkproTestContext` - therefore a target location does not need to +be configured explicitly. + +Assuming the writer produces only a single output file, this file can be accessed for +assertions using `outputAsString()`. If multiple output files are created, an argument can be passed +to that method, e.g. `outputAsString("output.txt")`. This will look for a at the target location whose +name ends in `output.txt`. If there is none or more than one matching file, the test will fail. + +If the original input file is in a different format or cannot be fully reproduced by the writer, +then it is easy to set up a *one way test*, simply by changing the final comparison. The following +example also shows how to specify additional parameters on the reader or writer. + +.Example of a one-way test [source,java,indent=0] ---- include::{source-dir}dkpro-core-io-conll-asl/src/test/java/de/tudarmstadt/ukp/dkpro/core/io/conll/Conll2006ReaderWriterTest.java[tags=testOneWay] ---- -.Example using `testRoundTrip` with extra parameters (BratReaderWriterTest) -[source,java,indent=0]] ----- -include::{source-dir}dkpro-core-io-brat-asl/src/test/java/de/tudarmstadt/ukp/dkpro/core/io/brat/BratReaderWriterTest.java[tags=testOneWay] ----- +In order to test the ability of readers to read multiple files, the `asJCasList()` method can be used. +While pipelines typically re-use a single CAS which is repeatedly reset and refilled, this method +generates a list of separate CAS instances which can be individually validated after the test. To +access elements of the list use `element(n)`. \ No newline at end of file diff --git a/dkpro-core-io-brat-asl/src/test/java/org/dkpro/core/io/brat/BratReaderWriterTest.java b/dkpro-core-io-brat-asl/src/test/java/org/dkpro/core/io/brat/BratReaderWriterTest.java index 75c309191d..dfcb4e18ce 100644 --- a/dkpro-core-io-brat-asl/src/test/java/org/dkpro/core/io/brat/BratReaderWriterTest.java +++ b/dkpro-core-io-brat-asl/src/test/java/org/dkpro/core/io/brat/BratReaderWriterTest.java @@ -17,37 +17,36 @@ */ package org.dkpro.core.io.brat; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Arrays.asList; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; +import static org.assertj.core.api.Assertions.contentOf; import static org.dkpro.core.testing.IOTestRunner.testOneWay; import static org.dkpro.core.testing.IOTestRunner.testRoundTrip; -import org.dkpro.core.io.brat.BratReader; -import org.dkpro.core.io.brat.BratWriter; +import java.io.File; + import org.dkpro.core.io.conll.Conll2009Reader; import org.dkpro.core.io.conll.Conll2012Reader; import org.dkpro.core.testing.DkproTestContext; +import org.dkpro.core.testing.ReaderAssert; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; -//NOTE: This file contains Asciidoc markers for partial inclusion of this file in the documentation -//Do not remove these tags! public class BratReaderWriterTest { @Test public void testConll2009() throws Exception { -// tag::testOneWay[] testOneWay( createReaderDescription(Conll2009Reader.class), // the reader createEngineDescription(BratWriter.class, // the writer BratWriter.PARAM_WRITE_RELATION_ATTRIBUTES, true), "conll/2009/en-ref.ann", // the reference file for the output "conll/2009/en-orig.conll"); // the input file for the test -// end::testOneWay[] } @Test @@ -273,14 +272,17 @@ BratWriter.PARAM_RELATION_TYPES, asList( public void testBratWithDiscontinuousFragmentNear() throws Exception { - testRoundTrip(createReaderDescription(BratReader.class, + ReaderAssert.assertThat(BratReader.class, BratReader.PARAM_TEXT_ANNOTATION_TYPE_MAPPINGS, asList("Token -> de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", "Organization -> de.tudarmstadt.ukp.dkpro.core.api.ner.type.Organization", - "Location -> de.tudarmstadt.ukp.dkpro.core.api.ner.type.Location")), - createEngineDescription(BratWriter.class, BratWriter.PARAM_ENABLE_TYPE_MAPPINGS, - true), - "brat/document0c.ann"); + "Location -> de.tudarmstadt.ukp.dkpro.core.api.ner.type.Location")) + .readingFrom("src/test/resources/brat/document0c.ann") + .usingWriter(BratWriter.class, + BratWriter.PARAM_ENABLE_TYPE_MAPPINGS, true) + .outputAsString("document0c.ann") + .isEqualToNormalizingNewlines( + contentOf(new File("src/test/resources/brat/document0c.ann"), UTF_8)); } @Test diff --git a/dkpro-core-io-conll-asl/src/test/java/org/dkpro/core/io/conll/Conll2006ReaderWriterTest.java b/dkpro-core-io-conll-asl/src/test/java/org/dkpro/core/io/conll/Conll2006ReaderWriterTest.java index 87395bf350..b282a325a8 100644 --- a/dkpro-core-io-conll-asl/src/test/java/org/dkpro/core/io/conll/Conll2006ReaderWriterTest.java +++ b/dkpro-core-io-conll-asl/src/test/java/org/dkpro/core/io/conll/Conll2006ReaderWriterTest.java @@ -17,11 +17,13 @@ */ package org.dkpro.core.io.conll; -import static org.dkpro.core.testing.IOTestRunner.testOneWay; -import static org.dkpro.core.testing.IOTestRunner.testRoundTrip; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.assertj.core.api.Assertions.contentOf; + +import java.io.File; import org.dkpro.core.testing.DkproTestContext; -import org.junit.Ignore; +import org.dkpro.core.testing.ReaderAssert; import org.junit.Rule; import org.junit.Test; @@ -29,19 +31,17 @@ //Do not remove these tags! public class Conll2006ReaderWriterTest { - // Deleted the test file here because it was malformed *and* we had no provenance info. - // However, leaving the test in right now and ignoring it because it is used in the - // documentation. - @Ignore() @Test public void roundTrip() throws Exception { // tag::testRoundTrip[] - testRoundTrip( - Conll2006Reader.class, // the reader - Conll2006Writer.class, // the writer - "conll/2006/fk003_2006_08_ZH1.conll"); // the input also used as output reference + ReaderAssert.assertThat(Conll2006Reader.class) // the reader + .readingFrom("src/test/resources/conll/2006/fi-ref.conll") // the test input file + .usingWriter(Conll2006Writer.class) // the writer + .outputAsString() // access writer output + .isEqualToNormalizingNewlines( // compare to input file + contentOf(new File("src/test/resources/conll/2006/fi-ref.conll"), UTF_8)); // end::testRoundTrip[] } @@ -50,11 +50,14 @@ public void testFinnTreeBank() throws Exception { // tag::testOneWay[] - testOneWay( - Conll2006Reader.class, // the reader - Conll2006Writer.class, // the writer - "conll/2006/fi-ref.conll", // the reference file for the output - "conll/2006/fi-orig.conll"); // the input file for the test + ReaderAssert.assertThat(Conll2006Reader.class, // the reader + Conll2006Reader.PARAM_SOURCE_ENCODING, "UTF-8") // reader parameter + .readingFrom("src/test/resources/conll/2006/fi-orig.conll") // the test input file + .usingWriter(Conll2006Writer.class, // the writer + Conll2006Writer.PARAM_TARGET_ENCODING, "UTF-8") // writer parameter + .outputAsString("fi-orig.conll") // access writer output + .isEqualToNormalizingNewlines( // compare to input file + contentOf(new File("src/test/resources/conll/2006/fi-ref.conll"), UTF_8)); // end::testOneWay[] } diff --git a/dkpro-core-io-conll-asl/src/test/java/org/dkpro/core/io/conll/ConllUReaderWriterTest.java b/dkpro-core-io-conll-asl/src/test/java/org/dkpro/core/io/conll/ConllUReaderWriterTest.java index 8a8dc2a260..ededb68113 100644 --- a/dkpro-core-io-conll-asl/src/test/java/org/dkpro/core/io/conll/ConllUReaderWriterTest.java +++ b/dkpro-core-io-conll-asl/src/test/java/org/dkpro/core/io/conll/ConllUReaderWriterTest.java @@ -17,16 +17,26 @@ */ package org.dkpro.core.io.conll; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; +import static org.apache.uima.fit.util.JCasUtil.select; +import static org.assertj.core.api.Assertions.contentOf; +import static org.assertj.core.api.Assertions.tuple; import static org.dkpro.core.testing.IOTestRunner.testOneWay; import static org.dkpro.core.testing.IOTestRunner.testRoundTrip; +import java.io.File; + import org.dkpro.core.testing.DkproTestContext; +import org.dkpro.core.testing.ReaderAssert; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; + public class ConllUReaderWriterTest { @Test @@ -82,7 +92,22 @@ public void roundTripV2SentenceBoundariesAndComments() public void roundTripV2SyntacticAnnotation() throws Exception { - testRoundTrip(ConllUReader.class, ConllUWriter.class, "conll/u_v2/conllu-syntactic_annotation.conllu"); + ReaderAssert.assertThat(ConllUReader.class) + .readingFrom("src/test/resources/conll/u_v2/conllu-syntactic_annotation.conllu") + .asJCasList() + .extracting( + jcas -> select(jcas, Sentence.class).size(), + jcas -> select(jcas, Token.class).size()) + .containsExactly( + tuple(1, 6)); + + ReaderAssert.assertThat(ConllUReader.class) + .readingFrom("src/test/resources/conll/u_v2/conllu-syntactic_annotation.conllu") + .usingWriter(ConllUWriter.class) + .outputAsString() + .isEqualToNormalizingNewlines(contentOf( + new File("src/test/resources/conll/u_v2/conllu-syntactic_annotation.conllu"), + UTF_8)); } @Ignore("This unfortunately doesn't work yet.") diff --git a/dkpro-core-io-conll-asl/src/test/resources/conll/2003/en-orig.conll b/dkpro-core-io-conll-asl/src/test/resources/conll/2003/en-orig.conll index a560e6f650..23cb26ad5e 100644 --- a/dkpro-core-io-conll-asl/src/test/resources/conll/2003/en-orig.conll +++ b/dkpro-core-io-conll-asl/src/test/resources/conll/2003/en-orig.conll @@ -5,3 +5,4 @@ heads VBZ I-VP O for IN I-PP O Baghdad NNP I-NP I-LOC . . O O + diff --git a/dkpro-core-io-conll-asl/src/test/resources/conll/2006/fi-ref.conll b/dkpro-core-io-conll-asl/src/test/resources/conll/2006/fi-ref.conll index 473c8e0bea..813c864e3c 100644 --- a/dkpro-core-io-conll-asl/src/test/resources/conll/2006/fi-ref.conll +++ b/dkpro-core-io-conll-asl/src/test/resources/conll/2006/fi-ref.conll @@ -33,3 +33,4 @@ 3 Nimi nimi N N N Nom Sg 0 main _ _ 4 ja ja CC CC CC 5 phrm _ _ 5 tarkoitus tarkoitus N N N Nom Sg 3 conjunct _ _ + diff --git a/dkpro-core-io-conll-asl/src/test/resources/conll/u_v2/conllu-morphological_annotation.conllu b/dkpro-core-io-conll-asl/src/test/resources/conll/u_v2/conllu-morphological_annotation.conllu index 9c3427428d..14f16a3887 100644 --- a/dkpro-core-io-conll-asl/src/test/resources/conll/u_v2/conllu-morphological_annotation.conllu +++ b/dkpro-core-io-conll-asl/src/test/resources/conll/u_v2/conllu-morphological_annotation.conllu @@ -5,3 +5,4 @@ 4 elva elva NUM RG.NOM Case=Nom|NumType=Card _ _ _ _ 5 år år NOUN NN.NEU.PLU.IND.NOM Case=Nom|Definite=Ind|Gender=Neut|Number=Plur _ _ _ _ 6 . . PUNCT DL.MAD _ _ _ _ _ + diff --git a/dkpro-core-io-conll-asl/src/test/resources/conll/u_v2/conllu-syntactic_annotation.conllu b/dkpro-core-io-conll-asl/src/test/resources/conll/u_v2/conllu-syntactic_annotation.conllu index 9c08004b0d..e3cf89b165 100644 --- a/dkpro-core-io-conll-asl/src/test/resources/conll/u_v2/conllu-syntactic_annotation.conllu +++ b/dkpro-core-io-conll-asl/src/test/resources/conll/u_v2/conllu-syntactic_annotation.conllu @@ -5,3 +5,4 @@ 4 sell sell VERB VBP Number=Plur|Person=3|Tense=Pres 2 conj 0:root|2:conj _ 5 books book NOUN NNS Number=Plur 2 obj 2:obj|4:obj _ 6 . . PUNCT . _ 2 punct 2:punct _ + diff --git a/dkpro-core-io-conll-asl/src/test/resources/conll/u_v2/conllu-words_and_tokens.conllu b/dkpro-core-io-conll-asl/src/test/resources/conll/u_v2/conllu-words_and_tokens.conllu index 2107acac9e..8fbc4bfa05 100644 --- a/dkpro-core-io-conll-asl/src/test/resources/conll/u_v2/conllu-words_and_tokens.conllu +++ b/dkpro-core-io-conll-asl/src/test/resources/conll/u_v2/conllu-words_and_tokens.conllu @@ -6,3 +6,4 @@ 3 a a _ _ _ _ _ _ _ 4 el el _ _ _ _ _ _ _ 5 mar mar _ _ _ _ _ _ _ + diff --git a/dkpro-core-testing-asl/pom.xml b/dkpro-core-testing-asl/pom.xml index 01635f590d..23d117679e 100644 --- a/dkpro-core-testing-asl/pom.xml +++ b/dkpro-core-testing-asl/pom.xml @@ -97,6 +97,10 @@ junit junit + + org.assertj + assertj-core + log4j log4j @@ -105,6 +109,10 @@ org.slf4j slf4j-log4j12 + + org.slf4j + slf4j-api + diff --git a/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/IOTestRunner.java b/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/IOTestRunner.java index f23af181ca..fd4b5076ab 100644 --- a/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/IOTestRunner.java +++ b/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/IOTestRunner.java @@ -17,12 +17,15 @@ */ package org.dkpro.core.testing; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; import static org.apache.uima.fit.factory.ConfigurationParameterFactory.canParameterBeSet; import static org.apache.uima.fit.factory.ConfigurationParameterFactory.getParameterSettings; import static org.apache.uima.fit.factory.ConfigurationParameterFactory.setParameter; import static org.apache.uima.fit.pipeline.SimplePipeline.runPipeline; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.contentOf; import static org.junit.Assert.assertEquals; import java.io.File; @@ -51,12 +54,20 @@ public class IOTestRunner private static final String RESOURCE_COLLECTION_READER_BASE = "org.dkpro.core.api.io.ResourceCollectionReaderBase"; private static final String JCAS_FILE_WRITER_IMPL_BASE = "org.dkpro.core.api.io.JCasFileWriter_ImplBase"; + /** + * @deprecated Use {@link ReaderAssert} instead. + */ + @Deprecated public static void testRoundTrip(Class aReader, Class aWriter, String aFile) throws Exception { - testOneWay(createReaderDescription(aReader), createEngineDescription(aWriter), aFile, - aFile); + ReaderAssert.assertThat(aReader) + .readingFrom("src/test/resources/" + aFile) + .usingWriter(aWriter) + .outputAsString(FilenameUtils.getName(aFile)) + .satisfies(output -> assertThat(output.trim()).isEqualToNormalizingNewlines( + contentOf(new File("src/test/resources/" + aFile), UTF_8).trim())); } public static void testRoundTrip(Class aReader, @@ -78,6 +89,12 @@ public static void testRoundTrip(CollectionReaderDescription aReader, AnalysisEngineDescription aWriter, String aFile) throws Exception { +// ReaderAssert.assertThat(aReader) +// .readingFrom("src/test/resources/" + aFile) +// .usingWriter(aWriter) +// .asString() +// .isEqualToNormalizingNewlines( +// contentOf(new File("src/test/resources/" + aFile), UTF_8)); testOneWay(aReader, aWriter, aFile, aFile); } diff --git a/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/ReaderAssert.java b/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/ReaderAssert.java new file mode 100644 index 0000000000..ea42ee3dbe --- /dev/null +++ b/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/ReaderAssert.java @@ -0,0 +1,299 @@ +/* + * Copyright 2019 + * Ubiquitous Knowledge Processing (UKP) Lab + * Technische Universität Darmstadt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.dkpro.core.testing; + +import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; +import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; +import static org.apache.uima.fit.factory.ConfigurationParameterFactory.canParameterBeSet; +import static org.apache.uima.fit.factory.ConfigurationParameterFactory.getParameterSettings; +import static org.apache.uima.fit.factory.ConfigurationParameterFactory.setParameter; +import static org.dkpro.core.api.parameter.ComponentParameters.PARAM_SOURCE_LOCATION; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import org.apache.uima.analysis_component.AnalysisComponent; +import org.apache.uima.analysis_engine.AnalysisEngineDescription; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.SerialFormat; +import org.apache.uima.collection.CollectionReader; +import org.apache.uima.collection.CollectionReaderDescription; +import org.apache.uima.fit.pipeline.JCasIterable; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.resource.metadata.TypeSystemDescription; +import org.apache.uima.util.CasCreationUtils; +import org.apache.uima.util.CasIOUtils; +import org.assertj.core.api.AbstractAssert; +import org.assertj.core.api.ListAssert; +import org.assertj.core.internal.Failures; +import org.dkpro.core.api.parameter.ComponentParameters; +import org.dkpro.core.testing.IOTestRunner.Validator; +import org.dkpro.core.testing.validation.checks.Check; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; + +public class ReaderAssert + extends AbstractAssert +{ + private Logger LOG = LoggerFactory.getLogger(getClass()); + + private Object requestedSourceLocation; + + private AnalysisEngineDescription[] engines; + + private boolean stripDocumentMetadata = true; + private boolean validate = true; + private TestOptions validationOptions = new TestOptions(); + + public ReaderAssert(CollectionReaderDescription aReader) + { + super(aReader, ReaderAssert.class); + + isNotNull(); + } + + public static ReaderAssert assertThat(Class aReaderClass, + Object... aConfigurationData) + throws ResourceInitializationException + { + return assertThat(createReaderDescription(aReaderClass, aConfigurationData)); + } + + public static ReaderAssert assertThat(CollectionReaderDescription aReader) + { + return new ReaderAssert(aReader); + } + + /** + * Configure the reader to read from the given file. + * + * @param aLocation + * a file location. + * @return the assert for chaining. + * @see #readingFrom(String) + */ + public ReaderAssert readingFrom(File aLocation) + { + return _readingFrom(aLocation); + } + + /** + * Configure the reader to read from the given location. The source location can either be + * configured using this method or by setting {@link ComponentParameters#PARAM_SOURCE_LOCATION} + * in the reader description. + * + * @param aLocation + * a location. + * @return the assert for chaining. + */ + public ReaderAssert readingFrom(String aLocation) + { + return _readingFrom(aLocation); + } + + protected ReaderAssert _readingFrom(Object aLocation) + { + isNotNull(); + + if (requestedSourceLocation != null) { + failWithMessage("Source location has already been set to [%s]", + requestedSourceLocation); + } + + requestedSourceLocation = aLocation; + + if (!canParameterBeSet(actual, PARAM_SOURCE_LOCATION)) { + failWithMessage("Parameter [%s] cannot be set on reader [%s]", + PARAM_SOURCE_LOCATION, actual.getImplementationName()); + } + + // Is the source location defined in the reader parameters? + Map readerParameters = getParameterSettings(actual); + if (readerParameters.containsKey(PARAM_SOURCE_LOCATION)) { + throw Failures.instance().failure(String.format( + "Source location [%s] already defined in the reader parameters.", + readerParameters.get(PARAM_SOURCE_LOCATION))); + } + + setParameter(actual, PARAM_SOURCE_LOCATION, requestedSourceLocation); + + return this; + } + + public ReaderAssert usingEngines(AnalysisEngineDescription... aEngines) + { + isNotNull(); + + engines = aEngines; + + return this; + } + + public WriterAssert usingWriter(Class aComponentClass, + Object... aConfigurationData) + throws ResourceInitializationException + { + return usingWriter(createEngineDescription(aComponentClass, aConfigurationData)); + } + + public WriterAssert usingWriter(AnalysisEngineDescription aWriter) + { + isNotNull(); + + try { + return WriterAssert.assertThat(aWriter).consuming(toJCasIterable()); + } + catch (ResourceInitializationException e) { + AssertionError error = Failures.instance() + .failure(String.format("Error constucting reading pipeline.")); + error.initCause(e); + throw error; + } + } + + /** + * Normally fields such as {@link DocumentMetaData#getDocumentUri()} which include the full + * document path and which are not consistent between different test environments are cleared. + * If this is not desired, invoke this method. + */ + public void keepDocumentMetadata() + { + stripDocumentMetadata = false; + } + + /** + * Normally, the output of the reader is sanity-checked. If this is not desired, invoke this + * method. + */ + public void skipValidation() + { + validate = false; + } + + /** + * Skip the given checks during reader output validation. + * + * @param aCheck + * the checks to skip. + */ + public void skipChecks(Class aCheck) + { + validationOptions.skipCheck(aCheck); + } + + /** + * Infers the actual source location. + * + * @return the source location. + */ + protected Object sourceLocation() + { + Map readerParameters = getParameterSettings(actual); + + // Was the source location set explicitly? + if (requestedSourceLocation == null) { + // Is the target location known from the reader parameters? + if (readerParameters.containsKey(PARAM_SOURCE_LOCATION)) { + return readerParameters.get(PARAM_SOURCE_LOCATION); + } + + // Can we get one from the DKPro Core test context? + if (DkproTestContext.get() == null) { + String contextOutputFolderName = "target/test-output/" + + DkproTestContext.get().getTestOutputFolderName(); + readingFrom(contextOutputFolderName); + return contextOutputFolderName; + } + + // No success? + throw Failures.instance() + .failure(String.format("Unable to determine source location. Use a @Rule " + + DkproTestContext.class.getSimpleName() + + " or set the location using `readingWith()")); + } + else { + return requestedSourceLocation; + } + } + + protected List processors() throws ResourceInitializationException + { + List processors = new ArrayList<>(); + + // By default we sanity-check the output of the reader. + if (validate) { + processors.add(createEngineDescription(Validator.class)); + Validator.options = validationOptions; + } + + // By default, we strip the document metadata if no options are specified + if (stripDocumentMetadata) { + processors.add(createEngineDescription(DocumentMetaDataStripper.class)); + } + + return processors; + } + + public ListAssert asJCasList() + { + List casses = new ArrayList<>(); + + try { + for (JCas jcas : toJCasIterable()) { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + CasIOUtils.save(jcas.getCas(), bos, SerialFormat.SERIALIZED_TSI); + CAS copy = CasCreationUtils.createCas((TypeSystemDescription) null, null, null); + CasIOUtils.load(new ByteArrayInputStream(bos.toByteArray()), copy); + casses.add(copy.getJCas()); + } + } + catch (Exception e) { + AssertionError error = Failures.instance() + .failure(String.format("Pipeline execution failed: %s", e.getMessage())); + error.initCause(e); + throw error; + } + + return new ListAssert<>(casses); + } + + public JCasIterable toJCasIterable() throws ResourceInitializationException + { + // Obtains the actual source location, also ensuring that it was actually defined. + Object actualSourceLocation = sourceLocation(); + + LOG.debug("Reading from source location: {}", actualSourceLocation); + + List allProcessors = new ArrayList<>(); + allProcessors.addAll(processors()); + if (engines != null) { + allProcessors.addAll(Arrays.asList(engines)); + } + + return new JCasIterable(actual, + allProcessors.stream().toArray(AnalysisEngineDescription[]::new)); + } +} diff --git a/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/WriterAssert.java b/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/WriterAssert.java new file mode 100644 index 0000000000..59d6036b23 --- /dev/null +++ b/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/WriterAssert.java @@ -0,0 +1,452 @@ +/* + * Copyright 2019 + * Ubiquitous Knowledge Processing (UKP) Lab + * Technische Universität Darmstadt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.dkpro.core.testing; + +import static org.apache.commons.lang3.StringUtils.replaceOnce; +import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; +import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; +import static org.apache.uima.fit.factory.ConfigurationParameterFactory.canParameterBeSet; +import static org.apache.uima.fit.factory.ConfigurationParameterFactory.getParameterSettings; +import static org.apache.uima.fit.factory.ConfigurationParameterFactory.setParameter; +import static org.dkpro.core.api.parameter.ComponentParameters.PARAM_TARGET_LOCATION; + +import java.io.File; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.commons.io.FileUtils; +import org.apache.uima.analysis_component.AnalysisComponent; +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.analysis_engine.AnalysisEngineDescription; +import org.apache.uima.fit.pipeline.JCasIterable; +import org.apache.uima.fit.util.LifeCycleUtil; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; +import org.assertj.core.api.AbstractAssert; +import org.assertj.core.api.FileAssert; +import org.assertj.core.api.ListAssert; +import org.assertj.core.api.StringAssert; +import org.assertj.core.internal.Failures; +import org.assertj.core.util.Files; +import org.dkpro.core.api.parameter.ComponentParameters; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class WriterAssert + extends AbstractAssert +{ + private Logger LOG = LoggerFactory.getLogger(getClass()); + + public static final String VAR_TARGET = "${TARGET}"; + + // See JCasFileWriter_ImplBase + private static final String PARAM_SINGULAR_TARGET = "singularTarget"; + private static final String PARAM_STRIP_EXTENSION = "stripExtension"; + + private JCasIterable jcasIterable; + + private Object requestedTargetLocation; + private boolean singularTargetAnnounced = false; + private boolean stripExtension = true; + + public WriterAssert(AnalysisEngineDescription aWriter) + { + super(aWriter, WriterAssert.class); + + isNotNull(); + + if (!actual.isPrimitive()) { + failWithMessage("Writer cannot be an aggregate. Use `usingEngine` if you need to add " + + "additional analysis engines or secondary writers."); + } + } + + public static WriterAssert assertThat(Class aWriterClass, + Object... aConfigurationData) + throws ResourceInitializationException + { + return assertThat(createEngineDescription(aWriterClass, aConfigurationData)); + } + + public static WriterAssert assertThat(AnalysisEngineDescription aWriter) + { + return new WriterAssert(aWriter); + } + + public WriterAssert consuming(JCasIterable aJCasIterable) + { + jcasIterable = aJCasIterable; + + return this; + } + + /** + * By default, the original extension is stripped from the original file name and the writer's + * extension is then added. By calling this method, the original extension is retained and + * in addition the writer's extension is added. + * + * @return the assert for chaining. + */ + public WriterAssert keepOriginalExtension() + { + stripExtension = false; + + return this; + } + + /** + * Configure the writer to write to the given file. + * + * @param aLocation + * a location. + * @return the assert for chaining. + * @see #writingTo(String) + */ + public WriterAssert writingTo(File aLocation) + { + return _writingTo(aLocation); + } + + /** + * Configure the writer to write to the given location. The target location can either be + * configured using this method or by setting {@link ComponentParameters#PARAM_TARGET_LOCATION} + * in the reader description. + * + * @param aLocation + * a location. + * @return the assert for chaining. + */ + public WriterAssert writingTo(String aLocation) + { + return _writingTo(aLocation); + } + + /** + * Configure the writer to write all output into a single file at the given location. The + * location is the final file name, not a folder name. The singular target flag can either be + * configured using this method or by setting {@code PARAM_SINGULAR_TARGET} to {@code true} + * in the writer description. This method can also be used to indicate that a component + * implicitly writes a singular target, even if it does not support + * {@code PARAM_SINGULAR_TARGET}. This affects e.g. how {@link #asFiles()} interprets the + * target location. + * + * @param aLocation + * a location. + * @return the assert for chaining. + * @see #writingTo(String) + */ + public WriterAssert writingToSingular(String aLocation) + { + singularTargetAnnounced = true; + + // If the parameter can be set on the component, set it - otherwise assume that the + // component implicitly creates a singular target from the target location + if (canParameterBeSet(actual, PARAM_SINGULAR_TARGET)) { + Map writerParameters = getParameterSettings(actual); + if (Boolean.TRUE.equals(writerParameters.get(PARAM_SINGULAR_TARGET))) { + failWithMessage("PARAM_SINGULAR_TARGET already set in the writer parameters."); + + } + setParameter(actual, PARAM_SINGULAR_TARGET, true); + } + + return _writingTo(aLocation); + } + + public WriterAssert _writingTo(Object aLocation) + { + isNotNull(); + + if (requestedTargetLocation != null) { + failWithMessage("Target location has already been set to [%s]", + requestedTargetLocation); + } + + requestedTargetLocation = aLocation; + + if (!canParameterBeSet(actual, PARAM_TARGET_LOCATION)) { + failWithMessage("Parameter [%s] cannot be set on writer [%s]", + PARAM_TARGET_LOCATION, actual.getImplementationName()); + } + + // Is the target location defined in the writer parameters? + Map writerParameters = getParameterSettings(actual); + if (writerParameters.containsKey(PARAM_TARGET_LOCATION)) { + throw Failures.instance().failure(String.format( + "Target location [%s] already defined in the writer parameters.", + writerParameters.get(PARAM_TARGET_LOCATION))); + } + + requestedTargetLocation = resolvePlaceholders(requestedTargetLocation); + + setParameter(actual, PARAM_TARGET_LOCATION, requestedTargetLocation); + + return this; + } + + protected static T resolvePlaceholders(T aLocation) + { + if (aLocation instanceof String) { + String location = (String) aLocation; + + if (location.startsWith(VAR_TARGET)) { + if (DkproTestContext.get() == null) { + throw Failures.instance() + .failure(String.format("Cannot substitute `%s` - no %s found.", + VAR_TARGET, DkproTestContext.class.getSimpleName())); + } + + File contextOutputFolder = new File("target/test-output/" + + DkproTestContext.get().getTestOutputFolderName()); + if (contextOutputFolder.exists()) { + FileUtils.deleteQuietly(contextOutputFolder); + } + + return (T) replaceOnce(location, VAR_TARGET, contextOutputFolder.getPath()); + } + } + + return aLocation; + } + + /** + * Infers the actual target location. + * + * @return the target location. + */ + protected Object targetLocation() + { + Map writerParameters = getParameterSettings(actual); + + // Was the target location set explicitly? + if (requestedTargetLocation == null) { + // Is the target location known from the writer parameters? + if (writerParameters.containsKey(PARAM_TARGET_LOCATION)) { + return writerParameters.get(PARAM_TARGET_LOCATION); + } + + // Can we get one from the DKPro Core test context? + if (DkproTestContext.get() != null) { + writingTo(VAR_TARGET); + return getParameterSettings(actual).get(PARAM_TARGET_LOCATION); + } + + // No success? + throw Failures.instance() + .failure(String.format("Unable to determine target location. Use a @Rule " + + DkproTestContext.class.getSimpleName() + + " or set the location using `writingTo()")); + } + else { + return requestedTargetLocation; + } + } + + protected List listTargetLocationFiles() + { + Object location = targetLocation(); + + if (location instanceof String) { + location = new File((String) location); + } + + if (location instanceof File) { + File fileLocation = (File) location; + + if (!fileLocation.exists()) { + throw Failures.instance().failure( + String.format("Target location [%s] does not exist.", fileLocation)); + } + + if (isSingularTarget()) { + return Arrays.asList(fileLocation); + } + + return Arrays.asList(fileLocation.listFiles()); + } + + throw Failures.instance().failure(String + .format("Target location [%s] cannot be interpreted as a directory.", location)); + } + + protected boolean isSingularTarget() + { + Map writerParameters = getParameterSettings(actual); + + if (Boolean.TRUE.equals(writerParameters.get(PARAM_SINGULAR_TARGET))) { + return true; + } + + return singularTargetAnnounced; + } + + protected void configureWriter() + { + // By default, we strip the original extension when writing to avoid extension accumulation + if (stripExtension && canParameterBeSet(actual, PARAM_STRIP_EXTENSION)) { + setParameter(actual, PARAM_STRIP_EXTENSION, true); + } + + // If the target location is specified in the writer descriptor only, replace any variable + // in it if possible + if (canParameterBeSet(actual, PARAM_TARGET_LOCATION)) { + Map writerParameters = getParameterSettings(actual); + if (writerParameters.containsKey(PARAM_TARGET_LOCATION)) { + Object location = writerParameters.get(PARAM_TARGET_LOCATION); + setParameter(actual, PARAM_TARGET_LOCATION, resolvePlaceholders(location)); + } + } + } + + /** + * Gets the output written to the target location as a string. This method fails if more than + * one output file was created or if no output was created. + *

+ * This method triggers the execution of the text pipeline. + * + * @return the output written to the target location as a string. + */ + public StringAssert outputAsString() + { + return outputAsString(null); + } + + /** + * Gets the output written to the target location as a string. + *

+ * This method triggers the execution of the text pipeline. + * + * @param aPathSuffix + * a path/filename suffix which uniquely identifies the requested output file. + * + * @return the output written to the target location as a string. + */ + public StringAssert outputAsString(String aPathSuffix) + { + run(); + + List files = listTargetLocationFiles(); + + if (aPathSuffix != null) { + files = files.stream() + .filter(file -> file.getPath().endsWith(aPathSuffix)) + .collect(Collectors.toList()); + } + + if (files.isEmpty()) { + if (aPathSuffix != null) { + failWithMessage("Not output file ending in [%s] found at target location [%s].", + aPathSuffix, requestedTargetLocation); + } + else { + failWithMessage("Not output file found at target location [%s].", + requestedTargetLocation); + } + } + + if (files.size() > 1) { + if (aPathSuffix != null) { + failWithMessage( + "Expected single output file ending in [%s] at target location [%s] but " + + "found multiple: %s.", + aPathSuffix, requestedTargetLocation, files); + } + else { + failWithMessage( + "Expected single output file at target location [%s] but found multiple: %s.", + requestedTargetLocation, files); + } + + } + + return new StringAssert(Files.contentOf(files.get(0), StandardCharsets.UTF_8)); + } + /** + * Gets the output written to the target location as a file. This method fails if more than + * one output file was created or if no output was created. + *

+ * This method triggers the execution of the text pipeline. + * + * @return the output written to the target location as a file. + */ + public FileAssert asFile() + { + run(); + + List files = listTargetLocationFiles(); + + if (files.isEmpty()) { + failWithMessage("Not output found at target location [%s].", requestedTargetLocation); + } + + if (files.size() > 1) { + failWithMessage( + "Expected single output file at target location [%s] but found multiple: %s.", + requestedTargetLocation, files); + } + + return new FileAssert(files.get(0)); + } + /** + * Gets the files written to the target location. + *

+ * This method triggers the execution of the text pipeline. + * + * @return the files written to the target location. + */ + public ListAssert asFiles() + { + run(); + + return new ListAssert<>(listTargetLocationFiles()); + } + + protected void run() + { + configureWriter(); + + // Obtains the actual target location, also ensuring that it was actually defined. + Object actualTargetLocation = targetLocation(); + + LOG.debug("Writing to target location : {}", actualTargetLocation); + LOG.debug("- is singular target : {}", isSingularTarget()); + + AnalysisEngine writer = null; + try { + writer = createEngine(actual); + + for (JCas jcas : jcasIterable) { + writer.process(jcas); + } + + LifeCycleUtil.collectionProcessComplete(writer); + } + catch (Exception e) { + AssertionError error = Failures.instance().failure(String.format( + "Pipeline execution failed: %s", e.getMessage())); + error.initCause(e); + throw error; + } + finally { + LifeCycleUtil.destroy(writer); + } + } +}