diff --git a/.github/workflows/dotnetcore.yml b/.github/workflows/dotnetcore.yml index d8ed894..0cae18d 100644 --- a/.github/workflows/dotnetcore.yml +++ b/.github/workflows/dotnetcore.yml @@ -2,7 +2,7 @@ name: .NET Core on: push: - branches: [ main ] + branches: [ main, develop ] jobs: build: @@ -10,11 +10,11 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup .NET Core - uses: actions/setup-dotnet@v1 + uses: actions/setup-dotnet@v4 with: - dotnet-version: 5.0.402 + dotnet-version: 8.0.110 - name: Install dependencies run: dotnet restore working-directory: ./Conllu diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..5c7247b --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,7 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [] +} \ No newline at end of file diff --git a/Conllu/Conllu/Conllu.csproj b/Conllu/Conllu/Conllu.csproj index 378d878..0bb1149 100644 --- a/Conllu/Conllu/Conllu.csproj +++ b/Conllu/Conllu/Conllu.csproj @@ -1,17 +1,17 @@ - net5.0 + net8.0 Conllu Arthur Hemmer A lightweight C# CoNLL-U parser. - Arthur Hemmer 2021 + Arthur Hemmer 2024 https://github.com/ArthurDevNL/CoNLL-U/blob/main/LICENSE https://github.com/ArthurDevNL/CoNLL-U Git CONLLU;NLP;PARSER CoNLL-U - 1.1.1 + 1.1.2 diff --git a/Conllu/Conllu/Extensions/StringExtensions.cs b/Conllu/Conllu/Extensions/StringExtensions.cs index 68f800d..0cab404 100644 --- a/Conllu/Conllu/Extensions/StringExtensions.cs +++ b/Conllu/Conllu/Extensions/StringExtensions.cs @@ -5,11 +5,17 @@ namespace Conllu.Extensions { internal static class StringExtensions { - public static string? ValueOrNull(this string? x) + public static string ValueOrNull(this string x) => x == "_" ? null : x; - public static string ValueOrUnderscore(this string? x) - => x?.Trim().IsNullOrEmpty() == false ? x : "_"; + public static string ValueOrUnderscore(this string x) + { + if (x is null) + return "_"; + if (x.Trim().IsNullOrEmpty()) + return "_"; + return x; + } public static IEnumerable SplitLines(this string x) => x.Split( diff --git a/Conllu/Conllu/Token.cs b/Conllu/Conllu/Token.cs index 6f68628..801f7e1 100644 --- a/Conllu/Conllu/Token.cs +++ b/Conllu/Conllu/Token.cs @@ -183,7 +183,7 @@ public override int GetHashCode() return Identifier.GetHashCode(); } - public override bool Equals(object? obj) + public override bool Equals(object obj) { if (obj is Token t) return Identifier.Equals(t.Identifier); diff --git a/Conllu/Conllu/TokenIdentifier.cs b/Conllu/Conllu/TokenIdentifier.cs index 97255f7..98e1386 100644 --- a/Conllu/Conllu/TokenIdentifier.cs +++ b/Conllu/Conllu/TokenIdentifier.cs @@ -84,7 +84,7 @@ public int CompareTo(TokenIdentifier other) return Id.CompareTo(other.Id); } - public override bool Equals(object? obj) + public override bool Equals(object obj) { if (obj is TokenIdentifier ti) return CompareTo(ti) == 0; diff --git a/Conllu/Conllu/Tree.cs b/Conllu/Conllu/Tree.cs index d8edc10..a4afc50 100644 --- a/Conllu/Conllu/Tree.cs +++ b/Conllu/Conllu/Tree.cs @@ -81,7 +81,7 @@ public override int GetHashCode() return HashCode.Combine(Value.GetHashCode(), Connection.GetHashCode(), Children.GetHashCode()); } - public override bool Equals(object? obj) + public override bool Equals(object obj) { if (obj is Tree t) return Value.Equals(t.Value) && Connection.Equals(t.Connection) && Children.SequenceEqual(t.Children); diff --git a/Conllu/ConlluTests/ConlluTests.cs b/Conllu/ConlluTests/ConlluTests.cs index 7b3e3bc..d296e03 100644 --- a/Conllu/ConlluTests/ConlluTests.cs +++ b/Conllu/ConlluTests/ConlluTests.cs @@ -26,30 +26,30 @@ public void TestParse() var text = reader.ReadToEnd(); var result = ConlluParser.ParseText(text).ToList(); - Assert.AreEqual(1, result.Count); + Assert.That(result.Count, Is.EqualTo(1)); var sentence = result.First(); - Assert.AreEqual(sentence.Tokens.Count, 10); - Assert.False(sentence.IsEmpty()); + Assert.That(sentence.Tokens.Count, Is.EqualTo(10)); + Assert.That(sentence.IsEmpty(), Is.False); var token = sentence.Tokens.First(); - Assert.AreEqual("The", token.Form); - Assert.AreEqual("the", token.Lemma); - Assert.AreEqual("DET", token.Upos); - Assert.AreEqual(PosTag.Det, token.UposEnum); - Assert.AreEqual("DT", token.Xpos); - Assert.AreEqual(2, token.Feats.Count); - Assert.IsTrue(token.Feats.ContainsKey("Definite")); - Assert.AreEqual("Def", token.Feats["Definite"]); - Assert.IsTrue(token.Feats.ContainsKey("PronType")); - Assert.AreEqual("Art", token.Feats["PronType"]); - Assert.AreEqual(4, token.Head); - Assert.AreEqual("det", token.DepRel); - Assert.AreEqual(DependencyRelation.Det, token.DepRelEnum); - Assert.IsNotNull(token.Deps); - Assert.IsTrue(!token.Deps.Any()); - Assert.IsNull(token.Misc); - Assert.AreEqual("1 The the DET DT Definite=Def|PronType=Art 4 det _ _", token.RawLine); + Assert.That(token.Form, Is.EqualTo("The")); + Assert.That(token.Lemma, Is.EqualTo("the")); + Assert.That(token.Upos, Is.EqualTo("DET")); + Assert.That(token.UposEnum, Is.EqualTo(PosTag.Det)); + Assert.That(token.Xpos, Is.EqualTo("DT")); + Assert.That(token.Feats.Count, Is.EqualTo(2)); + Assert.That(token.Feats.ContainsKey("Definite"), Is.True); + Assert.That(token.Feats["Definite"], Is.EqualTo("Def")); + Assert.That(token.Feats.ContainsKey("PronType"), Is.True); + Assert.That(token.Feats["PronType"], Is.EqualTo("Art")); + Assert.That(token.Head, Is.EqualTo(4)); + Assert.That(token.DepRel, Is.EqualTo("det")); + Assert.That(token.DepRelEnum, Is.EqualTo(DependencyRelation.Det)); + Assert.That(token.Deps, Is.Not.Null); + Assert.That(token.Deps.Any(), Is.False); + Assert.That(token.Misc, Is.Null); + Assert.That(token.RawLine, Is.EqualTo("1 The the DET DT Definite=Def|PronType=Art 4 det _ _")); } [Test] @@ -61,11 +61,11 @@ public void TestSentenceMetadata() using var reader = new StreamReader(stream); var text = reader.ReadToEnd(); var result = ConlluParser.ParseText(text).ToList(); - Assert.AreEqual(1, result.Count); + Assert.That(result.Count, Is.EqualTo(1)); var s = result.First(); - Assert.IsTrue(s.Metadata.ContainsKey("text")); - Assert.AreEqual("The quick brown fox jumps over the lazy dog.", s.Metadata["text"]); + Assert.That(s.Metadata.ContainsKey("text"), Is.True); + Assert.That(s.Metadata["text"], Is.EqualTo("The quick brown fox jumps over the lazy dog.")); } [Test] @@ -77,9 +77,9 @@ public void TestParseLargeFile() using var reader = new StreamReader(stream); var text = reader.ReadToEnd(); var result = ConlluParser.ParseText(text).ToList(); - Assert.AreEqual(2002, result.Count); - Assert.IsTrue(result.All(x => !x.IsEmpty())); - Assert.IsTrue(result.All(s => s.AsDependencyTree() != null)); + Assert.That(result.Count, Is.EqualTo(2002)); + Assert.That(result.All(x => !x.IsEmpty()), Is.True); + Assert.That(result.All(s => s.AsDependencyTree() != null), Is.True); } [Test] @@ -111,17 +111,17 @@ public void TestSerializeParse() using var reader = new StreamReader(stream); var text = reader.ReadToEnd(); var textLines = text.SplitLines(); - Assert.AreEqual(textLines, serializedLines); + Assert.That(textLines, Is.EqualTo(serializedLines)); // Re-parse var parsed = ConlluParser.ParseText(text).ToList(); - Assert.AreEqual(1, parsed.Count); + Assert.That(parsed.Count, Is.EqualTo(1)); var s = parsed.First(); - Assert.AreEqual(10, s.Tokens.Count); - Assert.AreEqual(1, s.Metadata.Count); - Assert.IsTrue(s.Metadata.ContainsKey("text")); - Assert.AreEqual("The quick brown fox jumps over the lazy dog.", s.Metadata["text"]); + Assert.That(s.Tokens.Count, Is.EqualTo(10)); + Assert.That(s.Metadata.Count, Is.EqualTo(1)); + Assert.That(s.Metadata.ContainsKey("text"), Is.True); + Assert.That(s.Metadata["text"], Is.EqualTo("The quick brown fox jumps over the lazy dog.")); } [Test] @@ -134,14 +134,14 @@ public void TestCreateParseTree() var text = reader.ReadToEnd(); var result = ConlluParser.ParseText(text).FirstOrDefault(); - Assert.NotNull(result); + Assert.That(result, Is.Not.Null); var tree = result.AsDependencyTree(); - Assert.AreEqual(5, tree.Value.Id); - Assert.AreEqual("jumps", tree.Value.Form); - Assert.AreEqual(3, tree.Children.Count()); - Assert.AreEqual(3, tree.Children.ToList()[0].Children.Count()); - Assert.AreEqual(3, tree.Children.ToList()[1].Children.Count()); - Assert.AreEqual(0, tree.Children.ToList()[2].Children.Count()); + Assert.That(tree.Value.Id, Is.EqualTo(5)); + Assert.That(tree.Value.Form, Is.EqualTo("jumps")); + Assert.That(tree.Children.Count(), Is.EqualTo(3)); + Assert.That(tree.Children.ToList()[0].Children.Count(), Is.EqualTo(3)); + Assert.That(tree.Children.ToList()[1].Children.Count(), Is.EqualTo(3)); + Assert.That(tree.Children.ToList()[2].Children.Count(), Is.EqualTo(0)); } [Test] @@ -161,7 +161,7 @@ public void TestRawTokenSequence() Token.FromLine("10 . . PUNCT . _ 5 punct _ _") }); - Assert.AreEqual("The quick brown fox jumps over the lazy dog.", sentence.RawTokenSequence()); + Assert.That(sentence.RawTokenSequence(), Is.EqualTo("The quick brown fox jumps over the lazy dog.")); } [Test] @@ -175,7 +175,7 @@ public void TestDependencyTreeSubTypes() var result = ConlluParser.ParseText(text).ToList(); var s = result[150]; - Assert.AreEqual(4, s.AsDependencyTree().Children.Count); + Assert.That(s.AsDependencyTree().Children.Count, Is.EqualTo(4)); } [Test] @@ -183,44 +183,44 @@ public void TestCompareTokenIdentifiers() { var ti1 = new TokenIdentifier("1"); var ti2 = new TokenIdentifier("2"); - Assert.IsTrue(ti1 < ti2); - Assert.IsTrue(ti2 > ti1); - Assert.IsTrue(ti1 <= ti2); - Assert.IsTrue(ti2 >= ti1); - Assert.IsFalse(ti1 == ti2); + Assert.That(ti1 < ti2, Is.True); + Assert.That(ti2 > ti1, Is.True); + Assert.That(ti1 <= ti2, Is.True); + Assert.That(ti2 >= ti1, Is.True); + Assert.That(ti1 == ti2, Is.False); var ti13 = new TokenIdentifier("1-3"); var ti4 = new TokenIdentifier("4.1"); - Assert.IsTrue(ti13.IsInRange(ti2.Id)); - Assert.IsFalse(ti13.IsInRange(ti4.Id)); - Assert.IsTrue(ti13.IsMultiwordIndex); + Assert.That(ti13.IsInRange(ti2.Id), Is.True); + Assert.That(ti13.IsInRange(ti4.Id), Is.False); + Assert.That(ti13.IsMultiwordIndex, Is.True); var ti14 = new TokenIdentifier("1-4"); - Assert.IsTrue(ti14 != ti13); + Assert.That(ti14 != ti13, Is.True); var ti42 = new TokenIdentifier("4.2"); - Assert.IsTrue(ti4 != ti42); - Assert.IsTrue(ti4 != ti1); + Assert.That(ti4 != ti42, Is.True); + Assert.That(ti4 != ti1, Is.True); - Assert.AreNotEqual(ti1, ti2); - Assert.IsFalse(ti1.Equals(ti2)); + Assert.That(ti1 != ti2, Is.True); + Assert.That(ti1.Equals(ti2), Is.False); var ti1Again = new TokenIdentifier("1"); - Assert.AreEqual(ti1, ti1Again); - Assert.IsTrue(ti1.Equals(ti1Again)); + Assert.That(ti1 == ti1Again, Is.True); + Assert.That(ti1.Equals(ti1Again), Is.True); } [Test] public void TestIdentifierSerialize() { var ti1 = new TokenIdentifier("1"); - Assert.AreEqual(ti1.Serialize(), "1"); + Assert.That(ti1.Serialize(), Is.EqualTo("1")); var ti13 = new TokenIdentifier("1-3"); - Assert.AreEqual(ti13.Serialize(), "1-3"); + Assert.That(ti13.Serialize(), Is.EqualTo("1-3")); var ti4 = new TokenIdentifier("4.1"); - Assert.AreEqual(ti4.Serialize(), "4.1"); + Assert.That(ti4.Serialize(), Is.EqualTo("4.1")); } [Test] @@ -256,11 +256,11 @@ public void TestTreeEquality() var s1Tree = s1.AsDependencyTree(); var s2Tree = s2.AsDependencyTree(); - Assert.IsTrue(s1Tree.Equals(s2Tree)); + Assert.That(s1Tree.Equals(s2Tree), Is.True); var t = Token.FromLine("11 test test ADJ JJ Degree=Pos 9 amod _ _"); s1Tree.AddChild(new Tree(t, DependencyRelation.Cc)); - Assert.IsFalse(s1Tree.Equals(s2Tree)); + Assert.That(s1Tree.Equals(s2Tree), Is.False); } [Test] @@ -282,12 +282,12 @@ public void TestTreeWhere() var s1Tree = s1.AsDependencyTree(); var over = s1Tree.WhereBfs(x => x.Form == "over").FirstOrDefault(); - Assert.IsNotNull(over); - Assert.AreEqual(over.Value.Id, 6); + Assert.That(over, Is.Not.Null); + Assert.That(over.Value.Id, Is.EqualTo(6)); var lazy = s1Tree.WhereDfs(x => x.DepRelEnum == DependencyRelation.Amod).FirstOrDefault(); - Assert.IsNotNull(lazy); - Assert.AreEqual(lazy.Value.Id, 8); + Assert.That(lazy, Is.Not.Null); + Assert.That(lazy.Value.Id, Is.EqualTo(8)); } } } \ No newline at end of file diff --git a/Conllu/ConlluTests/ConlluTests.csproj b/Conllu/ConlluTests/ConlluTests.csproj index 0730bad..9aceb5b 100644 --- a/Conllu/ConlluTests/ConlluTests.csproj +++ b/Conllu/ConlluTests/ConlluTests.csproj @@ -1,14 +1,14 @@ - net5.0 + net8.0 false - - - + + + diff --git a/Conllu/ConlluTests/EnumerableExtensions.cs b/Conllu/ConlluTests/EnumerableExtensions.cs index 280e435..97af4aa 100644 --- a/Conllu/ConlluTests/EnumerableExtensions.cs +++ b/Conllu/ConlluTests/EnumerableExtensions.cs @@ -5,13 +5,13 @@ namespace Conllu.Extensions { internal static class EnumerableExtensions { - public static bool IsNullOrEmpty(this IEnumerable? list) + public static bool IsNullOrEmpty(this IEnumerable list) => list == null || !list.Any(); - public static IEnumerable EmptyIfNull(this IEnumerable? list) + public static IEnumerable EmptyIfNull(this IEnumerable list) => list ?? Enumerable.Empty(); - public static IEnumerable? WhereNotNull(this IEnumerable? list) + public static IEnumerable WhereNotNull(this IEnumerable list) => list?.Where(x => x != null).Cast(); } } \ No newline at end of file diff --git a/Conllu/global.json b/Conllu/global.json new file mode 100644 index 0000000..2326795 --- /dev/null +++ b/Conllu/global.json @@ -0,0 +1,5 @@ +{ + "sdk": { + "version": "8.0.110" + } +} \ No newline at end of file