core: refactor ProtoEncoder for Jelly-Patch

Ostrzyciel · Ostrzyciel · commit 19072fe45d72 · 2025-02-17T10:56:22.000+01:00
Related to: Jelly-RDF/jelly-protobuf#11 This introduces a few refactors around the ProtoEncoder to allow us to reuse its code in the core-patch module later. This includes: - Allowing NodeEncoder to append to anything that can consume lookup entries, via a dedicated interface - De-inlining protected methods in ProtoEncoder. I don't think it was working anyway. The JVM is smart enough to do inlining by itself, and the inlines were messing with public/private code guarantees. - Create the core.internal package to group the messier internal classes together and keep the top-level package clean.
diff --git a/core/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java b/core/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java
@@ -1,24 +1,21 @@
 package eu.ostrzyciel.jelly.core;
 
+import eu.ostrzyciel.jelly.core.internal.RowBufferAppender;
 import eu.ostrzyciel.jelly.core.proto.v1.*;
 import scala.collection.mutable.Buffer;
 
 import java.util.LinkedHashMap;
+import java.util.function.Consumer;
 import java.util.function.Function;
 
 /**
  * Encodes RDF nodes native to the used RDF library (e.g., Apache Jena, RDF4J) into Jelly's protobuf objects.
  * This class performs a lot of caching to avoid encoding the same node multiple times. It is absolutely NOT
  * thread-safe, and should only be ever used by a single instance of ProtoEncoder.
- * 
- * <p>
- * This class is marked as public because make* methods in ProtoEncoder are inlined, and the inlining
- * requires the NodeEncoder to be public. Do NOT use this class outside of ProtoEncoder. It is not
- * considered part of the public API.
- * </p>
+ *
  * @param <TNode> The type of RDF nodes used by the RDF library.
  */
-public final class NodeEncoder<TNode> {
+final class NodeEncoder<TNode> {
     /**
      * A cached node that depends on other lookups (RdfIri and RdfLiteral in the datatype variant).
      */
@@ -61,6 +58,8 @@ protected boolean removeEldestEntry(java.util.Map.Entry<K, V> eldest) {
     private final EncoderLookup prefixLookup;
     private final EncoderLookup nameLookup;
 
+    private final RowBufferAppender bufferAppender;
+
     // We split the node caches in three – the first two are for nodes that depend on the lookups
     // (IRIs and datatype literals). The third one is for nodes that don't depend on the lookups.
     private final NodeCache<Object, DependentNode> iriNodeCache;
@@ -75,11 +74,18 @@ protected boolean removeEldestEntry(java.util.Map.Entry<K, V> eldest) {
     /**
      * Creates a new NodeEncoder.
      * @param opt Jelly RDF stream options
+     * @param bufferAppender consumer of the lookup entry rows
      * @param nodeCacheSize The size of the node cache (for nodes that don't depend on lookups)
      * @param iriNodeCacheSize The size of the IRI dependent node cache (for prefix+name encoding)
      * @param dtLiteralNodeCacheSize The size of the datatype literal dependent node cache
      */
-    public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize, int dtLiteralNodeCacheSize) {
+    public NodeEncoder(
+        RdfStreamOptions opt,
+        RowBufferAppender bufferAppender,
+        int nodeCacheSize,
+        int iriNodeCacheSize,
+        int dtLiteralNodeCacheSize
+    ) {
         datatypeLookup = new EncoderLookup(opt.maxDatatypeTableSize(), true);
         this.maxPrefixTableSize = opt.maxPrefixTableSize();
         if (maxPrefixTableSize > 0) {
@@ -96,18 +102,18 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize
         dtLiteralNodeCache = new NodeCache<>(dtLiteralNodeCacheSize);
         nameLookup = new EncoderLookup(opt.maxNameTableSize(), maxPrefixTableSize > 0);
         nodeCache = new NodeCache<>(nodeCacheSize);
+        this.bufferAppender = bufferAppender;
     }
 
     /**
      * Encodes a datatype literal using two layers of caching – both for the entire literal, and the datatype name.
      * @param key The literal key (the unencoded literal node)
      * @param lex The lexical form of the literal
      * @param datatypeName The name of the datatype
-     * @param rowsBuffer The buffer to which the new datatype entry should be appended
      * @return The encoded literal
      */
     public UniversalTerm encodeDtLiteral(
-            TNode key, String lex, String datatypeName, Buffer<RdfStreamRow> rowsBuffer
+        TNode key, String lex, String datatypeName
     ) {
         var cachedNode = dtLiteralNodeCache.computeIfAbsent(key, k -> new DependentNode());
         // Check if the value is still valid
@@ -121,15 +127,13 @@ public UniversalTerm encodeDtLiteral(
         // The node is not encoded, but we may already have the datatype encoded
         var dtEntry = datatypeLookup.getOrAddEntry(datatypeName);
         if (dtEntry.newEntry) {
-            rowsBuffer.append(new RdfStreamRow(
-                new RdfDatatypeEntry(dtEntry.setId, datatypeName)
-            ));
+            bufferAppender.appendLookupEntry(new RdfDatatypeEntry(dtEntry.setId, datatypeName));
         }
         int dtId = dtEntry.getId;
         cachedNode.lookupPointer1 = dtId;
         cachedNode.lookupSerial1 = datatypeLookup.serials[dtId];
         cachedNode.encoded = new RdfLiteral(
-                lex, new RdfLiteral$LiteralKind$Datatype(dtId)
+            lex, new RdfLiteral$LiteralKind$Datatype(dtId)
         );
 
         return cachedNode.encoded;
@@ -138,17 +142,14 @@ public UniversalTerm encodeDtLiteral(
     /**
      * Encodes an IRI using two layers of caching – both for the entire IRI, and the prefix and name tables.
      * @param iri The IRI to encode
-     * @param rowsBuffer The buffer to which the new name and prefix lookup entries should be appended
      * @return The encoded IRI
      */
-    public UniversalTerm encodeIri(String iri, Buffer<RdfStreamRow> rowsBuffer) {
+    public UniversalTerm encodeIri(String iri) {
         if (maxPrefixTableSize == 0) {
             // Fast path for no prefixes
             var nameEntry = nameLookup.getOrAddEntry(iri);
             if (nameEntry.newEntry) {
-                rowsBuffer.append(new RdfStreamRow(
-                        new RdfNameEntry(nameEntry.setId, iri)
-                ));
+                bufferAppender.appendLookupEntry(new RdfNameEntry(nameEntry.setId, iri));
             }
             int nameId = nameEntry.getId;
             if (lastIriNameId + 1 == nameId) {
@@ -192,14 +193,10 @@ public UniversalTerm encodeIri(String iri, Buffer<RdfStreamRow> rowsBuffer) {
         var prefixEntry = prefixLookup.getOrAddEntry(prefix);
         var nameEntry = nameLookup.getOrAddEntry(postfix);
         if (prefixEntry.newEntry) {
-            rowsBuffer.append(new RdfStreamRow(
-                new RdfPrefixEntry(prefixEntry.setId, prefix)
-            ));
+            bufferAppender.appendLookupEntry(new RdfPrefixEntry(prefixEntry.setId, prefix));
         }
         if (nameEntry.newEntry) {
-            rowsBuffer.append(new RdfStreamRow(
-                new RdfNameEntry(nameEntry.setId, postfix)
-            ));
+            bufferAppender.appendLookupEntry(new RdfNameEntry(nameEntry.setId, postfix));
         }
         int nameId = nameEntry.getId;
         int prefixId = prefixEntry.getId;
diff --git a/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoDecoderImpl.scala b/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoDecoderImpl.scala
@@ -2,6 +2,7 @@ package eu.ostrzyciel.jelly.core
 
 import eu.ostrzyciel.jelly.core.proto.v1.*
 import eu.ostrzyciel.jelly.core.ConverterFactory.NamespaceHandler
+import eu.ostrzyciel.jelly.core.internal.{DecoderLookup, LastNodeHolder}
 
 import scala.annotation.switch
 import scala.collection.mutable.ListBuffer
diff --git a/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoEncoder.scala b/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoEncoder.scala
@@ -1,5 +1,6 @@
 package eu.ostrzyciel.jelly.core
 
+import eu.ostrzyciel.jelly.core.internal.{LastNodeHolder, RowBufferAppender}
 import eu.ostrzyciel.jelly.core.proto.v1.*
 
 import scala.collection.mutable
@@ -37,7 +38,9 @@ object ProtoEncoder:
  * Take care to ensure the correctness of the transmitted data, or use the specialized wrappers from the stream package.
  * @param params parameters object for the encoder
  */
-abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEncoder.Params):
+abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEncoder.Params)
+  extends RowBufferAppender:
+
   import ProtoEncoder.*
 
   // *** 1. THE PUBLIC INTERFACE ***
@@ -147,7 +150,7 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEnco
   /**
    * Turn an RDF node (S, P, or O) into its protobuf representation.
    *
-   * Use the protected final inline make* methods in this class to create the nodes.
+   * Use the protected final make* methods in this class to create the nodes.
    *
    * @param node RDF node
    * @return the encoded term
@@ -158,7 +161,7 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEnco
   /**
    * Turn an RDF graph node into its protobuf representation.
    *
-   * Use the protected final inline make*Graph methods in this class to create the nodes.
+   * Use the protected final make*Graph methods in this class to create the nodes.
    *
    * @param node RDF graph node
    * @return the encoded term
@@ -169,25 +172,25 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEnco
 
   // *** 3. THE PROTECTED INTERFACE ***
   // **********************************
-  protected final inline def makeIriNode(iri: String): UniversalTerm =
-    nodeEncoder.encodeIri(iri, rowBuffer)
+  protected final def makeIriNode(iri: String): UniversalTerm =
+    nodeEncoder.encodeIri(iri)
 
-  protected final inline def makeBlankNode(label: String): UniversalTerm =
+  protected final def makeBlankNode(label: String): UniversalTerm =
     nodeEncoder.encodeOther(label, _ => RdfTerm.Bnode(label))
 
-  protected final inline def makeSimpleLiteral(lex: String): UniversalTerm =
+  protected final def makeSimpleLiteral(lex: String): UniversalTerm =
     nodeEncoder.encodeOther(lex, _ => RdfLiteral(lex, RdfLiteral.LiteralKind.Empty))
 
-  protected final inline def makeLangLiteral(lit: TNode, lex: String, lang: String): UniversalTerm =
+  protected final def makeLangLiteral(lit: TNode, lex: String, lang: String): UniversalTerm =
     nodeEncoder.encodeOther(lit, _ => RdfLiteral(lex, RdfLiteral.LiteralKind.Langtag(lang)))
 
-  protected final inline def makeDtLiteral(lit: TNode, lex: String, dt: String): UniversalTerm =
-    nodeEncoder.encodeDtLiteral(lit, lex, dt, rowBuffer)
+  protected final def makeDtLiteral(lit: TNode, lex: String, dt: String): UniversalTerm =
+    nodeEncoder.encodeDtLiteral(lit, lex, dt)
 
-  protected final inline def makeTripleNode(triple: TQuoted): RdfTriple =
+  protected final def makeTripleNode(triple: TQuoted): RdfTriple =
     quotedToProto(triple)
 
-  protected final inline def makeDefaultGraph: RdfDefaultGraph =
+  protected final def makeDefaultGraph: RdfDefaultGraph =
     RdfDefaultGraph.defaultInstance
 
   // *** 3. PRIVATE FIELDS AND METHODS ***
@@ -197,6 +200,7 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEnco
   private val iResponsibleForBufferClear: Boolean = maybeRowBuffer.isEmpty
   private val nodeEncoder = new NodeEncoder[TNode](
     options,
+    this, // RowBufferAppender
     // Make the node cache size between 256 and 1024, depending on the user's maxNameTableSize.
     Math.max(Math.min(options.maxNameTableSize, 1024), 256),
     options.maxNameTableSize,
@@ -209,6 +213,9 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEnco
   private val lastObject: LastNodeHolder[TNode] = new LastNodeHolder()
   private var lastGraph: TNode | LastNodeHolder.NoValue.type = LastNodeHolder.NoValue
 
+  private[core] override def appendLookupEntry(entry: RdfLookupEntryRowValue): Unit =
+    rowBuffer.append(RdfStreamRow(entry))
+
   private def nodeToProtoWrapped(node: TNode, lastNodeHolder: LastNodeHolder[TNode]): SpoTerm =
     if node.equals(lastNodeHolder.node) then null
     else
diff --git a/core/src/main/scala/eu/ostrzyciel/jelly/core/internal/DecoderLookup.scala b/core/src/main/scala/eu/ostrzyciel/jelly/core/internal/DecoderLookup.scala
@@ -1,4 +1,4 @@
-package eu.ostrzyciel.jelly.core
+package eu.ostrzyciel.jelly.core.internal
 
 import scala.reflect.ClassTag
 
diff --git a/core/src/main/scala/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.scala b/core/src/main/scala/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.scala
@@ -1,4 +1,4 @@
-package eu.ostrzyciel.jelly.core
+package eu.ostrzyciel.jelly.core.internal
 
 private[core] object LastNodeHolder:
   /**
diff --git a/core/src/main/scala/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.scala b/core/src/main/scala/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.scala
@@ -0,0 +1,9 @@
+package eu.ostrzyciel.jelly.core.internal
+
+import eu.ostrzyciel.jelly.core.proto.v1.RdfLookupEntryRowValue
+
+/**
+ * Internal trait for appending lookup entries to the row buffer.
+ */
+private[core] trait RowBufferAppender:
+  private[core] def appendLookupEntry(entry: RdfLookupEntryRowValue): Unit
diff --git a/core/src/main/scala/eu/ostrzyciel/jelly/core/proto/v1/RdfStreamRowValue.scala b/core/src/main/scala/eu/ostrzyciel/jelly/core/proto/v1/RdfStreamRowValue.scala
@@ -23,3 +23,5 @@ private[core] trait RdfStreamRowValue:
   def name: RdfNameEntry = null
   def prefix: RdfPrefixEntry = null
   def datatype: RdfDatatypeEntry = null
+
+private[core] trait RdfLookupEntryRowValue extends RdfStreamRowValue
diff --git a/core/src/test/scala/eu/ostrzyciel/jelly/core/NodeEncoderSpec.scala b/core/src/test/scala/eu/ostrzyciel/jelly/core/NodeEncoderSpec.scala
diff --git a/project/Transform3.scala b/project/Transform3.scala

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-package eu.ostrzyciel.jelly.core`
	`1`	`+package eu.ostrzyciel.jelly.core.internal`
`2`	`2`
`3`	`3`	`import scala.reflect.ClassTag`
`4`	`4`