Skip to content

Commit 19072fe

Browse files
committed
core: refactor ProtoEncoder for Jelly-Patch
Related to: Jelly-RDF/jelly-protobuf#11 This introduces a few refactors around the ProtoEncoder to allow us to reuse its code in the core-patch module later. This includes: - Allowing NodeEncoder to append to anything that can consume lookup entries, via a dedicated interface - De-inlining protected methods in ProtoEncoder. I don't think it was working anyway. The JVM is smart enough to do inlining by itself, and the inlines were messing with public/private code guarantees. - Create the core.internal package to group the messier internal classes together and keep the top-level package clean.
1 parent 381c2c3 commit 19072fe

File tree

9 files changed

+90
-69
lines changed

9 files changed

+90
-69
lines changed

core/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java

+22-25
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,21 @@
11
package eu.ostrzyciel.jelly.core;
22

3+
import eu.ostrzyciel.jelly.core.internal.RowBufferAppender;
34
import eu.ostrzyciel.jelly.core.proto.v1.*;
45
import scala.collection.mutable.Buffer;
56

67
import java.util.LinkedHashMap;
8+
import java.util.function.Consumer;
79
import java.util.function.Function;
810

911
/**
1012
* Encodes RDF nodes native to the used RDF library (e.g., Apache Jena, RDF4J) into Jelly's protobuf objects.
1113
* This class performs a lot of caching to avoid encoding the same node multiple times. It is absolutely NOT
1214
* thread-safe, and should only be ever used by a single instance of ProtoEncoder.
13-
*
14-
* <p>
15-
* This class is marked as public because make* methods in ProtoEncoder are inlined, and the inlining
16-
* requires the NodeEncoder to be public. Do NOT use this class outside of ProtoEncoder. It is not
17-
* considered part of the public API.
18-
* </p>
15+
*
1916
* @param <TNode> The type of RDF nodes used by the RDF library.
2017
*/
21-
public final class NodeEncoder<TNode> {
18+
final class NodeEncoder<TNode> {
2219
/**
2320
* A cached node that depends on other lookups (RdfIri and RdfLiteral in the datatype variant).
2421
*/
@@ -61,6 +58,8 @@ protected boolean removeEldestEntry(java.util.Map.Entry<K, V> eldest) {
6158
private final EncoderLookup prefixLookup;
6259
private final EncoderLookup nameLookup;
6360

61+
private final RowBufferAppender bufferAppender;
62+
6463
// We split the node caches in three – the first two are for nodes that depend on the lookups
6564
// (IRIs and datatype literals). The third one is for nodes that don't depend on the lookups.
6665
private final NodeCache<Object, DependentNode> iriNodeCache;
@@ -75,11 +74,18 @@ protected boolean removeEldestEntry(java.util.Map.Entry<K, V> eldest) {
7574
/**
7675
* Creates a new NodeEncoder.
7776
* @param opt Jelly RDF stream options
77+
* @param bufferAppender consumer of the lookup entry rows
7878
* @param nodeCacheSize The size of the node cache (for nodes that don't depend on lookups)
7979
* @param iriNodeCacheSize The size of the IRI dependent node cache (for prefix+name encoding)
8080
* @param dtLiteralNodeCacheSize The size of the datatype literal dependent node cache
8181
*/
82-
public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize, int dtLiteralNodeCacheSize) {
82+
public NodeEncoder(
83+
RdfStreamOptions opt,
84+
RowBufferAppender bufferAppender,
85+
int nodeCacheSize,
86+
int iriNodeCacheSize,
87+
int dtLiteralNodeCacheSize
88+
) {
8389
datatypeLookup = new EncoderLookup(opt.maxDatatypeTableSize(), true);
8490
this.maxPrefixTableSize = opt.maxPrefixTableSize();
8591
if (maxPrefixTableSize > 0) {
@@ -96,18 +102,18 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize
96102
dtLiteralNodeCache = new NodeCache<>(dtLiteralNodeCacheSize);
97103
nameLookup = new EncoderLookup(opt.maxNameTableSize(), maxPrefixTableSize > 0);
98104
nodeCache = new NodeCache<>(nodeCacheSize);
105+
this.bufferAppender = bufferAppender;
99106
}
100107

101108
/**
102109
* Encodes a datatype literal using two layers of caching – both for the entire literal, and the datatype name.
103110
* @param key The literal key (the unencoded literal node)
104111
* @param lex The lexical form of the literal
105112
* @param datatypeName The name of the datatype
106-
* @param rowsBuffer The buffer to which the new datatype entry should be appended
107113
* @return The encoded literal
108114
*/
109115
public UniversalTerm encodeDtLiteral(
110-
TNode key, String lex, String datatypeName, Buffer<RdfStreamRow> rowsBuffer
116+
TNode key, String lex, String datatypeName
111117
) {
112118
var cachedNode = dtLiteralNodeCache.computeIfAbsent(key, k -> new DependentNode());
113119
// Check if the value is still valid
@@ -121,15 +127,13 @@ public UniversalTerm encodeDtLiteral(
121127
// The node is not encoded, but we may already have the datatype encoded
122128
var dtEntry = datatypeLookup.getOrAddEntry(datatypeName);
123129
if (dtEntry.newEntry) {
124-
rowsBuffer.append(new RdfStreamRow(
125-
new RdfDatatypeEntry(dtEntry.setId, datatypeName)
126-
));
130+
bufferAppender.appendLookupEntry(new RdfDatatypeEntry(dtEntry.setId, datatypeName));
127131
}
128132
int dtId = dtEntry.getId;
129133
cachedNode.lookupPointer1 = dtId;
130134
cachedNode.lookupSerial1 = datatypeLookup.serials[dtId];
131135
cachedNode.encoded = new RdfLiteral(
132-
lex, new RdfLiteral$LiteralKind$Datatype(dtId)
136+
lex, new RdfLiteral$LiteralKind$Datatype(dtId)
133137
);
134138

135139
return cachedNode.encoded;
@@ -138,17 +142,14 @@ public UniversalTerm encodeDtLiteral(
138142
/**
139143
* Encodes an IRI using two layers of caching – both for the entire IRI, and the prefix and name tables.
140144
* @param iri The IRI to encode
141-
* @param rowsBuffer The buffer to which the new name and prefix lookup entries should be appended
142145
* @return The encoded IRI
143146
*/
144-
public UniversalTerm encodeIri(String iri, Buffer<RdfStreamRow> rowsBuffer) {
147+
public UniversalTerm encodeIri(String iri) {
145148
if (maxPrefixTableSize == 0) {
146149
// Fast path for no prefixes
147150
var nameEntry = nameLookup.getOrAddEntry(iri);
148151
if (nameEntry.newEntry) {
149-
rowsBuffer.append(new RdfStreamRow(
150-
new RdfNameEntry(nameEntry.setId, iri)
151-
));
152+
bufferAppender.appendLookupEntry(new RdfNameEntry(nameEntry.setId, iri));
152153
}
153154
int nameId = nameEntry.getId;
154155
if (lastIriNameId + 1 == nameId) {
@@ -192,14 +193,10 @@ public UniversalTerm encodeIri(String iri, Buffer<RdfStreamRow> rowsBuffer) {
192193
var prefixEntry = prefixLookup.getOrAddEntry(prefix);
193194
var nameEntry = nameLookup.getOrAddEntry(postfix);
194195
if (prefixEntry.newEntry) {
195-
rowsBuffer.append(new RdfStreamRow(
196-
new RdfPrefixEntry(prefixEntry.setId, prefix)
197-
));
196+
bufferAppender.appendLookupEntry(new RdfPrefixEntry(prefixEntry.setId, prefix));
198197
}
199198
if (nameEntry.newEntry) {
200-
rowsBuffer.append(new RdfStreamRow(
201-
new RdfNameEntry(nameEntry.setId, postfix)
202-
));
199+
bufferAppender.appendLookupEntry(new RdfNameEntry(nameEntry.setId, postfix));
203200
}
204201
int nameId = nameEntry.getId;
205202
int prefixId = prefixEntry.getId;

core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoDecoderImpl.scala

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package eu.ostrzyciel.jelly.core
22

33
import eu.ostrzyciel.jelly.core.proto.v1.*
44
import eu.ostrzyciel.jelly.core.ConverterFactory.NamespaceHandler
5+
import eu.ostrzyciel.jelly.core.internal.{DecoderLookup, LastNodeHolder}
56

67
import scala.annotation.switch
78
import scala.collection.mutable.ListBuffer

core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoEncoder.scala

+19-12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package eu.ostrzyciel.jelly.core
22

3+
import eu.ostrzyciel.jelly.core.internal.{LastNodeHolder, RowBufferAppender}
34
import eu.ostrzyciel.jelly.core.proto.v1.*
45

56
import scala.collection.mutable
@@ -37,7 +38,9 @@ object ProtoEncoder:
3738
* Take care to ensure the correctness of the transmitted data, or use the specialized wrappers from the stream package.
3839
* @param params parameters object for the encoder
3940
*/
40-
abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEncoder.Params):
41+
abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEncoder.Params)
42+
extends RowBufferAppender:
43+
4144
import ProtoEncoder.*
4245

4346
// *** 1. THE PUBLIC INTERFACE ***
@@ -147,7 +150,7 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEnco
147150
/**
148151
* Turn an RDF node (S, P, or O) into its protobuf representation.
149152
*
150-
* Use the protected final inline make* methods in this class to create the nodes.
153+
* Use the protected final make* methods in this class to create the nodes.
151154
*
152155
* @param node RDF node
153156
* @return the encoded term
@@ -158,7 +161,7 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEnco
158161
/**
159162
* Turn an RDF graph node into its protobuf representation.
160163
*
161-
* Use the protected final inline make*Graph methods in this class to create the nodes.
164+
* Use the protected final make*Graph methods in this class to create the nodes.
162165
*
163166
* @param node RDF graph node
164167
* @return the encoded term
@@ -169,25 +172,25 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEnco
169172

170173
// *** 3. THE PROTECTED INTERFACE ***
171174
// **********************************
172-
protected final inline def makeIriNode(iri: String): UniversalTerm =
173-
nodeEncoder.encodeIri(iri, rowBuffer)
175+
protected final def makeIriNode(iri: String): UniversalTerm =
176+
nodeEncoder.encodeIri(iri)
174177

175-
protected final inline def makeBlankNode(label: String): UniversalTerm =
178+
protected final def makeBlankNode(label: String): UniversalTerm =
176179
nodeEncoder.encodeOther(label, _ => RdfTerm.Bnode(label))
177180

178-
protected final inline def makeSimpleLiteral(lex: String): UniversalTerm =
181+
protected final def makeSimpleLiteral(lex: String): UniversalTerm =
179182
nodeEncoder.encodeOther(lex, _ => RdfLiteral(lex, RdfLiteral.LiteralKind.Empty))
180183

181-
protected final inline def makeLangLiteral(lit: TNode, lex: String, lang: String): UniversalTerm =
184+
protected final def makeLangLiteral(lit: TNode, lex: String, lang: String): UniversalTerm =
182185
nodeEncoder.encodeOther(lit, _ => RdfLiteral(lex, RdfLiteral.LiteralKind.Langtag(lang)))
183186

184-
protected final inline def makeDtLiteral(lit: TNode, lex: String, dt: String): UniversalTerm =
185-
nodeEncoder.encodeDtLiteral(lit, lex, dt, rowBuffer)
187+
protected final def makeDtLiteral(lit: TNode, lex: String, dt: String): UniversalTerm =
188+
nodeEncoder.encodeDtLiteral(lit, lex, dt)
186189

187-
protected final inline def makeTripleNode(triple: TQuoted): RdfTriple =
190+
protected final def makeTripleNode(triple: TQuoted): RdfTriple =
188191
quotedToProto(triple)
189192

190-
protected final inline def makeDefaultGraph: RdfDefaultGraph =
193+
protected final def makeDefaultGraph: RdfDefaultGraph =
191194
RdfDefaultGraph.defaultInstance
192195

193196
// *** 3. PRIVATE FIELDS AND METHODS ***
@@ -197,6 +200,7 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEnco
197200
private val iResponsibleForBufferClear: Boolean = maybeRowBuffer.isEmpty
198201
private val nodeEncoder = new NodeEncoder[TNode](
199202
options,
203+
this, // RowBufferAppender
200204
// Make the node cache size between 256 and 1024, depending on the user's maxNameTableSize.
201205
Math.max(Math.min(options.maxNameTableSize, 1024), 256),
202206
options.maxNameTableSize,
@@ -209,6 +213,9 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](params: ProtoEnco
209213
private val lastObject: LastNodeHolder[TNode] = new LastNodeHolder()
210214
private var lastGraph: TNode | LastNodeHolder.NoValue.type = LastNodeHolder.NoValue
211215

216+
private[core] override def appendLookupEntry(entry: RdfLookupEntryRowValue): Unit =
217+
rowBuffer.append(RdfStreamRow(entry))
218+
212219
private def nodeToProtoWrapped(node: TNode, lastNodeHolder: LastNodeHolder[TNode]): SpoTerm =
213220
if node.equals(lastNodeHolder.node) then null
214221
else

core/src/main/scala/eu/ostrzyciel/jelly/core/DecoderLookup.scala core/src/main/scala/eu/ostrzyciel/jelly/core/internal/DecoderLookup.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package eu.ostrzyciel.jelly.core
1+
package eu.ostrzyciel.jelly.core.internal
22

33
import scala.reflect.ClassTag
44

core/src/main/scala/eu/ostrzyciel/jelly/core/LastNodeHolder.scala core/src/main/scala/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package eu.ostrzyciel.jelly.core
1+
package eu.ostrzyciel.jelly.core.internal
22

33
private[core] object LastNodeHolder:
44
/**
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package eu.ostrzyciel.jelly.core.internal
2+
3+
import eu.ostrzyciel.jelly.core.proto.v1.RdfLookupEntryRowValue
4+
5+
/**
6+
* Internal trait for appending lookup entries to the row buffer.
7+
*/
8+
private[core] trait RowBufferAppender:
9+
private[core] def appendLookupEntry(entry: RdfLookupEntryRowValue): Unit

core/src/main/scala/eu/ostrzyciel/jelly/core/proto/v1/RdfStreamRowValue.scala

+2
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,5 @@ private[core] trait RdfStreamRowValue:
2323
def name: RdfNameEntry = null
2424
def prefix: RdfPrefixEntry = null
2525
def datatype: RdfDatatypeEntry = null
26+
27+
private[core] trait RdfLookupEntryRowValue extends RdfStreamRowValue

0 commit comments

Comments
 (0)