Skip to content

Reusage schemas fix #1252

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -5940,7 +5940,7 @@ public final class org/jetbrains/kotlinx/dataframe/impl/io/FastDoubleParser {

public final class org/jetbrains/kotlinx/dataframe/impl/schema/DataFrameSchemaImpl : org/jetbrains/kotlinx/dataframe/schema/DataFrameSchema {
public fun <init> (Ljava/util/Map;)V
public fun compare (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Z)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public fun compare (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public fun equals (Ljava/lang/Object;)Z
public fun getColumns ()Ljava/util/Map;
public fun hashCode ()I
Expand Down Expand Up @@ -6636,18 +6636,20 @@ public final class org/jetbrains/kotlinx/dataframe/math/SumKt {
}

public abstract class org/jetbrains/kotlinx/dataframe/schema/ColumnSchema {
public fun <init> ()V
public final fun compare (Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public final fun compare (Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public static synthetic fun compare$default (Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public fun equals (Ljava/lang/Object;)Z
public abstract fun getContentType ()Lkotlin/reflect/KType;
public abstract fun getKind ()Lorg/jetbrains/kotlinx/dataframe/columns/ColumnKind;
public abstract fun getNullable ()Z
public abstract fun getType ()Lkotlin/reflect/KType;
public fun hashCode ()I
}

public final class org/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Frame : org/jetbrains/kotlinx/dataframe/schema/ColumnSchema {
public fun <init> (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;ZLkotlin/reflect/KType;)V
public final fun compare (Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Frame;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public final fun compare (Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Frame;Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public static synthetic fun compare$default (Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Frame;Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Frame;Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public fun getContentType ()Lkotlin/reflect/KType;
public fun getKind ()Lorg/jetbrains/kotlinx/dataframe/columns/ColumnKind;
public fun getNullable ()Z
Expand All @@ -6657,7 +6659,8 @@ public final class org/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Frame : o

public final class org/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Group : org/jetbrains/kotlinx/dataframe/schema/ColumnSchema {
public fun <init> (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Lkotlin/reflect/KType;)V
public final fun compare (Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Group;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public final fun compare (Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Group;Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public static synthetic fun compare$default (Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Group;Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Group;Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public fun getContentType ()Lkotlin/reflect/KType;
public fun getKind ()Lorg/jetbrains/kotlinx/dataframe/columns/ColumnKind;
public fun getNullable ()Z
Expand All @@ -6667,7 +6670,8 @@ public final class org/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Group : o

public final class org/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Value : org/jetbrains/kotlinx/dataframe/schema/ColumnSchema {
public fun <init> (Lkotlin/reflect/KType;)V
public final fun compare (Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Value;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public final fun compare (Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Value;Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public static synthetic fun compare$default (Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Value;Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema$Value;Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public fun getContentType ()Lkotlin/reflect/KType;
public fun getKind ()Lorg/jetbrains/kotlinx/dataframe/columns/ColumnKind;
public fun getNullable ()Z
Expand All @@ -6692,9 +6696,22 @@ public final class org/jetbrains/kotlinx/dataframe/schema/CompareResult$Companio
public final fun compareNullability (ZZ)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
}

public final class org/jetbrains/kotlinx/dataframe/schema/CompareResultKt {
public static final fun plus (Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
}

public final class org/jetbrains/kotlinx/dataframe/schema/ComparisonMode : java/lang/Enum {
public static final field LENIENT Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;
public static final field STRICT Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;
public static final field STRICT_FOR_NESTED_SCHEMAS Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;
public static fun getEntries ()Lkotlin/enums/EnumEntries;
public static fun valueOf (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;
public static fun values ()[Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;
}

public abstract interface class org/jetbrains/kotlinx/dataframe/schema/DataFrameSchema {
public abstract fun compare (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Z)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public static synthetic fun compare$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public abstract fun compare (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public static synthetic fun compare$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Lorg/jetbrains/kotlinx/dataframe/schema/ComparisonMode;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/CompareResult;
public abstract fun getColumns ()Ljava/util/Map;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,9 @@ internal fun MutableMap<ColumnPath, Int>.putColumnsOrder(schema: DataFrameSchema
val columnPath = path + name
this[columnPath] = i
when (column) {
is ColumnSchema.Frame -> {
putColumnsOrder(column.schema, columnPath)
}

is ColumnSchema.Group -> {
putColumnsOrder(column.schema, columnPath)
}
is ColumnSchema.Frame -> putColumnsOrder(column.schema, columnPath)
is ColumnSchema.Group -> putColumnsOrder(column.schema, columnPath)
is ColumnSchema.Value -> Unit
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.impl.toSnakeCase
import org.jetbrains.kotlinx.dataframe.keywords.HardKeywords
import org.jetbrains.kotlinx.dataframe.keywords.ModifierKeywords
import org.jetbrains.kotlinx.dataframe.schema.ComparisonMode
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema

private fun renderNullability(nullable: Boolean) = if (nullable) "?" else ""

internal fun Iterable<Marker>.filterRequiredForSchema(schema: DataFrameSchema) =
filter { it.isOpen && it.schema.compare(schema).isSuperOrEqual() }
filter { it.isOpen && it.schema.compare(schema, ComparisonMode.STRICT_FOR_NESTED_SCHEMAS).isSuperOrEqual() }

internal val charsToQuote = """[ `(){}\[\].<>'"/|\\!?@:;%^&*#$-]""".toRegex()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.codeGen.MarkerVisibility
import org.jetbrains.kotlinx.dataframe.codeGen.SchemaProcessor
import org.jetbrains.kotlinx.dataframe.codeGen.ValidFieldName
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import org.jetbrains.kotlinx.dataframe.schema.ComparisonMode
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema

internal class SchemaProcessorImpl(
Expand All @@ -23,7 +24,7 @@ internal class SchemaProcessorImpl(

private fun DataFrameSchema.getAllSuperMarkers() =
registeredMarkers
.filter { it.isOpen && it.schema.compare(this).isSuperOrEqual() }
.filter { it.isOpen && it.schema.compare(this, ComparisonMode.STRICT_FOR_NESTED_SCHEMAS).isSuperOrEqual() }

private fun List<Marker>.onlyLeafs(): List<Marker> {
val skip = flatMap { it.allSuperMarkers.keys }.toSet()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,58 @@ package org.jetbrains.kotlinx.dataframe.impl.schema
import org.jetbrains.kotlinx.dataframe.impl.renderType
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import org.jetbrains.kotlinx.dataframe.schema.CompareResult
import org.jetbrains.kotlinx.dataframe.schema.CompareResult.Equals
import org.jetbrains.kotlinx.dataframe.schema.CompareResult.IsDerived
import org.jetbrains.kotlinx.dataframe.schema.CompareResult.IsSuper
import org.jetbrains.kotlinx.dataframe.schema.CompareResult.None
import org.jetbrains.kotlinx.dataframe.schema.ComparisonMode
import org.jetbrains.kotlinx.dataframe.schema.ComparisonMode.STRICT
import org.jetbrains.kotlinx.dataframe.schema.ComparisonMode.STRICT_FOR_NESTED_SCHEMAS
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.jetbrains.kotlinx.dataframe.schema.plus
import kotlin.collections.forEach

public class DataFrameSchemaImpl(override val columns: Map<String, ColumnSchema>) : DataFrameSchema {

override fun compare(other: DataFrameSchema, strictlyEqualNestedSchemas: Boolean): CompareResult {
override fun compare(other: DataFrameSchema, comparisonMode: ComparisonMode): CompareResult {
require(other is DataFrameSchemaImpl)
if (this === other) return CompareResult.Equals
var result = CompareResult.Equals
columns.forEach {
val otherColumn = other.columns[it.key]
if (otherColumn == null) {
result = result.combine(if (strictlyEqualNestedSchemas) CompareResult.None else CompareResult.IsDerived)
} else {
result = result.combine(it.value.compareStrictlyEqualNestedSchemas(otherColumn))
if (this === other) return Equals

var result: CompareResult = Equals

// check for each column in this schema if there is a column with the same name in the other schema
// - if so, check those schemas for equality, taking comparisonMode into account
// - if not, consider the other schema derived from this (or unrelated (None) if comparisonMode == STRICT)
this.columns.forEach { (thisColName, thisSchema) ->
val otherSchema = other.columns[thisColName]
result += when {
otherSchema != null -> {
// increase comparisonMode strictness when dealing with nested schemas of FrameColumns or ColumnGroups
val newComparisonMode =
if (comparisonMode == STRICT_FOR_NESTED_SCHEMAS && thisSchema !is ColumnSchema.Value) {
STRICT
} else {
comparisonMode
}

thisSchema.compare(other = otherSchema, comparisonMode = newComparisonMode)
}

else -> if (comparisonMode == STRICT) None else IsDerived
}
if (result == CompareResult.None) return CompareResult.None
if (result == None) return None
}
other.columns.forEach {
val thisField = columns[it.key]
if (thisField == null) {
result = result.combine(if (strictlyEqualNestedSchemas) CompareResult.None else CompareResult.IsSuper)
if (result == CompareResult.None) return CompareResult.None
}
// then check for each column in the other schema if there is a column with the same name in this schema
// if not, consider the other schema as super to this (or unrelated (None) if comparisonMode == STRICT)
other.columns.forEach { (otherColName, _) ->
if (this.columns[otherColName] != null) return@forEach
result += if (comparisonMode == STRICT) None else IsSuper
if (result == None) return None
}
return result
}

override fun equals(other: Any?): Boolean = other is DataFrameSchema && compare(other).isEqual()
override fun equals(other: Any?): Boolean = other is DataFrameSchema && this.compare(other).isEqual()

override fun toString(): String = render()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@ import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
import org.jetbrains.kotlinx.dataframe.schema.ComparisonMode.LENIENT
import org.jetbrains.kotlinx.dataframe.schema.ComparisonMode.STRICT
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.full.isSupertypeOf
import kotlin.reflect.typeOf

public abstract class ColumnSchema {
public sealed class ColumnSchema {

/** Either [Value] or [Group] or [Frame]. */
public abstract val kind: ColumnKind
Expand Down Expand Up @@ -39,9 +41,10 @@ public abstract class ColumnSchema {
override val nullable: Boolean = type.isMarkedNullable
override val contentType: KType? = null

public fun compare(other: Value): CompareResult =
public fun compare(other: Value, comparisonMode: ComparisonMode = LENIENT): CompareResult =
when {
type == other.type -> CompareResult.Equals
comparisonMode == STRICT -> CompareResult.None
type.isSubtypeOf(other.type) -> CompareResult.IsDerived
type.isSupertypeOf(other.type) -> CompareResult.IsSuper
else -> CompareResult.None
Expand All @@ -55,10 +58,11 @@ public abstract class ColumnSchema {
override val nullable: Boolean = false
override val type: KType get() = typeOf<AnyRow>()

public fun compare(other: Group): CompareResult = schema.compare(other.schema)

internal fun compareStrictlyEqualNestedSchemas(other: Group): CompareResult =
schema.compare(other.schema, strictlyEqualNestedSchemas = true)
public fun compare(other: Group, comparisonMode: ComparisonMode = LENIENT): CompareResult =
schema.compare(
other = other.schema,
comparisonMode = comparisonMode,
)
}

public class Frame(
Expand All @@ -69,14 +73,11 @@ public abstract class ColumnSchema {
public override val kind: ColumnKind = ColumnKind.Frame
override val type: KType get() = typeOf<AnyFrame>()

public fun compare(other: Frame): CompareResult =
schema.compare(other.schema).combine(CompareResult.compareNullability(nullable, other.nullable))

internal fun compareStrictlyEqualNestedSchemas(other: Frame): CompareResult =
public fun compare(other: Frame, comparisonMode: ComparisonMode = LENIENT): CompareResult =
schema.compare(
other.schema,
strictlyEqualNestedSchemas = true,
).combine(CompareResult.compareNullability(nullable, other.nullable))
other = other.schema,
comparisonMode = comparisonMode,
) + CompareResult.compareNullability(thisIsNullable = nullable, otherIsNullable = other.nullable)
}

/** Checks equality just on kind, type, or schema. */
Expand All @@ -88,37 +89,27 @@ public abstract class ColumnSchema {
is Value -> type == (otherType as Value).type
is Group -> schema == (otherType as Group).schema
is Frame -> schema == (otherType as Frame).schema
else -> throw NotImplementedError()
}
}

public fun compare(other: ColumnSchema): CompareResult = compare(other, false)

internal fun compareStrictlyEqualNestedSchemas(other: ColumnSchema): CompareResult = compare(other, true)

private fun compare(other: ColumnSchema, strictlyEqualNestedSchemas: Boolean): CompareResult {
public fun compare(other: ColumnSchema, comparisonMode: ComparisonMode = LENIENT): CompareResult {
if (kind != other.kind) return CompareResult.None
if (this === other) return CompareResult.Equals
return when (this) {
is Value -> compare(other as Value)

is Group -> if (strictlyEqualNestedSchemas) {
compareStrictlyEqualNestedSchemas(
other as Group,
)
} else {
compare(other as Group)
}

is Frame -> if (strictlyEqualNestedSchemas) {
compareStrictlyEqualNestedSchemas(
other as Frame,
)
} else {
compare(other as Frame)
}
is Value -> compare(other as Value, comparisonMode)
is Group -> compare(other as Group, comparisonMode)
is Frame -> compare(other as Frame, comparisonMode)
}
}

else -> throw NotImplementedError()
override fun hashCode(): Int {
var result = nullable.hashCode()
result = 31 * result + kind.hashCode()
result = 31 * result + when (this) {
is Value -> type.hashCode()
is Group -> schema.hashCode()
is Frame -> schema.hashCode()
}
return result
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ public enum class CompareResult {
}
}
}

public operator fun CompareResult.plus(other: CompareResult): CompareResult = this.combine(other)
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.jetbrains.kotlinx.dataframe.schema

public enum class ComparisonMode {
/**
* In this mode, all [CompareResults][CompareResult] can occur.
*
* If this schema has columns the other has not, the other is considered [CompareResult.IsDerived].
* If the other schema has columns this has not, this is considered [CompareResult.IsSuper].
*/
LENIENT,

/**
* Columns must all be present in the other schema with the same name and type.
* [CompareResult.IsDerived] and [CompareResult.IsSuper] will result in [CompareResult.None] in this mode.
*/
STRICT,

/** Works like [LENIENT] at the top-level, but turns to [STRICT] for nested schemas. */
STRICT_FOR_NESTED_SCHEMAS,
}
Loading