Skip to content

Commit d272018

Browse files
gather kdocs
1 parent 2bf037a commit d272018

File tree

2 files changed

+203
-0
lines changed

2 files changed

+203
-0
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,132 @@ import org.jetbrains.kotlinx.dataframe.RowValueFilter
66
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
77
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
88
import org.jetbrains.kotlinx.dataframe.annotations.Refine
9+
import org.jetbrains.kotlinx.dataframe.api.GatherDocs.Grammar
10+
import org.jetbrains.kotlinx.dataframe.api.Update.UPDATE_OPERATION
11+
import org.jetbrains.kotlinx.dataframe.api.notNull
12+
import org.jetbrains.kotlinx.dataframe.api.where
913
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
1014
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1115
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1216
import org.jetbrains.kotlinx.dataframe.impl.api.gatherImpl
1317
import org.jetbrains.kotlinx.dataframe.impl.columnName
1418
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
19+
import org.jetbrains.kotlinx.dataframe.documentation.*
1520
import kotlin.reflect.KProperty
1621
import kotlin.reflect.KType
1722
import kotlin.reflect.typeOf
1823

1924
// region gather
2025

26+
/**
27+
* Reshapes the [DataFrame] by gathering specified [\columns] into two columns: keys and values
28+
* (or into just one of them).
29+
*
30+
* By default, the "key" column contains the names of the gathered columns,
31+
* and the "value" column holds the corresponding cell values.
32+
* The original columns selected for gathering are removed from the result,
33+
* while all other columns remain unchanged —
34+
* except that their values are duplicated for each generated key-value pair.
35+
*
36+
* This function does not perform the reshaping immediately — it returns
37+
* a [Gather] object, which serves as an intermediate step.
38+
* Before applying the final transformation, you may:
39+
* - filter values ([where][Gather.where], [notNull][Gather.notNull]);
40+
* - explode list-columns ([explodeLists][Gather.explodeLists]);
41+
* - transform keys ([mapKeys][Gather.mapKeys]) or values ([mapValues][Gather.mapValues]);
42+
* - cast the value type ([cast][Gather.cast]).
43+
*
44+
* Finally, reshape the DataFrame using one of the following methods:
45+
* - [into][Gather.into]
46+
* - [keysInto][Gather.keysInto]
47+
* - [valuesInto][Gather.valuesInto]
48+
*
49+
* Each of these methods returns a new reshaped [DataFrame].
50+
*
51+
* This operation is the reverse of [pivot].
52+
*
53+
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
54+
*
55+
* For more information: {@include [DocumentationUrls.Gather]}
56+
*
57+
* See [Grammar].
58+
*/
59+
internal interface GatherDocs {
60+
61+
/**
62+
* ## {@get [GATHER_OPERATION]} Operation Grammar
63+
* {@include [LineBreak]}
64+
* {@include [DslGrammarLink]}
65+
* {@include [LineBreak]}
66+
*
67+
* **[`gather`][gather]****` { `**`columnsSelector: `[`ColumnsSelector`][ColumnsSelector]**` }`**
68+
*
69+
* {@include [Indent]}
70+
* `\[ `__`.`__[**`explodeLists`**][Gather.explodeLists]**`() `**`]`
71+
*
72+
* {@include [Indent]}
73+
* `\[ `__`.`__[**`cast`**][Gather.cast]**`<T>() `**`]`
74+
*
75+
* {@include [Indent]}
76+
* `\[ `__`.`__[**`notNull`**][Gather.cast]**`() `**`]`
77+
*
78+
* {@include [Indent]}
79+
* `\[ `__`.`__[**`where`**][Gather.where]**` { `**`filter: `[`RowValueFilter`][RowValueFilter]**` } `**`]`
80+
*
81+
* {@include [Indent]}
82+
* `\[ `__`.`__[**`mapKeys`**][Gather.mapKeys]**` { `**`transform: (`[`String`][String]**`) -> K } `**`]`
83+
*
84+
* {@include [Indent]}
85+
* `\[ `__`.`__[**`mapValues`**][Gather.mapValues]**` { `**`transform: (`**`C`**`) -> R`**` } `**`]`
86+
*
87+
* {@include [Indent]}
88+
* __`.`__[**`into`**][Gather.into]**`(`**`keyColumn: `[`String`][String]**`, `**`valueColumn: `[`String`][String]**`)`**
89+
*
90+
* {@include [Indent]}
91+
* `| `__`.`__[**`keysInto`**][Gather.keysInto]**`(`**`keyColumn: `[`String`][String]**`)`**
92+
*
93+
* {@include [Indent]}
94+
* `| `__`.`__[**`valuesInto`**][Gather.valuesInto]**`(`**`valueColumn: `[`String`][String]**`)`**
95+
* {@set [GATHER_OPERATION] [**`gather`**][gather]}{@comment The default name of the `update` operation function name.}
96+
*/
97+
interface Grammar
98+
99+
/*
100+
* This argument providing the (clickable) name of the update-like function.
101+
* Note: If clickable, make sure to [alias][your type].
102+
*/
103+
@Suppress("ClassName")
104+
@ExcludeFromSources
105+
interface GATHER_OPERATION
106+
}
107+
108+
/** {@set [SelectingColumns.OPERATION] [gather][gather]} */
109+
@ExcludeFromSources
110+
private interface SetGatherOperationArg
111+
112+
/**
113+
* {@include [GatherDocs]}
114+
* ### This Gather Overload
115+
*/
116+
@ExcludeFromSources
117+
private interface CommonGatherDocs
118+
119+
/**
120+
* @include [CommonGatherDocs]
121+
* @include [SelectingColumns.Dsl] {@include [SetGatherOperationArg]}
122+
* ### Examples:
123+
* ```kotlin
124+
* // Gather `resultA` and `resultB` columns into a single "value" column,
125+
* // with the "series" column containing a key derived from the last letter
126+
* // of the corresponding original column name (i.e., 'A' or 'B').
127+
* df.gather { resultA and resultB }.mapKeys { it.last() }.into("series", "value")
128+
*
129+
* // Gather values of all `String` columns (at any depth)
130+
* // into a single "tag" column, omitting the key column.
131+
* df.gather { colsOf<String>() }.valuesInto("tag")
132+
* ```
133+
* @param [selector] The [Columns Selector][ColumnsSelector] used to select the columns of this [DataFrame] to group.
134+
*/
21135
@Interpretable("Gather0")
22136
public fun <T, C> DataFrame<T>.gather(selector: ColumnsSelector<T, C>): Gather<T, C, String, C> =
23137
Gather(
@@ -29,6 +143,15 @@ public fun <T, C> DataFrame<T>.gather(selector: ColumnsSelector<T, C>): Gather<T
29143
valueTransform = null,
30144
)
31145

146+
/**
147+
* @include [CommonGatherDocs]
148+
* @include [SelectingColumns.ColumnNames] {@include [SetGatherOperationArg]}
149+
* ### Example:
150+
* ```kotlin
151+
* df.gather("resultA", "resultB").mapKeys { it.last() }.into("series", "value")
152+
* ```
153+
* @param [columns] The [Column Names][String] used to select the columns of this [DataFrame] to gather.
154+
*/
32155
public fun <T> DataFrame<T>.gather(vararg columns: String): Gather<T, Any?, String, Any?> =
33156
gather { columns.toColumnSet() }
34157

@@ -44,6 +167,34 @@ public fun <T, C> DataFrame<T>.gather(vararg columns: KProperty<C>): Gather<T, C
44167

45168
// endregion
46169

170+
/**
171+
* Filter values in columns previously selected by [gather] using a [filter][RowValueFilter].
172+
*
173+
* [RowValueFilter] provides each value as a lambda argument, allowing you
174+
* to filter rows using a Boolean condition.
175+
*
176+
* It's an intermediate step; returns a new [Gather] with filtered value columns.
177+
*
178+
* For more information: {@include [DocumentationUrls.Gather]}
179+
*
180+
* See [Grammar].
181+
*
182+
* ### Examples
183+
* ```kotlin
184+
* // Gather `resultA` and `resultB` columns into a single "value" column,
185+
* // with the "series" column containing a key.
186+
* // Only values greater than `pValue` are included.
187+
* df.gather { resultA and resultB }.where { it >= pValue }.into("series", "value")
188+
*
189+
* // Gather values of all `String` columns (at any nesting level)
190+
* // into a single "tag" column, omitting the key column.
191+
* // Only non-empty strings are included.
192+
* df.gather { colsOf<String>() }.where { it.isNotEmpty() }.valuesInto("tag")
193+
* ```
194+
*
195+
* @param filter The [RowValueFilter] used to specify the filtering condition for gathered values.
196+
* @return A new [Gather] with the filtered rows.
197+
*/
47198
@Interpretable("GatherWhere")
48199
public fun <T, C, K, R> Gather<T, C, K, R>.where(filter: RowValueFilter<T, C>): Gather<T, C, K, R> =
49200
Gather(
@@ -56,9 +207,58 @@ public fun <T, C, K, R> Gather<T, C, K, R>.where(filter: RowValueFilter<T, C>):
56207
explode = explode,
57208
)
58209

210+
/**
211+
* Filters out `null` values from the columns previously selected by [gather],
212+
* keeping only non-null entries.
213+
*
214+
* A special case of [where].
215+
*
216+
* It's an intermediate step; returns a new [Gather] with filtered value columns.
217+
*
218+
* For more information: {@include [DocumentationUrls.Gather]}
219+
*
220+
* See [Grammar].
221+
*
222+
* ### Example
223+
* ```kotlin
224+
* // Gather `resultA` and `resultB` columns into a single "value" column,
225+
* // with the "series" column containing a key.
226+
* // If these columns contain nullable `Double` values, `notNull` filters out nulls,
227+
* // allowing subsequent transformations like `mapValues` to treat values as non-null `Double`.
228+
* df.gather { resultA and resultB }
229+
* .notNull()
230+
* .mapValues { (it + 0.5).toFloat() }
231+
* .into("series", "value")
232+
* ```
233+
* @return A new [Gather] instance with only non-null values retained.
234+
*/
59235
@Interpretable("GatherChangeType")
60236
public fun <T, C, K, R> Gather<T, C?, K, R>.notNull(): Gather<T, C, K, R> = where { it != null } as Gather<T, C, K, R>
61237

238+
/**
239+
* Explode values.
240+
*
241+
* A special case of [where].
242+
*
243+
* It's an intermediate step; returns a new [Gather] with filtered value columns.
244+
*
245+
* For more information: {@include [DocumentationUrls.Gather]}
246+
*
247+
* See [Grammar].
248+
*
249+
* ### Example
250+
* ```kotlin
251+
* // Gather `resultA` and `resultB` columns into a single "value" column,
252+
* // with the "series" column containing a key.
253+
* // If these columns contain nullable `Double` values, `notNull` filters out nulls,
254+
* // allowing subsequent transformations like `mapValues` to treat values as non-null `Double`.
255+
* df.gather { resultA and resultB }
256+
* .notNull()
257+
* .mapValues { (it + 0.5).toFloat() }
258+
* .into("series", "value")
259+
* ```
260+
* @return A new [Gather] instance with only non-null values retained.
261+
*/
62262
@Interpretable("GatherExplodeLists")
63263
public fun <T, C, K, R> Gather<T, C, K, R>.explodeLists(): Gather<T, C, K, R> =
64264
Gather(

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,7 @@ internal interface DocumentationUrls {
101101

102102
/** [See `convert` on the documentation website.]({@include [Url]}/convert.html) */
103103
interface Convert
104+
105+
/** [See `gather` on the documentation website.]({@include [Url]}/gather.html) */
106+
interface Gather
104107
}

0 commit comments

Comments
 (0)