Skip to content

Commit 47291fe

Browse files
gather kdocs complete
1 parent 3eca678 commit 47291fe

File tree

1 file changed

+222
-14
lines changed
  • core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api

1 file changed

+222
-14
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt

Lines changed: 222 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -119,14 +119,14 @@ private interface CommonGatherDocs
119119
/**
120120
* @include [CommonGatherDocs]
121121
* @include [SelectingColumns.Dsl] {@include [SetGatherOperationArg]}
122-
* ### Examples:
122+
* ### Examples
123123
* ```kotlin
124124
* // Gather `resultA` and `resultB` columns into a single "value" column,
125125
* // with the "series" column containing a key derived from the last letter
126126
* // of the corresponding original column name (i.e., 'A' or 'B').
127127
* df.gather { resultA and resultB }.mapKeys { it.last() }.into("series", "value")
128128
*
129-
* // Gather values of all `String` columns (at any depth)
129+
* // Gather values of all `String` columns
130130
* // into a single "tag" column, omitting the key column.
131131
* df.gather { colsOf<String>() }.valuesInto("tag")
132132
* ```
@@ -146,7 +146,7 @@ public fun <T, C> DataFrame<T>.gather(selector: ColumnsSelector<T, C>): Gather<T
146146
/**
147147
* @include [CommonGatherDocs]
148148
* @include [SelectingColumns.ColumnNames] {@include [SetGatherOperationArg]}
149-
* ### Example:
149+
* ### Example
150150
* ```kotlin
151151
* df.gather("resultA", "resultB").mapKeys { it.last() }.into("series", "value")
152152
* ```
@@ -186,7 +186,7 @@ public fun <T, C> DataFrame<T>.gather(vararg columns: KProperty<C>): Gather<T, C
186186
* // Only values greater than `pValue` are included.
187187
* df.gather { resultA and resultB }.where { it >= pValue }.into("series", "value")
188188
*
189-
* // Gather values of all `String` columns (at any nesting level)
189+
* // Gather values of all `String` columns
190190
* // into a single "tag" column, omitting the key column.
191191
* // Only non-empty strings are included.
192192
* df.gather { colsOf<String>() }.where { it.isNotEmpty() }.valuesInto("tag")
@@ -223,7 +223,7 @@ public fun <T, C, K, R> Gather<T, C, K, R>.where(filter: RowValueFilter<T, C>):
223223
* ```kotlin
224224
* // Gather `resultA` and `resultB` columns into a single "value" column,
225225
* // with the "series" column containing a key.
226-
* // If these columns contain nullable `Double` values, `notNull` filters out nulls,
226+
* // Assuming these columns contain nullable `Double` values, `notNull` filters out nulls,
227227
* // allowing subsequent transformations like `mapValues` to treat values as non-null `Double`.
228228
* df.gather { resultA and resultB }
229229
* .notNull()
@@ -236,28 +236,35 @@ public fun <T, C, K, R> Gather<T, C, K, R>.where(filter: RowValueFilter<T, C>):
236236
public fun <T, C, K, R> Gather<T, C?, K, R>.notNull(): Gather<T, C, K, R> = where { it != null } as Gather<T, C, K, R>
237237

238238
/**
239-
* Explode values.
239+
* Explodes [List] values in the columns previously selected by [gather].
240240
*
241-
* A special case of [where].
241+
* If not all values are lists (for example, if one column contains `Double` values and
242+
* another contains `List<Double>`), only the list values will be exploded — non-list values remain unchanged.
242243
*
243-
* It's an intermediate step; returns a new [Gather] with filtered value columns.
244+
* After explosion, operations like [where], [notNull], and [mapValues] are applied to individual list elements
245+
* rather than to the lists themselves. To enable this, the resulting type should be explicitly specified using [cast].
244246
*
245-
* For more information: {@include [DocumentationUrls.Gather]}
247+
* This is an intermediate step; returns a new [Gather] with exploded values.
246248
*
247-
* See [Grammar].
249+
* For more information, see: {@include [DocumentationUrls.Gather]}
250+
*
251+
* See also: [Grammar].
248252
*
249253
* ### Example
250254
* ```kotlin
251255
* // Gather `resultA` and `resultB` columns into a single "value" column,
252256
* // with the "series" column containing a key.
253-
* // If these columns contain nullable `Double` values, `notNull` filters out nulls,
254-
* // allowing subsequent transformations like `mapValues` to treat values as non-null `Double`.
257+
* // Assuming `resultA` contains `Double` values and `
258+
* // resultB` contains `List<Double>` values,
259+
* // `explodeLists` will apply only to values from `resultB`,
260+
* // resulting in all gathered values being of type `Double`.
255261
* df.gather { resultA and resultB }
256-
* .notNull()
262+
* .explodeLists()
263+
* .cast<Double>()
257264
* .mapValues { (it + 0.5).toFloat() }
258265
* .into("series", "value")
259266
* ```
260-
* @return A new [Gather] instance with only non-null values retained.
267+
* @return A new [Gather] instance with exploded list values.
261268
*/
262269
@Interpretable("GatherExplodeLists")
263270
public fun <T, C, K, R> Gather<T, C, K, R>.explodeLists(): Gather<T, C, K, R> =
@@ -271,6 +278,31 @@ public fun <T, C, K, R> Gather<T, C, K, R>.explodeLists(): Gather<T, C, K, R> =
271278
explode = true,
272279
)
273280

281+
/**
282+
* Explodes [List] values in the columns previously selected by [gather].
283+
*
284+
* After explosion, operations like [where], [notNull], and [mapValues] are applied to individual list elements
285+
* instead of the lists themselves.
286+
*
287+
* This is an intermediate step; returns a new [Gather] with exploded values.
288+
*
289+
* For more information, see: {@include [DocumentationUrls.Gather]}
290+
*
291+
* See also: [Grammar].
292+
*
293+
* ### Example
294+
* ```kotlin
295+
* // Gather `resultA` and `resultB` columns into a single "value" column,
296+
* // with the "series" column containing a key.
297+
* // Assuming `resultA` and `resultB` contain `List<Double>` values,
298+
* // `explodeLists` will produce individual `Double` elements.
299+
* df.gather { resultA and resultB }
300+
* .explodeLists()
301+
* .mapValues { (it + 0.5).toFloat() }
302+
* .into("series", "value")
303+
* ```
304+
* @return A new [Gather] instance with exploded list values.
305+
*/
274306
@JvmName("explodeListsTyped")
275307
@Interpretable("GatherExplodeLists")
276308
public fun <T, C, K, R> Gather<T, List<C>, K, R>.explodeLists(): Gather<T, C, K, R> =
@@ -284,6 +316,27 @@ public fun <T, C, K, R> Gather<T, List<C>, K, R>.explodeLists(): Gather<T, C, K,
284316
explode = true,
285317
) as Gather<T, C, K, R>
286318

319+
/**
320+
* Applies a [transform] to the gathering keys —
321+
* that is, the names of the columns previously selected by [gather].
322+
*
323+
* This is an intermediate step; returns a new [Gather] with transformed keys.
324+
*
325+
* For more information, see: {@include [DocumentationUrls.Gather]}
326+
*
327+
* See also: [Grammar].
328+
*
329+
* ### Example
330+
* ```kotlin
331+
* // Gather `resultA` and `resultB` columns into a single "value" column,
332+
* // with the "series" column containing a key derived from the last letter
333+
* // of each original column name (i.e., 'A' or 'B').
334+
* df.gather { resultA and resultB }
335+
* .mapKeys { it.last() }
336+
* .into("series", "value")
337+
* ```
338+
* @return A new [Gather] instance with transformed keys.
339+
*/
287340
@Interpretable("GatherMap")
288341
public inline fun <T, C, reified K, R> Gather<T, C, *, R>.mapKeys(
289342
noinline transform: (String) -> K,
@@ -298,6 +351,27 @@ public inline fun <T, C, reified K, R> Gather<T, C, *, R>.mapKeys(
298351
explode = explode,
299352
)
300353

354+
/**
355+
* Applies a [transform] to the values from the columns previously selected by [gather].
356+
*
357+
* This is an intermediate step; returns a new [Gather] with transformed values.
358+
*
359+
* For more information, see: {@include [DocumentationUrls.Gather]}
360+
*
361+
* See also: [Grammar].
362+
*
363+
* ### Example
364+
* ```kotlin
365+
* // Gather `resultA` and `resultB` columns into a single "value" column,
366+
* // with the "series" column containing a key.
367+
* // Assuming `resultA` and `resultB` contain `Double` values,
368+
* // `mapValues` transforms each value using the provided expression.
369+
* df.gather { resultA and resultB }
370+
* .mapValues { (it + 0.5).toFloat() }
371+
* .into("series", "value")
372+
* ```
373+
* @return A new [Gather] instance with transformed values.
374+
*/
301375
@Interpretable("GatherMap")
302376
public fun <T, C, K, R> Gather<T, C, K, *>.mapValues(transform: (C) -> R): Gather<T, C, K, R> =
303377
Gather(
@@ -310,6 +384,30 @@ public fun <T, C, K, R> Gather<T, C, K, *>.mapValues(transform: (C) -> R): Gathe
310384
explode = explode,
311385
)
312386

387+
388+
/**
389+
* An intermediate class used in the [gather] operation.
390+
*
391+
* This class itself does not perform the reshaping — it serves as a transitional step
392+
* before specifying how to structure the gathered data.
393+
* It must be followed by one of the reshaping methods to produce a new [DataFrame] with the transformed layout.
394+
*
395+
* Use the following methods to complete the gathering:
396+
* - [into] – reshapes into both key and value columns.
397+
* - [keysInto] – reshapes into a single key column.
398+
* - [valuesInto] – reshapes into a single value column.
399+
*
400+
* You can also configure the transformation using:
401+
* - [where] / [notNull] – to filter gathered values.
402+
* - [explodeLists] – to flatten list values.
403+
* - [mapKeys] – to transform the generated keys.
404+
* - [mapValues] – to transform the gathered values.
405+
* - [cast] – to specify the resulting value type.
406+
*
407+
* This operation is the reverse of [pivot].
408+
*
409+
* See [Grammar][GatherDocs.Grammar] for more details.
410+
*/
313411
public class Gather<T, C, K, R>(
314412
@PublishedApi
315413
internal val df: DataFrame<T>,
@@ -326,6 +424,35 @@ public class Gather<T, C, K, R>(
326424
@PublishedApi
327425
internal val explode: Boolean = false,
328426
) {
427+
/**
428+
* Casts the type of values in the columns previously selected by [gather]
429+
* without modifying the values themselves.
430+
*
431+
* This is useful when the type cannot be automatically inferred and needs to be explicitly specified
432+
* for further [Gather] operations such as [filter][Gather.where], [notNull][Gather.notNull],
433+
* or [mapValues][Gather.mapValues].
434+
* It does not affect the actual content of the values —
435+
* only the type used for compile-time safety and transformation configuration.
436+
*
437+
* This is an intermediate step; returns a new [Gather] instance with an updated value type parameter.
438+
*
439+
* For more information, see: {@include [DocumentationUrls.Gather]}
440+
*
441+
* See also: [Grammar].
442+
*
443+
* ### Example
444+
* ```kotlin
445+
* // Gather all subcolumns in the "results" column group into a single "value" column,
446+
* // with the "series" column containing a key.
447+
* // After `cast`, values are treated as Float in `filter` and `mapValues`.
448+
* df.gather { results.cols() }
449+
* .cast<Float>()
450+
* .filter { it > 0.05 }
451+
* .mapValues { (it + 0.5f).toDouble() }
452+
* .into("series", "value")
453+
* ```
454+
* @return A new [Gather] instance with the specified value type.
455+
*/
329456
@Interpretable("GatherChangeType")
330457
public fun <P> cast(): Gather<T, P, K, P> {
331458
// TODO: introduce GatherWithTransform to avoid this error
@@ -336,6 +463,35 @@ public class Gather<T, C, K, R>(
336463

337464
// region into
338465

466+
/**
467+
* Reshapes the columns previously selected by [gather] into two new columns:
468+
* [keyColumn], containing the original column names, and [valueColumn], containing the corresponding cell values.
469+
*
470+
* Returns a new [DataFrame] with the reshaped structure.
471+
* The original gathered columns are removed from the result,
472+
* while all other columns remain unchanged —
473+
* except that their values are duplicated for each generated key-value pair.
474+
*
475+
* Resulting key and value values can be adjusted using [mapKeys] and [mapValues], respectively.
476+
*
477+
* For more information, see: {@include [DocumentationUrls.Gather]}
478+
*
479+
* See also: [Grammar].
480+
*
481+
* ### Example
482+
* ```kotlin
483+
* // Gather `resultA` and `resultB` columns into a single "value" column,
484+
* // with the "series" column containing a key derived from the last letter
485+
* // of the original column names (i.e., 'A' or 'B').
486+
* df.gather { resultA and resultB }
487+
* .mapKeys { it.last() }
488+
* .into("series", "value")
489+
* ```
490+
*
491+
* @param keyColumn The name of the column to store keys (original column names by default).
492+
* @param valueColumn The name of the column to store gathered values.
493+
* @return A new [DataFrame] with reshaped columns.
494+
*/
339495
@Refine
340496
@Interpretable("GatherInto")
341497
public fun <T, C, K, R> Gather<T, C, K, R>.into(keyColumn: String, valueColumn: String): DataFrame<T> =
@@ -357,6 +513,32 @@ public fun <T, C, K, R> Gather<T, C, K, R>.into(keyColumn: KProperty<K>, valueCo
357513

358514
// region keysInto
359515

516+
/**
517+
* Reshapes the columns previously selected by [gather] into a new [keyColumn],
518+
* containing the original column names. The value column is omitted.
519+
*
520+
* Returns a new [DataFrame] with the reshaped structure.
521+
* The original gathered columns are removed from the result,
522+
* while all other columns remain unchanged —
523+
* except that their values are duplicated for each generated key.
524+
*
525+
* Resulting key values can be adjusted using [mapKeys].
526+
*
527+
* For more information, see: {@include [DocumentationUrls.Gather]}
528+
*
529+
* See also: [Grammar].
530+
*
531+
* ### Example
532+
* ```kotlin
533+
* // Gather names of all columns containing "series" in their name
534+
* // into a single "seriesType" column, omitting the value column.
535+
* df.gather { cols { it.name().contains("series") } }
536+
* .keysInto("seriesType")
537+
* ```
538+
*
539+
* @param keyColumn The name of the column to store keys (original column names by default).
540+
* @return A new [DataFrame] with reshaped columns.
541+
*/
360542
@Refine
361543
@Interpretable("GatherKeysInto")
362544
public fun <T, C, K, R> Gather<T, C, K, R>.keysInto(keyColumn: String): DataFrame<T> = gatherImpl(keyColumn, null)
@@ -375,6 +557,32 @@ public fun <T, C, K, R> Gather<T, C, K, R>.keysInto(keyColumn: KProperty<K>): Da
375557

376558
// region valuesInto
377559

560+
/**
561+
* Reshapes the columns previously selected by [gather] into a new [valueColumn],
562+
* containing the original column values. The key column is omitted.
563+
*
564+
* Returns a new [DataFrame] with the reshaped structure.
565+
* The original gathered columns are removed from the result,
566+
* while all other columns remain unchanged —
567+
* except that their values are duplicated for each generated value.
568+
*
569+
* Resulting values can be adjusted using [mapValues].
570+
*
571+
* For more information, see: {@include [DocumentationUrls.Gather]}
572+
*
573+
* See also: [Grammar].
574+
*
575+
* ### Example
576+
* ```kotlin
577+
* // Gather values of all `String` columns
578+
* // into a single "tag" column, omitting the key column.
579+
* df.gather { colsOf<String>() }
580+
* .valuesInto("tag")
581+
* ```
582+
*
583+
* @param valueColumn The name of the column to store gathered values.
584+
* @return A new [DataFrame] with reshaped columns.
585+
*/
378586
@Refine
379587
@Interpretable("GatherValuesInto")
380588
public fun <T, C, K, R> Gather<T, C, K, R>.valuesInto(valueColumn: String): DataFrame<T> = gatherImpl(null, valueColumn)

0 commit comments

Comments
 (0)