@@ -6,18 +6,132 @@ import org.jetbrains.kotlinx.dataframe.RowValueFilter
6
6
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
7
7
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
8
8
import org.jetbrains.kotlinx.dataframe.annotations.Refine
9
+ import org.jetbrains.kotlinx.dataframe.api.GatherDocs.Grammar
10
+ import org.jetbrains.kotlinx.dataframe.api.Update.UPDATE_OPERATION
11
+ import org.jetbrains.kotlinx.dataframe.api.notNull
12
+ import org.jetbrains.kotlinx.dataframe.api.where
9
13
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
10
14
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
11
15
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
12
16
import org.jetbrains.kotlinx.dataframe.impl.api.gatherImpl
13
17
import org.jetbrains.kotlinx.dataframe.impl.columnName
14
18
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
19
+ import org.jetbrains.kotlinx.dataframe.documentation.*
15
20
import kotlin.reflect.KProperty
16
21
import kotlin.reflect.KType
17
22
import kotlin.reflect.typeOf
18
23
19
24
// region gather
20
25
26
+ /* *
27
+ * Reshapes the [DataFrame] by gathering specified [\columns] into two columns: keys and values
28
+ * (or into just one of them).
29
+ *
30
+ * By default, the "key" column contains the names of the gathered columns,
31
+ * and the "value" column holds the corresponding cell values.
32
+ * The original columns selected for gathering are removed from the result,
33
+ * while all other columns remain unchanged —
34
+ * except that their values are duplicated for each generated key-value pair.
35
+ *
36
+ * This function does not perform the reshaping immediately — it returns
37
+ * a [Gather] object, which serves as an intermediate step.
38
+ * Before applying the final transformation, you may:
39
+ * - filter values ([where][Gather.where], [notNull][Gather.notNull]);
40
+ * - explode list-columns ([explodeLists][Gather.explodeLists]);
41
+ * - transform keys ([mapKeys][Gather.mapKeys]) or values ([mapValues][Gather.mapValues]);
42
+ * - cast the value type ([cast][Gather.cast]).
43
+ *
44
+ * Finally, reshape the DataFrame using one of the following methods:
45
+ * - [into][Gather.into]
46
+ * - [keysInto][Gather.keysInto]
47
+ * - [valuesInto][Gather.valuesInto]
48
+ *
49
+ * Each of these methods returns a new reshaped [DataFrame].
50
+ *
51
+ * This operation is the reverse of [pivot].
52
+ *
53
+ * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
54
+ *
55
+ * For more information: {@include [DocumentationUrls.Gather]}
56
+ *
57
+ * See [Grammar].
58
+ */
59
+ internal interface GatherDocs {
60
+
61
+ /* *
62
+ * ## {@get [GATHER_OPERATION]} Operation Grammar
63
+ * {@include [LineBreak]}
64
+ * {@include [DslGrammarLink]}
65
+ * {@include [LineBreak]}
66
+ *
67
+ * **[`gather`][gather]****` { `**`columnsSelector: `[`ColumnsSelector`][ColumnsSelector]**` }`**
68
+ *
69
+ * {@include [Indent]}
70
+ * `\[ `__`.`__[**`explodeLists`**][Gather.explodeLists]**`() `**`]`
71
+ *
72
+ * {@include [Indent]}
73
+ * `\[ `__`.`__[**`cast`**][Gather.cast]**`<T>() `**`]`
74
+ *
75
+ * {@include [Indent]}
76
+ * `\[ `__`.`__[**`notNull`**][Gather.cast]**`() `**`]`
77
+ *
78
+ * {@include [Indent]}
79
+ * `\[ `__`.`__[**`where`**][Gather.where]**` { `**`filter: `[`RowValueFilter`][RowValueFilter]**` } `**`]`
80
+ *
81
+ * {@include [Indent]}
82
+ * `\[ `__`.`__[**`mapKeys`**][Gather.mapKeys]**` { `**`transform: (`[`String`][String]**`) -> K } `**`]`
83
+ *
84
+ * {@include [Indent]}
85
+ * `\[ `__`.`__[**`mapValues`**][Gather.mapValues]**` { `**`transform: (`**`C`**`) -> R`**` } `**`]`
86
+ *
87
+ * {@include [Indent]}
88
+ * __`.`__[**`into`**][Gather.into]**`(`**`keyColumn: `[`String`][String]**`, `**`valueColumn: `[`String`][String]**`)`**
89
+ *
90
+ * {@include [Indent]}
91
+ * `| `__`.`__[**`keysInto`**][Gather.keysInto]**`(`**`keyColumn: `[`String`][String]**`)`**
92
+ *
93
+ * {@include [Indent]}
94
+ * `| `__`.`__[**`valuesInto`**][Gather.valuesInto]**`(`**`valueColumn: `[`String`][String]**`)`**
95
+ * {@set [GATHER_OPERATION] [**`gather`**][gather]}{@comment The default name of the `update` operation function name.}
96
+ */
97
+ interface Grammar
98
+
99
+ /*
100
+ * This argument providing the (clickable) name of the update-like function.
101
+ * Note: If clickable, make sure to [alias][your type].
102
+ */
103
+ @Suppress(" ClassName" )
104
+ @ExcludeFromSources
105
+ interface GATHER_OPERATION
106
+ }
107
+
108
+ /* * {@set [SelectingColumns.OPERATION] [gather][gather]} */
109
+ @ExcludeFromSources
110
+ private interface SetGatherOperationArg
111
+
112
+ /* *
113
+ * {@include [GatherDocs]}
114
+ * ### This Gather Overload
115
+ */
116
+ @ExcludeFromSources
117
+ private interface CommonGatherDocs
118
+
119
+ /* *
120
+ * @include [CommonGatherDocs]
121
+ * @include [SelectingColumns.Dsl] {@include [SetGatherOperationArg]}
122
+ * ### Examples:
123
+ * ```kotlin
124
+ * // Gather `resultA` and `resultB` columns into a single "value" column,
125
+ * // with the "series" column containing a key derived from the last letter
126
+ * // of the corresponding original column name (i.e., 'A' or 'B').
127
+ * df.gather { resultA and resultB }.mapKeys { it.last() }.into("series", "value")
128
+ *
129
+ * // Gather values of all `String` columns (at any depth)
130
+ * // into a single "tag" column, omitting the key column.
131
+ * df.gather { colsOf<String>() }.valuesInto("tag")
132
+ * ```
133
+ * @param [selector] The [Columns Selector][ColumnsSelector] used to select the columns of this [DataFrame] to group.
134
+ */
21
135
@Interpretable(" Gather0" )
22
136
public fun <T , C > DataFrame<T>.gather (selector : ColumnsSelector <T , C >): Gather <T , C , String , C > =
23
137
Gather (
@@ -29,6 +143,15 @@ public fun <T, C> DataFrame<T>.gather(selector: ColumnsSelector<T, C>): Gather<T
29
143
valueTransform = null ,
30
144
)
31
145
146
+ /* *
147
+ * @include [CommonGatherDocs]
148
+ * @include [SelectingColumns.ColumnNames] {@include [SetGatherOperationArg]}
149
+ * ### Example:
150
+ * ```kotlin
151
+ * df.gather("resultA", "resultB").mapKeys { it.last() }.into("series", "value")
152
+ * ```
153
+ * @param [columns] The [Column Names][String] used to select the columns of this [DataFrame] to gather.
154
+ */
32
155
public fun <T > DataFrame<T>.gather (vararg columns : String ): Gather <T , Any ?, String , Any ?> =
33
156
gather { columns.toColumnSet() }
34
157
@@ -44,6 +167,34 @@ public fun <T, C> DataFrame<T>.gather(vararg columns: KProperty<C>): Gather<T, C
44
167
45
168
// endregion
46
169
170
+ /* *
171
+ * Filter values in columns previously selected by [gather] using a [filter][RowValueFilter].
172
+ *
173
+ * [RowValueFilter] provides each value as a lambda argument, allowing you
174
+ * to filter rows using a Boolean condition.
175
+ *
176
+ * It's an intermediate step; returns a new [Gather] with filtered value columns.
177
+ *
178
+ * For more information: {@include [DocumentationUrls.Gather]}
179
+ *
180
+ * See [Grammar].
181
+ *
182
+ * ### Examples
183
+ * ```kotlin
184
+ * // Gather `resultA` and `resultB` columns into a single "value" column,
185
+ * // with the "series" column containing a key.
186
+ * // Only values greater than `pValue` are included.
187
+ * df.gather { resultA and resultB }.where { it >= pValue }.into("series", "value")
188
+ *
189
+ * // Gather values of all `String` columns (at any nesting level)
190
+ * // into a single "tag" column, omitting the key column.
191
+ * // Only non-empty strings are included.
192
+ * df.gather { colsOf<String>() }.where { it.isNotEmpty() }.valuesInto("tag")
193
+ * ```
194
+ *
195
+ * @param filter The [RowValueFilter] used to specify the filtering condition for gathered values.
196
+ * @return A new [Gather] with the filtered rows.
197
+ */
47
198
@Interpretable(" GatherWhere" )
48
199
public fun <T , C , K , R > Gather <T , C , K , R >.where (filter : RowValueFilter <T , C >): Gather <T , C , K , R > =
49
200
Gather (
@@ -56,9 +207,58 @@ public fun <T, C, K, R> Gather<T, C, K, R>.where(filter: RowValueFilter<T, C>):
56
207
explode = explode,
57
208
)
58
209
210
+ /* *
211
+ * Filters out `null` values from the columns previously selected by [gather],
212
+ * keeping only non-null entries.
213
+ *
214
+ * A special case of [where].
215
+ *
216
+ * It's an intermediate step; returns a new [Gather] with filtered value columns.
217
+ *
218
+ * For more information: {@include [DocumentationUrls.Gather]}
219
+ *
220
+ * See [Grammar].
221
+ *
222
+ * ### Example
223
+ * ```kotlin
224
+ * // Gather `resultA` and `resultB` columns into a single "value" column,
225
+ * // with the "series" column containing a key.
226
+ * // If these columns contain nullable `Double` values, `notNull` filters out nulls,
227
+ * // allowing subsequent transformations like `mapValues` to treat values as non-null `Double`.
228
+ * df.gather { resultA and resultB }
229
+ * .notNull()
230
+ * .mapValues { (it + 0.5).toFloat() }
231
+ * .into("series", "value")
232
+ * ```
233
+ * @return A new [Gather] instance with only non-null values retained.
234
+ */
59
235
@Interpretable(" GatherChangeType" )
60
236
public fun <T , C , K , R > Gather <T , C ?, K , R >.notNull (): Gather <T , C , K , R > = where { it != null } as Gather <T , C , K , R >
61
237
238
+ /* *
239
+ * Explode values.
240
+ *
241
+ * A special case of [where].
242
+ *
243
+ * It's an intermediate step; returns a new [Gather] with filtered value columns.
244
+ *
245
+ * For more information: {@include [DocumentationUrls.Gather]}
246
+ *
247
+ * See [Grammar].
248
+ *
249
+ * ### Example
250
+ * ```kotlin
251
+ * // Gather `resultA` and `resultB` columns into a single "value" column,
252
+ * // with the "series" column containing a key.
253
+ * // If these columns contain nullable `Double` values, `notNull` filters out nulls,
254
+ * // allowing subsequent transformations like `mapValues` to treat values as non-null `Double`.
255
+ * df.gather { resultA and resultB }
256
+ * .notNull()
257
+ * .mapValues { (it + 0.5).toFloat() }
258
+ * .into("series", "value")
259
+ * ```
260
+ * @return A new [Gather] instance with only non-null values retained.
261
+ */
62
262
@Interpretable(" GatherExplodeLists" )
63
263
public fun <T , C , K , R > Gather <T , C , K , R >.explodeLists (): Gather <T , C , K , R > =
64
264
Gather (
0 commit comments