1
1
package csv
2
2
3
+ // TODO: error, throw error always? no decode value with gap etc? -d csv from_csv
4
+ // TODO: header row field count mismatch error, csv reader takes care of check atm. can use FieldsPerRecord -1
5
+ // TODO: row object keys mismatch writer
6
+ // TODO: lazy quotes?
7
+ // TODO: comment in writer? string elements?
8
+ // TODO: to_csv objects
9
+ // TODO: to_csv opts help
10
+ // TODO: go maps are random order, now sorts headers
11
+ // TODO: option aliases?
12
+ // TODO: snake_case option?
13
+
3
14
import (
4
15
"bytes"
5
16
"embed"
6
- "encoding/csv"
7
17
"errors"
8
18
"fmt"
9
19
"io"
20
+ "sort"
10
21
11
22
"github.com/wader/fq/format"
23
+ "github.com/wader/fq/internal/csvex"
12
24
"github.com/wader/fq/internal/gojqex"
13
25
"github.com/wader/fq/pkg/bitio"
14
26
"github.com/wader/fq/pkg/decode"
@@ -28,8 +40,11 @@ func init() {
28
40
ProbeOrder : format .ProbeOrderTextFuzzy ,
29
41
DecodeFn : decodeCSV ,
30
42
DefaultInArg : format.CSV_In {
31
- Comma : "," ,
32
- Comment : "#" ,
43
+ Delimiter : "," ,
44
+ Comment : "" ,
45
+ QuoteChar : `"` ,
46
+ Header : true ,
47
+ SkipInitialSpace : false ,
33
48
},
34
49
Functions : []string {"_todisplay" },
35
50
})
@@ -41,29 +56,59 @@ func decodeCSV(d *decode.D) any {
41
56
var ci format.CSV_In
42
57
d .ArgAs (& ci )
43
58
44
- var rvs []any
45
59
br := d .RawLen (d .Len ())
46
- r := csv .NewReader (bitio .NewIOReader (br ))
47
- r .TrimLeadingSpace = true
60
+ r := csvex .NewReader (bitio .NewIOReader (br ))
48
61
r .LazyQuotes = true
49
- if ci .Comma != "" {
62
+ if ci .Delimiter != "" {
63
+ r .Comma = rune (ci .Delimiter [0 ])
64
+ } else if ci .Comma != "" {
50
65
r .Comma = rune (ci .Comma [0 ])
51
66
}
52
67
if ci .Comment != "" {
53
68
r .Comment = rune (ci .Comment [0 ])
69
+ } else {
70
+ r .Comment = 0
71
+ }
72
+ if ci .QuoteChar != "" {
73
+ r .Quote = rune (ci .QuoteChar [0 ])
74
+ } else {
75
+ r .Quote = '"'
54
76
}
77
+ r .TrimLeadingSpace = ci .SkipInitialSpace
78
+
79
+ row := 1
80
+ var rvs []any
81
+
82
+ var headers []string
55
83
for {
56
84
r , err := r .Read ()
57
85
if errors .Is (err , io .EOF ) {
58
86
break
59
87
} else if err != nil {
60
88
return err
61
89
}
62
- var vs []any
63
- for _ , s := range r {
64
- vs = append (vs , s )
90
+
91
+ if ci .Header {
92
+ if headers == nil {
93
+ // TODO: duplicate headers?
94
+ headers = append (headers , r ... )
95
+ } else {
96
+ obj := map [string ]any {}
97
+ for i , s := range r {
98
+ h := headers [i ]
99
+ obj [h ] = s
100
+ }
101
+ rvs = append (rvs , obj )
102
+ }
103
+ } else {
104
+ var vs []any
105
+ for _ , s := range r {
106
+ vs = append (vs , s )
107
+ }
108
+ rvs = append (rvs , vs )
65
109
}
66
- rvs = append (rvs , vs )
110
+
111
+ row ++
67
112
}
68
113
69
114
d .Value .V = & scalar.Any {Actual : rvs }
@@ -73,35 +118,108 @@ func decodeCSV(d *decode.D) any {
73
118
}
74
119
75
120
type ToCSVOpts struct {
76
- Comma string
121
+ Comma string // alias for Delimiter
122
+ Delimiter string
123
+ QuoteChar string
124
+ Header bool
77
125
}
78
126
79
127
func toCSV (_ * interp.Interp , c []any , opts ToCSVOpts ) any {
80
128
b := & bytes.Buffer {}
81
- w := csv .NewWriter (b )
82
- if opts .Comma != "" {
129
+ w := csvex .NewWriter (b )
130
+ if opts .Delimiter != "" {
131
+ w .Comma = rune (opts .Delimiter [0 ])
132
+ } else if opts .Comma != "" {
83
133
w .Comma = rune (opts .Comma [0 ])
84
134
}
135
+ if opts .QuoteChar != "" {
136
+ w .Quote = rune (opts .QuoteChar [0 ])
137
+ } else {
138
+ w .Quote = '"'
139
+ }
140
+
141
+ seenObject := 0
142
+ seenArrays := 0
143
+ var headers []string
144
+
85
145
for _ , row := range c {
86
- rs , ok := gojqex.Cast [[]any ](row )
87
- if ! ok {
88
- return fmt .Errorf ("expected row to be an array, got %s" , gojqex .TypeErrorPreview (row ))
89
- }
90
- vs , ok := gojqex .NormalizeToStrings (rs ).([]any )
91
- if ! ok {
92
- panic ("not array" )
93
- }
94
- var ss []string
95
- for _ , v := range vs {
96
- s , ok := v .(string )
146
+ switch row .(type ) {
147
+ case []any :
148
+ if seenObject > 0 {
149
+ return fmt .Errorf ("mixed row types, expected row to be an object, got %s" , gojqex .TypeErrorPreview (row ))
150
+ }
151
+
152
+ rs , ok := gojqex.Cast [[]any ](row )
97
153
if ! ok {
98
- return fmt .Errorf ("expected row record to be scalars , got %s" , gojqex .TypeErrorPreview (v ))
154
+ return fmt .Errorf ("expected row to be an array , got %s" , gojqex .TypeErrorPreview (row ))
99
155
}
100
- ss = append (ss , s )
101
- }
102
- if err := w .Write (ss ); err != nil {
103
- return err
156
+ vs , ok := gojqex .NormalizeToStrings (rs ).([]any )
157
+ if ! ok {
158
+ panic ("not array" )
159
+ }
160
+ var ss []string
161
+ for _ , v := range vs {
162
+ s , ok := v .(string )
163
+ if ! ok {
164
+ return fmt .Errorf ("expected row record to be scalars, got %s" , gojqex .TypeErrorPreview (v ))
165
+ }
166
+ ss = append (ss , s )
167
+ }
168
+ if err := w .Write (ss ); err != nil {
169
+ return err
170
+ }
171
+
172
+ seenArrays ++
173
+ case map [string ]any :
174
+ if seenArrays > 0 {
175
+ return fmt .Errorf ("mixed row types, expected row to be an array, got %s" , gojqex .TypeErrorPreview (row ))
176
+ }
177
+
178
+ rm , ok := gojqex.Cast [map [string ]any ](row )
179
+ if ! ok {
180
+ return fmt .Errorf ("expected row to be an object, got %s" , gojqex .TypeErrorPreview (row ))
181
+ }
182
+ vm , ok := gojqex .NormalizeToStrings (rm ).(map [string ]any )
183
+ if ! ok {
184
+ panic ("not object" )
185
+ }
186
+
187
+ if headers == nil {
188
+ // TODO: maps are random order in go
189
+ for k := range vm {
190
+ headers = append (headers , k )
191
+ }
192
+ sort .Strings (headers )
193
+
194
+ if err := w .Write (headers ); err != nil {
195
+ return err
196
+ }
197
+ }
198
+
199
+ var ss []string
200
+ keysFound := 0
201
+ for _ , k := range headers {
202
+ s , ok := vm [k ].(string )
203
+ if ! ok {
204
+ return fmt .Errorf ("expected row object to have a %q key, %s" , k , gojqex .TypeErrorPreview (row ))
205
+ }
206
+ ss = append (ss , s )
207
+ keysFound ++
208
+ }
209
+ // TODO: what keys are extra/missing
210
+ if keysFound < len (headers ) {
211
+ return fmt .Errorf ("expected row object has missing keys %s" , gojqex .TypeErrorPreview (row ))
212
+ } else if keysFound > len (headers ) {
213
+ return fmt .Errorf ("expected row object has extra keys %s" , gojqex .TypeErrorPreview (row ))
214
+ }
215
+
216
+ if err := w .Write (ss ); err != nil {
217
+ return err
218
+ }
219
+
220
+ seenObject ++
104
221
}
222
+
105
223
}
106
224
w .Flush ()
107
225
0 commit comments