Skip to content

Commit 12eadf6

Browse files
committed
cvs: Add more from/to options
Add header option, on by default Disable comments by default Rename comma to delimiter Add quote_char option Add skip initial space option Uses a forked version of std csv to support custom quote character See top of csv.go for TODOs
1 parent a7d54ff commit 12eadf6

File tree

7 files changed

+1608
-32
lines changed

7 files changed

+1608
-32
lines changed

format/csv/csv.go

+148-30
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,26 @@
11
package csv
22

3+
// TODO: error, throw error always? no decode value with gap etc? -d csv from_csv
4+
// TODO: header row field count mismatch error, csv reader takes care of check atm. can use FieldsPerRecord -1
5+
// TODO: row object keys mismatch writer
6+
// TODO: lazy quotes?
7+
// TODO: comment in writer? string elements?
8+
// TODO: to_csv objects
9+
// TODO: to_csv opts help
10+
// TODO: go maps are random order, now sorts headers
11+
// TODO: option aliases?
12+
// TODO: snake_case option?
13+
314
import (
415
"bytes"
516
"embed"
6-
"encoding/csv"
717
"errors"
818
"fmt"
919
"io"
20+
"sort"
1021

1122
"github.com/wader/fq/format"
23+
"github.com/wader/fq/internal/csvex"
1224
"github.com/wader/fq/internal/gojqex"
1325
"github.com/wader/fq/pkg/bitio"
1426
"github.com/wader/fq/pkg/decode"
@@ -28,8 +40,11 @@ func init() {
2840
ProbeOrder: format.ProbeOrderTextFuzzy,
2941
DecodeFn: decodeCSV,
3042
DefaultInArg: format.CSV_In{
31-
Comma: ",",
32-
Comment: "#",
43+
Delimiter: ",",
44+
Comment: "",
45+
QuoteChar: `"`,
46+
Header: true,
47+
SkipInitialSpace: false,
3348
},
3449
Functions: []string{"_todisplay"},
3550
})
@@ -41,29 +56,59 @@ func decodeCSV(d *decode.D) any {
4156
var ci format.CSV_In
4257
d.ArgAs(&ci)
4358

44-
var rvs []any
4559
br := d.RawLen(d.Len())
46-
r := csv.NewReader(bitio.NewIOReader(br))
47-
r.TrimLeadingSpace = true
60+
r := csvex.NewReader(bitio.NewIOReader(br))
4861
r.LazyQuotes = true
49-
if ci.Comma != "" {
62+
if ci.Delimiter != "" {
63+
r.Comma = rune(ci.Delimiter[0])
64+
} else if ci.Comma != "" {
5065
r.Comma = rune(ci.Comma[0])
5166
}
5267
if ci.Comment != "" {
5368
r.Comment = rune(ci.Comment[0])
69+
} else {
70+
r.Comment = 0
71+
}
72+
if ci.QuoteChar != "" {
73+
r.Quote = rune(ci.QuoteChar[0])
74+
} else {
75+
r.Quote = '"'
5476
}
77+
r.TrimLeadingSpace = ci.SkipInitialSpace
78+
79+
row := 1
80+
var rvs []any
81+
82+
var headers []string
5583
for {
5684
r, err := r.Read()
5785
if errors.Is(err, io.EOF) {
5886
break
5987
} else if err != nil {
6088
return err
6189
}
62-
var vs []any
63-
for _, s := range r {
64-
vs = append(vs, s)
90+
91+
if ci.Header {
92+
if headers == nil {
93+
// TODO: duplicate headers?
94+
headers = append(headers, r...)
95+
} else {
96+
obj := map[string]any{}
97+
for i, s := range r {
98+
h := headers[i]
99+
obj[h] = s
100+
}
101+
rvs = append(rvs, obj)
102+
}
103+
} else {
104+
var vs []any
105+
for _, s := range r {
106+
vs = append(vs, s)
107+
}
108+
rvs = append(rvs, vs)
65109
}
66-
rvs = append(rvs, vs)
110+
111+
row++
67112
}
68113

69114
d.Value.V = &scalar.Any{Actual: rvs}
@@ -73,35 +118,108 @@ func decodeCSV(d *decode.D) any {
73118
}
74119

75120
type ToCSVOpts struct {
76-
Comma string
121+
Comma string // alias for Delimiter
122+
Delimiter string
123+
QuoteChar string
124+
Header bool
77125
}
78126

79127
func toCSV(_ *interp.Interp, c []any, opts ToCSVOpts) any {
80128
b := &bytes.Buffer{}
81-
w := csv.NewWriter(b)
82-
if opts.Comma != "" {
129+
w := csvex.NewWriter(b)
130+
if opts.Delimiter != "" {
131+
w.Comma = rune(opts.Delimiter[0])
132+
} else if opts.Comma != "" {
83133
w.Comma = rune(opts.Comma[0])
84134
}
135+
if opts.QuoteChar != "" {
136+
w.Quote = rune(opts.QuoteChar[0])
137+
} else {
138+
w.Quote = '"'
139+
}
140+
141+
seenObject := 0
142+
seenArrays := 0
143+
var headers []string
144+
85145
for _, row := range c {
86-
rs, ok := gojqex.Cast[[]any](row)
87-
if !ok {
88-
return fmt.Errorf("expected row to be an array, got %s", gojqex.TypeErrorPreview(row))
89-
}
90-
vs, ok := gojqex.NormalizeToStrings(rs).([]any)
91-
if !ok {
92-
panic("not array")
93-
}
94-
var ss []string
95-
for _, v := range vs {
96-
s, ok := v.(string)
146+
switch row.(type) {
147+
case []any:
148+
if seenObject > 0 {
149+
return fmt.Errorf("mixed row types, expected row to be an object, got %s", gojqex.TypeErrorPreview(row))
150+
}
151+
152+
rs, ok := gojqex.Cast[[]any](row)
97153
if !ok {
98-
return fmt.Errorf("expected row record to be scalars, got %s", gojqex.TypeErrorPreview(v))
154+
return fmt.Errorf("expected row to be an array, got %s", gojqex.TypeErrorPreview(row))
99155
}
100-
ss = append(ss, s)
101-
}
102-
if err := w.Write(ss); err != nil {
103-
return err
156+
vs, ok := gojqex.NormalizeToStrings(rs).([]any)
157+
if !ok {
158+
panic("not array")
159+
}
160+
var ss []string
161+
for _, v := range vs {
162+
s, ok := v.(string)
163+
if !ok {
164+
return fmt.Errorf("expected row record to be scalars, got %s", gojqex.TypeErrorPreview(v))
165+
}
166+
ss = append(ss, s)
167+
}
168+
if err := w.Write(ss); err != nil {
169+
return err
170+
}
171+
172+
seenArrays++
173+
case map[string]any:
174+
if seenArrays > 0 {
175+
return fmt.Errorf("mixed row types, expected row to be an array, got %s", gojqex.TypeErrorPreview(row))
176+
}
177+
178+
rm, ok := gojqex.Cast[map[string]any](row)
179+
if !ok {
180+
return fmt.Errorf("expected row to be an object, got %s", gojqex.TypeErrorPreview(row))
181+
}
182+
vm, ok := gojqex.NormalizeToStrings(rm).(map[string]any)
183+
if !ok {
184+
panic("not object")
185+
}
186+
187+
if headers == nil {
188+
// TODO: maps are random order in go
189+
for k := range vm {
190+
headers = append(headers, k)
191+
}
192+
sort.Strings(headers)
193+
194+
if err := w.Write(headers); err != nil {
195+
return err
196+
}
197+
}
198+
199+
var ss []string
200+
keysFound := 0
201+
for _, k := range headers {
202+
s, ok := vm[k].(string)
203+
if !ok {
204+
return fmt.Errorf("expected row object to have a %q key, %s", k, gojqex.TypeErrorPreview(row))
205+
}
206+
ss = append(ss, s)
207+
keysFound++
208+
}
209+
// TODO: what keys are extra/missing
210+
if keysFound < len(headers) {
211+
return fmt.Errorf("expected row object has missing keys %s", gojqex.TypeErrorPreview(row))
212+
} else if keysFound > len(headers) {
213+
return fmt.Errorf("expected row object has extra keys %s", gojqex.TypeErrorPreview(row))
214+
}
215+
216+
if err := w.Write(ss); err != nil {
217+
return err
218+
}
219+
220+
seenObject++
104221
}
222+
105223
}
106224
w.Flush()
107225

format/format.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -363,8 +363,12 @@ type HTML_In struct {
363363
}
364364

365365
type CSV_In struct {
366-
Comma string `doc:"Separator character"`
367-
Comment string `doc:"Comment line character"`
366+
Comma string `doc:"Alias for Delimiter"`
367+
Delimiter string `doc:"Field delimiter character"`
368+
Comment string `doc:"Comment line character"`
369+
QuoteChar string `doc:"Quote character"`
370+
Header bool `doc:"Convert to objects based on header"`
371+
SkipInitialSpace bool `doc:"Don't include leading whitespace"`
368372
}
369373

370374
type Bitcoin_Block_In struct {

internal/csvex/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Modified version of go std encoding/csv Reader/Writer to support difference quote character

0 commit comments

Comments
 (0)