Skip to content

Commit c316b43

Browse files
committed
cleaned up range parsing, added random row selection support to csvrows
1 parent 1f9bf58 commit c316b43

File tree

7 files changed

+408
-123
lines changed

7 files changed

+408
-123
lines changed

cmds/csvcols/csvcols.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,6 @@ import (
3434
"github.com/google/uuid"
3535
)
3636

37-
const (
38-
//FIXME: maxColumns needs to be calculated from the data rather than being a constant
39-
maxColumns = 2048
40-
)
41-
4237
var (
4338
description = `
4439
%s converts a set of command line args into columns output in CSV format.
@@ -207,7 +202,7 @@ func main() {
207202
}
208203

209204
if outputColumns != "" {
210-
columnNos, err := datatools.ParseRange(outputColumns, maxColumns)
205+
columnNos, err := datatools.ParseRange(outputColumns)
211206
cli.ExitOnError(app.Eout, err, quiet)
212207

213208
// NOTE: We need to adjust from humans counting from 1 to counting from zero

cmds/csvrows/csvrows.go

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ func main() {
115115

116116
// Application specific options
117117
app.StringVar(&delimiter, "d,delimiter", "", "set delimiter character")
118-
app.StringVar(&outputRows, "row,rows", "", "output specified rows in order (e.g. -row 1,5,2:4))")
119-
app.BoolVar(&skipHeaderRow, "skip-header-row", false, "skip the header row (alias for -row 2:")
118+
app.StringVar(&outputRows, "row,rows", "", "output specified rows in order (e.g. -row 1,5,2-4))")
119+
app.BoolVar(&skipHeaderRow, "skip-header-row", false, "skip the header row (alias for -row 2-")
120120
app.BoolVar(&showHeader, "header", false, "display the header row (alias for '-rows 1')")
121121
app.IntVar(&randomRows, "random", 0, "return N randomly selected rows")
122122

@@ -161,22 +161,15 @@ func main() {
161161
}
162162

163163
if randomRows > 0 {
164-
datatools.CSVRandomRows(app.In, app.Out, app.Eout, showHeader, randomRows, delimiter)
165-
os.Exit(0)
166-
}
167-
168-
if showHeader == true {
169-
outputRows = "1"
170-
}
171-
if len(args) == 0 && outputRows == "" {
172-
outputRows = "1:"
173-
if skipHeaderRow == true {
174-
outputRows = "2:"
164+
if err := datatools.CSVRandomRows(app.In, app.Out, showHeader, randomRows, delimiter); err != nil {
165+
fmt.Fprintf(app.Eout, "%s, %s\n", inputFName, err)
166+
os.Exit(1)
175167
}
168+
os.Exit(0)
176169
}
177170

178171
if outputRows != "" {
179-
rowNos, err := datatools.ParseRange(outputRows, maxRows)
172+
rowNos, err := datatools.ParseRange(outputRows)
180173
cli.ExitOnError(app.Eout, err, quiet)
181174

182175
// NOTE: We need to adjust from humans counting from 1 to counting from zero
@@ -186,10 +179,21 @@ func main() {
186179
rowNos[i] = 0
187180
}
188181
}
189-
datatools.CSVRows(app.In, app.Out, app.Eout, rowNos, delimiter)
182+
if err := datatools.CSVRows(app.In, app.Out, showHeader, rowNos, delimiter); err != nil {
183+
fmt.Fprintf(app.Eout, "%s, %s\n", inputFName, err)
184+
os.Exit(1)
185+
}
186+
os.Exit(0)
187+
}
188+
if inputFName != "" {
189+
if err := datatools.CSVRowsAll(app.In, app.Out, showHeader, delimiter); err != nil {
190+
fmt.Fprintf(app.Eout, "%s, %s\n", inputFName, err)
191+
os.Exit(1)
192+
}
190193
os.Exit(0)
191194
}
192195

196+
// NOTE: If we're not processing an existing CSV source for input we're turning parameters into CSV rows!
193197
if len(delimiter) > 0 && len(args) == 1 {
194198
args = strings.Split(args[0], datatools.NormalizeDelimiter(delimiter))
195199
}

csv.go

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
//
2+
// datatools.go is a package for working with various types of data (e.g. CSV, XLSX, JSON) in support
3+
// of the utilities included in the datatools.go package.
4+
//
5+
// Copyright (c) 2017, Caltech
6+
// All rights not granted herein are expressly reserved by Caltech.
7+
//
8+
// Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
9+
//
10+
// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
11+
//
12+
// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
13+
//
14+
// 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
15+
//
16+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17+
//
18+
package datatools
19+
20+
import (
21+
"encoding/csv"
22+
"fmt"
23+
"io"
24+
"math/rand"
25+
"time"
26+
)
27+
28+
func selectedRow(rowNo int, record []string, rowNos []int) []string {
29+
if len(rowNos) == 0 {
30+
return record
31+
}
32+
for _, i := range rowNos {
33+
if i == rowNo {
34+
return record
35+
}
36+
}
37+
return nil
38+
}
39+
40+
func shuffleRows(rows [][]string, src rand.Source) {
41+
// Create our random number source
42+
rn := rand.New(src)
43+
for a := len(rows) - 1; a > 0; a-- {
44+
// Pick a random element to swap with
45+
b := rn.Intn(a + 1)
46+
// Swap with a random element
47+
rows[a], rows[b] = rows[b], rows[a]
48+
}
49+
}
50+
51+
// CSVRandomRows reads a in, creates a csv Reader and Writer and randomly selectes the rowCount
52+
// number of rows to write out. If showHeader is true it is excluded from the random row selection
53+
// and will be written to out before the randomized rows.
54+
// rowCount is the number of rows to return independent of the header row.
55+
func CSVRandomRows(in io.Reader, out io.Writer, showHeader bool, rowCount int, delimiter string) error {
56+
var err error
57+
58+
headerRow := []string{}
59+
rows := [][]string{}
60+
r := csv.NewReader(in)
61+
w := csv.NewWriter(out)
62+
if delimiter != "" {
63+
r.Comma = NormalizeDelimiterRune(delimiter)
64+
w.Comma = NormalizeDelimiterRune(delimiter)
65+
}
66+
67+
// read in our rows.
68+
for i := 0; err != io.EOF; i++ {
69+
rec, err := r.Read()
70+
if err == io.EOF {
71+
break
72+
}
73+
if err != nil {
74+
return fmt.Errorf("%s (%T %+v)", err, rec, rec)
75+
}
76+
if i == 0 && showHeader {
77+
headerRow = rec
78+
} else {
79+
rows = append(rows, rec)
80+
}
81+
}
82+
if showHeader && len(headerRow) > 0 {
83+
if err := w.Write(headerRow); err != nil {
84+
return fmt.Errorf("Error writing record to csv: %s (Row %T %+v)", err, headerRow, headerRow)
85+
}
86+
}
87+
88+
// Shuffle the rows, then write out the desired number of rows.
89+
rSrc := rand.NewSource(time.Now().UnixNano())
90+
shuffleRows(rows, rSrc)
91+
92+
// Now render the rowCount of the suffled rows
93+
if rowCount > len(rows) {
94+
rowCount = len(rows)
95+
}
96+
for i := 0; i < rowCount; i++ {
97+
row := rows[i]
98+
if row != nil {
99+
if err := w.Write(row); err != nil {
100+
return fmt.Errorf("Error writing record to csv: %s (Row %T %+v)", err, row, row)
101+
}
102+
}
103+
}
104+
w.Flush()
105+
err = w.Error()
106+
if err != nil {
107+
return fmt.Errorf("%s\n", err)
108+
}
109+
return nil
110+
}
111+
112+
// CSVRows renders the rows numbers in rowNos using the delimiter to out
113+
func CSVRows(in io.Reader, out io.Writer, showHeader bool, rowNos []int, delimiter string) error {
114+
var err error
115+
116+
r := csv.NewReader(in)
117+
w := csv.NewWriter(out)
118+
if delimiter != "" {
119+
r.Comma = NormalizeDelimiterRune(delimiter)
120+
w.Comma = NormalizeDelimiterRune(delimiter)
121+
}
122+
for i := 0; err != io.EOF; i++ {
123+
rec, err := r.Read()
124+
if err == io.EOF {
125+
break
126+
}
127+
if err != nil {
128+
return fmt.Errorf("%s (%T %+v)", err, rec, rec)
129+
}
130+
if i == 0 && showHeader {
131+
if err = w.Write(rec); err != nil {
132+
return fmt.Errorf("Error writing record to csv: %s (Row %T %+v)", err, rec, rec)
133+
}
134+
} else {
135+
row := selectedRow(i, rec, rowNos)
136+
if row != nil {
137+
if err = w.Write(row); err != nil {
138+
return fmt.Errorf("Error writing record to csv: %s (Row %T %+v)", err, row, row)
139+
}
140+
}
141+
}
142+
}
143+
w.Flush()
144+
err = w.Error()
145+
if err != nil {
146+
return fmt.Errorf("%s\n", err)
147+
}
148+
return nil
149+
}
150+
151+
// CSVRowsAll renders the all rows in rowNos using the delimiter to out
152+
func CSVRowsAll(in io.Reader, out io.Writer, showHeader bool, delimiter string) error {
153+
var err error
154+
155+
r := csv.NewReader(in)
156+
w := csv.NewWriter(out)
157+
if delimiter != "" {
158+
r.Comma = NormalizeDelimiterRune(delimiter)
159+
w.Comma = NormalizeDelimiterRune(delimiter)
160+
}
161+
for i := 0; err != io.EOF; i++ {
162+
row, err := r.Read()
163+
if err == io.EOF {
164+
break
165+
}
166+
if err != nil {
167+
return fmt.Errorf("%s (%T %+v)", err, row, row)
168+
}
169+
if i == 0 && showHeader {
170+
if err = w.Write(row); err != nil {
171+
return fmt.Errorf("Error writing record to csv: %s (Row %T %+v)", err, row, row)
172+
}
173+
continue
174+
} else if i > 0 {
175+
if err = w.Write(row); err != nil {
176+
return fmt.Errorf("Error writing record to csv: %s (Row %T %+v)", err, row, row)
177+
}
178+
}
179+
}
180+
w.Flush()
181+
err = w.Error()
182+
if err != nil {
183+
return fmt.Errorf("%s\n", err)
184+
}
185+
return nil
186+
}

datatools.go

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import (
2626
"encoding/json"
2727
"fmt"
2828
"io"
29-
"strconv"
3029
"strings"
3130
"unicode"
3231

@@ -209,47 +208,6 @@ func Levenshtein(src string, target string, insertCost int, deleteCost int, subs
209208
})
210209
}
211210

212-
// ParseRange take a range notation string and convert it into a list of integers
213-
func ParseRange(s string, max int) ([]int, error) {
214-
var err error
215-
216-
nums := []int{}
217-
for _, arg := range strings.Split(s, ",") {
218-
if strings.Contains(arg, ":") {
219-
a, b := 0, max
220-
parts := strings.Split(arg, ":")
221-
if parts[0] != "" {
222-
a, err = strconv.Atoi(strings.TrimSpace(parts[0]))
223-
if err != nil {
224-
return nums, fmt.Errorf("Expected a number for start of range, %q, %s\n", arg, err)
225-
}
226-
}
227-
if parts[1] != "" {
228-
b, err = strconv.Atoi(strings.TrimSpace(parts[1]))
229-
if err != nil {
230-
return nums, fmt.Errorf("Expected a number for end of range, %q, %s\n", arg, err)
231-
}
232-
}
233-
if a <= b {
234-
for i := a; i <= b; i++ {
235-
nums = append(nums, i)
236-
}
237-
} else if a > b {
238-
return nums, fmt.Errorf("%d less than %d, invalid range", b, a)
239-
} else {
240-
nums = append(nums, a)
241-
}
242-
} else {
243-
i, err := strconv.Atoi(strings.TrimSpace(arg))
244-
if err != nil {
245-
return nums, err
246-
}
247-
nums = append(nums, i)
248-
}
249-
}
250-
return nums, nil
251-
}
252-
253211
// EnglishTitle - uses an improve capitalization rules for English titles.
254212
// This is based on the approach suggested in the Go language Cookbook:
255213
// http://golangcookbook.com/chapters/strings/title/

datatools_test.go

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -107,63 +107,3 @@ func TestFilter(t *testing.T) {
107107
t.Errorf("expected %t, got %t", expected, result)
108108
}
109109
}
110-
111-
func TestParseRange(t *testing.T) {
112-
src := `1`
113-
expected := []int{1}
114-
result, err := ParseRange(src, 10000)
115-
if err != nil {
116-
t.Errorf("ParseRange failed, %s", err)
117-
t.FailNow()
118-
}
119-
for i, val := range expected {
120-
if i >= len(result) {
121-
t.Errorf("item %d: expected %d, missing element in result d", i, val)
122-
} else {
123-
if result[i] != val {
124-
t.Errorf("item %d: expected %d, got %d", i, val, result[i])
125-
}
126-
}
127-
}
128-
129-
src = `1:3`
130-
expected = []int{1, 2, 3}
131-
result, err = ParseRange(src, 10000)
132-
if err != nil {
133-
t.Errorf("ParseRange failed, %s", err)
134-
t.FailNow()
135-
}
136-
for i, val := range expected {
137-
if i >= len(result) {
138-
t.Errorf("item %d: expected %d, missing element in result d", i, val)
139-
} else {
140-
if result[i] != val {
141-
t.Errorf("item %d: expected %d, got %d", i, val, result[i])
142-
}
143-
}
144-
}
145-
146-
src = `1,4:6,10`
147-
expected = []int{
148-
1,
149-
4,
150-
5,
151-
6,
152-
10,
153-
}
154-
155-
result, err = ParseRange(src, 10000)
156-
if err != nil {
157-
t.Errorf("ParseRange failed, %s", err)
158-
t.FailNow()
159-
}
160-
for i, val := range expected {
161-
if i >= len(result) {
162-
t.Errorf("item %d: expected %d, missing element in result d", i, val)
163-
} else {
164-
if result[i] != val {
165-
t.Errorf("item %d: expected %d, got %d", i, val, result[i])
166-
}
167-
}
168-
}
169-
}

0 commit comments

Comments
 (0)