Skip to content

Commit bc8b129

Browse files
authored
Merge pull request #11 from jf-tech/fixscanner3
switch delim/esc to []byte, reimplement (Byte)IndexWithEsc, Unescape, add ByteUnescape, and more benches
2 parents 191d6ca + 8bff6f8 commit bc8b129

File tree

4 files changed

+355
-185
lines changed

4 files changed

+355
-185
lines changed

ios/scanners.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,28 +29,30 @@ const (
2929
)
3030

3131
// NewScannerByDelim creates a scanner that returns tokens from the source reader separated by a delimiter.
32-
func NewScannerByDelim(r io.Reader, delim string, flags ScannerByDelimFlag) *bufio.Scanner {
32+
func NewScannerByDelim(r io.Reader, delim []byte, flags ScannerByDelimFlag) *bufio.Scanner {
3333
return NewScannerByDelim2(r, delim, nil, flags)
3434
}
3535

3636
// NewScannerByDelim2 creates a scanner that returns tokens from the source reader separated by a delimiter, with
3737
// consideration of potential presence of escaping sequence.
3838
// Note: the token returned from the scanner will **NOT** do any unescaping, thus keeping the original value.
39-
func NewScannerByDelim2(r io.Reader, delim string, escape *rune, flags ScannerByDelimFlag) *bufio.Scanner {
39+
func NewScannerByDelim2(r io.Reader, delim, escape []byte, flags ScannerByDelimFlag) *bufio.Scanner {
40+
if escape == nil {
41+
return NewScannerByDelim3(r, delim, nil, flags, nil)
42+
}
4043
return NewScannerByDelim3(r, delim, escape, flags, nil)
4144
}
4245

4346
// NewScannerByDelim3 creates a scanner that utilizes given buf to avoid/minimize allocation and returns tokens
4447
// from the source reader separated by a delimiter, with consideration of potential presence of escaping sequence.
4548
// Note: the token returned from the scanner will **NOT** do any unescaping, thus keeping the original value.
46-
func NewScannerByDelim3(r io.Reader, delim string, escape *rune, flags ScannerByDelimFlag, buf []byte) *bufio.Scanner {
49+
func NewScannerByDelim3(r io.Reader, delim, escape []byte, flags ScannerByDelimFlag, buf []byte) *bufio.Scanner {
4750
flags &= scannerByDelimValidFlags
4851

4952
includeDelimLenInToken := len(delim)
5053
if flags&ScannerByDelimFlagDropDelimInReturn != 0 {
5154
includeDelimLenInToken = 0
5255
}
53-
5456
eofAsDelim := flags&ScannerByDelimFlagEofAsDelim != 0
5557

5658
scanner := bufio.NewScanner(r)
@@ -60,7 +62,7 @@ func NewScannerByDelim3(r io.Reader, delim string, escape *rune, flags ScannerBy
6062
if atEof && len(data) == 0 {
6163
return 0, nil, nil
6264
}
63-
if index := strs.IndexWithEsc(string(data), delim, escape); index >= 0 {
65+
if index := strs.ByteIndexWithEsc(data, delim, escape); index >= 0 {
6466
return index + len(delim), data[:index+includeDelimLenInToken], nil
6567
}
6668
if atEof && eofAsDelim {

ios/scanners_test.go

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,50 +6,48 @@ import (
66
"testing"
77

88
"github.com/stretchr/testify/assert"
9-
10-
"github.com/jf-tech/go-corelib/strs"
119
)
1210

1311
func TestNewScannerByDelim(t *testing.T) {
1412
for _, test := range []struct {
1513
name string
1614
input io.Reader
17-
delim string
15+
delim []byte
1816
flags ScannerByDelimFlag
1917
expectedTokens []string
2018
}{
2119
{
2220
name: "multi-char delim | eof as delim | drop delim",
2321
input: strings.NewReader("abc#123##efg####???##xyz##"),
24-
delim: "##",
22+
delim: []byte("##"),
2523
flags: ScannerByDelimFlagEofAsDelim | ScannerByDelimFlagDropDelimInReturn,
2624
expectedTokens: []string{"abc#123", "efg", "", "???", "xyz"},
2725
},
2826
{
2927
name: "CR LF delim | eof as delim | include delim",
3028
input: strings.NewReader("\r\n\rabc\r"),
31-
delim: "\r\n",
29+
delim: []byte("\r\n"),
3230
flags: ScannerByDelimFlagEofAsDelim | ScannerByDelimFlagIncludeDelimInReturn,
3331
expectedTokens: []string{"\r\n", "\rabc\r"},
3432
},
3533
{
3634
name: "empty reader",
3735
input: strings.NewReader(""),
38-
delim: "*",
36+
delim: []byte("*"),
3937
flags: ScannerByDelimFlagDefault,
4038
expectedTokens: []string{},
4139
},
4240
{
4341
name: "empty token",
4442
input: strings.NewReader("*"),
45-
delim: "*",
43+
delim: []byte("*"),
4644
flags: ScannerByDelimFlagEofNotAsDelim | ScannerByDelimFlagDropDelimInReturn,
4745
expectedTokens: []string{""},
4846
},
4947
{
5048
name: "trailing newlines",
5149
input: strings.NewReader("*\n"),
52-
delim: "*",
50+
delim: []byte("*"),
5351
flags: ScannerByDelimFlagEofAsDelim | ScannerByDelimFlagIncludeDelimInReturn,
5452
expectedTokens: []string{"*", "\n"},
5553
},
@@ -69,8 +67,8 @@ func TestNewScannerByDelim(t *testing.T) {
6967
func TestNewScannerByDelim2(t *testing.T) {
7068
s := NewScannerByDelim2(
7169
strings.NewReader("abc#123##efg####???##xyz##"),
72-
"##",
73-
strs.RunePtr('?'),
70+
[]byte("##"),
71+
[]byte("?"),
7472
ScannerByDelimFlagEofAsDelim|ScannerByDelimFlagDropDelimInReturn)
7573
var tokens []string
7674
for s.Scan() {
@@ -84,8 +82,8 @@ func TestNewScannerByDelim3(t *testing.T) {
8482
buf := make([]byte, 0, 100)
8583
s := NewScannerByDelim3(
8684
strings.NewReader("abc#123##efg####???##xyz##"),
87-
"##",
88-
strs.RunePtr('?'),
85+
[]byte("##"),
86+
[]byte("?"),
8987
ScannerByDelimFlagEofAsDelim|ScannerByDelimFlagDropDelimInReturn,
9088
buf)
9189
var tokens []string
@@ -96,17 +94,16 @@ func TestNewScannerByDelim3(t *testing.T) {
9694
assert.Equal(t, []string{"abc#123", "efg", "", "???##xyz"}, tokens)
9795
}
9896

99-
// Benchmark shows the benefit of using NewScannerByDelim3 with pre-allocated buf.
100-
// BenchmarkNewScannerByDelim2-8 5000 299188 ns/op 2141712 B/op 996 allocs/op
101-
// BenchmarkNewScannerByDelim3-8 30000 48893 ns/op 208 B/op 3 allocs/op
102-
103-
var benchmarkInput = strings.Repeat("abc#", 1000)
97+
var benchmarkInput = strings.Repeat("abc#", 100000)
98+
var benchmarkDelim = []byte("#")
99+
var benchmarkBuf = make([]byte, 1024)
104100

101+
// BenchmarkNewScannerByDelim2-8 500 2597188 ns/op 4344 B/op 5 allocs/op
105102
func BenchmarkNewScannerByDelim2(b *testing.B) {
106103
for i := 0; i < b.N; i++ {
107104
s := NewScannerByDelim2(
108105
strings.NewReader(benchmarkInput),
109-
"#",
106+
benchmarkDelim,
110107
nil,
111108
ScannerByDelimFlagEofAsDelim|ScannerByDelimFlagDropDelimInReturn)
112109
for s.Scan() {
@@ -121,15 +118,15 @@ func BenchmarkNewScannerByDelim2(b *testing.B) {
121118
}
122119
}
123120

121+
// BenchmarkNewScannerByDelim3-8 500 2585825 ns/op 242 B/op 3 allocs/op
124122
func BenchmarkNewScannerByDelim3(b *testing.B) {
125-
buf := make([]byte, 0, 10)
126123
for i := 0; i < b.N; i++ {
127124
s := NewScannerByDelim3(
128125
strings.NewReader(benchmarkInput),
129-
"#",
126+
benchmarkDelim,
130127
nil,
131128
ScannerByDelimFlagEofAsDelim|ScannerByDelimFlagDropDelimInReturn,
132-
buf)
129+
benchmarkBuf)
133130
for s.Scan() {
134131
token := s.Bytes()
135132
if len(token) != 3 || token[0] != 'a' || token[1] != 'b' || token[2] != 'c' {

0 commit comments

Comments
 (0)