Skip to content
This repository was archived by the owner on Mar 8, 2020. It is now read-only.

Commit 8c08ccf

Browse files
committed
unqoteSingle: switch to simple strings-based impl
Signed-off-by: Alexander Bezzubov <[email protected]>
1 parent ec16186 commit 8c08ccf

File tree

2 files changed

+72
-36
lines changed

2 files changed

+72
-36
lines changed

driver/normalizer/strconv.go

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ func unquoteSingle(s string) (string, error) {
3535
return s, nil
3636
}
3737
}
38-
s = replaceEscapedMaybe(s, '0', '\x00') // treatment of special JS escape seq
38+
s = replaceEscapedMaybe(s, "\\0", "\x00") // treatment of special JS escape seq
3939

4040
var runeTmp [utf8.UTFMax]byte
4141
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
@@ -62,39 +62,28 @@ func contains(s string, c byte) bool {
6262

6363
// replaceEscapedMaybe returns a copy of s with "\\old[^0-9]" replaced by new.
6464
// Is not part of the stdlib, handles the special case of JS escape sequence.
65-
func replaceEscapedMaybe(s string, old, new rune) string {
66-
var runeTmp [utf8.UTFMax]byte
67-
n := utf8.EncodeRune(runeTmp[:], new)
68-
69-
lastCp := 0
70-
var buf []byte
71-
for i, w := 0, 0; i < len(s); i += w {
72-
r1, w1 := utf8.DecodeRuneInString(s[i:])
73-
w = w1
74-
if r1 == '\\' { // find sequence \\old[^0-9]
75-
r2, w2 := utf8.DecodeRuneInString(s[i+w1:])
76-
if r2 == old {
77-
r3, _ := utf8.DecodeRuneInString(s[i+w1+w2:])
78-
if 0 > r3 || r3 > 9 { // not a number after "\\old"
79-
w += w2
80-
if len(buf) == 0 {
81-
buf = make([]byte, 0, 3*len(s)/2)
82-
}
83-
buf = append(buf, []byte(s[lastCp:i])...)
84-
buf = append(buf, runeTmp[:n]...)
85-
lastCp = i + w
86-
}
87-
}
65+
func replaceEscapedMaybe(s, old, repl string) string {
66+
var out strings.Builder
67+
for s != "" {
68+
pos := strings.Index(s, old)
69+
if pos < 0 {
70+
break
71+
}
72+
out.WriteString(s[:pos])
73+
s = s[pos+len(old):]
74+
r, n := utf8.DecodeRuneInString(s)
75+
s = s[n:]
76+
if r >= '0' && r <= '9' {
77+
out.WriteString(old)
78+
} else {
79+
out.WriteString(repl)
80+
}
81+
if n != 0 {
82+
out.WriteRune(r)
8883
}
8984
}
90-
if lastCp == 0 {
91-
return s
92-
}
93-
94-
if 0 < lastCp && lastCp < len(s) {
95-
return string(append(buf, []byte(s[lastCp:len(s)])...))
96-
}
97-
return string(buf)
85+
out.WriteString(s)
86+
return out.String()
9887
}
9988

10089
const lowerhex = "0123456789abcdef"

driver/normalizer/strconv_test.go

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package normalizer
33
import (
44
"regexp"
55
"testing"
6+
"unicode/utf8"
67

78
"github.com/stretchr/testify/assert"
89
"github.com/stretchr/testify/require"
@@ -70,17 +71,54 @@ func printDebug(t *testing.T, quoted, actual string) {
7071

7172
func BenchmarkReplacingNullEscape_Iterative(b *testing.B) {
7273
b.ReportAllocs()
73-
s := testCasesUnquote[3].quoted
7474
for n := 0; n < b.N; n++ {
75-
replaceEscapedMaybe(s, '0', '\x00')
75+
for _, test := range testCasesUnquote {
76+
replaceEscapedMaybeIter(test.quoted, '0', '\x00')
77+
}
78+
}
79+
}
80+
81+
func replaceEscapedMaybeIter(s string, old, new rune) string {
82+
var runeTmp [utf8.UTFMax]byte
83+
n := utf8.EncodeRune(runeTmp[:], new)
84+
85+
lastCp := 0
86+
var buf []byte
87+
for i, w := 0, 0; i < len(s); i += w {
88+
r1, w1 := utf8.DecodeRuneInString(s[i:])
89+
w = w1
90+
if r1 == '\\' { // find sequence \\old[^0-9]
91+
r2, w2 := utf8.DecodeRuneInString(s[i+w1:])
92+
if r2 == old {
93+
r3, _ := utf8.DecodeRuneInString(s[i+w1+w2:])
94+
if 0 > r3 || r3 > 9 { // not a number after "\\old"
95+
w += w2
96+
if len(buf) == 0 {
97+
buf = make([]byte, 0, 3*len(s)/2)
98+
}
99+
buf = append(buf, []byte(s[lastCp:i])...)
100+
buf = append(buf, runeTmp[:n]...)
101+
lastCp = i + w
102+
}
103+
}
104+
}
105+
}
106+
if lastCp == 0 {
107+
return s
108+
}
109+
110+
if 0 < lastCp && lastCp < len(s) {
111+
return string(append(buf, []byte(s[lastCp:len(s)])...))
76112
}
113+
return string(buf)
77114
}
78115

79116
func BenchmarkReplacingNullEscape_Regexp(b *testing.B) {
80117
b.ReportAllocs()
81-
s := testCasesUnquote[3].quoted
82118
for n := 0; n < b.N; n++ {
83-
replaceEscapedMaybeRegexp(s)
119+
for _, test := range testCasesUnquote {
120+
replaceEscapedMaybeRegexp(test.quoted)
121+
}
84122
}
85123
}
86124

@@ -90,3 +128,12 @@ var re = regexp.MustCompile(`\\0([^0-9]|$)`)
90128
func replaceEscapedMaybeRegexp(s string) string {
91129
return re.ReplaceAllString(s, "\x00$1")
92130
}
131+
132+
func BenchmarkReplacingNullEscape_Simple(b *testing.B) {
133+
b.ReportAllocs()
134+
for n := 0; n < b.N; n++ {
135+
for _, test := range testCasesUnquote {
136+
replaceEscapedMaybe(test.quoted, "\\0", "\x00")
137+
}
138+
}
139+
}

0 commit comments

Comments
 (0)