Skip to content

Commit 6945807

Browse files
authored
Merge pull request #226 from greatroar/noasm
internal/lz4block: Speed up noasm decoder
2 parents 0f7173a + a3839dc commit 6945807

File tree

1 file changed

+23
-34
lines changed

1 file changed

+23
-34
lines changed

internal/lz4block/decode_other.go

+23-34
Original file line numberDiff line numberDiff line change
@@ -32,33 +32,7 @@ func decodeBlock(dst, src, dict []byte) (ret int) {
3232

3333
// Literals.
3434
if lLen := b >> 4; lLen > 0 {
35-
switch {
36-
case lLen < 0xF && si+16 < uint(len(src)):
37-
// Shortcut 1
38-
// if we have enough room in src and dst, and the literals length
39-
// is small enough (0..14) then copy all 16 bytes, even if not all
40-
// are part of the literals.
41-
copy(dst[di:], src[si:si+16])
42-
si += lLen
43-
di += lLen
44-
if mLen := b & 0xF; mLen < 0xF {
45-
// Shortcut 2
46-
// if the match length (4..18) fits within the literals, then copy
47-
// all 18 bytes, even if not all are part of the literals.
48-
mLen += 4
49-
if offset := u16(src[si:]); mLen <= offset && offset < di {
50-
i := di - offset
51-
// The remaining buffer may not hold 18 bytes.
52-
// See https://github.com/pierrec/lz4/issues/51.
53-
if end := i + 18; end <= uint(len(dst)) {
54-
copy(dst[di:], dst[i:end])
55-
si += 2
56-
di += mLen
57-
continue
58-
}
59-
}
60-
}
61-
case lLen == 0xF:
35+
if lLen == 0xF {
6236
for {
6337
x := uint(src[si])
6438
if lLen += x; int(lLen) < 0 {
@@ -69,30 +43,45 @@ func decodeBlock(dst, src, dict []byte) (ret int) {
6943
break
7044
}
7145
}
72-
fallthrough
73-
default:
46+
}
47+
if lLen <= 16 && si+16 < uint(len(src)) {
48+
// Shortcut 1: if we have enough room in src and dst, and the
49+
// literal length is at most 16, then copy 16 bytes, even if not
50+
// all are part of the literal. The compiler inlines this copy.
51+
copy(dst[di:di+16], src[si:si+16])
52+
} else {
7453
copy(dst[di:di+lLen], src[si:si+lLen])
75-
si += lLen
76-
di += lLen
7754
}
55+
si += lLen
56+
di += lLen
7857
}
7958

59+
// Match.
8060
mLen := b & 0xF
8161
if si == uint(len(src)) && mLen == 0 {
8262
break
8363
} else if si >= uint(len(src)) {
8464
return hasError
8565
}
66+
mLen += minMatch
8667

8768
offset := u16(src[si:])
8869
if offset == 0 {
8970
return hasError
9071
}
9172
si += 2
9273

93-
// Match.
94-
mLen += minMatch
95-
if mLen == minMatch+0xF {
74+
if mLen <= 16 {
75+
// Shortcut 2: if the match length is at most 16 and we're far
76+
// enough from the end of dst, copy 16 bytes unconditionally
77+
// so that the compiler can inline the copy.
78+
if mLen <= offset && offset < di && di+16 <= uint(len(dst)) {
79+
i := di - offset
80+
copy(dst[di:di+16], dst[i:i+16])
81+
di += mLen
82+
continue
83+
}
84+
} else if mLen >= 15+minMatch {
9685
for {
9786
x := uint(src[si])
9887
if mLen += x; int(mLen) < 0 {

0 commit comments

Comments
 (0)