Skip to content

Commit 213913a

Browse files
committed
improve iterate performance
1 parent e77db04 commit 213913a

File tree

6 files changed

+36
-33
lines changed

6 files changed

+36
-33
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
.idea/
2+
.vscode/
3+
.DS_Store

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Write Ahead Log for LSM or bitcask storage, with block cache.
1212

1313
## Design Overview
1414

15-
![](https://img-blog.csdnimg.cn/3910507c20a04f9190c3664e3657a4b1.png#pic_center)
15+
![wal-logo.png](https://s2.loli.net/2024/05/19/Iz4JML2SokFdrfN.png)
1616

1717
## Format
1818

go.mod

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
module github.com/rosedblabs/wal
22

3-
go 1.19
3+
go 1.21
44

55
require (
6-
github.com/stretchr/testify v1.8.3
6+
github.com/stretchr/testify v1.9.0
77
github.com/valyala/bytebufferpool v1.0.0
88
)
99

go.sum

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
22
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3-
github.com/hashicorp/golang-lru/v2 v2.0.2 h1:Dwmkdr5Nc/oBiXgJS3CDHNhJtIHkuZ3DZF5twqnfBdU=
4-
github.com/hashicorp/golang-lru/v2 v2.0.2/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
53
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
64
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
7-
github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
8-
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
5+
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
6+
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
97
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
108
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
119
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=

segment.go

+28-22
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,10 @@ import (
44
"encoding/binary"
55
"errors"
66
"fmt"
7+
"github.com/valyala/bytebufferpool"
78
"hash/crc32"
89
"io"
910
"os"
10-
"sync"
11-
12-
"github.com/valyala/bytebufferpool"
1311
)
1412

1513
type ChunkType = byte
@@ -53,7 +51,7 @@ type segment struct {
5351
currentBlockSize uint32
5452
closed bool
5553
header []byte
56-
blockPool sync.Pool
54+
cachedBlock *blockAndHeader
5755
}
5856

5957
// segmentReader is used to iterate all the data from the segment file.
@@ -67,8 +65,9 @@ type segmentReader struct {
6765

6866
// block and chunk header, saved in pool.
6967
type blockAndHeader struct {
70-
block []byte
71-
header []byte
68+
block []byte
69+
header []byte
70+
blockNumber int64
7271
}
7372

7473
// ChunkPosition represents the position of a chunk in a segment file.
@@ -101,23 +100,23 @@ func openSegmentFile(dirPath, extName string, id uint32) (*segment, error) {
101100
panic(fmt.Errorf("seek to the end of segment file %d%s failed: %v", id, extName, err))
102101
}
103102

103+
// init cached block
104+
bh := &blockAndHeader{
105+
block: make([]byte, blockSize),
106+
header: make([]byte, chunkHeaderSize),
107+
blockNumber: -1,
108+
}
109+
104110
return &segment{
105111
id: id,
106112
fd: fd,
107113
header: make([]byte, chunkHeaderSize),
108-
blockPool: sync.Pool{New: newBlockAndHeader},
109114
currentBlockNumber: uint32(offset / blockSize),
110115
currentBlockSize: uint32(offset % blockSize),
116+
cachedBlock: bh,
111117
}, nil
112118
}
113119

114-
func newBlockAndHeader() interface{} {
115-
return &blockAndHeader{
116-
block: make([]byte, blockSize),
117-
header: make([]byte, chunkHeaderSize),
118-
}
119-
}
120-
121120
// NewReader creates a new segment reader.
122121
// You can call Next to get the next chunk data,
123122
// and io.EOF will be returned when there is no data.
@@ -356,6 +355,8 @@ func (seg *segment) writeChunkBuffer(buf *bytebufferpool.ByteBuffer) error {
356355
return err
357356
}
358357

358+
// the cached block can not be reused again after writes.
359+
seg.cachedBlock.blockNumber = -1
359360
return nil
360361
}
361362

@@ -372,13 +373,10 @@ func (seg *segment) readInternal(blockNumber uint32, chunkOffset int64) ([]byte,
372373

373374
var (
374375
result []byte
375-
bh = seg.blockPool.Get().(*blockAndHeader)
376+
bh = seg.cachedBlock
376377
segSize = seg.Size()
377378
nextChunk = &ChunkPosition{SegmentId: seg.id}
378379
)
379-
defer func() {
380-
seg.blockPool.Put(bh)
381-
}()
382380

383381
for {
384382
size := int64(blockSize)
@@ -391,10 +389,18 @@ func (seg *segment) readInternal(blockNumber uint32, chunkOffset int64) ([]byte,
391389
return nil, nil, io.EOF
392390
}
393391

394-
// cache miss, read block from the segment file
395-
_, err := seg.fd.ReadAt(bh.block[0:size], offset)
396-
if err != nil {
397-
return nil, nil, err
392+
// There are two cases that we should read block from file:
393+
// 1. the acquired block is not the cached one
394+
// 2. new writes appended to the block, and the block
395+
// is still smaller than 32KB, we must read it again because of the new writes.
396+
if seg.cachedBlock.blockNumber != int64(blockNumber) || size != blockSize {
397+
// read block from segment file at the specified offset.
398+
_, err := seg.fd.ReadAt(bh.block[0:size], offset)
399+
if err != nil {
400+
return nil, nil, err
401+
}
402+
// remember the block
403+
bh.blockNumber = int64(blockNumber)
398404
}
399405

400406
// header

wal.go

-4
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,6 @@ var (
3030
//
3131
// The mu sync.RWMutex is used for concurrent access to the WAL data structure,
3232
// ensuring safe access and modification.
33-
//
34-
// The blockCache is an LRU cache used to store recently accessed data blocks,
35-
// improving read performance by reducing disk I/O.
36-
// It is implemented using a lru.Cache structure with keys of type uint64 and values of type []byte.
3733
type WAL struct {
3834
activeSegment *segment // active segment file, used for new incoming writes.
3935
olderSegments map[SegmentID]*segment // older segment files, only used for read.

0 commit comments

Comments
 (0)