Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 81 additions & 70 deletions internal/util/fieldmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,19 @@ type FieldNameMap struct {
maxKeyLength int
all []caching.Pair
trie *caching.TrieTree
hash *caching.HashMap
hash map[string]unsafe.Pointer
}

// Set sets the field descriptor for the given key
func NewFieldNameMap() *FieldNameMap {
return &FieldNameMap{
hash: make(map[string]unsafe.Pointer, defaultMapSize),
}
}

// Set sets the field descriptor for the given key.
//
// NOTICE: It set to hash map by default. If user want to use trie tree,
// please call Build() after all Set() calls.
func (ft *FieldNameMap) Set(key string, field unsafe.Pointer) (exist bool) {
if len(key) > ft.maxKeyLength {
ft.maxKeyLength = len(key)
Expand All @@ -59,7 +68,7 @@ func (ft FieldNameMap) Get(k string) unsafe.Pointer {
if ft.trie != nil {
return (unsafe.Pointer)(ft.trie.Get(k))
} else if ft.hash != nil {
return (unsafe.Pointer)(ft.hash.Get(k))
return (unsafe.Pointer)(ft.hash[k])
}
return nil
}
Expand All @@ -72,7 +81,7 @@ func (ft FieldNameMap) All() []caching.Pair {
// Size returns the size of the map
func (ft FieldNameMap) Size() int {
if ft.hash != nil {
return ft.hash.Size()
return len(ft.hash)
} else if ft.trie != nil {
return ft.trie.Size()
}
Expand All @@ -81,88 +90,90 @@ func (ft FieldNameMap) Size() int {

// Build builds the map.
// It will try to build a trie tree if the dispersion of keys is higher enough (min).
func (ft *FieldNameMap) Build() {
func (ft *FieldNameMap) Build(noTrieTree bool) {
if len(ft.all) == 0 {
return
}

var empty unsafe.Pointer

// statistics the distrubution for each position:
// - primary slice store the position as its index
// - secondary map used to merge values with same char at the same position
var positionDispersion = make([]map[byte][]int, ft.maxKeyLength)
if !noTrieTree {
// statistics the distrubution for each position:
// - primary slice store the position as its index
// - secondary map used to merge values with same char at the same position
var positionDispersion = make([]map[byte][]int, ft.maxKeyLength)

for i, v := range ft.all {
for j := ft.maxKeyLength - 1; j >= 0; j-- {
if v.Key == "" {
// empty key, especially store
empty = v.Val
}
// get the char at the position, defualt (position beyonds key range) is ASCII 0
var c = byte(0)
if j < len(v.Key) {
c = v.Key[j]
}

if positionDispersion[j] == nil {
positionDispersion[j] = make(map[byte][]int, 16)
}
// recoder the index i of the value with same char c at the same position j
positionDispersion[j][c] = append(positionDispersion[j][c], i)
}
}

for i, v := range ft.all {
for j := ft.maxKeyLength - 1; j >= 0; j-- {
if v.Key == "" {
// empty key, especially store
empty = v.Val
// calculate the best position which has the highest dispersion
var idealPos = -1
var min = defaultMaxBucketSize
var count = len(ft.all)

for i := ft.maxKeyLength - 1; i >= 0; i-- {
cd := positionDispersion[i]
l := len(cd)
// calculate the dispersion (average bucket size)
f := float64(count) / float64(l)
if f < min {
min = f
idealPos = i
}
// get the char at the position, defualt (position beyonds key range) is ASCII 0
var c = byte(0)
if j < len(v.Key) {
c = v.Key[j]
// 1 means all the value store in different bucket, no need to continue calulating
if min == 1 {
break
}
}

if positionDispersion[j] == nil {
positionDispersion[j] = make(map[byte][]int, 16)
if idealPos != -1 {
// find the best position, build a trie tree
ft.hash = nil
ft.trie = &caching.TrieTree{}
// NOTICE: we only use a two-layer tree here, for better performance
ft.trie.Positions = append(ft.trie.Positions, idealPos)
// set all key-values to the trie tree
for _, v := range ft.all {
ft.trie.Set(v.Key, v.Val)
}
// recoder the index i of the value with same char c at the same position j
positionDispersion[j][c] = append(positionDispersion[j][c], i)
if empty != nil {
ft.trie.Empty = empty
}
return
}
}

// calculate the best position which has the highest dispersion
var idealPos = -1
var min = defaultMaxBucketSize
var count = len(ft.all)

for i := ft.maxKeyLength - 1; i >= 0; i-- {
cd := positionDispersion[i]
l := len(cd)
// calculate the dispersion (average bucket size)
f := float64(count) / float64(l)
if f < min {
min = f
idealPos = i
}
// 1 means all the value store in different bucket, no need to continue calulating
if min == 1 {
break
// no ideal position or force use hash map
ft.trie = nil
ft.hash = make(map[string]unsafe.Pointer, len(ft.all))
// set all key-values to the trie tree
for _, v := range ft.all {
// caching.HashMap does not support duplicate key, so must check if the key exists before set
// WARN: if the key exists, the value WON'T be replaced
o := ft.hash[v.Key]
if o == nil {
ft.hash[v.Key] = v.Val
}
}

if idealPos != -1 {
// find the best position, build a trie tree
ft.hash = nil
ft.trie = &caching.TrieTree{}
// NOTICE: we only use a two-layer tree here, for better performance
ft.trie.Positions = append(ft.trie.Positions, idealPos)
// set all key-values to the trie tree
for _, v := range ft.all {
ft.trie.Set(v.Key, v.Val)
}
if empty != nil {
ft.trie.Empty = empty
}

} else {
// no ideal position, build a hash map
ft.trie = nil
ft.hash = caching.NewHashMap(len(ft.all), defaultHashMapLoadFactor)
// set all key-values to the trie tree
for _, v := range ft.all {
// caching.HashMap does not support duplicate key, so must check if the key exists before set
// WARN: if the key exists, the value WON'T be replaced
o := ft.hash.Get(v.Key)
if o == nil {
ft.hash.Set(v.Key, v.Val)
}
}
if empty != nil {
ft.hash.Set("", empty)
}
if empty != nil {
ft.hash[""] = empty
}
}

Expand Down
46 changes: 34 additions & 12 deletions internal/util/fieldmap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,42 @@

package util

import "testing"
import (
"testing"
"unsafe"
)

func TestEmptyFieldMap(t *testing.T) {
// empty test
ids := FieldIDMap{}
if ids.Get(1) != nil {
t.Fatalf("expect nil")
func TestFieldMap(t *testing.T) {
ids := NewFieldNameMap()
v1 := "a"
ids.Set("1", unsafe.Pointer(&v1))
v2 := "b"
ids.Set("2", unsafe.Pointer(&v2))
ids.Set("1", unsafe.Pointer(&v2))
ids.Set("", unsafe.Pointer(&v1))

ids.Build(false)

if ids.Get("1") != unsafe.Pointer(&v2) {
t.Fatalf("expect 1")
}
names := FieldNameMap{}
if names.Get("a") != nil {
t.Fatalf("expect nil")
if ids.Get("2") != unsafe.Pointer(&v2) {
t.Fatalf("expect 1")
}
names.Build()
if names.Get("a") != nil {
t.Fatalf("expect nil")
if ids.Get("") != unsafe.Pointer(&v1) {
t.Fatalf("expect 1")
}

ids = NewFieldNameMap()
ids.Set("", unsafe.Pointer(&v1))
ids.Set("1", unsafe.Pointer(&v2))
ids.Build(true)

if ids.Get("") != unsafe.Pointer(&v1) {
t.Fatalf("expect 1")
}
if ids.Get("1") != unsafe.Pointer(&v2) {
t.Fatalf("expect 2")
}

}
6 changes: 5 additions & 1 deletion proto/idl.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ type Options struct {
ParseFunctionMode meta.ParseFunctionMode // not implemented.

EnableProtoBase bool // not implemented.

// ForceHashMapAsFieldNameMap indicates to use hash map as underlying field name map.
// By default we try to use trie tree as field name map, which is usually faster than go map but consume more memory.
ForceHashMapAsFieldNameMap bool
}

// NewDefaultOptions creates a default Options.
Expand Down Expand Up @@ -258,7 +262,7 @@ func parseMessage(ctx context.Context, msgDesc *desc.MessageDescriptor, cache co
md.names.Set(name, unsafe.Pointer(fieldDesc))
md.names.Set(jsonName, unsafe.Pointer(fieldDesc))
}
md.names.Build()
md.names.Build(opts.ForceHashMapAsFieldNameMap)

return ty, nil
}
10 changes: 7 additions & 3 deletions thrift/idl.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ type Options struct {

// ApiBodyFastPath indicates `api.body` will change alias-name of root field, which can avoid search http-body on them
ApiBodyFastPath bool

// ForceHashMapAsFieldNameMap indicates to use hash map as underlying field name map.
// By default we try to use trie tree as field name map, which is usually faster than go map but consume more memory.
ForceHashMapAsFieldNameMap bool
}

// NewDefaultOptions creates a default Options.
Expand Down Expand Up @@ -479,7 +483,7 @@ func parseRequest(ctx context.Context, isStreaming bool, fn *parser.Function, tr
}
wrappedTyDsc.Struct().ids.Set(int32(reqAst.ID), unsafe.Pointer(reqField))
wrappedTyDsc.Struct().names.Set(reqAst.Name, unsafe.Pointer(reqField))
wrappedTyDsc.Struct().names.Build()
wrappedTyDsc.Struct().names.Build(opts.ForceHashMapAsFieldNameMap)
return wrappedTyDsc, hasRequestBase, nil
}

Expand Down Expand Up @@ -528,7 +532,7 @@ func parseResponse(ctx context.Context, isStreaming bool, fn *parser.Function, t
wrappedResp.Struct().ids.Set(int32(exp.ID), unsafe.Pointer(exceptionField))
wrappedResp.Struct().names.Set(exp.Name, unsafe.Pointer(exceptionField))
}
wrappedResp.Struct().names.Build()
wrappedResp.Struct().names.Build(opts.ForceHashMapAsFieldNameMap)
return wrappedResp, nil
}

Expand Down Expand Up @@ -758,7 +762,7 @@ func parseType(ctx context.Context, t *parser.Type, tree *parser.Thrift, cache c

}
// buidl field name map
ty.Struct().names.Build()
ty.Struct().names.Build(opts.ForceHashMapAsFieldNameMap)
return ty, nil
}
}
Expand Down