diff --git a/internal/util/fieldmap.go b/internal/util/fieldmap.go index 1cc80b2d..00ae202a 100644 --- a/internal/util/fieldmap.go +++ b/internal/util/fieldmap.go @@ -35,10 +35,19 @@ type FieldNameMap struct { maxKeyLength int all []caching.Pair trie *caching.TrieTree - hash *caching.HashMap + hash map[string]unsafe.Pointer } -// Set sets the field descriptor for the given key +func NewFieldNameMap() *FieldNameMap { + return &FieldNameMap{ + hash: make(map[string]unsafe.Pointer, defaultMapSize), + } +} + +// Set sets the field descriptor for the given key. +// +// NOTICE: It set to hash map by default. If user want to use trie tree, +// please call Build() after all Set() calls. func (ft *FieldNameMap) Set(key string, field unsafe.Pointer) (exist bool) { if len(key) > ft.maxKeyLength { ft.maxKeyLength = len(key) @@ -59,7 +68,7 @@ func (ft FieldNameMap) Get(k string) unsafe.Pointer { if ft.trie != nil { return (unsafe.Pointer)(ft.trie.Get(k)) } else if ft.hash != nil { - return (unsafe.Pointer)(ft.hash.Get(k)) + return (unsafe.Pointer)(ft.hash[k]) } return nil } @@ -72,7 +81,7 @@ func (ft FieldNameMap) All() []caching.Pair { // Size returns the size of the map func (ft FieldNameMap) Size() int { if ft.hash != nil { - return ft.hash.Size() + return len(ft.hash) } else if ft.trie != nil { return ft.trie.Size() } @@ -81,88 +90,90 @@ func (ft FieldNameMap) Size() int { // Build builds the map. // It will try to build a trie tree if the dispersion of keys is higher enough (min). -func (ft *FieldNameMap) Build() { +func (ft *FieldNameMap) Build(noTrieTree bool) { if len(ft.all) == 0 { return } var empty unsafe.Pointer - // statistics the distrubution for each position: - // - primary slice store the position as its index - // - secondary map used to merge values with same char at the same position - var positionDispersion = make([]map[byte][]int, ft.maxKeyLength) + if !noTrieTree { + // statistics the distrubution for each position: + // - primary slice store the position as its index + // - secondary map used to merge values with same char at the same position + var positionDispersion = make([]map[byte][]int, ft.maxKeyLength) + + for i, v := range ft.all { + for j := ft.maxKeyLength - 1; j >= 0; j-- { + if v.Key == "" { + // empty key, especially store + empty = v.Val + } + // get the char at the position, defualt (position beyonds key range) is ASCII 0 + var c = byte(0) + if j < len(v.Key) { + c = v.Key[j] + } + + if positionDispersion[j] == nil { + positionDispersion[j] = make(map[byte][]int, 16) + } + // recoder the index i of the value with same char c at the same position j + positionDispersion[j][c] = append(positionDispersion[j][c], i) + } + } - for i, v := range ft.all { - for j := ft.maxKeyLength - 1; j >= 0; j-- { - if v.Key == "" { - // empty key, especially store - empty = v.Val + // calculate the best position which has the highest dispersion + var idealPos = -1 + var min = defaultMaxBucketSize + var count = len(ft.all) + + for i := ft.maxKeyLength - 1; i >= 0; i-- { + cd := positionDispersion[i] + l := len(cd) + // calculate the dispersion (average bucket size) + f := float64(count) / float64(l) + if f < min { + min = f + idealPos = i } - // get the char at the position, defualt (position beyonds key range) is ASCII 0 - var c = byte(0) - if j < len(v.Key) { - c = v.Key[j] + // 1 means all the value store in different bucket, no need to continue calulating + if min == 1 { + break } + } - if positionDispersion[j] == nil { - positionDispersion[j] = make(map[byte][]int, 16) + if idealPos != -1 { + // find the best position, build a trie tree + ft.hash = nil + ft.trie = &caching.TrieTree{} + // NOTICE: we only use a two-layer tree here, for better performance + ft.trie.Positions = append(ft.trie.Positions, idealPos) + // set all key-values to the trie tree + for _, v := range ft.all { + ft.trie.Set(v.Key, v.Val) } - // recoder the index i of the value with same char c at the same position j - positionDispersion[j][c] = append(positionDispersion[j][c], i) + if empty != nil { + ft.trie.Empty = empty + } + return } } - // calculate the best position which has the highest dispersion - var idealPos = -1 - var min = defaultMaxBucketSize - var count = len(ft.all) - - for i := ft.maxKeyLength - 1; i >= 0; i-- { - cd := positionDispersion[i] - l := len(cd) - // calculate the dispersion (average bucket size) - f := float64(count) / float64(l) - if f < min { - min = f - idealPos = i - } - // 1 means all the value store in different bucket, no need to continue calulating - if min == 1 { - break + // no ideal position or force use hash map + ft.trie = nil + ft.hash = make(map[string]unsafe.Pointer, len(ft.all)) + // set all key-values to the trie tree + for _, v := range ft.all { + // caching.HashMap does not support duplicate key, so must check if the key exists before set + // WARN: if the key exists, the value WON'T be replaced + o := ft.hash[v.Key] + if o == nil { + ft.hash[v.Key] = v.Val } } - - if idealPos != -1 { - // find the best position, build a trie tree - ft.hash = nil - ft.trie = &caching.TrieTree{} - // NOTICE: we only use a two-layer tree here, for better performance - ft.trie.Positions = append(ft.trie.Positions, idealPos) - // set all key-values to the trie tree - for _, v := range ft.all { - ft.trie.Set(v.Key, v.Val) - } - if empty != nil { - ft.trie.Empty = empty - } - - } else { - // no ideal position, build a hash map - ft.trie = nil - ft.hash = caching.NewHashMap(len(ft.all), defaultHashMapLoadFactor) - // set all key-values to the trie tree - for _, v := range ft.all { - // caching.HashMap does not support duplicate key, so must check if the key exists before set - // WARN: if the key exists, the value WON'T be replaced - o := ft.hash.Get(v.Key) - if o == nil { - ft.hash.Set(v.Key, v.Val) - } - } - if empty != nil { - ft.hash.Set("", empty) - } + if empty != nil { + ft.hash[""] = empty } } diff --git a/internal/util/fieldmap_test.go b/internal/util/fieldmap_test.go index aafb7dd0..0e7ea6bb 100644 --- a/internal/util/fieldmap_test.go +++ b/internal/util/fieldmap_test.go @@ -16,20 +16,42 @@ package util -import "testing" +import ( + "testing" + "unsafe" +) -func TestEmptyFieldMap(t *testing.T) { - // empty test - ids := FieldIDMap{} - if ids.Get(1) != nil { - t.Fatalf("expect nil") +func TestFieldMap(t *testing.T) { + ids := NewFieldNameMap() + v1 := "a" + ids.Set("1", unsafe.Pointer(&v1)) + v2 := "b" + ids.Set("2", unsafe.Pointer(&v2)) + ids.Set("1", unsafe.Pointer(&v2)) + ids.Set("", unsafe.Pointer(&v1)) + + ids.Build(false) + + if ids.Get("1") != unsafe.Pointer(&v2) { + t.Fatalf("expect 1") } - names := FieldNameMap{} - if names.Get("a") != nil { - t.Fatalf("expect nil") + if ids.Get("2") != unsafe.Pointer(&v2) { + t.Fatalf("expect 1") } - names.Build() - if names.Get("a") != nil { - t.Fatalf("expect nil") + if ids.Get("") != unsafe.Pointer(&v1) { + t.Fatalf("expect 1") } + + ids = NewFieldNameMap() + ids.Set("", unsafe.Pointer(&v1)) + ids.Set("1", unsafe.Pointer(&v2)) + ids.Build(true) + + if ids.Get("") != unsafe.Pointer(&v1) { + t.Fatalf("expect 1") + } + if ids.Get("1") != unsafe.Pointer(&v2) { + t.Fatalf("expect 2") + } + } diff --git a/proto/idl.go b/proto/idl.go index ff626f05..a646391c 100644 --- a/proto/idl.go +++ b/proto/idl.go @@ -48,6 +48,10 @@ type Options struct { ParseFunctionMode meta.ParseFunctionMode // not implemented. EnableProtoBase bool // not implemented. + + // ForceHashMapAsFieldNameMap indicates to use hash map as underlying field name map. + // By default we try to use trie tree as field name map, which is usually faster than go map but consume more memory. + ForceHashMapAsFieldNameMap bool } // NewDefaultOptions creates a default Options. @@ -258,7 +262,7 @@ func parseMessage(ctx context.Context, msgDesc *desc.MessageDescriptor, cache co md.names.Set(name, unsafe.Pointer(fieldDesc)) md.names.Set(jsonName, unsafe.Pointer(fieldDesc)) } - md.names.Build() + md.names.Build(opts.ForceHashMapAsFieldNameMap) return ty, nil } diff --git a/thrift/idl.go b/thrift/idl.go index 9d773b0a..02434488 100644 --- a/thrift/idl.go +++ b/thrift/idl.go @@ -100,6 +100,10 @@ type Options struct { // ApiBodyFastPath indicates `api.body` will change alias-name of root field, which can avoid search http-body on them ApiBodyFastPath bool + + // ForceHashMapAsFieldNameMap indicates to use hash map as underlying field name map. + // By default we try to use trie tree as field name map, which is usually faster than go map but consume more memory. + ForceHashMapAsFieldNameMap bool } // NewDefaultOptions creates a default Options. @@ -479,7 +483,7 @@ func parseRequest(ctx context.Context, isStreaming bool, fn *parser.Function, tr } wrappedTyDsc.Struct().ids.Set(int32(reqAst.ID), unsafe.Pointer(reqField)) wrappedTyDsc.Struct().names.Set(reqAst.Name, unsafe.Pointer(reqField)) - wrappedTyDsc.Struct().names.Build() + wrappedTyDsc.Struct().names.Build(opts.ForceHashMapAsFieldNameMap) return wrappedTyDsc, hasRequestBase, nil } @@ -528,7 +532,7 @@ func parseResponse(ctx context.Context, isStreaming bool, fn *parser.Function, t wrappedResp.Struct().ids.Set(int32(exp.ID), unsafe.Pointer(exceptionField)) wrappedResp.Struct().names.Set(exp.Name, unsafe.Pointer(exceptionField)) } - wrappedResp.Struct().names.Build() + wrappedResp.Struct().names.Build(opts.ForceHashMapAsFieldNameMap) return wrappedResp, nil } @@ -758,7 +762,7 @@ func parseType(ctx context.Context, t *parser.Type, tree *parser.Thrift, cache c } // buidl field name map - ty.Struct().names.Build() + ty.Struct().names.Build(opts.ForceHashMapAsFieldNameMap) return ty, nil } }