Skip to content

Commit

Permalink
fix: Remove frequently updating metric to avoid mutex contention (#38775
Browse files Browse the repository at this point in the history
)

issue: #37630

Reduce the frequency updating metrics to avoid holding the mutex for
long periods.

---------

Signed-off-by: bigsheeper <[email protected]>
  • Loading branch information
bigsheeper authored Jan 24, 2025
1 parent cd068f2 commit 5fb597b
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 18 deletions.
26 changes: 9 additions & 17 deletions internal/datacoord/index_meta.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/hashicorp/golang-lru/v2/expirable"
"github.com/prometheus/client_golang/prometheus"
"github.com/samber/lo"
"go.uber.org/atomic"
"go.uber.org/zap"
"google.golang.org/protobuf/proto"

Expand Down Expand Up @@ -65,6 +66,8 @@ type indexMeta struct {

// segmentID -> indexID -> segmentIndex
segmentIndexes map[UniqueID]map[UniqueID]*model.SegmentIndex

lastUpdateMetricTime atomic.Time
}

func newIndexTaskStats(s *model.SegmentIndex) *metricsinfo.IndexTaskStats {
Expand Down Expand Up @@ -205,6 +208,10 @@ func (m *indexMeta) updateSegIndexMeta(segIdx *model.SegmentIndex, updateFunc fu
}

func (m *indexMeta) updateIndexTasksMetrics() {
if time.Since(m.lastUpdateMetricTime.Load()) < 120*time.Second {
return
}
defer m.lastUpdateMetricTime.Store(time.Now())
taskMetrics := make(map[UniqueID]map[commonpb.IndexState]int)
for _, segIdx := range m.segmentBuildInfo.List() {
if segIdx.IsDeleted || !m.isIndexExist(segIdx.CollectionID, segIdx.IndexID) {
Expand Down Expand Up @@ -233,6 +240,7 @@ func (m *indexMeta) updateIndexTasksMetrics() {
}
}
}
log.Ctx(m.ctx).Info("update index metric", zap.Int("collectionNum", len(taskMetrics)))
}

func checkParams(fieldIndex *model.Index, req *indexpb.CreateIndexRequest) bool {
Expand Down Expand Up @@ -878,7 +886,7 @@ func (m *indexMeta) GetAllSegIndexes() map[int64]*model.SegmentIndex {
tasks := m.segmentBuildInfo.List()
segIndexes := make(map[int64]*model.SegmentIndex, len(tasks))
for buildID, segIndex := range tasks {
segIndexes[buildID] = model.CloneSegmentIndex(segIndex)
segIndexes[buildID] = segIndex
}
return segIndexes
}
Expand Down Expand Up @@ -975,22 +983,6 @@ func (m *indexMeta) CheckCleanSegmentIndex(buildID UniqueID) (bool, *model.Segme
return true, nil
}

func (m *indexMeta) GetMetasByNodeID(nodeID UniqueID) []*model.SegmentIndex {
m.RLock()
defer m.RUnlock()

metas := make([]*model.SegmentIndex, 0)
for _, segIndex := range m.segmentBuildInfo.List() {
if segIndex.IsDeleted {
continue
}
if nodeID == segIndex.NodeID {
metas = append(metas, model.CloneSegmentIndex(segIndex))
}
}
return metas
}

func (m *indexMeta) getSegmentsIndexStates(collectionID UniqueID, segmentIDs []UniqueID) map[int64]map[int64]*indexpb.SegmentIndexState {
m.RLock()
defer m.RUnlock()
Expand Down
8 changes: 7 additions & 1 deletion internal/querycoordv2/task/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,9 @@ type taskScheduler struct {
channelTasks *ConcurrentMap[replicaChannelIndex, Task]
processQueue *taskQueue
waitQueue *taskQueue
taskStats *expirable.LRU[UniqueID, Task]

taskStats *expirable.LRU[UniqueID, Task]
lastUpdateMetricTime atomic.Time

// nodeID -> collectionID -> taskDelta
segmentTaskDelta *ExecutingTaskDelta
Expand Down Expand Up @@ -364,6 +366,9 @@ func (scheduler *taskScheduler) Add(task Task) error {
}

func (scheduler *taskScheduler) updateTaskMetrics() {
if time.Since(scheduler.lastUpdateMetricTime.Load()) < 30*time.Second {
return
}
segmentGrowNum, segmentReduceNum, segmentUpdateNum, segmentMoveNum := 0, 0, 0, 0
leaderGrowNum, leaderReduceNum, leaderUpdateNum := 0, 0, 0
channelGrowNum, channelReduceNum, channelMoveNum := 0, 0, 0
Expand Down Expand Up @@ -421,6 +426,7 @@ func (scheduler *taskScheduler) updateTaskMetrics() {
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelGrowTaskLabel).Set(float64(channelGrowNum))
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelReduceTaskLabel).Set(float64(channelReduceNum))
metrics.QueryCoordTaskNum.WithLabelValues(metrics.ChannelMoveTaskLabel).Set(float64(channelMoveNum))
scheduler.lastUpdateMetricTime.Store(time.Now())
}

// check whether the task is valid to add,
Expand Down

0 comments on commit 5fb597b

Please sign in to comment.