Skip to content

Commit 8a4ac8c

Browse files
authored
enhance: expose more metrics data (milvus-io#39456)
issue: milvus-io#36621 milvus-io#39417 1. Adjust the server-side cache size. 2. Add source information for configurations. 3. Add node ID for compaction and indexing tasks. 4. Resolve localhost access issues to fix health check failures for etcd. Signed-off-by: jaime <yun.zhang@zilliz.com>
1 parent a9e0e0a commit 8a4ac8c

17 files changed

Lines changed: 116 additions & 51 deletions

File tree

deployments/docker/standalone/docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ services:
1111
- ETCD_SNAPSHOT_COUNT=50000
1212
volumes:
1313
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
14-
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
14+
command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
1515
healthcheck:
1616
test: ["CMD", "etcdctl", "endpoint", "health"]
1717
interval: 30s

internal/datacoord/compaction_task_meta.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ func newCompactionTaskStats(task *datapb.CompactionTask) *metricsinfo.Compaction
5252
ResultSegments: lo.Map(task.ResultSegments, func(t int64, i int) string {
5353
return strconv.FormatInt(t, 10)
5454
}),
55+
NodeID: task.NodeID,
5556
}
5657
}
5758

@@ -70,7 +71,7 @@ func newCompactionTaskMeta(ctx context.Context, catalog metastore.DataCoordCatal
7071
ctx: ctx,
7172
catalog: catalog,
7273
compactionTasks: make(map[int64]map[int64]*datapb.CompactionTask, 0),
73-
taskStats: expirable.NewLRU[UniqueID, *metricsinfo.CompactionTask](32, nil, time.Minute*15),
74+
taskStats: expirable.NewLRU[UniqueID, *metricsinfo.CompactionTask](512, nil, time.Minute*15),
7475
}
7576
if err := csm.reloadFromKV(); err != nil {
7677
return nil, err

internal/datacoord/import_meta.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ type importTasks struct {
5353
func newImportTasks() *importTasks {
5454
return &importTasks{
5555
tasks: make(map[int64]ImportTask),
56-
taskStats: expirable.NewLRU[UniqueID, ImportTask](64, nil, time.Minute*30),
56+
taskStats: expirable.NewLRU[UniqueID, ImportTask](512, nil, time.Minute*30),
5757
}
5858
}
5959

internal/datacoord/index_meta.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ func newIndexTaskStats(s *model.SegmentIndex) *metricsinfo.IndexTaskStats {
8282
IndexVersion: s.IndexVersion,
8383
CreatedUTCTime: typeutil.TimestampToString(s.CreatedUTCTime * 1000),
8484
FinishedUTCTime: typeutil.TimestampToString(s.FinishedUTCTime * 1000),
85+
NodeID: s.NodeID,
8586
}
8687
}
8788

@@ -98,7 +99,7 @@ func newSegmentIndexBuildInfo() *segmentBuildInfo {
9899
// build ID -> segment index
99100
buildID2SegmentIndex: make(map[UniqueID]*model.SegmentIndex),
100101
// build ID -> task stats
101-
taskStats: expirable.NewLRU[UniqueID, *metricsinfo.IndexTaskStats](64, nil, time.Minute*30),
102+
taskStats: expirable.NewLRU[UniqueID, *metricsinfo.IndexTaskStats](1024, nil, time.Minute*30),
102103
}
103104
}
104105

internal/datacoord/job_manager_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ func (s *jobManagerSuite) TestJobManager_triggerStatsTaskLoop() {
104104
allocator: alloc,
105105
tasks: make(map[int64]Task),
106106
meta: mt,
107-
taskStats: expirable.NewLRU[UniqueID, Task](64, nil, time.Minute*5),
107+
taskStats: expirable.NewLRU[UniqueID, Task](512, nil, time.Minute*5),
108108
},
109109
allocator: alloc,
110110
}

internal/datacoord/task_scheduler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ func newTaskScheduler(
9393
handler: handler,
9494
indexEngineVersionManager: indexEngineVersionManager,
9595
allocator: allocator,
96-
taskStats: expirable.NewLRU[UniqueID, Task](64, nil, time.Minute*15),
96+
taskStats: expirable.NewLRU[UniqueID, Task](512, nil, time.Minute*15),
9797
compactionHandler: compactionHandler,
9898
}
9999
ts.reloadFromMeta()

internal/datacoord/task_stats.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ func (st *statsTask) UpdateVersion(ctx context.Context, nodeID int64, meta *meta
142142
log.Warn("segment is contains by l0 compaction, skip stats", zap.Int64("taskID", st.taskID),
143143
zap.Int64("segmentID", st.segmentID))
144144
st.SetState(indexpb.JobState_JobStateFailed, "segment is contains by l0 compaction")
145-
//reset compacting
145+
// reset compacting
146146
meta.SetSegmentsCompacting(ctx, []UniqueID{st.segmentID}, false)
147147
st.SetStartTime(time.Now())
148148
return fmt.Errorf("segment is contains by l0 compaction")

internal/flushcommon/syncmgr/sync_manager.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ func NewSyncManager(chunkManager storage.ChunkManager) SyncManager {
7272
keyLockDispatcher: dispatcher,
7373
chunkManager: chunkManager,
7474
tasks: typeutil.NewConcurrentMap[string, Task](),
75-
taskStats: expirable.NewLRU[string, Task](16, nil, time.Minute*15),
75+
taskStats: expirable.NewLRU[string, Task](64, nil, time.Minute*15),
7676
}
7777
// setup config update watcher
7878
params.Watch(params.DataNodeCfg.MaxParallelSyncMgrTasks.Key, config.NewHandler("datanode.syncmgr.poolsize", syncMgr.resizeHandler))

internal/proxy/impl.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6753,7 +6753,7 @@ func DeregisterSubLabel(subLabel string) {
67536753
func (node *Proxy) RegisterRestRouter(router gin.IRouter) {
67546754
// Cluster request that executed by proxy
67556755
router.GET(http.ClusterInfoPath, getClusterInfo(node))
6756-
router.GET(http.ClusterConfigsPath, getConfigs(paramtable.Get().GetAll()))
6756+
router.GET(http.ClusterConfigsPath, getConfigs(paramtable.Get().GetConfigsView()))
67576757
router.GET(http.ClusterClientsPath, getConnectedClients)
67586758
router.GET(http.ClusterDependenciesPath, getDependencies)
67596759

internal/querycoordv2/task/scheduler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ func NewScheduler(ctx context.Context,
284284
channelTasks: NewConcurrentMap[replicaChannelIndex, Task](),
285285
processQueue: newTaskQueue(),
286286
waitQueue: newTaskQueue(),
287-
taskStats: expirable.NewLRU[UniqueID, Task](64, nil, time.Minute*15),
287+
taskStats: expirable.NewLRU[UniqueID, Task](256, nil, time.Minute*15),
288288
segmentTaskDelta: NewExecutingTaskDelta(),
289289
channelTaskDelta: NewExecutingTaskDelta(),
290290
}

0 commit comments

Comments
 (0)