Skip to content

Commit ac0994e

Browse files
committed
Add measurement to check memory usage in benchmark list job
1 parent 21b5881 commit ac0994e

File tree

4 files changed

+225
-1
lines changed

4 files changed

+225
-1
lines changed

clusterloader2/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ require (
6060
k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01
6161
k8s.io/klog/v2 v2.130.1
6262
k8s.io/kubelet v0.33.0
63+
k8s.io/metrics v0.0.0-00010101000000-000000000000
6364
)
6465

6566
require (

clusterloader2/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1588,6 +1588,8 @@ k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUy
15881588
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
15891589
k8s.io/kubelet v0.33.0 h1:4pJA2Ge6Rp0kDNV76KH7pTBiaV2T1a1874QHMcubuSU=
15901590
k8s.io/kubelet v0.33.0/go.mod h1:iDnxbJQMy9DUNaML5L/WUlt3uJtNLWh7ZAe0JSp4Yi0=
1591+
k8s.io/metrics v0.33.0 h1:sKe5sC9qb1RakMhs8LWYNuN2ne6OTCWexj8Jos3rO2Y=
1592+
k8s.io/metrics v0.33.0/go.mod h1:XewckTFXmE2AJiP7PT3EXaY7hi7bler3t2ZLyOdQYzU=
15911593
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
15921594
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro=
15931595
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package common
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"slices"
23+
"sync"
24+
"time"
25+
26+
"k8s.io/apimachinery/pkg/runtime"
27+
"k8s.io/apimachinery/pkg/runtime/serializer"
28+
"k8s.io/client-go/kubernetes"
29+
clientset "k8s.io/client-go/kubernetes"
30+
"k8s.io/klog/v2"
31+
metricsapi "k8s.io/metrics/pkg/apis/metrics/v1beta1"
32+
"k8s.io/perf-tests/clusterloader2/pkg/errors"
33+
"k8s.io/perf-tests/clusterloader2/pkg/measurement"
34+
"k8s.io/perf-tests/clusterloader2/pkg/util"
35+
)
36+
37+
const (
38+
containerMemoryUsageName = "ContainerMemoryUsage"
39+
)
40+
41+
func init() {
42+
if err := measurement.Register(containerMemoryUsageName, createContainerMemoryUsageMeasurement); err != nil {
43+
klog.Fatalf("Cannot register %s: %v", containerMemoryUsageName, err)
44+
}
45+
klog.Infof("[ContainerMemoryUsage] Starting measurement with label selector")
46+
}
47+
48+
func createContainerMemoryUsageMeasurement() measurement.Measurement {
49+
return &containerMemoryUsageMeasurement{}
50+
}
51+
52+
type containerMemoryUsageMeasurement struct {
53+
isRunning bool
54+
summaries []measurement.Summary
55+
clientset *kubernetes.Clientset
56+
pollFrequency time.Duration
57+
threshold float64
58+
59+
stopCh chan struct{}
60+
mu sync.Mutex
61+
samples []float64
62+
lock sync.Mutex
63+
wg sync.WaitGroup
64+
}
65+
66+
func (c *containerMemoryUsageMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) {
67+
action, err := util.GetString(config.Params, "action")
68+
if err != nil {
69+
return nil, err
70+
}
71+
72+
c.lock.Lock()
73+
defer c.lock.Unlock()
74+
75+
switch action {
76+
case "start":
77+
return nil, c.start(config)
78+
case "gather":
79+
return c.gather()
80+
default:
81+
return nil, fmt.Errorf("unknown action %v", action)
82+
}
83+
84+
}
85+
86+
// Configure parses configuration and initializes clientset
87+
func (c *containerMemoryUsageMeasurement) initFields(config *measurement.Config) error {
88+
c.isRunning = true
89+
c.stopCh = make(chan struct{})
90+
91+
frequency, err := util.GetDuration(config.Params, "pollFrequency")
92+
if err != nil {
93+
return err
94+
}
95+
c.pollFrequency = frequency
96+
97+
threshold, err := util.GetFloat64OrDefault(config.Params, "threshold", 0)
98+
if err != nil {
99+
return err
100+
}
101+
c.threshold = threshold
102+
103+
return nil
104+
}
105+
106+
func (c *containerMemoryUsageMeasurement) start(config *measurement.Config) error {
107+
klog.V(2).Infof("%v: measurement already running", c)
108+
if c.isRunning {
109+
klog.V(2).Infof("%v: measurement already running", c)
110+
return nil
111+
}
112+
if err := c.initFields(config); err != nil {
113+
return err
114+
}
115+
k8sClient := config.ClusterFramework.GetClientSets().GetClient()
116+
117+
c.wg.Add(1)
118+
go func() {
119+
defer c.wg.Done()
120+
121+
for {
122+
select {
123+
case <-c.stopCh:
124+
return
125+
case <-time.After(c.pollFrequency):
126+
memoryUsage, err := c.getAPIServerMemoryUsage(k8sClient)
127+
if err != nil {
128+
klog.Warningf("ContainerMemoryUsage: read error: %v", err)
129+
continue
130+
}
131+
132+
// convert Memory bytes to GB
133+
memoryInGB := memoryUsage / (1024 * 1024 * 1024)
134+
135+
c.samples = append(c.samples, memoryInGB)
136+
}
137+
}
138+
}()
139+
return nil
140+
}
141+
142+
func (c *containerMemoryUsageMeasurement) gather() ([]measurement.Summary, error) {
143+
klog.V(2).Infof("%v: measurement already running", c)
144+
if !c.isRunning {
145+
return nil, nil
146+
}
147+
148+
// Close this channel to stop the execution
149+
close(c.stopCh)
150+
// Wait for execution goroutine to finish
151+
c.wg.Wait()
152+
c.isRunning = false
153+
154+
klog.V(2).Infof("%s: gathering summaries", containerMemoryUsageName)
155+
maxMemoryUsage := slices.Max(c.samples)
156+
157+
content := fmt.Sprintf(`{"maxMemoryUsage": %d, "threshold": %d}`, maxMemoryUsage, c.threshold)
158+
159+
summary := measurement.CreateSummary(containerMemoryUsageName, "json", content)
160+
klog.V(2).Infof("%d: maxMemoryUsage", maxMemoryUsage)
161+
klog.V(2).Infof("%d: threshold", c.threshold)
162+
if maxMemoryUsage > c.threshold {
163+
err := errors.NewMetricViolationError("ContainerMemoryUsage", fmt.Sprintf("SLO not fulfilled (expected >= %f, got: %f)", c.threshold, maxMemoryUsage))
164+
return nil, err
165+
}
166+
167+
return []measurement.Summary{summary}, nil
168+
169+
}
170+
171+
func (c *containerMemoryUsageMeasurement) getAPIServerMemoryUsage(cl clientset.Interface) (float64, error) {
172+
173+
result, err := cl.CoreV1().RESTClient().
174+
Get().
175+
AbsPath("apis/metrics.k8s.io/v1beta1/namespaces/kube-system/pods").
176+
Param("labelSelector", "component=kube-apiserver").
177+
DoRaw(context.Background())
178+
179+
scheme := runtime.NewScheme()
180+
_ = metricsapi.AddToScheme(scheme) // register metrics API types into scheme
181+
182+
codef := serializer.NewCodecFactory(scheme).UniversalDeserializer()
183+
184+
if err != nil {
185+
return 0, err
186+
}
187+
188+
podMetricsList := &metricsapi.PodMetricsList{}
189+
_, _, err = codef.Decode(result, nil, podMetricsList)
190+
if err != nil {
191+
return 0, fmt.Errorf("failed to decode metrics: %v", err)
192+
}
193+
194+
var memoryUsage float64
195+
196+
for _, podMetric := range podMetricsList.Items {
197+
for _, container := range podMetric.Containers {
198+
if container.Name == "kube-apiserver" {
199+
memoryUsage = container.Usage.Memory().AsApproximateFloat64()
200+
break
201+
}
202+
}
203+
}
204+
return memoryUsage, nil
205+
}
206+
207+
func (c *containerMemoryUsageMeasurement) Dispose() {}
208+
209+
func (c *containerMemoryUsageMeasurement) String() string {
210+
return containerMemoryUsageName
211+
}

clusterloader2/testing/list/modules/measurements.yaml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
## Input params
44
# Valid actions: "start", "gather"
55
{{$action := .action}}
6+
{{$ENABLE_CONTAINER_MEMORY_USAGE_MEASUREMENT := DefaultParam .CL2_ENABLE_CONTAINER_MEMORY_USAGE_MEASUREMENT true}}
7+
{{$CONTAINER_MEMORY_THRESHOLD := DefaultParam .CL2_CONTAINER_MEMORY_THRESHOLD 1}}
68

79

810
steps:
@@ -17,4 +19,12 @@ steps:
1719
- Identifier: TestMetrics
1820
Method: TestMetrics
1921
Params:
20-
action: {{$action}}
22+
action: {{$action}}
23+
{{if $ENABLE_CONTAINER_MEMORY_USAGE_MEASUREMENT}}
24+
- Identifier: ContainerMemoryUsage
25+
Method: ContainerMemoryUsage
26+
Params:
27+
action: {{$action}}
28+
threshold: {{$CONTAINER_MEMORY_THRESHOLD}}
29+
pollFrequency: 15s
30+
{{end}}

0 commit comments

Comments
 (0)