Skip to content

Commit 43f79d3

Browse files
committed
fix: update activator label and var name.
Signed-off-by: X1aoZEOuO <[email protected]>
1 parent 67d5b73 commit 43f79d3

File tree

7 files changed

+166
-64
lines changed

7 files changed

+166
-64
lines changed

api/core/v1alpha1/model_types.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ const (
3535
// Once either of them qualified, we'll expose this as a field in Model.
3636
ModelPreheatAnnoKey = "llmaz.io/model-preheat"
3737

38-
// ModelActivatorAnnotationKey is used to indicate whether the model is activated by the activator.
39-
ModelActivatorAnnoKey = "activator.llmaz.io/playground"
40-
// CachedModelActivatorAnnotationKey is used to cache the activator info of the model.
41-
CachedModelActivatorAnnoKey = "cached.activator.llmaz.io"
38+
// ModelActivatorAnnoKey is used to indicate the model name activated by the activator.
39+
ModelActivatorAnnoKey = "activator.llmaz.io/model-name"
40+
// CachedModelActivatorAnnoKey is used to cache the activator state of the model.
41+
CachedModelActivatorAnnoKey = "activator.llmaz.io/cached-state"
4242

4343
HUGGING_FACE = "Huggingface"
4444
MODEL_SCOPE = "ModelScope"

chart/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ controllerManager:
55
- --metrics-bind-address=:8443
66
- --leader-elect
77
- --namespace=llmaz-system
8-
- --enable-serverless
8+
- --enable-service-activator
99
- --pod-ip=$(POD_IP)
1010
containerSecurityContext:
1111
allowPrivilegeEscalation: false

cmd/main.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,14 @@ func main() {
6464
var enableLeaderElection bool
6565
var probeAddr string
6666
var namespace string
67-
var enableServerless bool
67+
var enableServiceActivator bool
6868
var podIP string
6969

7070
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
7171
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
7272
flag.StringVar(&namespace, "namespace", "llmaz-system", "The namespace of the llmaz to deploy")
73-
flag.BoolVar(&enableServerless, "enable-serverless", false, "Enable the serverless feature")
74-
flag.StringVar(&podIP, "pod-ip", "", "The pod IP of the llmaz controller manager")
73+
flag.BoolVar(&enableServiceActivator, "enable-service-activator", false, "Enable the service activator feature. This is an experimental feature.")
74+
flag.StringVar(&podIP, "pod-ip", "", "The pod IP of the llmaz controller manager. Only used when service activator is enabled.")
7575
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
7676
"Enable leader election for controller manager. "+
7777
"Enabling this will ensure there is only one active controller manager.")
@@ -125,7 +125,7 @@ func main() {
125125
// Cert won't be ready until manager starts, so start a goroutine here which
126126
// will block until the cert is ready before setting up the controllers.
127127
// Controllers who register after manager starts will start directly.
128-
go setupControllers(mgr, certsReady, enableServerless, podIP)
128+
go setupControllers(mgr, certsReady, enableServiceActivator, podIP)
129129

130130
//+kubebuilder:scaffold:builder
131131

@@ -145,7 +145,7 @@ func main() {
145145
}
146146
}
147147

148-
func setupControllers(mgr ctrl.Manager, certsReady chan struct{}, enableServerless bool, podIP string) {
148+
func setupControllers(mgr ctrl.Manager, certsReady chan struct{}, enableServiceActivator bool, podIP string) {
149149
// The controllers won't work until the webhooks are operating,
150150
// and the webhook won't work until the certs are all in places.
151151
setupLog.Info("waiting for the cert generation to complete")
@@ -181,7 +181,7 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}, enableServerle
181181
os.Exit(1)
182182
}
183183

184-
if enableServerless {
184+
if enableServiceActivator {
185185
dynamicClient, err := dynamic.NewForConfig(mgr.GetConfig())
186186
if err != nil {
187187
setupLog.Error(err, "unable to create dynamic client")

config/crd/bases/inference.llmaz.io_backendruntimes.yaml

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,12 @@ spec:
388388
- port
389389
type: object
390390
type: object
391+
stopSignal:
392+
description: |-
393+
StopSignal defines which signal will be sent to a container when it is being stopped.
394+
If not specified, the default is defined by the container runtime in use.
395+
StopSignal can only be set for Pods with a non-empty .spec.os.name
396+
type: string
391397
type: object
392398
livenessProbe:
393399
description: |-
@@ -770,7 +776,9 @@ spec:
770776
policies:
771777
description: |-
772778
policies is a list of potential scaling polices which can be used during scaling.
773-
At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
779+
If not set, use the default values:
780+
- For scale up: allow doubling the number of pods, or an absolute change of 4 pods in a 15s window.
781+
- For scale down: allow all pods to be removed in a 15s window.
774782
items:
775783
description: HPAScalingPolicy is a single
776784
policy which must hold true for a specified
@@ -814,6 +822,24 @@ spec:
814822
- For scale down: 300 (i.e. the stabilization window is 300 seconds long).
815823
format: int32
816824
type: integer
825+
tolerance:
826+
anyOf:
827+
- type: integer
828+
- type: string
829+
description: |-
830+
tolerance is the tolerance on the ratio between the current and desired
831+
metric value under which no updates are made to the desired number of
832+
replicas (e.g. 0.01 for 1%). Must be greater than or equal to zero. If not
833+
set, the default cluster-wide tolerance is applied (by default 10%).
834+
835+
For example, if autoscaling is configured with a memory consumption target of 100Mi,
836+
and scale-down and scale-up tolerances of 5% and 1% respectively, scaling will be
837+
triggered when the actual consumption falls below 95Mi or exceeds 101Mi.
838+
839+
This is an alpha field and requires enabling the HPAConfigurableTolerance
840+
feature gate.
841+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
842+
x-kubernetes-int-or-string: true
817843
type: object
818844
scaleUp:
819845
description: |-
@@ -826,7 +852,9 @@ spec:
826852
policies:
827853
description: |-
828854
policies is a list of potential scaling polices which can be used during scaling.
829-
At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
855+
If not set, use the default values:
856+
- For scale up: allow doubling the number of pods, or an absolute change of 4 pods in a 15s window.
857+
- For scale down: allow all pods to be removed in a 15s window.
830858
items:
831859
description: HPAScalingPolicy is a single
832860
policy which must hold true for a specified
@@ -870,6 +898,24 @@ spec:
870898
- For scale down: 300 (i.e. the stabilization window is 300 seconds long).
871899
format: int32
872900
type: integer
901+
tolerance:
902+
anyOf:
903+
- type: integer
904+
- type: string
905+
description: |-
906+
tolerance is the tolerance on the ratio between the current and desired
907+
metric value under which no updates are made to the desired number of
908+
replicas (e.g. 0.01 for 1%). Must be greater than or equal to zero. If not
909+
set, the default cluster-wide tolerance is applied (by default 10%).
910+
911+
For example, if autoscaling is configured with a memory consumption target of 100Mi,
912+
and scale-down and scale-up tolerances of 5% and 1% respectively, scaling will be
913+
triggered when the actual consumption falls below 95Mi or exceeds 101Mi.
914+
915+
This is an alpha field and requires enabling the HPAConfigurableTolerance
916+
feature gate.
917+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
918+
x-kubernetes-int-or-string: true
873919
type: object
874920
type: object
875921
metrics:

config/crd/bases/inference.llmaz.io_playgrounds.yaml

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,9 @@ spec:
295295
policies:
296296
description: |-
297297
policies is a list of potential scaling polices which can be used during scaling.
298-
At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
298+
If not set, use the default values:
299+
- For scale up: allow doubling the number of pods, or an absolute change of 4 pods in a 15s window.
300+
- For scale down: allow all pods to be removed in a 15s window.
299301
items:
300302
description: HPAScalingPolicy is a single policy
301303
which must hold true for a specified past
@@ -339,6 +341,24 @@ spec:
339341
- For scale down: 300 (i.e. the stabilization window is 300 seconds long).
340342
format: int32
341343
type: integer
344+
tolerance:
345+
anyOf:
346+
- type: integer
347+
- type: string
348+
description: |-
349+
tolerance is the tolerance on the ratio between the current and desired
350+
metric value under which no updates are made to the desired number of
351+
replicas (e.g. 0.01 for 1%). Must be greater than or equal to zero. If not
352+
set, the default cluster-wide tolerance is applied (by default 10%).
353+
354+
For example, if autoscaling is configured with a memory consumption target of 100Mi,
355+
and scale-down and scale-up tolerances of 5% and 1% respectively, scaling will be
356+
triggered when the actual consumption falls below 95Mi or exceeds 101Mi.
357+
358+
This is an alpha field and requires enabling the HPAConfigurableTolerance
359+
feature gate.
360+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
361+
x-kubernetes-int-or-string: true
342362
type: object
343363
scaleUp:
344364
description: |-
@@ -351,7 +371,9 @@ spec:
351371
policies:
352372
description: |-
353373
policies is a list of potential scaling polices which can be used during scaling.
354-
At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
374+
If not set, use the default values:
375+
- For scale up: allow doubling the number of pods, or an absolute change of 4 pods in a 15s window.
376+
- For scale down: allow all pods to be removed in a 15s window.
355377
items:
356378
description: HPAScalingPolicy is a single policy
357379
which must hold true for a specified past
@@ -395,6 +417,24 @@ spec:
395417
- For scale down: 300 (i.e. the stabilization window is 300 seconds long).
396418
format: int32
397419
type: integer
420+
tolerance:
421+
anyOf:
422+
- type: integer
423+
- type: string
424+
description: |-
425+
tolerance is the tolerance on the ratio between the current and desired
426+
metric value under which no updates are made to the desired number of
427+
replicas (e.g. 0.01 for 1%). Must be greater than or equal to zero. If not
428+
set, the default cluster-wide tolerance is applied (by default 10%).
429+
430+
For example, if autoscaling is configured with a memory consumption target of 100Mi,
431+
and scale-down and scale-up tolerances of 5% and 1% respectively, scaling will be
432+
triggered when the actual consumption falls below 95Mi or exceeds 101Mi.
433+
434+
This is an alpha field and requires enabling the HPAConfigurableTolerance
435+
feature gate.
436+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
437+
x-kubernetes-int-or-string: true
398438
type: object
399439
type: object
400440
metrics:

0 commit comments

Comments
 (0)