From b43056f305f660ab41e26b6d36f732f6dc9e1651 Mon Sep 17 00:00:00 2001 From: Aritra Basu Date: Tue, 21 Oct 2025 13:46:43 -0700 Subject: [PATCH] added healthcheck support for agent Signed-off-by: Aritra Basu --- calico-vpp-agent/cmd/calico_vpp_dataplane.go | 75 ++++- calico-vpp-agent/health/health.go | 259 ++++++++++++++++++ calico-vpp-agent/watch_dog/watch_dog.go | 59 ---- config/config.go | 6 + test/healthcheck/test_healthcheck.sh | 101 +++++++ yaml/base/calico-vpp-daemonset.yaml | 27 ++ yaml/generated/calico-vpp-dpdk.yaml | 27 ++ .../calico-vpp-eks-dpdk-multinet.yaml | 27 ++ yaml/generated/calico-vpp-eks-dpdk.yaml | 27 ++ yaml/generated/calico-vpp-eks-multinet.yaml | 27 ++ yaml/generated/calico-vpp-eks.yaml | 27 ++ yaml/generated/calico-vpp-kind-multinet.yaml | 27 ++ yaml/generated/calico-vpp-kind.yaml | 27 ++ yaml/generated/calico-vpp-multinet.yaml | 27 ++ yaml/generated/calico-vpp-nohuge.yaml | 27 ++ yaml/generated/calico-vpp.yaml | 27 ++ 16 files changed, 728 insertions(+), 69 deletions(-) create mode 100644 calico-vpp-agent/health/health.go delete mode 100644 calico-vpp-agent/watch_dog/watch_dog.go create mode 100755 test/healthcheck/test_healthcheck.sh diff --git a/calico-vpp-agent/cmd/calico_vpp_dataplane.go b/calico-vpp-agent/cmd/calico_vpp_dataplane.go index 8e126ddc9..877154ffc 100644 --- a/calico-vpp-agent/cmd/calico_vpp_dataplane.go +++ b/calico-vpp-agent/cmd/calico_vpp_dataplane.go @@ -38,13 +38,12 @@ import ( "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/connectivity" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/felix" + "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/health" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/prometheus" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/routing" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/services" - "github.com/projectcalico/vpp-dataplane/v3/config" - - watchdog "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/watch_dog" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/watchers" + "github.com/projectcalico/vpp-dataplane/v3/config" ) /* @@ -83,6 +82,15 @@ func main() { log.Fatalf("Error writing pidfile: %v", err) } + /** + * Start health check server + */ + healthServer := health.NewHealthServer( + log.WithFields(logrus.Fields{"component": "health"}), + *config.GetCalicoVppInitialConfig().HealthCheckPort, + ) + Go(healthServer.ServeHealth) + /** * Connect to VPP & wait for it to be up */ @@ -90,12 +98,16 @@ func main() { if err != nil { log.Fatalf("Cannot create VPP client: %v", err) } + healthServer.SetComponentStatus(health.ComponentVPP, true, "VPP connection established") + // Once we have the api connection, we know vpp & vpp-manager are running and the // state is accurately reported. Wait for vpp-manager to finish the config. common.VppManagerInfo, err = common.WaitForVppManager() if err != nil { log.Fatalf("Vpp Manager not started: %v", err) } + healthServer.SetComponentStatus(health.ComponentVPPManager, true, "VPP Manager ready") + common.ThePubSub = common.NewPubSub(log.WithFields(logrus.Fields{"component": "pubsub"})) /** @@ -164,15 +176,50 @@ func main() { routingServer.SetBGPConf(bgpConf) serviceServer.SetBGPConf(bgpConf) - watchDog := watchdog.NewWatchDog(log.WithFields(logrus.Fields{"component": "watchDog"}), &t) Go(felixServer.ServeFelix) - felixConfig := watchDog.Wait(felixServer.FelixConfigChan, "Waiting for FelixConfig to be provided by the calico pod") - ourBGPSpec := watchDog.Wait(felixServer.GotOurNodeBGPchan, "Waiting for bgp spec to be provided on node add") - // check if the watchDog timer has issued the t.Kill() which would mean we are dead - if !t.Alive() { - log.Fatal("WatchDog timed out waiting for config from felix. Exiting...") + + /* + * Mark as unhealthy while waiting for Felix config + * Kubernetes startup probe handles pod restart if needed + */ + healthServer.MarkAsUnhealthy("Waiting for Felix configuration") + log.Info("Waiting for Felix configuration...") + + ticker := time.NewTicker(20 * time.Second) + defer ticker.Stop() + + var felixConfig interface{} + var ourBGPSpec interface{} + felixConfigReceived := false + bgpSpecReceived := false + + for !felixConfigReceived || !bgpSpecReceived { + select { + case value := <-felixServer.FelixConfigChan: + felixConfig = value + felixConfigReceived = true + log.Info("FelixConfig received from calico pod") + case value := <-felixServer.GotOurNodeBGPchan: + ourBGPSpec = value + bgpSpecReceived = true + log.Info("BGP spec received from node add") + case <-t.Dying(): + log.Error("Tomb dying while waiting for Felix config") + return + case <-ticker.C: + if !felixConfigReceived { + log.Info("Still waiting for FelixConfig from calico pod...") + } + if !bgpSpecReceived { + log.Info("Still waiting for BGP spec from node add...") + } + } } + healthServer.MarkAsHealthy("Felix configuration received") + healthServer.SetComponentStatus(health.ComponentFelix, true, "Felix config received") + log.Info("Felix configuration received") + if ourBGPSpec != nil { bgpSpec, ok := ourBGPSpec.(*common.LocalNodeSpec) if !ok { @@ -189,7 +236,14 @@ func main() { if *config.GetCalicoVppFeatureGates().MultinetEnabled { Go(netWatcher.WatchNetworks) - watchDog.Wait(netWatcher.InSync, "Waiting for networks to be listed and synced") + log.Info("Waiting for networks to be listed and synced...") + select { + case <-netWatcher.InSync: + log.Info("Networks synced") + case <-t.Dying(): + log.Error("Tomb dying while waiting for networks sync") + return + } } if felixConfig != nil { @@ -218,6 +272,7 @@ func main() { Go(localSIDWatcher.WatchLocalSID) } + healthServer.SetComponentStatus(health.ComponentAgent, true, "Agent ready") log.Infof("Agent started") sigChan := make(chan os.Signal, 2) diff --git a/calico-vpp-agent/health/health.go b/calico-vpp-agent/health/health.go new file mode 100644 index 000000000..ed275ed64 --- /dev/null +++ b/calico-vpp-agent/health/health.go @@ -0,0 +1,259 @@ +// Copyright (C) 2025 Cisco Systems Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package health + +import ( + "encoding/json" + "fmt" + "net" + "net/http" + "sync" + "time" + + "github.com/sirupsen/logrus" + "gopkg.in/tomb.v2" +) + +// HealthStatus represents the current health state +type HealthStatus struct { + Healthy bool `json:"healthy"` + Ready bool `json:"ready"` + Components map[string]ComponentStatus `json:"components"` + Message string `json:"message,omitempty"` + LastUpdate time.Time `json:"lastUpdate"` +} + +// ComponentStatus tracks the status of individual components +type ComponentStatus struct { + Initialized bool `json:"initialized"` + Message string `json:"message,omitempty"` + UpdatedAt time.Time `json:"updatedAt"` +} + +// HealthServer provides HTTP health check endpoints +type HealthServer struct { + log *logrus.Entry + port uint32 + status HealthStatus + statusMutex sync.RWMutex + server *http.Server +} + +const ( + ComponentVPP = "vpp" + ComponentVPPManager = "vpp-manager" + ComponentFelix = "felix" + ComponentAgent = "agent" +) + +// NewHealthServer creates a new health check server +func NewHealthServer(log *logrus.Entry, port uint32) *HealthServer { + return &HealthServer{ + log: log, + port: port, + status: HealthStatus{ + Healthy: true, + Ready: false, + Components: make(map[string]ComponentStatus), + LastUpdate: time.Now(), + }, + } +} + +// SetComponentStatus updates the status of a specific component +func (hs *HealthServer) SetComponentStatus(component string, initialized bool, message string) { + hs.statusMutex.Lock() + defer hs.statusMutex.Unlock() + + hs.status.Components[component] = ComponentStatus{ + Initialized: initialized, + Message: message, + UpdatedAt: time.Now(), + } + hs.status.LastUpdate = time.Now() + + // Update overall readiness + hs.updateReadiness() + + hs.log.WithFields(logrus.Fields{ + "component": component, + "initialized": initialized, + "message": message, + }).Debug("Component status updated") +} + +// updateReadiness determines overall readiness based on component status +func (hs *HealthServer) updateReadiness() { + // Required components for readiness + requiredComponents := []string{ + ComponentVPP, + ComponentVPPManager, + ComponentFelix, + ComponentAgent, + } + + allReady := true + for _, comp := range requiredComponents { + status, exists := hs.status.Components[comp] + if !exists || !status.Initialized { + allReady = false + break + } + } + + hs.status.Ready = allReady + + if allReady { + hs.status.Message = "All components initialized" + } else { + hs.status.Message = "Waiting for components to initialize" + } +} + +// MarkAsHealthy marks the agent as healthy (but not necessarily ready) +func (hs *HealthServer) MarkAsHealthy(message string) { + hs.statusMutex.Lock() + defer hs.statusMutex.Unlock() + + hs.status.Healthy = true + if message != "" { + hs.status.Message = message + } else { + hs.status.Message = "Agent is healthy" + } + hs.status.LastUpdate = time.Now() + + hs.log.WithField("message", message).Info("Agent marked as healthy") +} + +// MarkAsUnhealthy marks the agent as unhealthy +func (hs *HealthServer) MarkAsUnhealthy(reason string) { + hs.statusMutex.Lock() + defer hs.statusMutex.Unlock() + + hs.status.Healthy = false + hs.status.Ready = false + hs.status.Message = reason + hs.status.LastUpdate = time.Now() + + hs.log.WithField("reason", reason).Warn("Agent marked as unhealthy") +} + +// GetStatus returns the current health status (thread-safe) +func (hs *HealthServer) GetStatus() HealthStatus { + hs.statusMutex.RLock() + defer hs.statusMutex.RUnlock() + + // Create a copy to avoid race conditions + statusCopy := hs.status + statusCopy.Components = make(map[string]ComponentStatus) + for k, v := range hs.status.Components { + statusCopy.Components[k] = v + } + + return statusCopy +} + +// livenessHandler handles the /liveness endpoint +func (hs *HealthServer) livenessHandler(w http.ResponseWriter, r *http.Request) { + status := hs.GetStatus() + + if status.Healthy { + w.WriteHeader(http.StatusOK) + fmt.Fprint(w, "OK") + } else { + w.WriteHeader(http.StatusServiceUnavailable) + fmt.Fprintf(w, "Unhealthy: %s", status.Message) + } +} + +// readinessHandler handles the /readiness endpoint +func (hs *HealthServer) readinessHandler(w http.ResponseWriter, r *http.Request) { + status := hs.GetStatus() + + if status.Ready { + w.WriteHeader(http.StatusOK) + fmt.Fprint(w, "Ready") + } else { + w.WriteHeader(http.StatusServiceUnavailable) + fmt.Fprintf(w, "Not ready: %s", status.Message) + } +} + +// statusHandler handles the /status endpoint (detailed JSON) +func (hs *HealthServer) statusHandler(w http.ResponseWriter, r *http.Request) { + status := hs.GetStatus() + + w.Header().Set("Content-Type", "application/json") + + httpStatus := http.StatusOK + if !status.Ready { + httpStatus = http.StatusServiceUnavailable + } + w.WriteHeader(httpStatus) + + if err := json.NewEncoder(w).Encode(status); err != nil { + hs.log.WithError(err).Error("Failed to encode status response") + } +} + +func (hs *HealthServer) ServeHealth(t *tomb.Tomb) error { + mux := http.NewServeMux() + mux.HandleFunc("/liveness", hs.livenessHandler) + mux.HandleFunc("/readiness", hs.readinessHandler) + mux.HandleFunc("/status", hs.statusHandler) + + // Create TCP listener for the health server + listener, err := net.Listen("tcp", fmt.Sprintf(":%d", hs.port)) + if err != nil { + // Try with a retry mechanism + for i := 0; i < 3; i++ { + hs.log.Warnf("Failed to bind to port %d, retrying in 5 seconds...", hs.port) + time.Sleep(5 * time.Second) + listener, err = net.Listen("tcp", fmt.Sprintf(":%d", hs.port)) + if err == nil { + break + } + } + if err != nil { + return fmt.Errorf("health server error: %w", err) + } + } + + hs.server = &http.Server{ + Addr: fmt.Sprintf(":%d", hs.port), + Handler: mux, + } + + hs.log.Infof("Starting health check server on port %d", hs.port) + + // Start server with our custom listener + errChan := make(chan error, 1) + go func() { + if err := hs.server.Serve(listener); err != nil && err != http.ErrServerClosed { + errChan <- err + } + }() + + // Wait for tomb to die or server error + select { + case <-t.Dying(): + hs.log.Info("Shutting down health check server") + return hs.server.Close() + case err := <-errChan: + return fmt.Errorf("health server error: %w", err) + } +} diff --git a/calico-vpp-agent/watch_dog/watch_dog.go b/calico-vpp-agent/watch_dog/watch_dog.go deleted file mode 100644 index 507d94f7b..000000000 --- a/calico-vpp-agent/watch_dog/watch_dog.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (C) 2019 Cisco Systems Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -// implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package watchdog - -import ( - "time" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "gopkg.in/tomb.v2" -) - -type WatchDog struct { - log *logrus.Entry - t *tomb.Tomb -} - -func NewWatchDog(log *logrus.Entry, t *tomb.Tomb) *WatchDog { - return &WatchDog{ - log: log, - t: t, - } -} - -func (wd *WatchDog) Wait(myChan chan interface{}, msg string) interface{} { - ticker := time.NewTicker(time.Second * 5) - nbTicks := 0 - defer ticker.Stop() - for { - select { - case value := <-myChan: - return value - case <-wd.t.Dying(): - return nil - case <-ticker.C: - nbTicks++ - if nbTicks >= 30 { - wd.t.Kill(errors.Errorf("Timeout waiting for config from felix")) - } else if nbTicks >= 6 { // Start warning after 6 ticks, i.e. 30sec - wd.log.Warn(msg) - } else { - wd.log.Info(msg) - } - } - } -} diff --git a/config/config.go b/config/config.go index d0d8f72ef..c495c5864 100644 --- a/config/config.go +++ b/config/config.go @@ -464,6 +464,9 @@ type CalicoVppInitialConfigConfigType struct { //out of agent and vppmanager // PrometheusStatsPrefix is the prefix to use for Prometheus metrics // Defaults to "cni.projectcalico.vpp." PrometheusStatsPrefix string `json:"prometheusStatsPrefix"` + // HealthCheckPort is the port on which the health check HTTP server listens + // Defaults to 9090 + HealthCheckPort *uint32 `json:"healthCheckPort"` } func (cfg *CalicoVppInitialConfigConfigType) Validate() (err error) { @@ -489,6 +492,9 @@ func (cfg *CalicoVppInitialConfigConfigType) Validate() (err error) { if cfg.PrometheusStatsPrefix == "" { cfg.PrometheusStatsPrefix = "cni.projectcalico.vpp." } + cfg.HealthCheckPort = DefaultToPtr( + cfg.HealthCheckPort, 9090, + ) return nil } func (cfg *CalicoVppInitialConfigConfigType) GetDefaultGWs() (gws []net.IP, err error) { diff --git a/test/healthcheck/test_healthcheck.sh b/test/healthcheck/test_healthcheck.sh new file mode 100755 index 000000000..78c2ade7c --- /dev/null +++ b/test/healthcheck/test_healthcheck.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +# Test script for Calico VPP agent healthcheck endpoints +# This script can be used to verify the healthcheck implementation + +set -e + +HEALTHCHECK_PORT=${HEALTHCHECK_PORT:-9090} +POD_NAME=${1:-} +KUBECONFIG=${KUBECONFIG:-$HOME/.kube/config} +LOCAL_PORT=${LOCAL_PORT:-19090} + +# Function to check if kubectl is properly configured +check_kubectl_config() { + if ! kubectl --kubeconfig="$KUBECONFIG" get nodes &>/dev/null; then + echo "Error: Cannot connect to Kubernetes cluster. Please check your kubectl configuration." + echo "If running with sudo, try: sudo KUBECONFIG=$KUBECONFIG $0 $POD_NAME" + exit 1 + fi +} + +# Function to clean up port-forward process +cleanup() { + if [ -n "$PORT_FORWARD_PID" ]; then + echo "Cleaning up port-forward (PID: $PORT_FORWARD_PID)" + kill $PORT_FORWARD_PID 2>/dev/null || true + fi +} + +# Set up trap to clean up port-forward on exit +trap cleanup EXIT + +if [ -z "$POD_NAME" ]; then + echo "Usage: $0 " + echo "" + echo "Example:" + echo " $0 calico-vpp-node-xxxxx" + echo "" + echo "Options:" + echo " HEALTHCHECK_PORT= Set a different healthcheck port in the container (default: 9090)" + echo " LOCAL_PORT= Set a different local port for port-forwarding (default: 19090)" + echo " KUBECONFIG= Set a different kubeconfig path" + exit 1 +fi + +# Check kubectl configuration +check_kubectl_config + +# Verify pod exists +if ! kubectl --kubeconfig="$KUBECONFIG" get pod -n calico-vpp-dataplane "$POD_NAME" &>/dev/null; then + echo "Error: Pod $POD_NAME not found in namespace calico-vpp-dataplane" + echo "Available pods:" + kubectl --kubeconfig="$KUBECONFIG" get pods -n calico-vpp-dataplane + exit 1 +fi + +echo "Testing healthcheck endpoints for pod: $POD_NAME" +echo "Using healthcheck port: $HEALTHCHECK_PORT" +echo "Using kubeconfig: $KUBECONFIG" +echo "" + +# Port-forward approach (always use this since container has no curl/wget/nc) +echo "Setting up port-forward from localhost:$LOCAL_PORT to pod:$HEALTHCHECK_PORT" +kubectl --kubeconfig="$KUBECONFIG" port-forward -n calico-vpp-dataplane "$POD_NAME" $LOCAL_PORT:$HEALTHCHECK_PORT > /dev/null 2>&1 & +PORT_FORWARD_PID=$! + +# Wait for port-forward to establish +echo "Waiting for port-forward to establish..." +sleep 2 +echo "" + +# Test if port-forward is working +if ! curl -s "http://localhost:$LOCAL_PORT/liveness" &>/dev/null; then + echo "Error: Port-forward not working. Please check if port $LOCAL_PORT is available." + exit 1 +fi + +# Test liveness endpoint +echo "=== Testing /liveness endpoint ===" +curl -s -w "\nHTTP Status: %{http_code}\n" "http://localhost:$LOCAL_PORT/liveness" || true +echo "" + +# Test readiness endpoint +echo "=== Testing /readiness endpoint ===" +curl -s -w "\nHTTP Status: %{http_code}\n" "http://localhost:$LOCAL_PORT/readiness" || true +echo "" + +# Test status endpoint (detailed JSON) +echo "=== Testing /status endpoint (detailed) ===" +curl -s "http://localhost:$LOCAL_PORT/status" | python3 -m json.tool || \ + curl -s "http://localhost:$LOCAL_PORT/status" +echo "" + +# Check Kubernetes probe status +echo "=== Kubernetes Probe Status ===" +kubectl --kubeconfig="$KUBECONFIG" get pod -n calico-vpp-dataplane "$POD_NAME" -o jsonpath='{.status.conditions[?(@.type=="Ready")]}' | python3 -m json.tool || true +echo "" + +echo "=== Pod Status ===" +kubectl --kubeconfig="$KUBECONFIG" get pod -n calico-vpp-dataplane "$POD_NAME" -o wide +echo "" \ No newline at end of file diff --git a/yaml/base/calico-vpp-daemonset.yaml b/yaml/base/calico-vpp-daemonset.yaml index 6a037d037..779a4c86d 100644 --- a/yaml/base/calico-vpp-daemonset.yaml +++ b/yaml/base/calico-vpp-daemonset.yaml @@ -288,6 +288,33 @@ spec: resources: requests: cpu: 250m + startupProbe: + failureThreshold: 10 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 3 + livenessProbe: + failureThreshold: 3 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 3 + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readiness + port: 9090 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 volumeMounts: - mountPath: /var/run/calico name: var-run-calico diff --git a/yaml/generated/calico-vpp-dpdk.yaml b/yaml/generated/calico-vpp-dpdk.yaml index 7a65afac3..fd4e5323d 100644 --- a/yaml/generated/calico-vpp-dpdk.yaml +++ b/yaml/generated/calico-vpp-dpdk.yaml @@ -274,12 +274,39 @@ spec: name: calico-vpp-config image: docker.io/calicovpp/agent:latest imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 3 name: agent + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readiness + port: 9090 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 resources: requests: cpu: 250m securityContext: privileged: true + startupProbe: + failureThreshold: 10 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 3 volumeMounts: - mountPath: /var/run/calico name: var-run-calico diff --git a/yaml/generated/calico-vpp-eks-dpdk-multinet.yaml b/yaml/generated/calico-vpp-eks-dpdk-multinet.yaml index 6ed70fe86..9df69c061 100644 --- a/yaml/generated/calico-vpp-eks-dpdk-multinet.yaml +++ b/yaml/generated/calico-vpp-eks-dpdk-multinet.yaml @@ -335,12 +335,39 @@ spec: name: calico-vpp-config image: docker.io/calicovpp/agent:latest imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 3 name: agent + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readiness + port: 9090 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 resources: requests: cpu: 250m securityContext: privileged: true + startupProbe: + failureThreshold: 10 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 3 volumeMounts: - mountPath: /var/run/calico name: var-run-calico diff --git a/yaml/generated/calico-vpp-eks-dpdk.yaml b/yaml/generated/calico-vpp-eks-dpdk.yaml index 09a962cbd..b20332468 100644 --- a/yaml/generated/calico-vpp-eks-dpdk.yaml +++ b/yaml/generated/calico-vpp-eks-dpdk.yaml @@ -285,12 +285,39 @@ spec: name: calico-vpp-config image: docker.io/calicovpp/agent:latest imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 3 name: agent + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readiness + port: 9090 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 resources: requests: cpu: 250m securityContext: privileged: true + startupProbe: + failureThreshold: 10 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 3 volumeMounts: - mountPath: /var/run/calico name: var-run-calico diff --git a/yaml/generated/calico-vpp-eks-multinet.yaml b/yaml/generated/calico-vpp-eks-multinet.yaml index 45d844bfc..b31ec1e00 100644 --- a/yaml/generated/calico-vpp-eks-multinet.yaml +++ b/yaml/generated/calico-vpp-eks-multinet.yaml @@ -333,12 +333,39 @@ spec: name: calico-vpp-config image: docker.io/calicovpp/agent:latest imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 3 name: agent + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readiness + port: 9090 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 resources: requests: cpu: 250m securityContext: privileged: true + startupProbe: + failureThreshold: 10 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 3 volumeMounts: - mountPath: /var/run/calico name: var-run-calico diff --git a/yaml/generated/calico-vpp-eks.yaml b/yaml/generated/calico-vpp-eks.yaml index ac5cdf552..38545d2b1 100644 --- a/yaml/generated/calico-vpp-eks.yaml +++ b/yaml/generated/calico-vpp-eks.yaml @@ -283,12 +283,39 @@ spec: name: calico-vpp-config image: docker.io/calicovpp/agent:latest imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 3 name: agent + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readiness + port: 9090 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 resources: requests: cpu: 250m securityContext: privileged: true + startupProbe: + failureThreshold: 10 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 3 volumeMounts: - mountPath: /var/run/calico name: var-run-calico diff --git a/yaml/generated/calico-vpp-kind-multinet.yaml b/yaml/generated/calico-vpp-kind-multinet.yaml index 2051c70e8..761be6429 100644 --- a/yaml/generated/calico-vpp-kind-multinet.yaml +++ b/yaml/generated/calico-vpp-kind-multinet.yaml @@ -286,12 +286,39 @@ spec: name: calico-vpp-config image: docker.io/calicovpp/agent:latest imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 3 name: agent + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readiness + port: 9090 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 resources: requests: cpu: 250m securityContext: privileged: true + startupProbe: + failureThreshold: 10 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 3 volumeMounts: - mountPath: /var/run/calico name: var-run-calico diff --git a/yaml/generated/calico-vpp-kind.yaml b/yaml/generated/calico-vpp-kind.yaml index 88511fbc5..dde1cb46a 100644 --- a/yaml/generated/calico-vpp-kind.yaml +++ b/yaml/generated/calico-vpp-kind.yaml @@ -236,12 +236,39 @@ spec: name: calico-vpp-config image: docker.io/calicovpp/agent:latest imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 3 name: agent + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readiness + port: 9090 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 resources: requests: cpu: 250m securityContext: privileged: true + startupProbe: + failureThreshold: 10 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 3 volumeMounts: - mountPath: /var/run/calico name: var-run-calico diff --git a/yaml/generated/calico-vpp-multinet.yaml b/yaml/generated/calico-vpp-multinet.yaml index 3065adea4..d904f1369 100644 --- a/yaml/generated/calico-vpp-multinet.yaml +++ b/yaml/generated/calico-vpp-multinet.yaml @@ -283,12 +283,39 @@ spec: name: calico-vpp-config image: docker.io/calicovpp/agent:latest imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 3 name: agent + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readiness + port: 9090 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 resources: requests: cpu: 250m securityContext: privileged: true + startupProbe: + failureThreshold: 10 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 3 volumeMounts: - mountPath: /var/run/calico name: var-run-calico diff --git a/yaml/generated/calico-vpp-nohuge.yaml b/yaml/generated/calico-vpp-nohuge.yaml index 62f8ad8cb..472f45d22 100644 --- a/yaml/generated/calico-vpp-nohuge.yaml +++ b/yaml/generated/calico-vpp-nohuge.yaml @@ -233,12 +233,39 @@ spec: name: calico-vpp-config image: docker.io/calicovpp/agent:latest imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 3 name: agent + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readiness + port: 9090 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 resources: requests: cpu: 250m securityContext: privileged: true + startupProbe: + failureThreshold: 10 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 3 volumeMounts: - mountPath: /var/run/calico name: var-run-calico diff --git a/yaml/generated/calico-vpp.yaml b/yaml/generated/calico-vpp.yaml index fefd6bbf2..62a5e8149 100644 --- a/yaml/generated/calico-vpp.yaml +++ b/yaml/generated/calico-vpp.yaml @@ -274,12 +274,39 @@ spec: name: calico-vpp-config image: docker.io/calicovpp/agent:latest imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 3 name: agent + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readiness + port: 9090 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 resources: requests: cpu: 250m securityContext: privileged: true + startupProbe: + failureThreshold: 10 + httpGet: + path: /liveness + port: 9090 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 3 volumeMounts: - mountPath: /var/run/calico name: var-run-calico