|
| 1 | +/* |
| 2 | + * Copyright 2025 The Kubernetes Authors. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +package main |
| 18 | + |
| 19 | +import ( |
| 20 | + "context" |
| 21 | + "fmt" |
| 22 | + "net" |
| 23 | + "net/url" |
| 24 | + "path" |
| 25 | + "strconv" |
| 26 | + "sync" |
| 27 | + |
| 28 | + "google.golang.org/grpc" |
| 29 | + "google.golang.org/grpc/codes" |
| 30 | + "google.golang.org/grpc/credentials/insecure" |
| 31 | + "google.golang.org/grpc/health/grpc_health_v1" |
| 32 | + "google.golang.org/grpc/status" |
| 33 | + "k8s.io/klog/v2" |
| 34 | + drapb "k8s.io/kubelet/pkg/apis/dra/v1beta1" |
| 35 | + registerapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1" |
| 36 | + |
| 37 | + "sigs.k8s.io/dra-example-driver/pkg/consts" |
| 38 | +) |
| 39 | + |
| 40 | +type healthcheck struct { |
| 41 | + grpc_health_v1.UnimplementedHealthServer |
| 42 | + |
| 43 | + server *grpc.Server |
| 44 | + wg sync.WaitGroup |
| 45 | + |
| 46 | + regClient registerapi.RegistrationClient |
| 47 | + draClient drapb.DRAPluginClient |
| 48 | +} |
| 49 | + |
| 50 | +func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error) { |
| 51 | + log := klog.FromContext(ctx) |
| 52 | + |
| 53 | + port := config.flags.healthcheckPort |
| 54 | + if port < 0 { |
| 55 | + return nil, nil |
| 56 | + } |
| 57 | + |
| 58 | + addr := net.JoinHostPort("", strconv.Itoa(port)) |
| 59 | + lis, err := net.Listen("tcp", addr) |
| 60 | + if err != nil { |
| 61 | + return nil, fmt.Errorf("failed to listen for healthcheck service at %s: %w", addr, err) |
| 62 | + } |
| 63 | + |
| 64 | + regSockPath := (&url.URL{ |
| 65 | + Scheme: "unix", |
| 66 | + // TODO: this needs to adapt when seamless upgrades |
| 67 | + // are enabled and the filename includes a uid. |
| 68 | + Path: path.Join(config.flags.kubeletRegistrarDirectoryPath, consts.DriverName+"-reg.sock"), |
| 69 | + }).String() |
| 70 | + log.Info("connecting to registration socket", "path", regSockPath) |
| 71 | + regConn, err := grpc.NewClient( |
| 72 | + regSockPath, |
| 73 | + grpc.WithTransportCredentials(insecure.NewCredentials()), |
| 74 | + ) |
| 75 | + if err != nil { |
| 76 | + return nil, fmt.Errorf("connect to registration socket: %w", err) |
| 77 | + } |
| 78 | + |
| 79 | + draSockPath := (&url.URL{ |
| 80 | + Scheme: "unix", |
| 81 | + Path: path.Join(config.DriverPluginPath(), "dra.sock"), |
| 82 | + }).String() |
| 83 | + log.Info("connecting to DRA socket", "path", draSockPath) |
| 84 | + draConn, err := grpc.NewClient( |
| 85 | + draSockPath, |
| 86 | + grpc.WithTransportCredentials(insecure.NewCredentials()), |
| 87 | + ) |
| 88 | + if err != nil { |
| 89 | + return nil, fmt.Errorf("connect to DRA socket: %w", err) |
| 90 | + } |
| 91 | + |
| 92 | + server := grpc.NewServer() |
| 93 | + healthcheck := &healthcheck{ |
| 94 | + server: server, |
| 95 | + regClient: registerapi.NewRegistrationClient(regConn), |
| 96 | + draClient: drapb.NewDRAPluginClient(draConn), |
| 97 | + } |
| 98 | + grpc_health_v1.RegisterHealthServer(server, healthcheck) |
| 99 | + |
| 100 | + healthcheck.wg.Add(1) |
| 101 | + go func() { |
| 102 | + defer healthcheck.wg.Done() |
| 103 | + log.Info("starting healthcheck service", "addr", lis.Addr().String()) |
| 104 | + if err := server.Serve(lis); err != nil { |
| 105 | + log.Error(err, "failed to serve healthcheck service", "addr", addr) |
| 106 | + } |
| 107 | + }() |
| 108 | + |
| 109 | + return healthcheck, nil |
| 110 | +} |
| 111 | + |
| 112 | +func (h *healthcheck) Stop(logger klog.Logger) { |
| 113 | + if h.server != nil { |
| 114 | + logger.Info("stopping healthcheck service") |
| 115 | + h.server.GracefulStop() |
| 116 | + } |
| 117 | + h.wg.Wait() |
| 118 | +} |
| 119 | + |
| 120 | +// Check implements [grpc_health_v1.HealthServer]. |
| 121 | +func (h *healthcheck) Check(ctx context.Context, req *grpc_health_v1.HealthCheckRequest) (*grpc_health_v1.HealthCheckResponse, error) { |
| 122 | + log := klog.FromContext(ctx) |
| 123 | + |
| 124 | + knownServices := map[string]struct{}{"": {}, "liveness": {}} |
| 125 | + if _, known := knownServices[req.GetService()]; !known { |
| 126 | + return nil, status.Error(codes.NotFound, "unknown service") |
| 127 | + } |
| 128 | + |
| 129 | + status := &grpc_health_v1.HealthCheckResponse{ |
| 130 | + Status: grpc_health_v1.HealthCheckResponse_NOT_SERVING, |
| 131 | + } |
| 132 | + |
| 133 | + info, err := h.regClient.GetInfo(ctx, ®isterapi.InfoRequest{}) |
| 134 | + if err != nil { |
| 135 | + log.Error(err, "failed to call GetInfo") |
| 136 | + return status, nil |
| 137 | + } |
| 138 | + log.V(5).Info("Successfully invoked GetInfo", "info", info) |
| 139 | + |
| 140 | + _, err = h.draClient.NodePrepareResources(ctx, &drapb.NodePrepareResourcesRequest{}) |
| 141 | + if err != nil { |
| 142 | + log.Error(err, "failed to call NodePrepareResources") |
| 143 | + return status, nil |
| 144 | + } |
| 145 | + log.V(5).Info("Successfully invoked NodePrepareResources") |
| 146 | + |
| 147 | + status.Status = grpc_health_v1.HealthCheckResponse_SERVING |
| 148 | + return status, nil |
| 149 | +} |
0 commit comments