Skip to content

consul: Add consul health checking for all gRPC endpoints #8261

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ type GRPCServerConfig struct {
// These service names must match the service names advertised by gRPC itself,
// which are identical to the names set in our gRPC .proto files prefixed by
// the package names set in those files (e.g. "ca.CertificateAuthority").
Services map[string]GRPCServiceConfig `json:"services" validate:"required,dive,required"`
Services map[string]*GRPCServiceConfig `json:"services" validate:"required,dive,required"`
// MaxConnectionAge specifies how long a connection may live before the server sends a GoAway to the
// client. Because gRPC connections re-resolve DNS after a connection close,
// this controls how long it takes before a client learns about changes to its
Expand All @@ -476,10 +476,10 @@ type GRPCServerConfig struct {

// GRPCServiceConfig contains the information needed to configure a gRPC service.
type GRPCServiceConfig struct {
// PerServiceClientNames is a map of gRPC service names to client certificate
// SANs. The upstream listening server will reject connections from clients
// which do not appear in this list, and the server interceptor will reject
// RPC calls for this service from clients which are not listed here.
// ClientNames is the list of accepted gRPC client certificate SANs.
// Connections from clients not in this list will be rejected by the
// upstream listener, and RPCs from unlisted clients will be denied by the
// server interceptor.
ClientNames []string `json:"clientNames" validate:"min=1,dive,hostname,required"`
}

Expand Down
22 changes: 19 additions & 3 deletions grpc/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ import (
"github.com/letsencrypt/boulder/cmd"
bcreds "github.com/letsencrypt/boulder/grpc/creds"

// 'grpc/health' is imported for its init function, which causes clients to
// rely on the Health Service for load-balancing.
// 'grpc/internal/resolver/dns' is imported for its init function, which
// registers the SRV resolver.
"google.golang.org/grpc/balancer/roundrobin"

// 'grpc/health' is imported for its init function, which causes clients to
// rely on the Health Service for load-balancing as long as a
// "healthCheckConfig" is specified in the gRPC service config.
_ "google.golang.org/grpc/health"

_ "github.com/letsencrypt/boulder/grpc/internal/resolver/dns"
Expand Down Expand Up @@ -61,7 +63,21 @@ func ClientSetup(c *cmd.GRPCClientConfig, tlsConfig *tls.Config, statsRegistry p
creds := bcreds.NewClientCredentials(tlsConfig.RootCAs, tlsConfig.Certificates, hostOverride)
return grpc.NewClient(
target,
grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, roundrobin.Name)),
grpc.WithDefaultServiceConfig(
fmt.Sprintf(
// By setting the service name to an empty string in
// healthCheckConfig, we're instructing the gRPC client to query
// the overall health status of each server. The grpc-go health
// server, as constructed by health.NewServer(), unconditionally
// sets the overall service (e.g. "") status to SERVING. If a
// specific service name were set, the server would need to
// explicitly transition that service to SERVING; otherwise,
// clients would receive a NOT_FOUND status and the connection
// would be marked as unhealthy (TRANSIENT_FAILURE).
`{"healthCheckConfig": {"serviceName": ""},"loadBalancingConfig": [{"%s":{}}]}`,
roundrobin.Name,
),
),
grpc.WithTransportCredentials(creds),
grpc.WithChainUnaryInterceptor(unaryInterceptors...),
grpc.WithChainStreamInterceptor(streamInterceptors...),
Expand Down
17 changes: 17 additions & 0 deletions grpc/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"net"
"slices"
"strings"
"time"

Expand Down Expand Up @@ -123,12 +124,21 @@ func (sb *serverBuilder) Build(tlsConfig *tls.Config, statsRegistry prometheus.R
// This is the names which are allowlisted at the server level, plus the union
// of all names which are allowlisted for any individual service.
acceptedSANs := make(map[string]struct{})
var acceptedSANsSlice []string
for _, service := range sb.cfg.Services {
for _, name := range service.ClientNames {
acceptedSANs[name] = struct{}{}
if !slices.Contains(acceptedSANsSlice, name) {
acceptedSANsSlice = append(acceptedSANsSlice, name)
}
}
}

// Ensure that the health service has the same ClientNames as the other
// services, so that health checks can be performed by clients which are
// allowed to connect to the server.
sb.cfg.Services[healthpb.Health_ServiceDesc.ServiceName].ClientNames = acceptedSANsSlice

creds, err := bcreds.NewServerCredentials(tlsConfig, acceptedSANs)
if err != nil {
return nil, err
Expand Down Expand Up @@ -224,8 +234,12 @@ func (sb *serverBuilder) Build(tlsConfig *tls.Config, statsRegistry prometheus.R

// initLongRunningCheck initializes a goroutine which will periodically check
// the health of the provided service and update the health server accordingly.
//
// TODO(#8255): Remove the service parameter and instead rely on transitioning
// the overall health of the server (e.g. "") instead of individual services.
func (sb *serverBuilder) initLongRunningCheck(shutdownCtx context.Context, service string, checkImpl func(context.Context) error) {
// Set the initial health status for the service.
sb.healthSrv.SetServingStatus("", healthpb.HealthCheckResponse_NOT_SERVING)
sb.healthSrv.SetServingStatus(service, healthpb.HealthCheckResponse_NOT_SERVING)

// check is a helper function that checks the health of the service and, if
Expand All @@ -249,10 +263,13 @@ func (sb *serverBuilder) initLongRunningCheck(shutdownCtx context.Context, servi
}

if next != healthpb.HealthCheckResponse_SERVING {
sb.logger.Errf("transitioning overall health from %q to %q, due to: %s", last, next, err)
sb.logger.Errf("transitioning health of %q from %q to %q, due to: %s", service, last, next, err)
} else {
sb.logger.Infof("transitioning overall health from %q to %q", last, next)
sb.logger.Infof("transitioning health of %q from %q to %q", service, last, next)
}
sb.healthSrv.SetServingStatus("", next)
sb.healthSrv.SetServingStatus(service, next)
return next
}
Expand Down
10 changes: 5 additions & 5 deletions grpc/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
"google.golang.org/grpc/health"
)

func Test_serverBuilder_initLongRunningCheck(t *testing.T) {
func TestServerBuilderInitLongRunningCheck(t *testing.T) {
t.Parallel()
hs := health.NewServer()
mockLogger := blog.NewMock()
Expand Down Expand Up @@ -41,8 +41,8 @@ func Test_serverBuilder_initLongRunningCheck(t *testing.T) {
// - ~100ms 3rd check failed, SERVING to NOT_SERVING
serving := mockLogger.GetAllMatching(".*\"NOT_SERVING\" to \"SERVING\"")
notServing := mockLogger.GetAllMatching((".*\"SERVING\" to \"NOT_SERVING\""))
test.Assert(t, len(serving) == 1, "expected one serving log line")
test.Assert(t, len(notServing) == 1, "expected one not serving log line")
test.Assert(t, len(serving) == 2, "expected two serving log lines")
test.Assert(t, len(notServing) == 2, "expected two not serving log lines")

mockLogger.Clear()

Expand All @@ -67,6 +67,6 @@ func Test_serverBuilder_initLongRunningCheck(t *testing.T) {
// - ~100ms 3rd check passed, NOT_SERVING to SERVING
serving = mockLogger.GetAllMatching(".*\"NOT_SERVING\" to \"SERVING\"")
notServing = mockLogger.GetAllMatching((".*\"SERVING\" to \"NOT_SERVING\""))
test.Assert(t, len(serving) == 2, "expected two serving log lines")
test.Assert(t, len(notServing) == 1, "expected one not serving log line")
test.Assert(t, len(serving) == 4, "expected four serving log lines")
test.Assert(t, len(notServing) == 2, "expected two not serving log lines")
}
1 change: 1 addition & 0 deletions test/config-next/akamai-purger.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config-next/ca.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config-next/crl-storer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config-next/email-exporter.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config-next/nonce-a.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config-next/nonce-b.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config-next/publisher.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config-next/ra.json
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config-next/remoteva-a.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config-next/remoteva-b.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config-next/remoteva-c.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
4 changes: 2 additions & 2 deletions test/config-next/sa.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"health-checker.boulder",
"consul.boulder"
"consul.boulder",
"health-checker.boulder"
]
}
}
Expand Down
1 change: 1 addition & 0 deletions test/config-next/va.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config/akamai-purger.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config/ca.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config/crl-storer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config/email-exporter.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config/nonce-a.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config/nonce-b.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config/publisher.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config/ra.json
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config/remoteva-a.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config/remoteva-b.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
1 change: 1 addition & 0 deletions test/config/remoteva-c.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
4 changes: 2 additions & 2 deletions test/config/sa.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"health-checker.boulder",
"consul.boulder"
"consul.boulder",
"health-checker.boulder"
]
}
}
Expand Down
1 change: 1 addition & 0 deletions test/config/va.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
},
"grpc.health.v1.Health": {
"clientNames": [
"consul.boulder",
"health-checker.boulder"
]
}
Expand Down
Loading
Loading