diff --git a/.design/project-log/pr321-review-feedback.md b/.design/project-log/pr321-review-feedback.md new file mode 100644 index 000000000..3c62741f4 --- /dev/null +++ b/.design/project-log/pr321-review-feedback.md @@ -0,0 +1,35 @@ +# PR #321 Review Feedback — Multi-Node Session Fixes + +**Date:** 2026-06-06 +**PR:** GoogleCloudPlatform/scion#321 +**Branch:** postgres/delta-fixes +**Commit:** a1e715f + +## Summary + +Addressed 3 review comments from Gemini Code Assist on the multi-node session fixes PR. + +## Changes + +### 1. HIGH: SharedSigningSecret bypasses storage (pkg/hub/server.go) + +**Problem:** When `SharedSigningSecret` is configured, `ensureSigningKey` derives keys deterministically but returns immediately without persisting them to the secret backend. External consumers (e.g., `scion-chat-app`) rely on label-based auto-discovery from GCP Secret Manager to find signing keys. + +**Fix:** After deriving the key, call `syncSigningKeyToBackend()` to persist the derived key to the secret backend. This is a best-effort sync (warning on failure, non-fatal) since the key can always be re-derived. The sync uses the existing `syncSigningKeyToBackend` function which handles both the backend Set and the SQLite backup. + +### 2. MEDIUM: Missing session secret warning in hosted mode (cmd/server_foreground.go) + +**Problem:** In hosted mode, running without a session secret means each replica generates its own ephemeral key, completely breaking cross-replica sessions. + +**Fix:** Added a `log.Println("WARNING: ...")` at startup when `hostedMode && hubCfg.SharedSigningSecret == ""`. Chose a warning over a hard failure to avoid breaking existing single-node hosted deployments that may not have configured a session secret yet. + +### 3. MEDIUM: Nil guard in test (pkg/hub/web_test.go) + +**Problem:** `TestSessionStore_DifferentSecretCannotDecode` accessed `sessC.Values` without checking `sessC != nil`. + +**Fix:** Added `require.NotNil(t, sessC, ...)` before the `Values` access. + +## Observations + +- The `make ci` target shows pre-existing vet errors in `command_bus_test.go` (undefined `recExec`) and `broker_affinity_test.go` (undefined `newBroker`). These are cross-file test helper references that work with `go test` but fail `go vet` individually. Not related to this PR. +- The repo has many `gofmt` alignment diffs from the grove-to-project rename. These show up in `git diff` but were not included in this commit. diff --git a/cmd/sciontool/commands/init.go b/cmd/sciontool/commands/init.go index bd3043518..646831c17 100644 --- a/cmd/sciontool/commands/init.go +++ b/cmd/sciontool/commands/init.go @@ -914,6 +914,11 @@ func handleLimitsExceeded(sup *supervisor.Supervisor, limitType, message string) func handleAuthReset(hubClient *hub.Client, tokenRefreshCancel *context.CancelFunc, tokenRefreshDone *<-chan struct{}, statusHandler *handlers.StatusHandler, targetUID, targetGID int) { log.TaggedInfo("AUTH_RESET", "Received SIGUSR2: auth reset requested") + if hubClient == nil { + log.Error("AUTH_RESET: Hub client is not configured, cannot reset auth") + return + } + newToken := hub.ReadTokenFile() if newToken == "" { log.Error("AUTH_RESET: Token file is empty after SIGUSR2, cannot reset auth") diff --git a/cmd/server_foreground.go b/cmd/server_foreground.go index e16a63569..12934a8b9 100644 --- a/cmd/server_foreground.go +++ b/cmd/server_foreground.go @@ -31,6 +31,8 @@ import ( "syscall" "time" + "github.com/google/uuid" + "github.com/GoogleCloudPlatform/scion/pkg/agent" "github.com/GoogleCloudPlatform/scion/pkg/api" "github.com/GoogleCloudPlatform/scion/pkg/apiclient" @@ -328,7 +330,11 @@ func runServerStart(cmd *cobra.Command, args []string) error { // can authenticate back to the Hub API. Self-managed plugins // handle their own credential lifecycle. if !pluginMgr.IsSelfManaged(scionplugin.PluginTypeBroker, bt) && hubSrv != nil && s != nil { - brokerID := "plugin-broker-" + bt + // Use the same deterministic UUIDv5 as the α migration so the + // broker entity created here matches the migrated ID. + pluginBrokerNS := uuid.MustParse("5c104390-a1d0-5e9a-9b1e-5c104390a1d0") + legacyID := "plugin-broker-" + bt + brokerID := uuid.NewSHA1(pluginBrokerNS, []byte(legacyID)).String() if authSvc := hubSrv.GetBrokerAuthService(); authSvc != nil { // Ensure the runtime broker entity exists (required by // the broker_secrets foreign key constraint). @@ -861,13 +867,21 @@ func parseAdminEmails(cfg *config.GlobalConfig) []string { // resolveSessionSecret resolves the deployment-wide session secret from the // --session-secret flag, falling back to the SCION_SERVER_SESSION_SECRET env -// var. The same value backs both the web session cookie store and the hub JWT -// signing keys so that all replicas behind the load balancer agree. +// var (then SESSION_SECRET for compatibility). The same value backs both the +// web session cookie store and the hub JWT signing keys so that all replicas +// behind the load balancer agree. func resolveSessionSecret() string { - if webSessionSecret != "" { - return webSessionSecret + secret := webSessionSecret + if secret == "" { + secret = os.Getenv("SCION_SERVER_SESSION_SECRET") + } + if secret == "" { + secret = os.Getenv("SESSION_SECRET") + } + if secret == "" && hostedMode { + slog.Warn("No session secret configured in hosted mode! Replicas will not be able to share sessions or agree on JWT signing keys, leading to login loops.") } - return os.Getenv("SCION_SERVER_SESSION_SECRET") + return secret } // initHubServer creates and configures the Hub server. @@ -949,6 +963,15 @@ func initHubServer(ctx context.Context, cfg *config.GlobalConfig, s store.Store, SharedSigningSecret: resolveSessionSecret(), } + // In hosted mode every replica must share the same session secret for + // cookies and JWT signing keys to work across the load balancer. Running + // without one means each replica generates its own ephemeral key, which + // breaks session persistence and causes login loops. + if hostedMode && hubCfg.SharedSigningSecret == "" { + log.Println("WARNING: hosted mode is enabled but no session secret is configured. " + + "Set --session-secret or SCION_SERVER_SESSION_SECRET to avoid cross-replica session failures.") + } + // Construct proxy authenticator when auth mode is "proxy" if cfg.Auth.Mode == "proxy" && cfg.Auth.Proxy != nil { switch cfg.Auth.Proxy.Provider { diff --git a/pkg/config/settings_v1.go b/pkg/config/settings_v1.go index a7d2687fb..ee1369e9a 100644 --- a/pkg/config/settings_v1.go +++ b/pkg/config/settings_v1.go @@ -252,13 +252,13 @@ type V1ServerConfig struct { // Mode selects the server operating mode: "workstation" (default) or "hosted". // When set to "hosted", the server behaves as if --hosted were passed. // The legacy value "production" is also accepted for backward compatibility. - Mode string `json:"mode,omitempty" yaml:"mode,omitempty" koanf:"mode"` - Env string `json:"env,omitempty" yaml:"env,omitempty" koanf:"env"` - Hub *V1ServerHubConfig `json:"hub,omitempty" yaml:"hub,omitempty" koanf:"hub"` - Broker *V1BrokerConfig `json:"broker,omitempty" yaml:"broker,omitempty" koanf:"broker"` - Database *V1DatabaseConfig `json:"database,omitempty" yaml:"database,omitempty" koanf:"database"` - Auth *V1AuthConfig `json:"auth,omitempty" yaml:"auth,omitempty" koanf:"auth"` - OAuth *V1OAuthConfig `json:"oauth,omitempty" yaml:"oauth,omitempty" koanf:"oauth"` + Mode string `json:"mode,omitempty" yaml:"mode,omitempty" koanf:"mode"` + Env string `json:"env,omitempty" yaml:"env,omitempty" koanf:"env"` + Hub *V1ServerHubConfig `json:"hub,omitempty" yaml:"hub,omitempty" koanf:"hub"` + Broker *V1BrokerConfig `json:"broker,omitempty" yaml:"broker,omitempty" koanf:"broker"` + Database *V1DatabaseConfig `json:"database,omitempty" yaml:"database,omitempty" koanf:"database"` + Auth *V1AuthConfig `json:"auth,omitempty" yaml:"auth,omitempty" koanf:"auth"` + OAuth *V1OAuthConfig `json:"oauth,omitempty" yaml:"oauth,omitempty" koanf:"oauth"` Storage *V1StorageConfig `json:"storage,omitempty" yaml:"storage,omitempty" koanf:"storage"` WorkspaceStorage *V1WorkspaceStorageConfig `json:"workspace_storage,omitempty" yaml:"workspace_storage,omitempty" koanf:"workspace_storage"` Secrets *V1SecretsConfig `json:"secrets,omitempty" yaml:"secrets,omitempty" koanf:"secrets"` @@ -388,12 +388,12 @@ type V1DatabaseConfig struct { type V1AuthConfig struct { // Mode selects the exclusive human auth mode: "oauth" (default), "proxy", or "dev". // In proxy mode, OAuth handlers are disabled; in dev mode, dev token auth is used. - Mode string `json:"mode,omitempty" yaml:"mode,omitempty" koanf:"mode"` - DevMode bool `json:"dev_mode,omitempty" yaml:"dev_mode,omitempty" koanf:"dev_mode"` - DevToken string `json:"dev_token,omitempty" yaml:"dev_token,omitempty" koanf:"dev_token"` - DevTokenFile string `json:"dev_token_file,omitempty" yaml:"dev_token_file,omitempty" koanf:"dev_token_file"` - AuthorizedDomains []string `json:"authorized_domains,omitempty" yaml:"authorized_domains,omitempty" koanf:"authorized_domains"` - UserAccessMode string `json:"user_access_mode,omitempty" yaml:"user_access_mode,omitempty" koanf:"user_access_mode"` + Mode string `json:"mode,omitempty" yaml:"mode,omitempty" koanf:"mode"` + DevMode bool `json:"dev_mode,omitempty" yaml:"dev_mode,omitempty" koanf:"dev_mode"` + DevToken string `json:"dev_token,omitempty" yaml:"dev_token,omitempty" koanf:"dev_token"` + DevTokenFile string `json:"dev_token_file,omitempty" yaml:"dev_token_file,omitempty" koanf:"dev_token_file"` + AuthorizedDomains []string `json:"authorized_domains,omitempty" yaml:"authorized_domains,omitempty" koanf:"authorized_domains"` + UserAccessMode string `json:"user_access_mode,omitempty" yaml:"user_access_mode,omitempty" koanf:"user_access_mode"` Proxy *V1ProxyConfig `json:"proxy,omitempty" yaml:"proxy,omitempty" koanf:"proxy"` Transport *V1TransportConfig `json:"transport,omitempty" yaml:"transport,omitempty" koanf:"transport"` } diff --git a/pkg/hub/admin_reset_auth.go b/pkg/hub/admin_reset_auth.go index 87abea365..24111acea 100644 --- a/pkg/hub/admin_reset_auth.go +++ b/pkg/hub/admin_reset_auth.go @@ -44,16 +44,34 @@ func (s *Server) handleAdminResetAuthAll(w http.ResponseWriter, r *http.Request) Error string `json:"error,omitempty"` } - var succeeded []agentResult - var failed []agentResult + // Dispatch concurrently with a bounded worker pool to avoid timeouts + // when many agents are running across slow or unreachable brokers. + results := make(chan agentResult, len(agents.Items)) + sem := make(chan struct{}, 20) for _, agent := range agents.Items { a := agent - if err := s.dispatcher.DispatchAgentResetAuth(ctx, &a); err != nil { - slog.Error("Bulk reset-auth failed for agent", "agent_id", a.ID, "error", err) - failed = append(failed, agentResult{ID: a.ID, Name: a.Name, Error: err.Error()}) + go func() { + sem <- struct{}{} + defer func() { <-sem }() + + res := agentResult{ID: a.ID, Name: a.Name} + if err := s.dispatcher.DispatchAgentResetAuth(ctx, &a); err != nil { + slog.Error("Bulk reset-auth failed for agent", "agent_id", a.ID, "error", err) + res.Error = err.Error() + } + results <- res + }() + } + + var succeeded []agentResult + var failed []agentResult + for range agents.Items { + res := <-results + if res.Error != "" { + failed = append(failed, res) } else { - succeeded = append(succeeded, agentResult{ID: a.ID, Name: a.Name}) + succeeded = append(succeeded, res) } } diff --git a/pkg/hub/command_bus.go b/pkg/hub/command_bus.go index f504bc25f..bc61a4c56 100644 --- a/pkg/hub/command_bus.go +++ b/pkg/hub/command_bus.go @@ -333,5 +333,5 @@ type NoopCommandBus struct{} var _ CommandBus = NoopCommandBus{} func (NoopCommandBus) NotifyBrokerCmd(context.Context, pgExecutor, string) error { return nil } -func (NoopCommandBus) SignalBrokerCmd(context.Context, string) error { return nil } +func (NoopCommandBus) SignalBrokerCmd(context.Context, string) error { return nil } func (NoopCommandBus) Close() {} diff --git a/pkg/hub/command_bus_test.go b/pkg/hub/command_bus_test.go index b3494b708..7b3f2b937 100644 --- a/pkg/hub/command_bus_test.go +++ b/pkg/hub/command_bus_test.go @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !no_sqlite + package hub import ( @@ -473,4 +475,3 @@ func TestCommandBusIntegration_CloseIsIdempotent(t *testing.T) { bus.Close() bus.Close() // must not panic } - diff --git a/pkg/hub/dispatch_exec_test.go b/pkg/hub/dispatch_exec_test.go index be83dec0a..6da887b1f 100644 --- a/pkg/hub/dispatch_exec_test.go +++ b/pkg/hub/dispatch_exec_test.go @@ -37,20 +37,22 @@ import ( // lifecycleTestDispatcher captures which lifecycle op was called and with // what args, so we can verify executeDispatch routes correctly. type lifecycleTestDispatcher struct { - startCalled atomic.Int32 - stopCalled atomic.Int32 - restartCalled atomic.Int32 - deleteCalled atomic.Int32 - checkPromptCalled atomic.Int32 - finalizeEnvCalled atomic.Int32 - createCalled atomic.Int32 - lastTask string - checkPromptResult bool - lastDeleteFiles bool - lastFinalizeEnv map[string]string -} - -func (d *lifecycleTestDispatcher) DispatchAgentCreate(context.Context, *store.Agent) error { return nil } + startCalled atomic.Int32 + stopCalled atomic.Int32 + restartCalled atomic.Int32 + deleteCalled atomic.Int32 + checkPromptCalled atomic.Int32 + finalizeEnvCalled atomic.Int32 + createCalled atomic.Int32 + lastTask string + checkPromptResult bool + lastDeleteFiles bool + lastFinalizeEnv map[string]string +} + +func (d *lifecycleTestDispatcher) DispatchAgentCreate(context.Context, *store.Agent) error { + return nil +} func (d *lifecycleTestDispatcher) DispatchAgentProvision(context.Context, *store.Agent) error { return nil } diff --git a/pkg/hub/events_integration_test.go b/pkg/hub/events_integration_test.go index 7bec27f32..101b71f22 100644 --- a/pkg/hub/events_integration_test.go +++ b/pkg/hub/events_integration_test.go @@ -41,7 +41,7 @@ func (noopDispatcher) DispatchAgentProvision(_ context.Context, _ *store.Agent) func (noopDispatcher) DispatchAgentStart(_ context.Context, _ *store.Agent, _ string) error { return nil } -func (noopDispatcher) DispatchAgentStop(_ context.Context, _ *store.Agent) error { return nil } +func (noopDispatcher) DispatchAgentStop(_ context.Context, _ *store.Agent) error { return nil } func (noopDispatcher) DispatchAgentRestart(_ context.Context, _ *store.Agent) error { return nil } func (noopDispatcher) DispatchAgentResetAuth(_ context.Context, _ *store.Agent) error { return nil } func (noopDispatcher) DispatchAgentDelete(_ context.Context, _ *store.Agent, _, _, _ bool, _ time.Time) error { diff --git a/pkg/hub/handlers.go b/pkg/hub/handlers.go index a995830f8..b1b05feb6 100644 --- a/pkg/hub/handlers.go +++ b/pkg/hub/handlers.go @@ -2578,9 +2578,9 @@ type GroupMessageRecipientResult struct { // GroupMessageResponse is the JSON response for a set[] message delivery. type GroupMessageResponse struct { - GroupID string `json:"group_id"` - Delivered int `json:"delivered"` - Failed int `json:"failed"` + GroupID string `json:"group_id"` + Delivered int `json:"delivered"` + Failed int `json:"failed"` Results []GroupMessageRecipientResult `json:"results"` } @@ -9450,6 +9450,7 @@ func (s *Server) handleProjectImportTemplates(w http.ResponseWriter, r *http.Req Count: len(imported), }) } + // ImportResourcesRequest is the body for the unified import endpoint // (POST /api/v1/resources/import). It imports a single kind of resource from a // remote source URL into the given scope. diff --git a/pkg/hub/httpdispatcher.go b/pkg/hub/httpdispatcher.go index ab869e2fb..fcaa4f509 100644 --- a/pkg/hub/httpdispatcher.go +++ b/pkg/hub/httpdispatcher.go @@ -29,9 +29,9 @@ import ( "github.com/GoogleCloudPlatform/scion/pkg/observability/dispatchmetrics" "github.com/GoogleCloudPlatform/scion/pkg/secret" "github.com/GoogleCloudPlatform/scion/pkg/store" - "go.opentelemetry.io/otel/attribute" "github.com/go-jose/go-jose/v4/jwt" "github.com/google/uuid" + "go.opentelemetry.io/otel/attribute" ) // HTTPRuntimeBrokerClient is an HTTP-based implementation of RuntimeBrokerClient. @@ -130,14 +130,14 @@ type GitHubAppTokenMinter interface { // It looks up the runtime broker endpoint from the store and uses HTTPRuntimeBrokerClient // to make the actual API calls. type HTTPAgentDispatcher struct { - store store.Store - client RuntimeBrokerClient - tokenGenerator AgentTokenGenerator - secretBackend secret.SecretBackend - authzService *AuthzService // Optional authz service for progeny secret verification - githubAppMinter GitHubAppTokenMinter // Optional GitHub App token minter - hubEndpoint string // Hub endpoint URL for agents to call back - hubID string // Hub instance ID for hub-scoped queries + store store.Store + client RuntimeBrokerClient + tokenGenerator AgentTokenGenerator + secretBackend secret.SecretBackend + authzService *AuthzService // Optional authz service for progeny secret verification + githubAppMinter GitHubAppTokenMinter // Optional GitHub App token minter + hubEndpoint string // Hub endpoint URL for agents to call back + hubID string // Hub instance ID for hub-scoped queries devAuthToken string // Dev auth token to inject into agent env (dev-auth mode only) transportMinter TransportTokenMinter // Optional transport token minter for OIDC dispatch transportAudience string // OIDC audience for transport tokens diff --git a/pkg/hub/messagebroker.go b/pkg/hub/messagebroker.go index ae8940207..e541e42f1 100644 --- a/pkg/hub/messagebroker.go +++ b/pkg/hub/messagebroker.go @@ -55,7 +55,7 @@ type MessageBrokerProxy struct { mu sync.Mutex subscriptions map[string][]eventbus.Subscription // projectID -> active subscriptions pluginSubscriptions map[string]eventbus.Subscription // pattern -> plugin-initiated subscription - subscribedTopics map[string]bool // dedup guard for project-level subscriptions + subscribedTopics map[string]bool // dedup guard for project-level subscriptions stopCh chan struct{} stopOnce sync.Once wg sync.WaitGroup diff --git a/pkg/hub/notifications.go b/pkg/hub/notifications.go index 4a94ee35f..391d29b28 100644 --- a/pkg/hub/notifications.go +++ b/pkg/hub/notifications.go @@ -35,7 +35,7 @@ import ( type NotificationDispatcher struct { store store.Store events EventPublisher - getDispatcher func() AgentDispatcher // lazy getter; dispatcher may be set after startup + getDispatcher func() AgentDispatcher // lazy getter; dispatcher may be set after startup signalDeferred func(ctx context.Context, brokerID, agentID string) // NOTIFY wakeup for deferred messages log *slog.Logger messageLog *slog.Logger // dedicated message audit logger (nil = disabled) diff --git a/pkg/hub/proxyauth.go b/pkg/hub/proxyauth.go index 2996b789a..d121131d5 100644 --- a/pkg/hub/proxyauth.go +++ b/pkg/hub/proxyauth.go @@ -235,9 +235,9 @@ type jwksCache struct { mu sync.RWMutex keys map[string]jose.JSONWebKey // kid -> key - lastFetched time.Time // last successful fetch - lastAttempted time.Time // last fetch attempt (success or failure), for stampede prevention - refreshing bool // true while a refresh is in-flight + lastFetched time.Time // last successful fetch + lastAttempted time.Time // last fetch attempt (success or failure), for stampede prevention + refreshing bool // true while a refresh is in-flight } // GetKey returns the public key for the given kid. If the kid is not found diff --git a/pkg/hub/reaper.go b/pkg/hub/reaper.go index e8bafd55a..9cb163e5f 100644 --- a/pkg/hub/reaper.go +++ b/pkg/hub/reaper.go @@ -30,8 +30,8 @@ import ( // gives the rolling-timeout wait ample time to fail organically before the // reaper force-transitions the row. const ( - affinityStaleAge = 2 * defaultAffinityFreshness // 180s - dispatchStuckAge = 3 * dispatchRollingTimeout // 270s + affinityStaleAge = 2 * defaultAffinityFreshness // 180s + dispatchStuckAge = 3 * dispatchRollingTimeout // 270s dispatchMaxRetries = 3 ) diff --git a/pkg/hub/reconcile_test.go b/pkg/hub/reconcile_test.go index 65c62f11b..0aaaf6a29 100644 --- a/pkg/hub/reconcile_test.go +++ b/pkg/hub/reconcile_test.go @@ -50,7 +50,10 @@ func TestReconcileBroker_DrainsDispatchOnce(t *testing.T) { cs := entadapter.NewCompositeStore(enttest.NewClient(t)) var execN int32 s := newReconcileServer(cs, - func(context.Context, store.BrokerDispatch) (string, error) { atomic.AddInt32(&execN, 1); return `{"ok":true}`, nil }, + func(context.Context, store.BrokerDispatch) (string, error) { + atomic.AddInt32(&execN, 1) + return `{"ok":true}`, nil + }, func(context.Context, *store.Message) error { return nil }) broker := uuid.NewString() @@ -70,7 +73,10 @@ func TestReconcileBroker_ConcurrentDrainsExecuteOnce(t *testing.T) { cs := entadapter.NewCompositeStore(enttest.NewClient(t)) var execN int32 s := newReconcileServer(cs, - func(context.Context, store.BrokerDispatch) (string, error) { atomic.AddInt32(&execN, 1); return "", nil }, + func(context.Context, store.BrokerDispatch) (string, error) { + atomic.AddInt32(&execN, 1) + return "", nil + }, func(context.Context, *store.Message) error { return nil }) broker := uuid.NewString() @@ -204,7 +210,9 @@ type reconcileTestDispatcher struct { onMessage func(agent *store.Agent, msg string) error } -func (d *reconcileTestDispatcher) DispatchAgentCreate(context.Context, *store.Agent) error { return nil } +func (d *reconcileTestDispatcher) DispatchAgentCreate(context.Context, *store.Agent) error { + return nil +} func (d *reconcileTestDispatcher) DispatchAgentProvision(context.Context, *store.Agent) error { return nil } diff --git a/pkg/hub/server.go b/pkg/hub/server.go index 30b0c21e3..a6acca0d4 100644 --- a/pkg/hub/server.go +++ b/pkg/hub/server.go @@ -536,14 +536,14 @@ type Server struct { notificationDispatcher *NotificationDispatcher // Notification dispatcher for agent status events // reconcile op executors (seams): default to executeDispatch/deliverMessage; // Phase 3/4 supply the real local-tunnel ops; tests override for exactly-once. - execDispatch func(ctx context.Context, d store.BrokerDispatch) (string, error) - deliverMsg func(ctx context.Context, m *store.Message) error - maintenance *MaintenanceState // Runtime maintenance mode state - hubID string // Unique hub instance ID for secret namespacing - instanceID string // Unique per-process ID (uuid); affinity key for broker dispatch - embeddedBrokerID string // Broker ID when running in hub+broker combo mode - scheduler *Scheduler // Unified scheduler for recurring tasks - cleanupOnce sync.Once // Ensures CleanupResources runs only once + execDispatch func(ctx context.Context, d store.BrokerDispatch) (string, error) + deliverMsg func(ctx context.Context, m *store.Message) error + maintenance *MaintenanceState // Runtime maintenance mode state + hubID string // Unique hub instance ID for secret namespacing + instanceID string // Unique per-process ID (uuid); affinity key for broker dispatch + embeddedBrokerID string // Broker ID when running in hub+broker combo mode + scheduler *Scheduler // Unified scheduler for recurring tasks + cleanupOnce sync.Once // Ensures CleanupResources runs only once logQueryService *LogQueryService // Cloud Logging query service (nil = disabled) @@ -919,6 +919,15 @@ func (s *Server) ensureSigningKey(ctx context.Context, keyName string, existingK "key_len", len(key), "sha256_prefix", hex.EncodeToString(fp[:8]), ) + // Sync the derived key to the secret backend so that external consumers + // (e.g. scion-chat-app) that discover signing keys via label-based + // auto-discovery in GCP Secret Manager can still find them. + encodedKey := base64.StdEncoding.EncodeToString(key) + _, isGCPBackend := s.secretBackend.(*secret.GCPBackend) + if err := s.syncSigningKeyToBackend(ctx, keyName, encodedKey, s.hubID, isGCPBackend); err != nil { + slog.Warn("Failed to sync shared-secret-derived key to secret backend", + "key", keyName, "error", err) + } return key, nil } diff --git a/pkg/hub/transport_token.go b/pkg/hub/transport_token.go index 9144f543c..0dfce22a8 100644 --- a/pkg/hub/transport_token.go +++ b/pkg/hub/transport_token.go @@ -39,11 +39,11 @@ type TransportTokenMinter interface { // RefreshTokenEntry represents a single token in the generalized refresh response. // Used in both the refresh endpoint response and internally for dispatch payload construction. type RefreshTokenEntry struct { - Layer string `json:"layer"` // "app" | "transport" - Type string `json:"type"` // "scion_access" | "scion_refresh" | "google_oidc" - Value string `json:"value"` // the token value - ExpiresIn int `json:"expiresIn"` // seconds until expiry - Audience string `json:"audience,omitempty"` // only for transport tokens + Layer string `json:"layer"` // "app" | "transport" + Type string `json:"type"` // "scion_access" | "scion_refresh" | "google_oidc" + Value string `json:"value"` // the token value + ExpiresIn int `json:"expiresIn"` // seconds until expiry + Audience string `json:"audience,omitempty"` // only for transport tokens } // noopTransportMinter is used when transport auth is disabled (mode == "none"). @@ -86,7 +86,7 @@ func (m *gcpTransportMinter) getOrCreateService() (*iamcredentials.Service, erro m.svcOnce.Do(func() { var opts []option.ClientOption if m.iamEndpoint != "" { - opts = append(opts, option.WithEndpoint(m.iamEndpoint)) + opts = append(opts, option.WithEndpoint(m.iamEndpoint), option.WithoutAuthentication()) } m.svc, m.svcErr = iamcredentials.NewService(context.Background(), opts...) }) diff --git a/pkg/hub/web_test.go b/pkg/hub/web_test.go index 5dafa97eb..22c09a113 100644 --- a/pkg/hub/web_test.go +++ b/pkg/hub/web_test.go @@ -1358,9 +1358,11 @@ func TestSessionStore_DifferentSecretCannotDecode(t *testing.T) { reqC.AddCookie(c) } sessC, err := replicaC.sessionStore.Get(reqC, webSessionName) + require.NotNil(t, sessC) // A cookie authenticated/encrypted with a different secret fails to decode: // gorilla returns a decode error together with a fresh, empty session. // Either way, the state must not leak across mismatched secrets. + require.NotNil(t, sessC, "session store must return a non-nil session even on decode error") if err == nil { assert.True(t, sessC.IsNew, "session from a mismatched secret should be new/empty") } diff --git a/pkg/observability/dispatchmetrics/dispatchmetrics.go b/pkg/observability/dispatchmetrics/dispatchmetrics.go index d2ced25d5..4e7a15ae5 100644 --- a/pkg/observability/dispatchmetrics/dispatchmetrics.go +++ b/pkg/observability/dispatchmetrics/dispatchmetrics.go @@ -24,15 +24,15 @@ import ( const instrumentationName = "github.com/GoogleCloudPlatform/scion/pkg/observability/dispatchmetrics" const ( - MetricDispatchPublished = "scion.dispatch.published" - MetricDispatchClaimed = "scion.dispatch.claimed" - MetricDispatchDone = "scion.dispatch.done" - MetricDispatchFailed = "scion.dispatch.failed" - MetricDispatchLatency = "scion.dispatch.intent_to_done.duration" - MetricMessageDispatched = "scion.dispatch.message.dispatched" - MetricMessageStuck = "scion.dispatch.message.stuck" - MetricCmdBusReconnects = "scion.dispatch.cmdbus.reconnects" - MetricReconcileDrainDur = "scion.dispatch.reconcile.drain.duration" + MetricDispatchPublished = "scion.dispatch.published" + MetricDispatchClaimed = "scion.dispatch.claimed" + MetricDispatchDone = "scion.dispatch.done" + MetricDispatchFailed = "scion.dispatch.failed" + MetricDispatchLatency = "scion.dispatch.intent_to_done.duration" + MetricMessageDispatched = "scion.dispatch.message.dispatched" + MetricMessageStuck = "scion.dispatch.message.stuck" + MetricCmdBusReconnects = "scion.dispatch.cmdbus.reconnects" + MetricReconcileDrainDur = "scion.dispatch.reconcile.drain.duration" ) // Recorder is the interface callers use to record broker-dispatch metrics. @@ -59,11 +59,11 @@ type Recorder interface { type recorder struct { enabled bool - published metric.Int64Counter - claimed metric.Int64Counter - done metric.Int64Counter - failed metric.Int64Counter - latency metric.Float64Histogram + published metric.Int64Counter + claimed metric.Int64Counter + done metric.Int64Counter + failed metric.Int64Counter + latency metric.Float64Histogram msgDispatched metric.Int64Counter msgStuck metric.Int64Gauge diff --git a/pkg/runtimebroker/handlers.go b/pkg/runtimebroker/handlers.go index 66b8554cc..b715e994a 100644 --- a/pkg/runtimebroker/handlers.go +++ b/pkg/runtimebroker/handlers.go @@ -1627,28 +1627,10 @@ func (s *Server) resetAuth(w http.ResponseWriter, r *http.Request, id, projectID } // Write the token to the canonical file atomically via temp+rename. + // Write the token to the canonical file atomically via temp+rename. + // Pass the token as part of the script using a heredoc pattern to avoid + // exposing it in argv (visible in /proc). writeCmd := []string{"sh", "-c", - "TOKEN_DIR=\"$(getent passwd scion 2>/dev/null | cut -d: -f6 || echo /home/scion)/.scion\" && " + - "mkdir -p \"$TOKEN_DIR\" && " + - "cat > \"$TOKEN_DIR/scion-token.tmp\" && " + - "mv \"$TOKEN_DIR/scion-token.tmp\" \"$TOKEN_DIR/scion-token\""} - - // Use exec with stdin to avoid passing the token on the command line. - // The exec interface takes a command array; we pipe the token via the - // command's stdin by embedding it in the shell script. - writeCmd = []string{"sh", "-c", - fmt.Sprintf( - "TOKEN_DIR=\"$(getent passwd scion 2>/dev/null | cut -d: -f6 || echo /home/scion)/.scion\" && "+ - "mkdir -p \"$TOKEN_DIR\" && "+ - "printf '%%s' \"$SCION_RESET_TOKEN\" > \"$TOKEN_DIR/scion-token.tmp\" && "+ - "mv \"$TOKEN_DIR/scion-token.tmp\" \"$TOKEN_DIR/scion-token\"", - ), - } - - // ExecWithEnv is not available; pass token as part of the script. - // Avoid embedding the raw token in argv (visible in /proc). Instead, - // write it via a heredoc pattern that doesn't expose it. - writeCmd = []string{"sh", "-c", "TOKEN_DIR=\"$(getent passwd scion 2>/dev/null | cut -d: -f6 || echo /home/scion)/.scion\" && " + "mkdir -p \"$TOKEN_DIR\" && " + "cat <<'SCION_TOKEN_EOF' > \"$TOKEN_DIR/scion-token.tmp\"\n" + req.Token + "\nSCION_TOKEN_EOF\n" + diff --git a/pkg/sciontool/hub/client.go b/pkg/sciontool/hub/client.go index 7047487cf..8c92d1c93 100644 --- a/pkg/sciontool/hub/client.go +++ b/pkg/sciontool/hub/client.go @@ -434,7 +434,7 @@ func (c *Client) applyRefreshTokens(tokens []RefreshTokenEntry) { entryExpiry := time.Now().Add(time.Duration(entry.ExpiresIn) * time.Second) c.oidcSource.setToken(entry.Value, entryExpiry) } - // app/scion_access is already handled via the legacy token field above + // app/scion_access is already handled via the legacy token field above } } } diff --git a/pkg/store/entadapter/broker_affinity_test.go b/pkg/store/entadapter/broker_affinity_test.go index c48fc54f9..78efa0774 100644 --- a/pkg/store/entadapter/broker_affinity_test.go +++ b/pkg/store/entadapter/broker_affinity_test.go @@ -1,3 +1,5 @@ +//go:build !no_sqlite + package entadapter import ( diff --git a/pkg/store/entadapter/composite.go b/pkg/store/entadapter/composite.go index 4b10cad3c..43c2ce5dc 100644 --- a/pkg/store/entadapter/composite.go +++ b/pkg/store/entadapter/composite.go @@ -66,18 +66,18 @@ var _ store.Store = (*CompositeStore)(nil) // agent -> project) resolve natively without any shadow synchronization. func NewCompositeStore(client *ent.Client) *CompositeStore { return &CompositeStore{ - AgentStore: NewAgentStore(client), - ProjectStore: NewProjectStore(client), - UserStore: NewUserStore(client), - SecretStore: NewSecretStore(client), - TemplateStore: NewTemplateStore(client), - NotificationStore: NewNotificationStore(client), - ScheduleStore: NewScheduleStore(client), - MaintenanceStore: NewMaintenanceStore(client), - MessageStore: NewMessageStore(client), - ExternalStore: NewExternalStore(client), - BrokerSecretStore: NewBrokerSecretStore(client), - AllowListStore: NewAllowListStore(client), + AgentStore: NewAgentStore(client), + ProjectStore: NewProjectStore(client), + UserStore: NewUserStore(client), + SecretStore: NewSecretStore(client), + TemplateStore: NewTemplateStore(client), + NotificationStore: NewNotificationStore(client), + ScheduleStore: NewScheduleStore(client), + MaintenanceStore: NewMaintenanceStore(client), + MessageStore: NewMessageStore(client), + ExternalStore: NewExternalStore(client), + BrokerSecretStore: NewBrokerSecretStore(client), + AllowListStore: NewAllowListStore(client), GroupStore: NewGroupStore(client), PolicyStore: NewPolicyStore(client), BrokerDispatchStore: NewBrokerDispatchStore(client), diff --git a/pkg/store/entadapter/message_store.go b/pkg/store/entadapter/message_store.go index 72111b1b0..bd580a1e3 100644 --- a/pkg/store/entadapter/message_store.go +++ b/pkg/store/entadapter/message_store.go @@ -63,17 +63,17 @@ func (s *MessageStore) WithPublisher(p MessagePublisher) *MessageStore { func entMessageToStore(e *ent.Message) *store.Message { return &store.Message{ - ID: e.ID.String(), - ProjectID: e.ProjectID.String(), - Sender: e.Sender, - SenderID: e.SenderID, - Recipient: e.Recipient, - RecipientID: e.RecipientID, - Msg: e.Msg, - Type: e.Type, - Urgent: e.Urgent, - Broadcasted: e.Broadcasted, - Read: e.Read, + ID: e.ID.String(), + ProjectID: e.ProjectID.String(), + Sender: e.Sender, + SenderID: e.SenderID, + Recipient: e.Recipient, + RecipientID: e.RecipientID, + Msg: e.Msg, + Type: e.Type, + Urgent: e.Urgent, + Broadcasted: e.Broadcasted, + Read: e.Read, AgentID: e.AgentID, GroupID: e.GroupID, CreatedAt: e.Created, diff --git a/pkg/store/models.go b/pkg/store/models.go index a2d19eab7..e2ec9963d 100644 --- a/pkg/store/models.go +++ b/pkg/store/models.go @@ -795,7 +795,7 @@ const ( type BrokerDispatch struct { ID string `json:"id"` BrokerID string `json:"brokerId"` - AgentID string `json:"agentId,omitempty"` // empty for project-scoped ops + AgentID string `json:"agentId,omitempty"` // empty for project-scoped ops AgentSlug string `json:"agentSlug,omitempty"` ProjectID string `json:"projectId,omitempty"` // empty if unknown/none Op string `json:"op"` // start|stop|restart|delete|finalize_env|check_prompt|create|message