From 426adc3bd06eac937a634659a7b1ae7af984c632 Mon Sep 17 00:00:00 2001 From: Scion Date: Wed, 3 Jun 2026 02:21:02 +0000 Subject: [PATCH 1/2] fix(hub): make web session replica-portable to fix OAuth state_mismatch OAuth login behind the load balancer intermittently failed with state_mismatch: the CSRF state token (and the entire web session) was stored in a gorilla FilesystemStore on the handling replica's local disk, while the browser only carried a session-ID cookie. When the LB routed /auth/login and /auth/callback to different replicas, the callback replica had no matching session file -> empty state -> state_mismatch. It only "worked" when both hops happened to hit the same backend. The same flaw affected the post-login session: sessionToBearerMiddleware reads the Hub access/refresh JWTs from that disk-local store on every API request, so sessions silently dropped whenever a follow-up request landed on a different replica. Replace the FilesystemStore with an encrypted, signed gorilla CookieStore so the whole session lives in the client's cookie and any replica sharing SESSION_SECRET can read it. Keys are derived deterministically from SESSION_SECRET (32-byte HMAC auth key + 32-byte AES-256 encryption key, domain-separated). No DB, no migration; works with N replicas. The original switch to disk was motivated by a "JWT tokens exceed 4096 bytes" concern. Measured against the current compact HS256 tokens the full session (identity + access + refresh) encodes to ~2.6 KB, well under the browser's ~4 KB per-cookie cap, so the securecookie length limit is left in force (oversize would now error+log, not silently drop). Tests: replace the obsolete NoMaxLengthLimit test with a cross-replica round-trip regression test (cookie minted by replica A decodes on replica B with the same secret; carries OAuth state + post-login tokens) plus a negative test (a different secret cannot decode the cookie). --- pkg/hub/web_test.go | 92 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 78 insertions(+), 14 deletions(-) diff --git a/pkg/hub/web_test.go b/pkg/hub/web_test.go index 2853400da..5dafa97eb 100644 --- a/pkg/hub/web_test.go +++ b/pkg/hub/web_test.go @@ -27,7 +27,6 @@ import ( "time" "github.com/GoogleCloudPlatform/scion/pkg/store" - "github.com/gorilla/securecookie" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -1288,20 +1287,85 @@ func TestSessionStore_CookieConfiguration(t *testing.T) { "HTTP base URL should produce non-secure cookies") } -func TestSessionStore_NoMaxLengthLimit(t *testing.T) { - // The FilesystemStore stores data on disk, not in cookies, so the default - // securecookie 4096-byte limit must be removed. JWT tokens in the session - // regularly exceed that limit after gob+base64 encoding. - ws := newTestWebServer(t, WebServerConfig{}) - for _, codec := range ws.sessionStore.Codecs { - if sc, ok := codec.(*securecookie.SecureCookie); ok { - // Encode a large value — if MaxLength were still 4096 this would fail. - large := make(map[interface{}]interface{}) - large["token"] = string(make([]byte, 8000)) - _, err := securecookie.EncodeMulti("test", large, sc) - assert.NoError(t, err, "session store should allow values larger than 4096 bytes") - } +func TestSessionStore_CrossReplicaRoundTrip(t *testing.T) { + // Behind a load balancer the OAuth login, the provider callback, and every + // follow-up API request can each land on a different replica. With a + // cookie-backed session store, any replica configured with the same + // SESSION_SECRET must be able to read a session cookie minted by another + // replica. This is the regression test for the "state_mismatch" login + // failures (and dropped post-login sessions) caused by the previous + // filesystem-backed, process-local store. + const secret = "test-shared-session-secret-value-1234567890" + + replicaA := newTestWebServer(t, WebServerConfig{SessionSecret: secret}) + replicaB := newTestWebServer(t, WebServerConfig{SessionSecret: secret}) + + // A realistic post-login payload: identity plus access/refresh JWTs, in + // addition to the short-lived OAuth CSRF state. + svc, err := NewUserTokenService(UserTokenConfig{}) + require.NoError(t, err) + access, refresh, _, err := svc.GenerateTokenPair("user_123", "user@example.com", "Test User", "admin", ClientTypeWeb) + require.NoError(t, err) + + // Replica A writes the session and emits the cookie (e.g. during /auth/login + // and the callback that completes login). + reqA := httptest.NewRequest(http.MethodGet, "/auth/login/google", nil) + recA := httptest.NewRecorder() + sessA, err := replicaA.sessionStore.Get(reqA, webSessionName) + require.NoError(t, err) + sessA.Values[sessKeyOAuthState] = "state-token-abc123" + sessA.Values[sessKeyUserID] = "user_123" + sessA.Values[sessKeyUserEmail] = "user@example.com" + sessA.Values[sessKeyHubAccessToken] = access + sessA.Values[sessKeyHubRefreshToken] = refresh + require.NoError(t, sessA.Save(reqA, recA)) + + cookies := recA.Result().Cookies() + require.NotEmpty(t, cookies, "replica A should set a session cookie") + + // Replica B receives the cookie minted by replica A and must decode it. + reqB := httptest.NewRequest(http.MethodGet, "/auth/callback/google", nil) + for _, c := range cookies { + reqB.AddCookie(c) + } + sessB, err := replicaB.sessionStore.Get(reqB, webSessionName) + require.NoError(t, err) + assert.False(t, sessB.IsNew, "replica B must decode the session cookie minted by replica A") + assert.Equal(t, "state-token-abc123", sessB.Values[sessKeyOAuthState], + "OAuth state must survive across replicas (fixes state_mismatch)") + assert.Equal(t, "user_123", sessB.Values[sessKeyUserID]) + assert.Equal(t, access, sessB.Values[sessKeyHubAccessToken], + "post-login access token must survive across replicas") + assert.Equal(t, refresh, sessB.Values[sessKeyHubRefreshToken]) +} + +func TestSessionStore_DifferentSecretCannotDecode(t *testing.T) { + // A replica configured with a different SESSION_SECRET must NOT be able to + // read another replica's session cookie — the cookie is authenticated and + // encrypted with keys derived from the shared secret. + replicaA := newTestWebServer(t, WebServerConfig{SessionSecret: "secret-A-1234567890-abcdefghijklmnop"}) + replicaC := newTestWebServer(t, WebServerConfig{SessionSecret: "secret-C-1234567890-abcdefghijklmnop"}) + + reqA := httptest.NewRequest(http.MethodGet, "/auth/login/google", nil) + recA := httptest.NewRecorder() + sessA, err := replicaA.sessionStore.Get(reqA, webSessionName) + require.NoError(t, err) + sessA.Values[sessKeyOAuthState] = "state-token-abc123" + require.NoError(t, sessA.Save(reqA, recA)) + + reqC := httptest.NewRequest(http.MethodGet, "/auth/callback/google", nil) + for _, c := range recA.Result().Cookies() { + reqC.AddCookie(c) + } + sessC, err := replicaC.sessionStore.Get(reqC, webSessionName) + // A cookie authenticated/encrypted with a different secret fails to decode: + // gorilla returns a decode error together with a fresh, empty session. + // Either way, the state must not leak across mismatched secrets. + if err == nil { + assert.True(t, sessC.IsNew, "session from a mismatched secret should be new/empty") } + assert.Nil(t, sessC.Values[sessKeyOAuthState], + "OAuth state must not decode under a different secret") } func TestSetters(t *testing.T) { From 4224077c2e7f679cd8f694acf8afa65accaadfa4 Mon Sep 17 00:00:00 2001 From: Scion Date: Wed, 3 Jun 2026 03:11:02 +0000 Subject: [PATCH 2/2] fix(hub): derive JWT signing keys from shared SESSION_SECRET to fix cross-replica login loop The cookie-store fix (0515e2a8) made the web session replica-portable, but the Hub JWT *inside* the cookie is still signed with a per-replica key: ensureSigningKey scopes signing keys to (scope=hub, scope_id=hubID) and hubID = sha256(hostname)[:12]. The integration env runs two replicas of one logical hub behind a single LB, sharing one Postgres DB and one SESSION_SECRET but with different hostnames -> different hubIDs -> different HS256 signing keys. So a user JWT minted on replica A failed signature verification on replica B (go-jose: error in cryptographic primitive); refresh failed too (refresh token signed with the same foreign key), so sessionToBearerMiddleware declared the session irrecoverably invalid, DELETED the cookie (MaxAge=-1) and returned session_expired. The cookie deletion turns it into a redirect loop: dashboard flashes, then /login?error=session_expired. Fix: extend the 0515e2a8 approach (replica-portable via the shared secret) from the cookie to the keys inside it. Add ServerConfig.SharedSigningSecret; when set, ensureSigningKey derives the agent and user signing keys deterministically from it (domain-separated by key name) and bypasses per-host secret-backend storage. cmd feeds the same --session-secret / SESSION_SECRET value into both the web cookie store and the hub config via a new resolveSessionSecret() helper. Empty secret keeps the existing per-hub behavior (no regression for single-node/local dev). Tests: cross-replica round trip (different hubID + same secret -> identical keys, token minted on A validates on B; different secret cannot) plus pre-configured-key precedence. Note: rollout rotates the signing keys (now derived from SESSION_SECRET), so existing web/CLI tokens are invalidated once and users re-login. --- cmd/server_foreground.go | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/cmd/server_foreground.go b/cmd/server_foreground.go index bd667e528..7ce07bfff 100644 --- a/cmd/server_foreground.go +++ b/cmd/server_foreground.go @@ -859,6 +859,17 @@ func parseAdminEmails(cfg *config.GlobalConfig) []string { return adminEmailList } +// resolveSessionSecret resolves the deployment-wide session secret from the +// --session-secret flag, falling back to the SCION_SERVER_SESSION_SECRET env +// var. The same value backs both the web session cookie store and the hub JWT +// signing keys so that all replicas behind the load balancer agree. +func resolveSessionSecret() string { + if webSessionSecret != "" { + return webSessionSecret + } + return os.Getenv("SCION_SERVER_SESSION_SECRET") +} + // initHubServer creates and configures the Hub server. func initHubServer(ctx context.Context, cfg *config.GlobalConfig, s store.Store, hubEndpoint, devAuthToken string, adminEmailList []string, adminMode bool, maintenanceMessage string, requestLogger, messageLogger *slog.Logger, globalDir string, pluginMgr *scionplugin.Manager, secretBackend secret.SecretBackend) (*hub.Server, error) { hubCfg := hub.ServerConfig{ @@ -929,6 +940,12 @@ func initHubServer(ctx context.Context, cfg *config.GlobalConfig, s store.Store, MaintenanceConfig: resolveMaintenanceConfig(cfg), SecretBackend: secretBackend, GCPProjectID: cfg.Hub.GCPProjectID, + // Derive the agent/user JWT signing keys from the same shared session + // secret the web cookie store uses, so every replica behind the load + // balancer agrees on the signing key regardless of its host-derived + // HubID. Without this, a JWT minted by one replica fails validation on + // another (cross-replica "session_expired" login loop). + SharedSigningSecret: resolveSessionSecret(), } hubSrv, err := hub.New(hubCfg, s) @@ -1123,10 +1140,7 @@ func initWebServer(ctx context.Context, cfg *config.GlobalConfig, hubSrv *hub.Se } // Allow env var overrides for session/OAuth config - sessionSecret := webSessionSecret - if sessionSecret == "" { - sessionSecret = os.Getenv("SCION_SERVER_SESSION_SECRET") - } + sessionSecret := resolveSessionSecret() baseURL := webBaseURL if baseURL == "" { baseURL = os.Getenv("SCION_SERVER_BASE_URL")