Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 64 additions & 4 deletions pkg/sciontool/metadata/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,16 @@
import (
"bytes"
"context"
"crypto/rand"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/http"
"os"
"path/filepath"
"strings"
"sync"
"syscall"
Expand Down Expand Up @@ -142,6 +145,9 @@
healthMu sync.Mutex
restartCount int
abandoned bool

shutdownToken string
shutdownTokenPath string
}

// authToken returns the current auth token, preferring the dynamic TokenFunc
Expand Down Expand Up @@ -200,10 +206,6 @@
s.cancel = cancel

addr := fmt.Sprintf("127.0.0.1:%d", s.config.Port)
s.srv = &http.Server{
Addr: addr,
Handler: s.buildMux(),
}

ln, err := net.Listen("tcp", addr)
if err != nil && errors.Is(err, syscall.EADDRINUSE) {
Expand Down Expand Up @@ -241,6 +243,16 @@
return fmt.Errorf("metadata server listen: %w", err)
}

if err := s.ensureShutdownToken(); err != nil {
cancel()
ln.Close()

Check failure on line 248 in pkg/sciontool/metadata/server.go

View workflow job for this annotation

GitHub Actions / golangci-lint

Error return value of `ln.Close` is not checked (errcheck)
return fmt.Errorf("metadata server shutdown token: %w", err)
}
s.srv = &http.Server{
Addr: addr,
Handler: s.buildMux(),
}

// Track this server so a future Start() can forcefully close it.
activeServerMu.Lock()
activeServer = s
Expand Down Expand Up @@ -352,6 +364,11 @@
s.srv.Shutdown(shutdownCtx)
shutdownCancel()
}
if s.shutdownTokenPath != "" {
if err := os.Remove(s.shutdownTokenPath); err != nil && !errors.Is(err, os.ErrNotExist) {
log.Debug("Failed to remove metadata shutdown token file %s: %v", s.shutdownTokenPath, err)
}
}
Comment on lines +367 to +371

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

There is a race condition between a stopping server instance (srv1) and a starting server instance (srv2) reclaiming the port.

When srv1.Stop() is called, s.srv.Shutdown(shutdownCtx) immediately closes the listener. This allows srv2 to successfully bind to the port and write its new token file via ensureShutdownToken(). However, srv1's Stop() continues executing and deletes the token file at s.shutdownTokenPath after srv2 has already written its new token. This leaves srv2 running without a token file, preventing any future instances from shutting it down.

To fix this, delete the token file before calling s.srv.Shutdown(shutdownCtx).


if s.cancel != nil {
s.cancel()
Expand All @@ -369,6 +386,12 @@
return
}
req.Header.Set("Metadata-Flavor", "Google")
token, err := os.ReadFile(shutdownTokenPath(s.config.Port))
if err != nil {
log.Debug("Could not read metadata shutdown token for port %d: %v", s.config.Port, err)
return
}
req.Header.Set("X-Scion-Shutdown-Token", strings.TrimSpace(string(token)))
resp, err := client.Do(req)
if err != nil {
log.Debug("Could not reach existing metadata server for shutdown: %v", err)
Expand All @@ -385,6 +408,10 @@
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
if s.shutdownToken == "" || r.Header.Get("X-Scion-Shutdown-Token") != s.shutdownToken {
http.Error(w, "Forbidden", http.StatusForbidden)
return
}
Comment on lines +411 to +414

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

The shutdown token is compared using standard string comparison (!=), which is not constant-time and can leak token information via timing attacks.

Additionally, we should trim any leading/trailing whitespace from the incoming header token to be robust against trailing newlines (e.g., if the token file was read with a trailing newline).

Please use crypto/subtle.ConstantTimeCompare to compare the tokens securely. Note that you will need to import crypto/subtle.

Suggested change
if s.shutdownToken == "" || r.Header.Get("X-Scion-Shutdown-Token") != s.shutdownToken {
http.Error(w, "Forbidden", http.StatusForbidden)
return
}
inputToken := strings.TrimSpace(r.Header.Get("X-Scion-Shutdown-Token"))
if s.shutdownToken == "" || len(inputToken) != len(s.shutdownToken) || subtle.ConstantTimeCompare([]byte(inputToken), []byte(s.shutdownToken)) != 1 {
http.Error(w, "Forbidden", http.StatusForbidden)
return
}

log.Info("Shutdown requested via /_scion/shutdown, stopping metadata server")
w.WriteHeader(http.StatusOK)
fmt.Fprint(w, "shutting down")
Expand All @@ -394,6 +421,39 @@
}()
}

func shutdownTokenPath(port int) string {
return filepath.Join(os.TempDir(), fmt.Sprintf("scion-metadata-shutdown-%d.token", port))
}
Comment on lines +424 to +426

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

Using os.TempDir() directly can expose the application to local Denial of Service (DoS) attacks on multi-user systems. Since /tmp is world-writable, any local user can pre-create the predictable token file (e.g., /tmp/scion-metadata-shutdown-<port>.token) owned by themselves, which will cause os.Remove and the subsequent server startup to fail with Permission denied.

Using os.UserRuntimeDir() (which typically points to /run/user/<uid>) is much more secure as it is only accessible by the current user. We can fall back to os.TempDir() if it is not available.

func shutdownTokenPath(port int) string {
	dir := os.TempDir()
	if runtimeDir, err := os.UserRuntimeDir(); err == nil {
		dir = runtimeDir
	}
	return filepath.Join(dir, fmt.Sprintf("scion-metadata-shutdown-%d.token", port))
}


func (s *Server) ensureShutdownToken() error {
if s.shutdownToken != "" {
return nil
}
tokenBytes := make([]byte, 32)
if _, err := rand.Read(tokenBytes); err != nil {
return err
}
s.shutdownToken = hex.EncodeToString(tokenBytes)
s.shutdownTokenPath = shutdownTokenPath(s.config.Port)
return writeShutdownToken(s.shutdownTokenPath, s.shutdownToken)
}

func writeShutdownToken(path, token string) error {
if err := os.Remove(path); err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL|syscall.O_NOFOLLOW, 0600)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

syscall.O_NOFOLLOW is not defined on Windows, which will break compilation of sciontool on Windows platforms.

Furthermore, O_NOFOLLOW is redundant here because os.O_EXCL combined with os.O_CREATE already guarantees that the file creation will fail with EEXIST if the path is a symbolic link (even a dangling one). Removing syscall.O_NOFOLLOW fixes cross-platform compilation while maintaining the same security guarantees.

Suggested change
f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL|syscall.O_NOFOLLOW, 0600)
f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0600)

if err != nil {
return err
}
defer f.Close()

Check failure on line 449 in pkg/sciontool/metadata/server.go

View workflow job for this annotation

GitHub Actions / golangci-lint

Error return value of `f.Close` is not checked (errcheck)
if _, err := f.WriteString(token + "\n"); err != nil {
_ = os.Remove(path)
return err
}
Comment on lines +450 to +453

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

On Windows, attempting to delete an open file using os.Remove will fail with a sharing violation. To ensure cross-platform compatibility and robustness, explicitly close the file descriptor f before attempting to remove the file on write failure.

Suggested change
if _, err := f.WriteString(token + "\n"); err != nil {
_ = os.Remove(path)
return err
}
if _, err := f.WriteString(token + "\n"); err != nil {
f.Close()
_ = os.Remove(path)
return err
}

return nil
}

func (s *Server) probeHealth() bool {
client := &http.Client{Timeout: healthCheckTimeout}
resp, err := client.Get(fmt.Sprintf("http://127.0.0.1:%d/", s.config.Port))
Expand Down
66 changes: 65 additions & 1 deletion pkg/sciontool/metadata/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
"net"
"net/http"
"net/http/httptest"
"os"
"strings"
"sync"
"sync/atomic"
"testing"
Expand Down Expand Up @@ -762,13 +764,31 @@
t.Fatalf("expected 403 without Metadata-Flavor, got %d", resp.StatusCode)
}

// POST with Metadata-Flavor should succeed and shut down
// POST with Metadata-Flavor but no shutdown token should be rejected
req, _ = http.NewRequest(http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/_scion/shutdown", port), nil)
req.Header.Set("Metadata-Flavor", "Google")
resp, err = http.DefaultClient.Do(req)
if err != nil {
t.Fatal(err)
}
resp.Body.Close()

Check failure on line 774 in pkg/sciontool/metadata/server_test.go

View workflow job for this annotation

GitHub Actions / golangci-lint

Error return value of `resp.Body.Close` is not checked (errcheck)
if resp.StatusCode != http.StatusForbidden {
t.Fatalf("expected 403 without shutdown token, got %d", resp.StatusCode)
}

token, err := os.ReadFile(shutdownTokenPath(port))
if err != nil {
t.Fatal(err)
}

// POST with Metadata-Flavor and shutdown token should succeed and shut down
req, _ = http.NewRequest(http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/_scion/shutdown", port), nil)
req.Header.Set("Metadata-Flavor", "Google")
req.Header.Set("X-Scion-Shutdown-Token", strings.TrimSpace(string(token)))
resp, err = http.DefaultClient.Do(req)
if err != nil {
t.Fatal(err)
}
body, _ := io.ReadAll(resp.Body)
resp.Body.Close()
if resp.StatusCode != http.StatusOK {
Expand Down Expand Up @@ -833,3 +853,47 @@
t.Fatalf("expected new-project from replacement server, got %q", body)
}
}

func TestMetadataServer_StartReclaimsPortViaShutdownEndpoint(t *testing.T) {
port := freePort(t)

srv1 := New(Config{
Mode: "block",
Port: port,
ProjectID: "old-project",
})
ctx1, cancel1 := context.WithCancel(context.Background())
defer cancel1()

if err := srv1.Start(ctx1); err != nil {
t.Fatal(err)
}
defer srv1.Stop()
time.Sleep(50 * time.Millisecond)

activeServerMu.Lock()
activeServer = nil
activeServerMu.Unlock()

srv2 := New(Config{
Mode: "block",
Port: port,
ProjectID: "new-project",
})
ctx2, cancel2 := context.WithCancel(context.Background())
defer cancel2()

if err := srv2.Start(ctx2); err != nil {
t.Fatalf("second Start() should reclaim port via shutdown endpoint: %v", err)
}
defer srv2.Stop()
time.Sleep(50 * time.Millisecond)

resp, body := metadataGet(t, port, "/computeMetadata/v1/project/project-id")
if resp.StatusCode != http.StatusOK {
t.Fatalf("expected 200, got %d", resp.StatusCode)
}
if body != "new-project" {
t.Fatalf("expected new-project from replacement server, got %q", body)
}
}
Loading