From 9e8d2e815240fa4250cafe8a00482f97cbd89a2f Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Thu, 12 Mar 2026 20:48:58 -0400 Subject: [PATCH 01/10] Add other backends (HTTP/WebDAV, Globus, S3/blob) to internal Pelican --- docs/parameters.yaml | 52 ++ e2e_fed_tests/s3v2_test.go | 570 ++++++++++++++++++++++ go.mod | 85 +++- go.sum | 203 ++++++-- launchers/origin_serve.go | 36 +- origin/advertise.go | 6 +- origin/globus.go | 37 ++ origin_serve/backend_globus.go | 237 ++++++++++ origin_serve/backend_globus_test.go | 215 +++++++++ origin_serve/backend_https.go | 656 ++++++++++++++++++++++++++ origin_serve/backend_https_test.go | 561 ++++++++++++++++++++++ origin_serve/backend_s3.go | 545 +++++++++++++++++++++ origin_serve/backend_s3_minio_test.go | 309 ++++++++++++ origin_serve/backend_s3_test.go | 622 ++++++++++++++++++++++++ origin_serve/handlers.go | 131 ++++- param/parameters.go | 25 + param/parameters_struct.go | 10 + server_structs/origin.go | 25 +- server_utils/origin.go | 6 + server_utils/origin_globusv2.go | 31 ++ server_utils/origin_httpsv2.go | 31 ++ server_utils/origin_s3.go | 20 + server_utils/origin_s3v2.go | 45 ++ 23 files changed, 4385 insertions(+), 73 deletions(-) create mode 100644 e2e_fed_tests/s3v2_test.go create mode 100644 origin_serve/backend_globus.go create mode 100644 origin_serve/backend_globus_test.go create mode 100644 origin_serve/backend_https.go create mode 100644 origin_serve/backend_https_test.go create mode 100644 origin_serve/backend_s3.go create mode 100644 origin_serve/backend_s3_minio_test.go create mode 100644 origin_serve/backend_s3_test.go create mode 100644 server_utils/origin_globusv2.go create mode 100644 server_utils/origin_httpsv2.go create mode 100644 server_utils/origin_s3v2.go diff --git a/docs/parameters.yaml b/docs/parameters.yaml index f29b7f90ab..938691c419 100644 --- a/docs/parameters.yaml +++ b/docs/parameters.yaml @@ -1651,6 +1651,24 @@ type: string default: path components: ["origin"] --- +name: Origin.ObjectProviderURL +description: |+ + A gocloud.dev/blob-compatible URL that specifies the object store provider and bucket/container to use for the s3v2 storage type. + When set, this takes precedence over Origin.S3ServiceUrl, Origin.S3Region, and Origin.S3UrlStyle, allowing a single parameter to + configure S3, GCS, or Azure Object Storage. + + Examples: + - S3: "s3://my-bucket?region=us-east-1&endpoint=https://s3.us-east-1.amazonaws.com" + - GCS: "gs://my-bucket" + - Azure: "azblob://my-container" + + The Origin.StoragePrefix is still applied on top of this URL. + S3 credentials are still read from Origin.S3AccessKeyfile/S3SecretKeyfile when using s3:// URLs. + GCS and Azure use application default credentials from the environment. +type: string +default: none +components: ["origin"] +--- name: Origin.HttpServiceUrl description: |+ If Origin.StorageType is set to `https`, the service URL is used as the base for requests to the backend. To generate the @@ -1672,6 +1690,40 @@ type: filename default: none components: ["origin"] --- +name: Origin.HttpAuthTokenPassthrough +description: |+ + When set to true for httpsv2 backends, the client's bearer token is forwarded to the upstream HTTP server + instead of using a static token. + This is mutually exclusive with Origin.HttpAuthTokenFile. +type: bool +default: false +components: ["origin"] +--- +name: Origin.HttpAuthOAuth2ClientID +description: |+ + When set for httpsv2 backends, configures OAuth2 client credentials for automatic token management + to the upstream HTTP server. + Requires Origin.HttpAuthOAuth2ClientSecretFile and Origin.HttpAuthOAuth2Issuer to also be set. +type: string +default: none +components: ["origin"] +--- +name: Origin.HttpAuthOAuth2ClientSecretFile +description: |+ + Path to a file containing the OAuth2 client secret for automatic token management to the upstream HTTP server. + The file should contain the secret as plain text (leading/trailing whitespace is trimmed). +type: filename +default: none +components: ["origin"] +--- +name: Origin.HttpAuthOAuth2Issuer +description: |+ + The OAuth2/OIDC issuer URL for automatic token management to the upstream HTTP server. + The token endpoint is discovered automatically via the issuer's `/.well-known/openid-configuration` metadata. +type: string +default: none +components: ["origin"] +--- name: Origin.XRootServiceUrl description: |+ When the origin is configured to export another XRootD storage backend by setting `Origin.StorageType = xroot`, the `XRootServiceUrl` diff --git a/e2e_fed_tests/s3v2_test.go b/e2e_fed_tests/s3v2_test.go new file mode 100644 index 0000000000..00539d4e89 --- /dev/null +++ b/e2e_fed_tests/s3v2_test.go @@ -0,0 +1,570 @@ +//go:build !windows + +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package fed_tests + +import ( + "crypto/md5" + "fmt" + "os" + "os/exec" + "path/filepath" + "regexp" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/pelicanplatform/pelican/client" + "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/fed_test_utils" + "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/server_utils" + "github.com/pelicanplatform/pelican/test_utils" + "github.com/pelicanplatform/pelican/token" + "github.com/pelicanplatform/pelican/token_scopes" +) + +const s3v2MemOriginConfig = ` +Origin: + StorageType: s3v2 + ObjectProviderURL: "mem://" + Exports: + - FederationPrefix: /test + Capabilities: ["PublicReads", "Writes", "Listings"] +Director: + MinStatResponse: 1 + MaxStatResponse: 1 +` + +// getS3v2Token creates a token with broad read/create/modify scopes for S3v2 tests. +func getS3v2Token(t *testing.T) string { + t.Helper() + issuer, err := config.GetServerIssuerURL() + require.NoError(t, err) + + tokenConfig := token.NewWLCGToken() + tokenConfig.Lifetime = time.Minute + tokenConfig.Issuer = issuer + tokenConfig.Subject = "origin" + tokenConfig.AddAudienceAny() + + readScope, err := token_scopes.Wlcg_Storage_Read.Path("/") + require.NoError(t, err) + createScope, err := token_scopes.Wlcg_Storage_Create.Path("/") + require.NoError(t, err) + modScope, err := token_scopes.Wlcg_Storage_Modify.Path("/") + require.NoError(t, err) + tokenConfig.AddScopes(readScope, createScope, modScope) + + tkn, err := tokenConfig.CreateToken() + require.NoError(t, err) + return tkn +} + +// TestS3v2MemOriginUploadDownload tests the full federation round-trip +// using the in-memory blob backend (mem://). +func TestS3v2MemOriginUploadDownload(t *testing.T) { + t.Cleanup(test_utils.SetupTestLogging(t)) + server_utils.ResetTestState() + defer server_utils.ResetTestState() + + ft := fed_test_utils.NewFedTest(t, s3v2MemOriginConfig) + require.NotNil(t, ft) + require.Greater(t, len(ft.Exports), 0, "Federation should have at least one export") + assert.Equal(t, "/test", ft.Exports[0].FederationPrefix) + + testContent := "Hello from the S3v2 mem:// federation test!" + localTmpDir := t.TempDir() + localFile := filepath.Join(localTmpDir, "test_file.txt") + require.NoError(t, os.WriteFile(localFile, []byte(testContent), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/test_file.txt", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + testToken := getS3v2Token(t) + + // Upload + uploadResults, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, uploadResults) + assert.Greater(t, uploadResults[0].TransferredBytes, int64(0)) + + // Download + downloadFile := filepath.Join(localTmpDir, "downloaded.txt") + downloadResults, err := client.DoGet(ft.Ctx, uploadURL, downloadFile, false, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, downloadResults) + assert.Equal(t, uploadResults[0].TransferredBytes, downloadResults[0].TransferredBytes) + + // Verify content + got, err := os.ReadFile(downloadFile) + require.NoError(t, err) + assert.Equal(t, testContent, string(got)) +} + +// TestS3v2MemOriginStat tests stat operations against the in-memory backend. +func TestS3v2MemOriginStat(t *testing.T) { + t.Cleanup(test_utils.SetupTestLogging(t)) + server_utils.ResetTestState() + defer server_utils.ResetTestState() + + ft := fed_test_utils.NewFedTest(t, s3v2MemOriginConfig) + require.NotNil(t, ft) + + // Upload a file first (mem backend starts empty) + testContent := []byte("Stat me via the federation") + localTmpDir := t.TempDir() + localFile := filepath.Join(localTmpDir, "stat_test.txt") + require.NoError(t, os.WriteFile(localFile, testContent, 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/stat_test.txt", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + testToken := getS3v2Token(t) + + _, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + + // Stat the file + statInfo, err := client.DoStat(ft.Ctx, uploadURL, client.WithToken(testToken)) + require.NoError(t, err) + assert.Equal(t, int64(len(testContent)), statInfo.Size) + assert.Equal(t, "/test/stat_test.txt", statInfo.Name) +} + +// TestS3v2MemOriginMultipleFiles tests uploading and downloading multiple files. +func TestS3v2MemOriginMultipleFiles(t *testing.T) { + t.Cleanup(test_utils.SetupTestLogging(t)) + server_utils.ResetTestState() + defer server_utils.ResetTestState() + + ft := fed_test_utils.NewFedTest(t, s3v2MemOriginConfig) + require.NotNil(t, ft) + + testFiles := map[string]string{ + "alpha.txt": "Content alpha", + "beta.txt": "Content beta", + "gamma.txt": "Content gamma", + } + + localTmpDir := t.TempDir() + testToken := getS3v2Token(t) + + // Upload all files + for name, content := range testFiles { + localFile := filepath.Join(localTmpDir, name) + require.NoError(t, os.WriteFile(localFile, []byte(content), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/%s", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt(), name) + + results, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err, "Failed to upload %s", name) + require.NotEmpty(t, results) + } + + // Download and verify all files + for name, expected := range testFiles { + downloadURL := fmt.Sprintf("pelican://%s:%d/test/%s", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt(), name) + downloadFile := filepath.Join(localTmpDir, "dl_"+name) + + results, err := client.DoGet(ft.Ctx, downloadURL, downloadFile, false, client.WithToken(testToken)) + require.NoError(t, err, "Failed to download %s", name) + require.NotEmpty(t, results) + + got, err := os.ReadFile(downloadFile) + require.NoError(t, err) + assert.Equal(t, expected, string(got), "Content mismatch for %s", name) + } +} + +// TestS3v2MemOriginLargeFile tests transferring a 10 MB file through the federation. +func TestS3v2MemOriginLargeFile(t *testing.T) { + t.Cleanup(test_utils.SetupTestLogging(t)) + server_utils.ResetTestState() + defer server_utils.ResetTestState() + + ft := fed_test_utils.NewFedTest(t, s3v2MemOriginConfig) + require.NotNil(t, ft) + + largeContent := make([]byte, 10*1024*1024) + for i := range largeContent { + largeContent[i] = byte(i % 256) + } + originalHash := fmt.Sprintf("%x", md5.Sum(largeContent)) + + localTmpDir := t.TempDir() + localFile := filepath.Join(localTmpDir, "large.bin") + require.NoError(t, os.WriteFile(localFile, largeContent, 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/large.bin", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + testToken := getS3v2Token(t) + + uploadResults, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, uploadResults) + assert.Equal(t, int64(len(largeContent)), uploadResults[0].TransferredBytes) + + downloadFile := filepath.Join(localTmpDir, "large_dl.bin") + downloadResults, err := client.DoGet(ft.Ctx, uploadURL, downloadFile, false, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, downloadResults) + assert.Equal(t, uploadResults[0].TransferredBytes, downloadResults[0].TransferredBytes) + + got, err := os.ReadFile(downloadFile) + require.NoError(t, err) + gotHash := fmt.Sprintf("%x", md5.Sum(got)) + assert.Equal(t, originalHash, gotHash, "Downloaded file hash should match original") +} + +// TestS3v2MemOriginListing tests directory listing through the federation. +func TestS3v2MemOriginListing(t *testing.T) { + t.Cleanup(test_utils.SetupTestLogging(t)) + server_utils.ResetTestState() + defer server_utils.ResetTestState() + + ft := fed_test_utils.NewFedTest(t, s3v2MemOriginConfig) + require.NotNil(t, ft) + + testToken := getS3v2Token(t) + localTmpDir := t.TempDir() + + // Upload several files to populate the mem backend + files := []string{"a.txt", "b.txt", "c.txt"} + for _, name := range files { + localFile := filepath.Join(localTmpDir, name) + require.NoError(t, os.WriteFile(localFile, []byte("content of "+name), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/%s", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt(), name) + _, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err, "Failed to upload %s", name) + } + + // List the /test/ directory + listURL := fmt.Sprintf("pelican://%s:%d/test/", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + entries, err := client.DoList(ft.Ctx, listURL, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, entries, "Listing should return entries") + + // Check that our uploaded files appear in the listing + nameSet := make(map[string]bool) + for _, e := range entries { + nameSet[e.Name] = true + } + for _, name := range files { + found := false + for key := range nameSet { + if strings.Contains(key, name) { + found = true + break + } + } + assert.True(t, found, "Listing should contain %s", name) + } +} + +// TestS3v2MemOriginOverwrite tests overwriting an existing file. +func TestS3v2MemOriginOverwrite(t *testing.T) { + t.Cleanup(test_utils.SetupTestLogging(t)) + server_utils.ResetTestState() + defer server_utils.ResetTestState() + + ft := fed_test_utils.NewFedTest(t, s3v2MemOriginConfig) + require.NotNil(t, ft) + + testToken := getS3v2Token(t) + localTmpDir := t.TempDir() + + // Enable client-side overwrites so the second PUT doesn't fail with FileAlreadyExists + require.NoError(t, param.Set(param.Client_EnableOverwrites.GetName(), true)) + defer func() { + require.NoError(t, param.Set(param.Client_EnableOverwrites.GetName(), false)) + }() + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/overwrite.txt", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + // First upload + localFile := filepath.Join(localTmpDir, "v1.txt") + require.NoError(t, os.WriteFile(localFile, []byte("version 1"), 0644)) + _, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + + // Overwrite with new content + localFile2 := filepath.Join(localTmpDir, "v2.txt") + require.NoError(t, os.WriteFile(localFile2, []byte("version 2"), 0644)) + _, err = client.DoPut(ft.Ctx, localFile2, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + + // Download and verify we get the latest version + downloadFile := filepath.Join(localTmpDir, "downloaded.txt") + _, err = client.DoGet(ft.Ctx, uploadURL, downloadFile, false, client.WithToken(testToken)) + require.NoError(t, err) + + got, err := os.ReadFile(downloadFile) + require.NoError(t, err) + assert.Equal(t, "version 2", string(got)) +} + +// --------------------------------------------------------------------------- +// Minio-backed federation tests +// --------------------------------------------------------------------------- + +// skipIfNoMinio skips the test if the minio binary is not available on PATH. +func skipIfNoMinio(t *testing.T) { + t.Helper() + if _, err := exec.LookPath("minio"); err != nil { + t.Skip("minio not found on PATH; skipping minio-backed test") + } +} + +// startMinioServer launches a minio server bound to 127.0.0.1:0 (OS-assigned port), +// parses the actual listening port from minio's log output, and returns the +// endpoint URL. The server is killed when the test completes. +func startMinioServer(t *testing.T) (endpoint string) { + t.Helper() + skipIfNoMinio(t) + + dataDir := t.TempDir() + + cmd := exec.Command("minio", "server", + "--address", "127.0.0.1:0", + "--console-address", "127.0.0.2:0", + dataDir, + ) + cmd.Env = append(os.Environ(), + "MINIO_ROOT_USER=minioadmin", + "MINIO_ROOT_PASSWORD=minioadmin", + ) + + logPath := filepath.Join(t.TempDir(), "minio.log") + logFile, err := os.Create(logPath) + require.NoError(t, err) + t.Cleanup(func() { logFile.Close() }) + cmd.Stdout = logFile + cmd.Stderr = logFile + + require.NoError(t, cmd.Start(), "failed to start minio") + t.Cleanup(func() { + cmd.Process.Kill() + cmd.Wait() //nolint:errcheck + }) + + // Minio prints a line like: S3-API: http://127.0.0.1:43219 + apiRe := regexp.MustCompile(`S3-API:\s+(https?://\S+)`) + require.Eventually(t, func() bool { + data, err := os.ReadFile(logPath) + if err != nil { + return false + } + if m := apiRe.FindSubmatch(data); m != nil { + endpoint = string(m[1]) + return true + } + return false + }, 30*time.Second, 200*time.Millisecond, "minio never printed an S3-API endpoint") + + // Pre-create the bucket directory on disk. + require.NoError(t, os.Mkdir(filepath.Join(dataDir, "test-bucket"), 0755)) + + return endpoint +} + +// TestS3v2MinioOriginUploadDownload runs a full Pelican federation backed by +// a real MinIO server. It exercises the complete S3v2 data path: director +// redirect → origin HTTP handler → gocloud.dev/blob/s3blob → MinIO. Skipped +// if minio is not installed. +func TestS3v2MinioOriginUploadDownload(t *testing.T) { + skipIfNoMinio(t) + t.Cleanup(test_utils.SetupTestLogging(t)) + server_utils.ResetTestState() + defer server_utils.ResetTestState() + + minioEndpoint := startMinioServer(t) + + // Write credential files for the origin to read. + credDir := t.TempDir() + akFile := filepath.Join(credDir, "access-key") + skFile := filepath.Join(credDir, "secret-key") + require.NoError(t, os.WriteFile(akFile, []byte("minioadmin"), 0600)) + require.NoError(t, os.WriteFile(skFile, []byte("minioadmin"), 0600)) + + // S3 params must be in the YAML config so they survive NewFedTest's + // config.InitServer → viper.MergeConfig flow and are available when + // GetOriginExports() runs. + originConfig := fmt.Sprintf(` +Origin: + StorageType: s3v2 + S3ServiceUrl: %s + S3Region: us-east-1 + S3Bucket: test-bucket + S3AccessKeyfile: %s + S3SecretKeyfile: %s + Exports: + - FederationPrefix: /test + Capabilities: ["PublicReads", "Writes", "Listings"] +Director: + MinStatResponse: 1 + MaxStatResponse: 1 +`, minioEndpoint, akFile, skFile) + + ft := fed_test_utils.NewFedTest(t, originConfig) + require.NotNil(t, ft) + require.Greater(t, len(ft.Exports), 0) + + testToken := getS3v2Token(t) + localTmpDir := t.TempDir() + + t.Run("UploadAndDownload", func(t *testing.T) { + testContent := "Hello from the MinIO-backed federation test!" + localFile := filepath.Join(localTmpDir, "test_file.txt") + require.NoError(t, os.WriteFile(localFile, []byte(testContent), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/test_file.txt", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + uploadResults, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, uploadResults) + assert.Greater(t, uploadResults[0].TransferredBytes, int64(0)) + + downloadFile := filepath.Join(localTmpDir, "downloaded.txt") + downloadResults, err := client.DoGet(ft.Ctx, uploadURL, downloadFile, false, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, downloadResults) + assert.Equal(t, uploadResults[0].TransferredBytes, downloadResults[0].TransferredBytes) + + got, err := os.ReadFile(downloadFile) + require.NoError(t, err) + assert.Equal(t, testContent, string(got)) + }) + + t.Run("Stat", func(t *testing.T) { + content := []byte("Stat me via the MinIO federation") + localFile := filepath.Join(localTmpDir, "stat_test.txt") + require.NoError(t, os.WriteFile(localFile, content, 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/stat_test.txt", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + _, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + + statInfo, err := client.DoStat(ft.Ctx, uploadURL, client.WithToken(testToken)) + require.NoError(t, err) + assert.Equal(t, int64(len(content)), statInfo.Size) + }) + + t.Run("LargeFile", func(t *testing.T) { + largeContent := make([]byte, 5*1024*1024) // 5 MB + for i := range largeContent { + largeContent[i] = byte(i % 256) + } + originalHash := fmt.Sprintf("%x", md5.Sum(largeContent)) + + localFile := filepath.Join(localTmpDir, "large.bin") + require.NoError(t, os.WriteFile(localFile, largeContent, 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/large.bin", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + _, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + + downloadFile := filepath.Join(localTmpDir, "large_dl.bin") + _, err = client.DoGet(ft.Ctx, uploadURL, downloadFile, false, client.WithToken(testToken)) + require.NoError(t, err) + + got, err := os.ReadFile(downloadFile) + require.NoError(t, err) + gotHash := fmt.Sprintf("%x", md5.Sum(got)) + assert.Equal(t, originalHash, gotHash) + }) + + t.Run("Listing", func(t *testing.T) { + for _, name := range []string{"list_a.txt", "list_b.txt"} { + localFile := filepath.Join(localTmpDir, name) + require.NoError(t, os.WriteFile(localFile, []byte("content of "+name), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/%s", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt(), name) + _, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + } + + listURL := fmt.Sprintf("pelican://%s:%d/test/", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + entries, err := client.DoList(ft.Ctx, listURL, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, entries) + + nameSet := make(map[string]bool) + for _, e := range entries { + nameSet[e.Name] = true + } + for _, name := range []string{"list_a.txt", "list_b.txt"} { + found := false + for key := range nameSet { + if strings.Contains(key, name) { + found = true + break + } + } + assert.True(t, found, "Listing should contain %s", name) + } + }) + + t.Run("Overwrite", func(t *testing.T) { + // Enable client-side overwrites so the second PUT doesn't fail with FileAlreadyExists + require.NoError(t, param.Set(param.Client_EnableOverwrites.GetName(), true)) + defer func() { + require.NoError(t, param.Set(param.Client_EnableOverwrites.GetName(), false)) + }() + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/overwrite_minio.txt", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + v1 := filepath.Join(localTmpDir, "v1.txt") + require.NoError(t, os.WriteFile(v1, []byte("version 1"), 0644)) + _, err := client.DoPut(ft.Ctx, v1, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + + v2 := filepath.Join(localTmpDir, "v2.txt") + require.NoError(t, os.WriteFile(v2, []byte("version 2"), 0644)) + _, err = client.DoPut(ft.Ctx, v2, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + + downloadFile := filepath.Join(localTmpDir, "overwrite_dl.txt") + _, err = client.DoGet(ft.Ctx, uploadURL, downloadFile, false, client.WithToken(testToken)) + require.NoError(t, err) + + got, err := os.ReadFile(downloadFile) + require.NoError(t, err) + assert.Equal(t, "version 2", string(got)) + }) +} diff --git a/go.mod b/go.mod index 636b797161..95945c6872 100644 --- a/go.mod +++ b/go.mod @@ -57,11 +57,12 @@ require ( github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a github.com/zsais/go-gin-prometheus v0.1.0 go.uber.org/atomic v1.11.0 + gocloud.dev v0.45.0 golang.org/x/crypto v0.46.0 golang.org/x/mod v0.30.0 golang.org/x/net v0.48.0 golang.org/x/oauth2 v0.34.0 - golang.org/x/sys v0.39.0 + golang.org/x/sys v0.40.0 golang.org/x/term v0.38.0 gopkg.in/yaml.v3 v3.0.1 gorm.io/gorm v1.25.7 @@ -69,19 +70,57 @@ require ( ) require ( + cel.dev/expr v0.25.1 // indirect + cloud.google.com/go v0.123.0 // indirect + cloud.google.com/go/auth v0.17.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect + cloud.google.com/go/compute/metadata v0.9.0 // indirect + cloud.google.com/go/iam v1.5.3 // indirect + cloud.google.com/go/monitoring v1.24.3 // indirect + cloud.google.com/go/storage v1.57.2 // indirect + github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3 // indirect + github.com/Azure/go-autorest v14.2.0+incompatible // indirect + github.com/Azure/go-autorest/autorest/to v0.4.1 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.54.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.54.0 // indirect github.com/alecthomas/chroma/v2 v2.14.0 // indirect + github.com/aws/aws-sdk-go-v2 v1.41.3 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6 // indirect + github.com/aws/aws-sdk-go-v2/config v1.32.2 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.19.11 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.19 // indirect + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.20.12 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.19 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.19 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.20 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.6 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.11 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.19 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.19 // indirect + github.com/aws/aws-sdk-go-v2/service/s3 v1.96.4 // indirect + github.com/aws/aws-sdk-go-v2/service/signin v1.0.7 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.30.12 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.16 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.41.8 // indirect + github.com/aws/smithy-go v1.24.2 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/bits-and-blooms/bitset v1.12.0 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/charmbracelet/lipgloss v0.12.1 // indirect github.com/charmbracelet/x/ansi v0.1.4 // indirect + github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect github.com/cristalhq/jwt/v4 v4.0.2 // indirect github.com/dgraph-io/ristretto v1.0.0 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/dlclark/regexp2 v1.11.0 // indirect + github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect + github.com/envoyproxy/protoc-gen-validate v1.3.0 // indirect github.com/glebarez/go-sqlite v1.21.2 // indirect + github.com/go-jose/go-jose/v4 v4.1.3 // indirect github.com/go-stomp/stomp/v3 v3.0.3 // indirect github.com/go-viper/mapstructure/v2 v2.2.1 // indirect github.com/gobuffalo/pop/v6 v6.1.1 // indirect @@ -89,11 +128,15 @@ require ( github.com/golang-jwt/jwt/v5 v5.3.0 // indirect github.com/golang/mock v1.6.0 // indirect github.com/google/flatbuffers v25.2.10+incompatible // indirect + github.com/google/s2a-go v0.1.9 // indirect + github.com/google/wire v0.7.0 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.7 // indirect + github.com/googleapis/gax-go/v2 v2.15.0 // indirect github.com/gorilla/context v1.1.1 // indirect github.com/gorilla/css v1.0.1 // indirect github.com/gorilla/securecookie v1.1.2 // indirect github.com/gorilla/sessions v1.2.1 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/hashicorp/go-retryablehttp v0.7.7 // indirect github.com/jinzhu/inflection v1.0.0 // indirect @@ -113,12 +156,14 @@ require ( github.com/ory/go-convenience v0.1.0 // indirect github.com/ory/x v0.0.665 // indirect github.com/oschwald/maxminddb-golang/v2 v2.0.0-beta.9 // indirect + github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect github.com/redis/go-redis/v9 v9.0.2 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sagikazarmark/locafero v0.6.0 // indirect github.com/seatgeek/logrus-gelf-formatter v0.0.0-20210414080842-5b05eb8ff761 // indirect github.com/sethvargo/go-retry v0.2.4 // indirect github.com/sourcegraph/conc v0.3.0 // indirect + github.com/spiffe/go-spiffe/v2 v2.6.0 // indirect github.com/streadway/amqp v1.0.0 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect github.com/yuin/goldmark v1.7.13 // indirect @@ -126,19 +171,25 @@ require ( go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/collector/pdata v1.0.0-rcv0016 // indirect go.opentelemetry.io/collector/semconv v0.87.0 // indirect + go.opentelemetry.io/contrib/detectors/gcp v1.39.0 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.46.1 // indirect go.opentelemetry.io/contrib/propagators/b3 v1.21.0 // indirect go.opentelemetry.io/contrib/propagators/jaeger v1.21.1 // indirect go.opentelemetry.io/contrib/samplers/jaegerremote v0.15.1 // indirect go.opentelemetry.io/otel/exporters/jaeger v1.17.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.21.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.21.0 // indirect go.opentelemetry.io/otel/exporters/zipkin v1.21.0 // indirect - go.opentelemetry.io/otel/sdk v1.39.0 // indirect - go.opentelemetry.io/proto/otlp v1.0.0 // indirect + go.opentelemetry.io/otel/sdk v1.40.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.40.0 // indirect + go.opentelemetry.io/proto/otlp v1.9.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/tools v0.39.0 // indirect golang.org/x/tools/godoc v0.1.0-deprecated // indirect + golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect + google.golang.org/api v0.256.0 // indirect + google.golang.org/genproto v0.0.0-20251124214823-79d6a2a48846 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect google.golang.org/grpc v1.79.3 // indirect @@ -146,15 +197,15 @@ require ( ) require ( - github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 // indirect - github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.6.0 // indirect - github.com/Azure/azure-sdk-for-go/sdk/internal v1.8.0 // indirect - github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 // indirect + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 // indirect + github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect + github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect github.com/GehirnInc/crypt v0.0.0-20200316065508-bb7000b8a962 // indirect github.com/VividCortex/ewma v1.2.0 github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect - github.com/aws/aws-sdk-go v1.45.25 // indirect + github.com/aws/aws-sdk-go v1.55.8 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bytedance/sonic v1.9.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect @@ -166,7 +217,7 @@ require ( github.com/dustin/go-humanize v1.0.1 // indirect github.com/edsrzf/mmap-go v1.1.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect - github.com/fsnotify/fsnotify v1.7.0 + github.com/fsnotify/fsnotify v1.9.0 github.com/gabriel-vasile/mimetype v1.4.2 // indirect github.com/gin-contrib/sessions v0.0.5 github.com/gin-contrib/sse v0.1.0 // indirect @@ -217,7 +268,7 @@ require ( github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/alertmanager v0.26.0 // indirect - github.com/prometheus/client_model v0.5.0 // indirect + github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common/assets v0.2.0 // indirect github.com/prometheus/common/sigv4 v0.1.0 // indirect github.com/prometheus/exporter-toolkit v0.10.0 // indirect @@ -232,16 +283,16 @@ require ( github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect go.mongodb.org/mongo-driver v1.12.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect - go.opentelemetry.io/otel v1.39.0 // indirect - go.opentelemetry.io/otel/metric v1.39.0 // indirect - go.opentelemetry.io/otel/trace v1.39.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect + go.opentelemetry.io/otel v1.40.0 // indirect + go.opentelemetry.io/otel/metric v1.40.0 // indirect + go.opentelemetry.io/otel/trace v1.40.0 // indirect go.uber.org/goleak v1.3.0 golang.org/x/arch v0.3.0 // indirect golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 golang.org/x/sync v0.19.0 golang.org/x/text v0.32.0 - golang.org/x/time v0.8.0 + golang.org/x/time v0.14.0 google.golang.org/protobuf v1.36.10 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect kernel.org/pub/linux/libs/security/libcap/psx v1.2.69 // indirect diff --git a/go.sum b/go.sum index ba0e03b45b..0201a9e37f 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4= +cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4= cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= @@ -13,14 +15,30 @@ cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKV cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc= cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= +cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= +cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU= +cloud.google.com/go/auth v0.17.0 h1:74yCm7hCj2rUyyAocqnFzsAYXgJhrG26XCFimrc/Kz4= +cloud.google.com/go/auth v0.17.0/go.mod h1:6wv/t5/6rOPAX4fJiRjKkJCvswLwdet7G8+UGXt7nCQ= +cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= +cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= +cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= +cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= +cloud.google.com/go/iam v1.5.3 h1:+vMINPiDF2ognBJ97ABAYYwRgsaqxPbQDlMnbHMjolc= +cloud.google.com/go/iam v1.5.3/go.mod h1:MR3v9oLkZCTlaqljW6Eb2d3HGDGK5/bDv93jhfISFvU= +cloud.google.com/go/logging v1.13.1 h1:O7LvmO0kGLaHY/gq8cV7T0dyp6zJhYAOtZPX4TF3QtY= +cloud.google.com/go/logging v1.13.1/go.mod h1:XAQkfkMBxQRjQek96WLPNze7vsOmay9H5PqfsNYDqvw= +cloud.google.com/go/longrunning v0.7.0 h1:FV0+SYF1RIj59gyoWDRi45GiYUMM3K1qO51qoboQT1E= +cloud.google.com/go/longrunning v0.7.0/go.mod h1:ySn2yXmjbK9Ba0zsQqunhDkYi0+9rlXIwnoAf+h+TPY= +cloud.google.com/go/monitoring v1.24.3 h1:dde+gMNc0UhPZD1Azu6at2e79bfdztVDS5lvhOdsgaE= +cloud.google.com/go/monitoring v1.24.3/go.mod h1:nYP6W0tm3N9H/bOw8am7t62YTzZY+zUeQ+Bi6+2eonI= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= @@ -30,26 +48,42 @@ cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0Zeo cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= +cloud.google.com/go/storage v1.57.2 h1:sVlym3cHGYhrp6XZKkKb+92I1V42ks2qKKpB0CF5Mb4= +cloud.google.com/go/storage v1.57.2/go.mod h1:n5ijg4yiRXXpCu0sJTD6k+eMf7GRrJmPyr9YxLXGHOk= +cloud.google.com/go/trace v1.11.7 h1:kDNDX8JkaAG3R2nq1lIdkb7FCSi1rCmsEtKVsty7p+U= +cloud.google.com/go/trace v1.11.7/go.mod h1:TNn9d5V3fQVf6s4SCveVMIBS2LJUqo73GACmq/Tky0s= dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= -filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 h1:E+OJmp2tPvt1W+amx48v1eqbjDYsgN+RzP4q16yV5eM= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1/go.mod h1:a6xsAQUZg+VsS3TJ05SRp524Hs4pZ/AeFSr5ENf0Yjo= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.6.0 h1:U2rTu3Ef+7w9FHKIAXM6ZyqF3UOWJZ12zIm8zECAFfg= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.6.0/go.mod h1:9kIvujWAA58nmPmWB1m23fyWic1kYZMxD9CxaWn4Qpg= -github.com/Azure/azure-sdk-for-go/sdk/internal v1.8.0 h1:jBQA3cKT4L2rWMpgE7Yt3Hwh2aUj8KXjIGLxjHeYNNo= -github.com/Azure/azure-sdk-for-go/sdk/internal v1.8.0/go.mod h1:4OG6tQ9EOP/MT0NMjDlRzWoVFxfu9rN9B2X+tlSVktg= +filippo.io/edwards25519 v1.1.1 h1:YpjwWWlNmGIDyXOn8zLzqiD+9TyIlPhGFG96P39uBpw= +filippo.io/edwards25519 v1.1.1/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 h1:JXg2dwJUmPB9JmtVmdEB16APJ7jurfbY5jnfXpJoRMc= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0/go.mod h1:YD5h/ldMsG0XiIw7PdyNhLxaM317eFh5yNLccNfGdyw= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 h1:Hk5QBxZQC1jb2Fwj6mpzme37xbCDdNTxU7O9eb5+LB4= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1/go.mod h1:IYus9qsFobWIc2YVwe/WPjcnyCkPKtnHAqUYeebc8z0= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2 h1:yz1bePFlP5Vws5+8ez6T3HWXPmwOK7Yvq8QxDBD3SKY= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2/go.mod h1:Pa9ZNPuoNu/GztvBSKk9J1cDJW6vk/n0zLtV4mgd8N8= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 h1:9iefClla7iYpfYWdzPCRDozdmndjTm8DXdpCzPajMgA= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2/go.mod h1:XtLgD3ZD34DAaVIIAyG3objl5DynM3CQ/vMcbBNJZGI= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4 v4.2.1 h1:UPeCRD+XY7QlaGQte2EVI2iOcWvUYA2XY8w5T/8v0NQ= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4 v4.2.1/go.mod h1:oGV6NlB0cvi1ZbYRR2UN44QHxWFyGk+iylgD0qaMXjA= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork v1.1.0 h1:QM6sE5k2ZT/vI5BEe0r7mqjsUSnhVBFbOsVkEuaEfiA= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2 v2.2.1 h1:bWh0Z2rOEDfB/ywv/l0iHN1JgyazE6kW/aIA89+CEK0= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2 v2.2.1/go.mod h1:Bzf34hhAE9NSxailk8xVeLEZbUjOXcC+GnU1mMKdhLw= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.8.1 h1:/Zt+cDPnpC3OVDm/JKLOs7M2DKmLRIIp3XIx9pHHiig= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.8.1/go.mod h1:Ng3urmn6dYe8gnbCMoHHVl5APYz2txho3koEkV2o2HA= +github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3 h1:ZJJNFaQ86GVKQ9ehwqyAFE6pIfyicpuJ8IkVaPBc6/4= +github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3/go.mod h1:URuDvhmATVKqHBH9/0nOiNKk0+YcwfQ3WkK5PqHKxc8= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= -github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 h1:XHOnouVk1mxXfQidrMEnLlPk9UMeRtyBTnEFtxkV0kU= -github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= +github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= +github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= +github.com/Azure/go-autorest/autorest/to v0.4.1 h1:CxNHBqdzTr7rLtdrtb5CMjJcDut+WNGCVv7OmS5+lTc= +github.com/Azure/go-autorest/autorest/to v0.4.1/go.mod h1:EtaofgU4zmtvn1zT2ARsjRFdq9vXx0YWtmElwL+GZ9M= +github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJTmL004Abzc5wDB5VtZG2PJk5ndYDgVacGqfirKxjM= +github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE= +github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 h1:XRzhVemXdgvJqCH0sFfrBUTnUJSBrBf7++ypk+twtRs= +github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/ClickHouse/ch-go v0.58.2 h1:jSm2szHbT9MCAB1rJ3WuCJqmGLi5UTjlNu+f530UTS0= @@ -58,6 +92,14 @@ github.com/ClickHouse/clickhouse-go/v2 v2.17.1 h1:ZCmAYWpu75IyEi7+Yrs/uaAjiCGY5w github.com/ClickHouse/clickhouse-go/v2 v2.17.1/go.mod h1:rkGTvFDTLqLIm0ma+13xmcCfr/08Gvs7KmFt1tgiWHQ= github.com/GehirnInc/crypt v0.0.0-20200316065508-bb7000b8a962 h1:KeNholpO2xKjgaaSyd+DyQRrsQjhbSeS7qe4nEw8aQw= github.com/GehirnInc/crypt v0.0.0-20200316065508-bb7000b8a962/go.mod h1:kC29dT1vFpj7py2OvG1khBdQpo3kInWP+6QipLbdngo= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 h1:sBEjpZlNHzK1voKq9695PJSX2o5NEXl7/OL3coiIY0c= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.54.0 h1:lhhYARPUu3LmHysQ/igznQphfzynnqI3D75oUyw1HXk= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.54.0/go.mod h1:l9rva3ApbBpEJxSNYnwT9N4CDLrWgtq3u8736C5hyJw= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.54.0 h1:xfK3bbi6F2RDtaZFtUdKO3osOBIhNb+xTs8lFW6yx9o= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.54.0/go.mod h1:vB2GH9GAYYJTO3mEn8oYwzEdhlayZIdQz6zdzgUIRvA= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.54.0 h1:s0WlVbf9qpvkh1c/uDAPElam0WrL7fHRIidgZJ7UqZI= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.54.0/go.mod h1:Mf6O40IAyB9zR/1J8nGDDPirZQQPbYJni8Yisy7NTMc= github.com/JGLTechnologies/gin-rate-limit v1.5.4 h1:1hIaXIdGM9MZFZlXgjWJLpxaK0WHEa5MeloK49nmQsc= github.com/JGLTechnologies/gin-rate-limit v1.5.4/go.mod h1:mGEhNzlHEg/Tk+KH/mKylZLTfDjACnx7MVYaAlj07eU= github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs= @@ -98,8 +140,48 @@ github.com/asaskevich/govalidator v0.0.0-20200907205600-7a23bdc65eef/go.mod h1:W github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= github.com/aws/aws-sdk-go v1.38.35/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= -github.com/aws/aws-sdk-go v1.45.25 h1:c4fLlh5sLdK2DCRTY1z0hyuJZU4ygxX8m1FswL6/nF4= -github.com/aws/aws-sdk-go v1.45.25/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= +github.com/aws/aws-sdk-go v1.55.8 h1:JRmEUbU52aJQZ2AjX4q4Wu7t4uZjOu71uyNmaWlUkJQ= +github.com/aws/aws-sdk-go v1.55.8/go.mod h1:ZkViS9AqA6otK+JBBNH2++sx1sgxrPKcSzPPvQkUtXk= +github.com/aws/aws-sdk-go-v2 v1.41.3 h1:4kQ/fa22KjDt13QCy1+bYADvdgcxpfH18f0zP542kZA= +github.com/aws/aws-sdk-go-v2 v1.41.3/go.mod h1:mwsPRE8ceUUpiTgF7QmQIJ7lgsKUPQOUl3o72QBrE1o= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6 h1:N4lRUXZpZ1KVEUn6hxtco/1d2lgYhNn1fHkkl8WhlyQ= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6/go.mod h1:lyw7GFp3qENLh7kwzf7iMzAxDn+NzjXEAGjKS2UOKqI= +github.com/aws/aws-sdk-go-v2/config v1.32.2 h1:4liUsdEpUUPZs5WVapsJLx5NPmQhQdez7nYFcovrytk= +github.com/aws/aws-sdk-go-v2/config v1.32.2/go.mod h1:l0hs06IFz1eCT+jTacU/qZtC33nvcnLADAPL/XyrkZI= +github.com/aws/aws-sdk-go-v2/credentials v1.19.11 h1:NdV8cwCcAXrCWyxArt58BrvZJ9pZ9Fhf9w6Uh5W3Uyc= +github.com/aws/aws-sdk-go-v2/credentials v1.19.11/go.mod h1:30yY2zqkMPdrvxBqzI9xQCM+WrlrZKSOpSJEsylVU+8= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.19 h1:INUvJxmhdEbVulJYHI061k4TVuS3jzzthNvjqvVvTKM= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.19/go.mod h1:FpZN2QISLdEBWkayloda+sZjVJL+e9Gl0k1SyTgcswU= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.20.12 h1:Zy6Tme1AA13kX8x3CnkHx5cqdGWGaj/anwOiWGnA0Xo= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.20.12/go.mod h1:ql4uXYKoTM9WUAUSmthY4AtPVrlTBZOvnBJTiCUdPxI= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.19 h1:/sECfyq2JTifMI2JPyZ4bdRN77zJmr6SrS1eL3augIA= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.19/go.mod h1:dMf8A5oAqr9/oxOfLkC/c2LU/uMcALP0Rgn2BD5LWn0= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.19 h1:AWeJMk33GTBf6J20XJe6qZoRSJo0WfUhsMdUKhoODXE= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.19/go.mod h1:+GWrYoaAsV7/4pNHpwh1kiNLXkKaSoppxQq9lbH8Ejw= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.20 h1:qi3e/dmpdONhj1RyIZdi6DKKpDXS5Lb8ftr3p7cyHJc= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.20/go.mod h1:V1K+TeJVD5JOk3D9e5tsX2KUdL7BlB+FV6cBhdobN8c= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.6 h1:XAq62tBTJP/85lFD5oqOOe7YYgWxY9LvWq8plyDvDVg= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.6/go.mod h1:x0nZssQ3qZSnIcePWLvcoFisRXJzcTVvYpAAdYX8+GI= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.11 h1:BYf7XNsJMzl4mObARUBUib+j2tf0U//JAAtTnYqvqCw= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.11/go.mod h1:aEUS4WrNk/+FxkBZZa7tVgp4pGH+kFGW40Y8rCPqt5g= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.19 h1:X1Tow7suZk9UCJHE1Iw9GMZJJl0dAnKXXP1NaSDHwmw= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.19/go.mod h1:/rARO8psX+4sfjUQXp5LLifjUt8DuATZ31WptNJTyQA= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.19 h1:JnQeStZvPHFHeyky/7LbMlyQjUa+jIBj36OlWm0pzIk= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.19/go.mod h1:HGyasyHvYdFQeJhvDHfH7HXkHh57htcJGKDZ+7z+I24= +github.com/aws/aws-sdk-go-v2/service/s3 v1.96.4 h1:4ExZyubQ6LQQVuF2Qp9OsfEvsTdAWh5Gfwf6PgIdLdk= +github.com/aws/aws-sdk-go-v2/service/s3 v1.96.4/go.mod h1:NF3JcMGOiARAss1ld3WGORCw71+4ExDD2cbbdKS5PpA= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.7 h1:Y2cAXlClHsXkkOvWZFXATr34b0hxxloeQu/pAZz2row= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.7/go.mod h1:idzZ7gmDeqeNrSPkdbtMp9qWMgcBwykA7P7Rzh5DXVU= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.12 h1:iSsvB9EtQ09YrsmIc44Heqlx5ByGErqhPK1ZQLppias= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.12/go.mod h1:fEWYKTRGoZNl8tZ77i61/ccwOMJdGxwOhWCkp6TXAr0= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.16 h1:EnUdUqRP1CNzt2DkV67tJx6XDN4xlfBFm+bzeNOQVb0= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.16/go.mod h1:Jic/xv0Rq/pFNCh3WwpH4BEqdbSAl+IyHro8LbibHD8= +github.com/aws/aws-sdk-go-v2/service/sts v1.41.8 h1:XQTQTF75vnug2TXS8m7CVJfC2nniYPZnO1D4Np761Oo= +github.com/aws/aws-sdk-go-v2/service/sts v1.41.8/go.mod h1:Xgx+PR1NUOjNmQY+tRMnouRp83JRM8pRMw/vCaVhPkI= +github.com/aws/smithy-go v1.24.2 h1:FzA3bu/nt/vDvmnkg+R8Xl46gmzEDam6mZ1hzmwXFng= +github.com/aws/smithy-go v1.24.2/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc= github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= github.com/aymanbagabas/go-udiff v0.2.0 h1:TK0fH4MteXUDspT88n8CKzvK0X9O2xu9yQjWpi6yML8= @@ -209,8 +291,11 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA= +github.com/envoyproxy/go-control-plane v0.14.0/go.mod h1:NcS5X47pLl/hfqxU70yPwL9ZMkUlwlKxtAohpi2wBEU= github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g= github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98= +github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI= +github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v1.3.0 h1:TvGH1wof4H33rezVKWSpqKz5NXWg5VPuZ0uONDT6eb4= github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA= @@ -222,8 +307,8 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= -github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= -github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/gin-contrib/sessions v0.0.5 h1:CATtfHmLMQrMNpJRgzjWXD7worTh7g7ritsQfmF+0jE= @@ -247,6 +332,8 @@ github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= github.com/go-jose/go-jose/v3 v3.0.3 h1:fFKWeig/irsp7XD2zBxvnmA/XaRWp5V3CBsZXJF7G7k= github.com/go-jose/go-jose/v3 v3.0.3/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ= +github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs= +github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= @@ -311,8 +398,8 @@ github.com/go-resty/resty/v2 v2.7.0 h1:me+K9p3uhSmXtrBZ4k9jcEAfJmuC8IivWHwaLZwPr github.com/go-resty/resty/v2 v2.7.0/go.mod h1:9PWDzw47qPphMRFfhsyk0NnSgvluHcljSMVIq3w7q0I= github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= -github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= -github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= +github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo= +github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-stomp/stomp/v3 v3.0.3 h1:7YQGJCDMkbA05Rw8dS00LxwU1mhzEHS69gMlPjMZGDk= github.com/go-stomp/stomp/v3 v3.0.3/go.mod h1:jTrybHBK20jPdM9iyh65m6GusX6aMf7atfEFZ1nIcgc= @@ -433,11 +520,18 @@ github.com/google/go-p11-kit v0.4.0 h1:2HCRptPun8gkfOJH6u8goMjCcGESuJpMx2ugozLti github.com/google/go-p11-kit v0.4.0/go.mod h1:tg3TK33e/pkQUoXAun7q1zD5VwmYcN20sPaMl4l3hJo= github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= +github.com/google/go-replayers/grpcreplay v1.3.0 h1:1Keyy0m1sIpqstQmgz307zhiJ1pV4uIlFds5weTmxbo= +github.com/google/go-replayers/grpcreplay v1.3.0/go.mod h1:v6NgKtkijC0d3e3RW8il6Sy5sqRVUwoQa4mHOGEy8DI= +github.com/google/go-replayers/httpreplay v1.2.0 h1:VM1wEyyjaoU53BwrOnaf9VhAyQQEEioJvFYxYcLRKzk= +github.com/google/go-replayers/httpreplay v1.2.0/go.mod h1:WahEFFZZ7a1P4VM1qEeHy+tME4bwyqPcwWbNlUI1Mcg= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= +github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc= +github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= @@ -448,13 +542,21 @@ github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20230926050212-f7f687d19a98 h1:pUa4ghanp6q4IJHwE9RwLgmVFfReJN+KbQ8ExNEUUoQ= github.com/google/pprof v0.0.0-20230926050212-f7f687d19a98/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= +github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/wire v0.7.0 h1:JxUKI6+CVBgCO2WToKy/nQk0sS+amI9z9EjVmdaocj4= +github.com/google/wire v0.7.0/go.mod h1:n6YbUQD9cPKTnHXEBN2DXlOp/mVADhVErcMFb0v3J18= +github.com/googleapis/enterprise-certificate-proxy v0.3.7 h1:zrn2Ee/nWmHulBx5sAVrGgAa0f2/R35S4DJwfFaUPFQ= +github.com/googleapis/enterprise-certificate-proxy v0.3.7/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/googleapis/gax-go/v2 v2.15.0 h1:SyjDc1mGgZU5LncH8gimWo9lW1DtIfPibOG81vgd/bo= +github.com/googleapis/gax-go/v2 v2.15.0/go.mod h1:zVVkkxAQHa1RQpg9z2AUCMnKhi0Qld9rcmyfL1OZhoc= github.com/gophercloud/gophercloud v1.7.0 h1:fyJGKh0LBvIZKLvBWvQdIgkaV5yTM3Jh9EYUh+UNCAs= github.com/gophercloud/gophercloud v1.7.0/go.mod h1:aAVqcocTSXh2vYFZ1JTvx4EQmfgzxRcNupUfxZbBNDM= github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8= @@ -473,8 +575,8 @@ github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWm github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd h1:PpuIBO5P3e9hpqBD0O/HjhShYuM6XE0i/lbE6J94kww= github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 h1:6UKoz5ujsI55KNpsJH3UwCq3T8kKbZwNZBNPuTTje8U= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1/go.mod h1:YvJ2f6MplWDhfxiUC3KpyTy76kYUZA4W3pTv/wdKQ9Y= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 h1:NmZ1PKzSTQbuGHw9DGPFomqkkLWMC+vZCkfs+FHv1Vg= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3/go.mod h1:zQrxl1YP88HQlA6i9c63DSVPFklWpGX4OWAc9bFuaH4= github.com/gwatts/gin-adapter v1.0.0 h1:TsmmhYTR79/RMTsfYJ2IQvI1F5KZ3ZFJxuQSYEOpyIA= github.com/gwatts/gin-adapter v1.0.0/go.mod h1:44AEV+938HsS0mjfXtBDCUZS9vONlF2gwvh8wu4sRYc= github.com/hashicorp/consul/api v1.25.1 h1:CqrdhYzc8XZuPnhIYZWH45toM0LB9ZeYr/gvpLVI3PE= @@ -605,6 +707,8 @@ github.com/karrick/godirwalk v1.8.0/go.mod h1:H5KPZjojv4lE+QYImBI8xVtrBRgYrIVsaR github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= +github.com/keybase/go-keychain v0.0.1 h1:way+bWYa6lDppZoZcgMbYsvC7GxljxrskdNInRtuthU= +github.com/keybase/go-keychain v0.0.1/go.mod h1:PdEILRW3i9D8JcdM+FmY6RwkHGnhHxXwkPPMeUgOK1k= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= @@ -812,8 +916,8 @@ github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1: github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= -github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= @@ -897,6 +1001,8 @@ github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.20.0-alpha.3 h1:1VDfj7RrKJfC7ci2Zs3YkO+lAgGEmZZ19aZe20Lyn7Q= github.com/spf13/viper v1.20.0-alpha.3/go.mod h1:CGBZzv0c9fOUASm6rfus4wdeIjR/04NOLq1P4KRhX3k= +github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo= +github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs= github.com/streadway/amqp v1.0.0 h1:kuuDrUJFZL1QYL9hUNuCxNObNzB0bV/ZG5jV3RWAQgo= github.com/streadway/amqp v1.0.0/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -1003,36 +1109,42 @@ go.opentelemetry.io/collector/pdata v1.0.0-rcv0016 h1:qCPXSQCoD3qeWFb1RuIks8fw9A go.opentelemetry.io/collector/pdata v1.0.0-rcv0016/go.mod h1:OdN0alYOlYhHXu6BDlGehrZWgtBuiDsz/rlNeJeXiNg= go.opentelemetry.io/collector/semconv v0.87.0 h1:BsG1jdLLRCBRlvUujk4QA86af7r/ZXnizczQpEs/gg8= go.opentelemetry.io/collector/semconv v0.87.0/go.mod h1:j/8THcqVxFna1FpvA2zYIsUperEtOaRaqoLYIN4doWw= +go.opentelemetry.io/contrib/detectors/gcp v1.39.0 h1:kWRNZMsfBHZ+uHjiH4y7Etn2FK26LAGkNFw7RHv1DhE= +go.opentelemetry.io/contrib/detectors/gcp v1.39.0/go.mod h1:t/OGqzHBa5v6RHZwrDBJ2OirWc+4q/w2fTbLZwAKjTk= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ= go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.46.1 h1:gbhw/u49SS3gkPWiYweQNJGm/uJN5GkI/FrosxSHT7A= go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.46.1/go.mod h1:GnOaBaFQ2we3b9AGWJpsBa7v1S5RlQzlC3O7dRMxZhM= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg= go.opentelemetry.io/contrib/propagators/b3 v1.21.0 h1:uGdgDPNzwQWRwCXJgw/7h29JaRqcq9B87Iv4hJDKAZw= go.opentelemetry.io/contrib/propagators/b3 v1.21.0/go.mod h1:D9GQXvVGT2pzyTfp1QBOnD1rzKEWzKjjwu5q2mslCUI= go.opentelemetry.io/contrib/propagators/jaeger v1.21.1 h1:f4beMGDKiVzg9IcX7/VuWVy+oGdjx3dNJ72YehmtY5k= go.opentelemetry.io/contrib/propagators/jaeger v1.21.1/go.mod h1:U9jhkEl8d1LL+QXY7q3kneJWJugiN3kZJV2OWz3hkBY= go.opentelemetry.io/contrib/samplers/jaegerremote v0.15.1 h1:Qb+5A+JbIjXwO7l4HkRUhgIn4Bzz0GNS2q+qdmSx+0c= go.opentelemetry.io/contrib/samplers/jaegerremote v0.15.1/go.mod h1:G4vNCm7fRk0kjZ6pGNLo5SpLxAUvOfSrcaegnT8TPck= -go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= -go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= +go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= +go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= go.opentelemetry.io/otel/exporters/jaeger v1.17.0 h1:D7UpUy2Xc2wsi1Ras6V40q806WM07rqoCWzXu7Sqy+4= go.opentelemetry.io/otel/exporters/jaeger v1.17.0/go.mod h1:nPCqOnEH9rNLKqH/+rrUjiMzHJdV1BlpKcTwRTyKkKI= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.21.0 h1:cl5P5/GIfFh4t6xyruOgJP5QiA1pw4fYYdv6nc6CBWw= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.21.0/go.mod h1:zgBdWWAu7oEEMC06MMKc5NLbA/1YDXV1sMpSqEeLQLg= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.21.0 h1:digkEZCJWobwBqMwC0cwCq8/wkkRy/OowZg5OArWZrM= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.21.0/go.mod h1:/OpE/y70qVkndM0TrxT4KBoN3RsFZP0QaofcfYrj76I= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.37.0 h1:6VjV6Et+1Hd2iLZEPtdV7vie80Yyqf7oikJLjQ/myi0= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.37.0/go.mod h1:u8hcp8ji5gaM/RfcOo8z9NMnf1pVLfVY7lBY2VOGuUU= go.opentelemetry.io/otel/exporters/zipkin v1.21.0 h1:D+Gv6lSfrFBWmQYyxKjDd0Zuld9SRXpIrEsKZvE4DO4= go.opentelemetry.io/otel/exporters/zipkin v1.21.0/go.mod h1:83oMKR6DzmHisFOW3I+yIMGZUTjxiWaiBI8M8+TU5zE= -go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= -go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= -go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= -go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= -go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= -go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= -go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= -go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= -go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= -go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= +go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g= +go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc= +go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8= +go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE= +go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4AtAlbuWdCYw= +go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg= +go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= +go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= +go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A= +go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= @@ -1050,6 +1162,8 @@ go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9E go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= +gocloud.dev v0.45.0 h1:WknIK8IbRdmynDvara3Q7G6wQhmEiOGwpgJufbM39sY= +gocloud.dev v0.45.0/go.mod h1:0kXKmkCLG6d31N7NyLZWzt7jDSQura9zD/mWgiB6THI= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= @@ -1149,7 +1263,6 @@ golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.0.0-20220826154423-83b083e8dc8b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= -golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= @@ -1239,15 +1352,14 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= -golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= +golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/telemetry v0.0.0-20251111182119-bc8e575c7b54 h1:E2/AqCUMZGgd73TQkxUMcMla25GB9i/5HOdLr+uH7Vo= golang.org/x/telemetry v0.0.0-20251111182119-bc8e575c7b54/go.mod h1:hKdjCMrbv9skySur+Nek8Hd0uJ0GuxJIoIX2payrIdQ= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20220722155259-a9ba230a4035/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= @@ -1264,7 +1376,6 @@ golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= -golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= @@ -1273,8 +1384,8 @@ golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg= -golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -1340,6 +1451,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= @@ -1358,6 +1471,8 @@ google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0M google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM= google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc= +google.golang.org/api v0.256.0 h1:u6Khm8+F9sxbCTYNoBHg6/Hwv0N/i+V94MvkOSor6oI= +google.golang.org/api v0.256.0/go.mod h1:KIgPhksXADEKJlnEoRa9qAII4rXcy40vfI8HRqcU964= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1393,6 +1508,8 @@ google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7Fc google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20251124214823-79d6a2a48846 h1:dDbsTLIK7EzwUq36kCSAsk0slouq/S0tWHeeGi97cD8= +google.golang.org/genproto v0.0.0-20251124214823-79d6a2a48846/go.mod h1:PP0g88Dz3C7hRAfbQCQggeWAXjuqGsNPLE4s7jh0RGU= google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls= google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto= google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww= diff --git a/launchers/origin_serve.go b/launchers/origin_serve.go index fbd23eb2ce..4028bdbe57 100644 --- a/launchers/origin_serve.go +++ b/launchers/origin_serve.go @@ -56,7 +56,11 @@ func OriginServe(ctx context.Context, engine *gin.Engine, egrp *errgroup.Group, // Determine if we should use XRootD or native HTTP server storageType := param.Origin_StorageType.GetString() - useXRootD := storageType != string(server_structs.OriginStoragePosixv2) && storageType != string(server_structs.OriginStorageSSH) + useXRootD := storageType != string(server_structs.OriginStoragePosixv2) && + storageType != string(server_structs.OriginStorageSSH) && + storageType != string(server_structs.OriginStorageS3v2) && + storageType != string(server_structs.OriginStorageHTTPSv2) && + storageType != string(server_structs.OriginStorageGlobusv2) if useXRootD { metrics.SetComponentHealthStatus(metrics.OriginCache_XRootD, metrics.StatusWarning, "XRootD is initializing") @@ -97,6 +101,13 @@ func OriginServe(ctx context.Context, engine *gin.Engine, egrp *errgroup.Group, return nil, errors.Wrap(err, "failed to initialize Globus backend") } origin.LaunchGlobusTokenRefresh(ctx, egrp) + } else if param.Origin_StorageType.GetString() == string(server_structs.OriginStorageGlobusv2) { + // For native Globus v2, we still use the existing Globus init for OAuth + // and DB management, but we don't persist tokens to disk. + if err := origin.InitGlobusBackend(originExports); err != nil { + return nil, errors.Wrap(err, "failed to initialize Globus v2 backend") + } + origin.LaunchGlobusTokenRefresh(ctx, egrp) } concLimit := param.Origin_Concurrency.GetInt() @@ -225,7 +236,11 @@ func OriginServeFinish(ctx context.Context, egrp *errgroup.Group, engine *gin.En // Handle POSIXv2 and SSH-specific initialization now that the web server is running storageType := param.Origin_StorageType.GetString() - useXRootD := storageType != string(server_structs.OriginStoragePosixv2) && storageType != string(server_structs.OriginStorageSSH) + useXRootD := storageType != string(server_structs.OriginStoragePosixv2) && + storageType != string(server_structs.OriginStorageSSH) && + storageType != string(server_structs.OriginStorageS3v2) && + storageType != string(server_structs.OriginStorageHTTPSv2) && + storageType != string(server_structs.OriginStorageGlobusv2) if !useXRootD { // For SSH backend, initialize the SSH connection before setting up handlers if storageType == string(server_structs.OriginStorageSSH) { @@ -262,6 +277,23 @@ func OriginServeFinish(ctx context.Context, egrp *errgroup.Group, engine *gin.En return errors.Wrap(err, "failed to initialize origin_serve handlers") } + // For Globus v2, activate backends with tokens from the Globus init + if storageType == string(server_structs.OriginStorageGlobusv2) { + collections, err := origin.GetActivatedGlobusCollections() + if err != nil { + log.Warningf("Failed to get activated Globus collections: %v", err) + } else { + gBackends := origin_serve.GetGlobusBackends() + for _, col := range collections { + if gb, ok := gBackends[col.CollectionID]; ok { + gb.Activate(col.CollectionToken, col.TransferToken, col.HTTPSServer) + log.Infof("Activated Globus v2 backend for collection %s", col.CollectionID) + } + } + } + origin_serve.LaunchGlobusv2TokenRefresh(ctx, egrp) + } + directorEnabled := modules.IsEnabled(server_structs.DirectorType) if err := origin_serve.RegisterHandlers(engine, directorEnabled); err != nil { return errors.Wrap(err, "failed to register origin_serve handlers") diff --git a/origin/advertise.go b/origin/advertise.go index 292270fe79..f8951b837f 100644 --- a/origin/advertise.go +++ b/origin/advertise.go @@ -180,13 +180,15 @@ func (server *OriginServer) CreateAdvertisement(name, id, originUrlStr, originWe // Get the overall health status as reported by the origin. status := metrics.GetHealthStatus().OverallStatus - // For POSIXv2 and SSH origins co-located with a director, DataURL (which becomes + // For native (non-XRootD) origins co-located with a director, DataURL (which becomes // ServerAd.URL) should have the /api/v1.0/origin/data prefix so the director redirects // to the right endpoint. When the origin is standalone, older clients cannot handle // non-empty resource paths, so we advertise the base URL. // WebURL stays as the base server URL for web browser access. dataUrlToAdvertise := originUrlStr - if (ost == server_structs.OriginStoragePosixv2 || ost == server_structs.OriginStorageSSH) && config.IsServerEnabled(server_structs.DirectorType) { + if (ost == server_structs.OriginStoragePosixv2 || ost == server_structs.OriginStorageSSH || + ost == server_structs.OriginStorageS3v2 || ost == server_structs.OriginStorageHTTPSv2 || + ost == server_structs.OriginStorageGlobusv2) && config.IsServerEnabled(server_structs.DirectorType) { if parsedUrl, err := url.Parse(originUrlStr); err == nil { parsedUrl.Path = "/api/v1.0/origin/data" dataUrlToAdvertise = parsedUrl.String() diff --git a/origin/globus.go b/origin/globus.go index bc5a7d798a..53c7701ce0 100644 --- a/origin/globus.go +++ b/origin/globus.go @@ -371,6 +371,43 @@ func GetGlobusExportsValues(activeOnly bool) []globusExport { return exps } +// GlobusCollectionInfo holds exported Globus collection data for use by +// native (non-XRootD) backends. +type GlobusCollectionInfo struct { + CollectionID string + HTTPSServer string + CollectionToken *oauth2.Token + TransferToken *oauth2.Token + OAuth2Config *oauth2.Config +} + +// GetActivatedGlobusCollections returns info about all activated Globus collections, +// suitable for initializing native Globus v2 backends. +func GetActivatedGlobusCollections() ([]GlobusCollectionInfo, error) { + globusExportsMutex.RLock() + defer globusExportsMutex.RUnlock() + + authCfg, err := GetGlobusOAuthCfg() + if err != nil { + return nil, fmt.Errorf("failed to get Globus OAuth config: %w", err) + } + + var result []GlobusCollectionInfo + for cid, exp := range globusExports { + if exp.Status != GlobusActivated { + continue + } + result = append(result, GlobusCollectionInfo{ + CollectionID: cid, + HTTPSServer: exp.HttpsServer, + CollectionToken: exp.Token, + TransferToken: exp.TransferToken, + OAuth2Config: authCfg, + }) + } + return result, nil +} + // Parse the OriginExport to add Globus status for each export for frontend RESTful API func originExportToGlobusExport(exps []server_utils.OriginExport) ([]globusExportUI, error) { globusExportsMutex.RLock() diff --git a/origin_serve/backend_globus.go b/origin_serve/backend_globus.go new file mode 100644 index 0000000000..2df268fea6 --- /dev/null +++ b/origin_serve/backend_globus.go @@ -0,0 +1,237 @@ +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package origin_serve + +import ( + "context" + "fmt" + "net/http" + "sync" + "time" + + log "github.com/sirupsen/logrus" + "golang.org/x/net/webdav" + "golang.org/x/oauth2" + "golang.org/x/sync/errgroup" + + "github.com/pelicanplatform/pelican/server_utils" +) + +// --------------------------------------------------------------------------- +// globusBackend — OriginBackend for Globus v2 (via HTTPS + Globus tokens) +// --------------------------------------------------------------------------- + +// GlobusBackendActivator provides methods for activating and refreshing +// a Globus v2 backend. It is the exported interface that external packages +// (e.g. launchers) use to manage Globus backends. +type GlobusBackendActivator interface { + // Activate marks the collection as activated with the given tokens. + Activate(collectionToken, transferToken *oauth2.Token, httpsServer string) + // RefreshTokens refreshes both the collection and transfer tokens. + RefreshTokens() error + // IsActivated returns whether the Globus collection has been activated. + IsActivated() bool +} + +// globusBackend wraps an httpsBackend with Globus-specific token management. +// The HTTPS filesystem reads files from the Globus collection's HTTPS endpoint, +// using an OAuth2 access token obtained through the Globus auth flow. +// +// Unlike the XRootD Globus backend, tokens are managed in memory only — no +// disk persistence is needed because the origin_serve infrastructure does +// not need to share tokens with an XRootD process. +type globusBackend struct { + inner *httpsBackend + + // Globus-specific token management + collectionID string + mu sync.RWMutex + collectionToken *oauth2.Token + transferToken *oauth2.Token + oauth2Cfg *oauth2.Config + httpsServer string // Collection HTTPS endpoint + activated bool +} + +// GlobusBackendConfig holds the parameters needed to construct a Globus backend. +type GlobusBackendConfig struct { + // CollectionID is the Globus collection UUID + CollectionID string + // HTTPSServer is the HTTPS URL for the collection (e.g. https://g-12345.data.globus.org) + HTTPSServer string + // StoragePrefix is the path prefix within the collection + StoragePrefix string + // OAuth2Config for refreshing tokens + OAuth2Config *oauth2.Config + // CollectionToken is the initial collection access token + CollectionToken *oauth2.Token + // TransferToken is the initial transfer access token + TransferToken *oauth2.Token +} + +// NewGlobusBackend creates a new native Globus backend. +func NewGlobusBackend(cfg GlobusBackendConfig) *globusBackend { + inner := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: cfg.HTTPSServer, + StoragePrefix: cfg.StoragePrefix, + TokenMode: HTTPSTokenOAuth2, + OAuth2Config: cfg.OAuth2Config, + OAuth2Token: cfg.CollectionToken, + }) + + return &globusBackend{ + inner: inner, + collectionID: cfg.CollectionID, + collectionToken: cfg.CollectionToken, + transferToken: cfg.TransferToken, + oauth2Cfg: cfg.OAuth2Config, + httpsServer: cfg.HTTPSServer, + activated: cfg.CollectionToken != nil, + } +} + +func (b *globusBackend) CheckAvailability() error { + b.mu.RLock() + defer b.mu.RUnlock() + if !b.activated { + return &globusUnavailableError{ + collectionID: b.collectionID, + msg: "Globus collection not activated", + } + } + return nil +} + +func (b *globusBackend) FileSystem() webdav.FileSystem { return b.inner.FileSystem() } + +func (b *globusBackend) Checksummer() server_utils.OriginChecksummer { + return nil // Globus doesn't support local checksums +} + +// IsActivated returns whether the Globus collection has been activated. +func (b *globusBackend) IsActivated() bool { + b.mu.RLock() + defer b.mu.RUnlock() + return b.activated +} + +// Activate marks the collection as activated with the given tokens. +func (b *globusBackend) Activate(collectionToken, transferToken *oauth2.Token, httpsServer string) { + b.mu.Lock() + defer b.mu.Unlock() + b.collectionToken = collectionToken + b.transferToken = transferToken + b.httpsServer = httpsServer + b.activated = true + + // Update the inner HTTPS backend's OAuth2 token + b.inner.SetOAuth2Token(collectionToken) +} + +// RefreshTokens refreshes both the collection and transfer tokens. +func (b *globusBackend) RefreshTokens() error { + b.mu.Lock() + defer b.mu.Unlock() + + if !b.activated || b.oauth2Cfg == nil { + return nil + } + + // Refresh collection token + if b.collectionToken != nil && b.collectionToken.Expiry.Before(time.Now().Add(10*time.Minute)) { + ts := b.oauth2Cfg.TokenSource(nil, b.collectionToken) + newTok, err := ts.Token() + if err != nil { + log.Warningf("Failed to refresh Globus collection token for %s: %v", b.collectionID, err) + b.activated = false + return fmt.Errorf("failed to refresh collection token: %w", err) + } + b.collectionToken = newTok + b.inner.SetOAuth2Token(newTok) + log.Debugf("Refreshed Globus collection token for %s", b.collectionID) + } + + // Refresh transfer token + if b.transferToken != nil && b.transferToken.Expiry.Before(time.Now().Add(10*time.Minute)) { + ts := b.oauth2Cfg.TokenSource(nil, b.transferToken) + newTok, err := ts.Token() + if err != nil { + log.Warningf("Failed to refresh Globus transfer token for %s: %v", b.collectionID, err) + return fmt.Errorf("failed to refresh transfer token: %w", err) + } + b.transferToken = newTok + log.Debugf("Refreshed Globus transfer token for %s", b.collectionID) + } + + return nil +} + +// --------------------------------------------------------------------------- +// globusUnavailableError — HTTP 503 when collection is not activated +// --------------------------------------------------------------------------- + +type globusUnavailableError struct { + collectionID string + msg string +} + +func (e *globusUnavailableError) Error() string { + return fmt.Sprintf("Globus collection %s: %s", e.collectionID, e.msg) +} + +func (e *globusUnavailableError) HTTPStatusCode() int { + return http.StatusServiceUnavailable +} + +// GetGlobusBackends returns the map of Globus v2 backends keyed by collection ID. +// This is used by the launcher to activate backends after Globus OAuth is initialized. +func GetGlobusBackends() map[string]GlobusBackendActivator { + result := make(map[string]GlobusBackendActivator, len(globusBackends)) + for k, v := range globusBackends { + result[k] = v + } + return result +} + +// LaunchGlobusv2TokenRefresh starts a periodic goroutine (every 5 min) that +// refreshes the OAuth2 tokens for all activated Globus v2 backends. +// Unlike the XRootD Globus backend, tokens are kept in memory only. +func LaunchGlobusv2TokenRefresh(ctx context.Context, egrp *errgroup.Group) { + if len(globusBackends) == 0 { + return + } + log.Info("Launching periodic Globus v2 token refresh") + egrp.Go(func() error { + ticker := time.NewTicker(5 * time.Minute) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + log.Info("Globus v2 token refresh stopped") + return nil + case <-ticker.C: + for cid, gb := range globusBackends { + if err := gb.RefreshTokens(); err != nil { + log.Errorf("Failed to refresh Globus v2 tokens for collection %s: %v", cid, err) + } + } + } + } + }) +} diff --git a/origin_serve/backend_globus_test.go b/origin_serve/backend_globus_test.go new file mode 100644 index 0000000000..3a9930b62c --- /dev/null +++ b/origin_serve/backend_globus_test.go @@ -0,0 +1,215 @@ +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package origin_serve + +import ( + "net/http" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/oauth2" +) + +// --------------------------------------------------------------------------- +// NewGlobusBackend construction +// --------------------------------------------------------------------------- + +func TestNewGlobusBackend_NotActivated(t *testing.T) { + gb := NewGlobusBackend(GlobusBackendConfig{ + CollectionID: "coll-id-123", + HTTPSServer: "https://g-12345.data.globus.org", + StoragePrefix: "/mydata", + }) + + assert.False(t, gb.IsActivated()) + assert.NotNil(t, gb.FileSystem()) + assert.Nil(t, gb.Checksummer()) +} + +func TestNewGlobusBackend_WithTokens(t *testing.T) { + tok := &oauth2.Token{ + AccessToken: "collection-access-token", + RefreshToken: "collection-refresh-token", + Expiry: time.Now().Add(1 * time.Hour), + } + gb := NewGlobusBackend(GlobusBackendConfig{ + CollectionID: "coll-id-456", + HTTPSServer: "https://g-99999.data.globus.org", + StoragePrefix: "/prefix", + CollectionToken: tok, + TransferToken: &oauth2.Token{AccessToken: "transfer-tok"}, + }) + + assert.True(t, gb.IsActivated()) +} + +// --------------------------------------------------------------------------- +// CheckAvailability +// --------------------------------------------------------------------------- + +func TestGlobusBackend_CheckAvailability_NotActivated(t *testing.T) { + gb := NewGlobusBackend(GlobusBackendConfig{ + CollectionID: "coll-x", + HTTPSServer: "https://g-x.data.globus.org", + }) + + err := gb.CheckAvailability() + require.Error(t, err) + assert.Contains(t, err.Error(), "not activated") + + // Should return 503 + if httpErr, ok := err.(*globusUnavailableError); ok { + assert.Equal(t, http.StatusServiceUnavailable, httpErr.HTTPStatusCode()) + } +} + +func TestGlobusBackend_CheckAvailability_Activated(t *testing.T) { + gb := NewGlobusBackend(GlobusBackendConfig{ + CollectionID: "coll-y", + HTTPSServer: "https://g-y.data.globus.org", + CollectionToken: &oauth2.Token{ + AccessToken: "valid-tok", + Expiry: time.Now().Add(1 * time.Hour), + }, + }) + + // When activated, CheckAvailability() succeeds (no upstream probe needed) + require.NoError(t, gb.CheckAvailability()) +} + +// --------------------------------------------------------------------------- +// Activate +// --------------------------------------------------------------------------- + +func TestGlobusBackend_Activate(t *testing.T) { + gb := NewGlobusBackend(GlobusBackendConfig{ + CollectionID: "coll-activate", + HTTPSServer: "https://g-a.data.globus.org", + }) + assert.False(t, gb.IsActivated()) + + collTok := &oauth2.Token{ + AccessToken: "coll-tok", + RefreshToken: "coll-refresh", + Expiry: time.Now().Add(1 * time.Hour), + } + transTok := &oauth2.Token{ + AccessToken: "trans-tok", + Expiry: time.Now().Add(1 * time.Hour), + } + + gb.Activate(collTok, transTok, "https://g-new.data.globus.org") + assert.True(t, gb.IsActivated()) + require.NoError(t, gb.CheckAvailability()) +} + +// --------------------------------------------------------------------------- +// RefreshTokens +// --------------------------------------------------------------------------- + +func TestGlobusBackend_RefreshTokens_NotActivated(t *testing.T) { + gb := NewGlobusBackend(GlobusBackendConfig{ + CollectionID: "coll-norefresh", + HTTPSServer: "https://g-nr.data.globus.org", + }) + // RefreshTokens on a non-activated backend should be a no-op + require.NoError(t, gb.RefreshTokens()) +} + +func TestGlobusBackend_RefreshTokens_NotExpiring(t *testing.T) { + gb := NewGlobusBackend(GlobusBackendConfig{ + CollectionID: "coll-fresh", + HTTPSServer: "https://g-f.data.globus.org", + CollectionToken: &oauth2.Token{ + AccessToken: "fresh-col", + RefreshToken: "refresh-col", + Expiry: time.Now().Add(2 * time.Hour), // not near expiry + }, + TransferToken: &oauth2.Token{ + AccessToken: "fresh-trans", + RefreshToken: "refresh-trans", + Expiry: time.Now().Add(2 * time.Hour), + }, + OAuth2Config: &oauth2.Config{ + ClientID: "client-id", + ClientSecret: "client-secret", + Endpoint: oauth2.Endpoint{ + TokenURL: "https://auth.globus.org/v2/oauth2/token", + }, + }, + }) + + // Tokens far from expiry — RefreshTokens should be a no-op + require.NoError(t, gb.RefreshTokens()) +} + +// --------------------------------------------------------------------------- +// globusUnavailableError +// --------------------------------------------------------------------------- + +func TestGlobusUnavailableError(t *testing.T) { + err := &globusUnavailableError{ + collectionID: "abc-123", + msg: "not ready", + } + assert.Equal(t, "Globus collection abc-123: not ready", err.Error()) + assert.Equal(t, http.StatusServiceUnavailable, err.HTTPStatusCode()) +} + +// --------------------------------------------------------------------------- +// GetGlobusBackends +// --------------------------------------------------------------------------- + +func TestGetGlobusBackends_Empty(t *testing.T) { + // Save and restore original + origMap := globusBackends + defer func() { globusBackends = origMap }() + + globusBackends = nil + result := GetGlobusBackends() + assert.Empty(t, result) +} + +func TestGetGlobusBackends_WithEntries(t *testing.T) { + origMap := globusBackends + defer func() { globusBackends = origMap }() + + gb1 := NewGlobusBackend(GlobusBackendConfig{CollectionID: "c1", HTTPSServer: "https://g1.data.globus.org"}) + gb2 := NewGlobusBackend(GlobusBackendConfig{CollectionID: "c2", HTTPSServer: "https://g2.data.globus.org"}) + + globusBackends = map[string]*globusBackend{ + "c1": gb1, + "c2": gb2, + } + + result := GetGlobusBackends() + assert.Len(t, result, 2) + assert.NotNil(t, result["c1"]) + assert.NotNil(t, result["c2"]) +} + +// --------------------------------------------------------------------------- +// GlobusBackendActivator interface conformance +// --------------------------------------------------------------------------- + +func TestGlobusBackend_ImplementsActivator(t *testing.T) { + var _ GlobusBackendActivator = (*globusBackend)(nil) +} diff --git a/origin_serve/backend_https.go b/origin_serve/backend_https.go new file mode 100644 index 0000000000..1fe8f20384 --- /dev/null +++ b/origin_serve/backend_https.go @@ -0,0 +1,656 @@ +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package origin_serve + +import ( + "context" + "fmt" + "io" + "net/http" + "os" + "path" + "strings" + "sync" + "sync/atomic" + "time" + + log "github.com/sirupsen/logrus" + gowebdav "github.com/studio-b12/gowebdav" + "golang.org/x/net/webdav" + "golang.org/x/oauth2" + + "github.com/pelicanplatform/pelican/server_utils" +) + +// --------------------------------------------------------------------------- +// HTTPSTokenMode controls how the backend authenticates to the upstream server. +// --------------------------------------------------------------------------- + +type HTTPSTokenMode int + +const ( + // HTTPSTokenNone — no token is sent to the backend. + HTTPSTokenNone HTTPSTokenMode = iota + // HTTPSTokenStatic — a static bearer token read from a file is sent. + HTTPSTokenStatic + // HTTPSTokenPassthrough — the client-supplied token is forwarded. + HTTPSTokenPassthrough + // HTTPSTokenOAuth2 — an OAuth2 access token is acquired and refreshed automatically. + HTTPSTokenOAuth2 +) + +// --------------------------------------------------------------------------- +// BackendMode — whether the upstream speaks WebDAV or plain HTTP. +// Determined by an OPTIONS probe at startup. +// --------------------------------------------------------------------------- + +type BackendMode int + +const ( + BackendModeUnknown BackendMode = iota + BackendModeWebDAV + BackendModeHTTP +) + +// --------------------------------------------------------------------------- +// httpsBackend — OriginBackend for HTTPS/WebDAV upstream storage +// --------------------------------------------------------------------------- + +type httpsBackend struct { + fs *httpsFileSystem +} + +// HTTPSBackendOptions groups the parameters needed to construct an HTTPS backend. +type HTTPSBackendOptions struct { + ServiceURL string + StoragePrefix string + TokenMode HTTPSTokenMode + // For static tokens: + StaticTokenFile string + // For OAuth2 tokens: + OAuth2Config *oauth2.Config + OAuth2Token *oauth2.Token // initial token (with refresh_token) +} + +func newHTTPSBackend(opts HTTPSBackendOptions) *httpsBackend { + fs := &httpsFileSystem{ + serviceURL: strings.TrimSuffix(opts.ServiceURL, "/"), + storagePrefix: opts.StoragePrefix, + tokenMode: opts.TokenMode, + staticTokenFile: opts.StaticTokenFile, + httpClient: &http.Client{Timeout: 60 * time.Second}, + } + if opts.OAuth2Config != nil && opts.OAuth2Token != nil { + fs.oauth2Cfg = opts.OAuth2Config + fs.oauth2Tok = opts.OAuth2Token + } + return &httpsBackend{fs: fs} +} + +// CheckAvailability probes the upstream to determine whether it speaks WebDAV or +// plain HTTP by issuing an OPTIONS request and inspecting the Allow / DAV headers. +func (b *httpsBackend) CheckAvailability() error { + return b.fs.probeBackendMode() +} + +func (b *httpsBackend) FileSystem() webdav.FileSystem { return b.fs } +func (b *httpsBackend) Checksummer() server_utils.OriginChecksummer { + return nil // Remote HTTPS backends don't support local checksums +} + +// SetOAuth2Token allows external callers (e.g. Globus init) to update the +// managed OAuth2 token at runtime. +func (b *httpsBackend) SetOAuth2Token(tok *oauth2.Token) { + if b.fs != nil { + b.fs.oauthMu.Lock() + defer b.fs.oauthMu.Unlock() + b.fs.oauth2Tok = tok + } +} + +// BackendMode returns the detected mode (WebDAV or HTTP). +func (b *httpsBackend) BackendMode() BackendMode { + return b.fs.backendMode +} + +// --------------------------------------------------------------------------- +// httpsFileSystem — implements webdav.FileSystem backed by an upstream HTTPS server. +// When the backend supports WebDAV, directory operations use the gowebdav library. +// When it is plain HTTP, we fall back to simple verbs (GET, PUT, DELETE, HEAD). +// --------------------------------------------------------------------------- + +type httpsFileSystem struct { + serviceURL string + storagePrefix string + tokenMode HTTPSTokenMode + staticTokenFile string + backendMode BackendMode + + // OAuth2 token management (in-memory only — no disk persistence) + oauth2Cfg *oauth2.Config + oauth2Tok *oauth2.Token + oauthMu sync.Mutex // protects oauth2Tok + + httpClient *http.Client +} + +// probeBackendMode issues an OPTIONS request against the upstream root and +// inspects the Allow and DAV headers to decide between WebDAV and plain HTTP. +func (fs *httpsFileSystem) probeBackendMode() error { + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + resp, err := fs.doRequest(ctx, "OPTIONS", fs.serviceURL+"/", nil, nil) + if err != nil { + // If OPTIONS fails outright, assume plain HTTP. + fs.backendMode = BackendModeHTTP + log.Infof("HTTPS backend at %s: OPTIONS probe failed (%v); assuming plain HTTP", fs.serviceURL, err) + return nil + } + defer resp.Body.Close() + + allow := resp.Header.Get("Allow") + dav := resp.Header.Get("DAV") + + if strings.Contains(allow, "PROPFIND") || dav != "" { + fs.backendMode = BackendModeWebDAV + log.Infof("HTTPS backend at %s detected as WebDAV (Allow=%q, DAV=%q)", fs.serviceURL, allow, dav) + } else { + fs.backendMode = BackendModeHTTP + log.Infof("HTTPS backend at %s detected as plain HTTP (Allow=%q)", fs.serviceURL, allow) + } + return nil +} + +// davPath constructs the path that the gowebdav client expects (relative to the +// service URL root). It prepends the configured storagePrefix. +func (fs *httpsFileSystem) davPath(name string) string { + name = strings.TrimPrefix(name, "/") + prefix := strings.TrimPrefix(fs.storagePrefix, "/") + if prefix != "" { + return "/" + prefix + "/" + name + } + return "/" + name +} + +// getDavClient returns a gowebdav.Client configured with the appropriate bearer +// token for the current request context. A fresh client is created per call so +// that the passthrough token is always correct even under concurrent requests. +func (fs *httpsFileSystem) getDavClient(ctx context.Context) *gowebdav.Client { + token := fs.getToken(ctx) + var client *gowebdav.Client + if token != "" { + auth := &simpleBearerAuth{token: token} + client = gowebdav.NewAuthClient(fs.serviceURL, auth) + } else { + client = gowebdav.NewClient(fs.serviceURL, "", "") + } + if fs.httpClient.Transport != nil { + client.SetTransport(fs.httpClient.Transport) + } + return client +} + +// upstreamURL returns the full URL for the given path on the upstream server. +func (fs *httpsFileSystem) upstreamURL(name string) string { + name = strings.TrimPrefix(name, "/") + prefix := strings.TrimPrefix(fs.storagePrefix, "/") + if prefix != "" { + return fs.serviceURL + "/" + prefix + "/" + name + } + return fs.serviceURL + "/" + name +} + +// getToken returns the bearer token to use for the upstream request. +func (fs *httpsFileSystem) getToken(ctx context.Context) string { + switch fs.tokenMode { + case HTTPSTokenStatic: + return fs.readStaticToken() + case HTTPSTokenPassthrough: + return tokenFromContext(ctx) + case HTTPSTokenOAuth2: + return fs.getOAuth2Token(ctx) + default: + return "" + } +} + +func (fs *httpsFileSystem) readStaticToken() string { + if fs.staticTokenFile == "" { + return "" + } + data, err := os.ReadFile(fs.staticTokenFile) + if err != nil { + log.Debugf("Failed to read HTTPS auth token file %s: %v", fs.staticTokenFile, err) + return "" + } + return strings.TrimSpace(string(data)) +} + +func (fs *httpsFileSystem) getOAuth2Token(ctx context.Context) string { + fs.oauthMu.Lock() + defer fs.oauthMu.Unlock() + + if fs.oauth2Cfg == nil || fs.oauth2Tok == nil { + return "" + } + + ts := fs.oauth2Cfg.TokenSource(ctx, fs.oauth2Tok) + tok, err := ts.Token() + if err != nil { + log.Warningf("Failed to refresh OAuth2 token for HTTPS backend: %v", err) + return fs.oauth2Tok.AccessToken + } + fs.oauth2Tok = tok + return tok.AccessToken +} + +// doRequest creates and executes an HTTP request to the upstream server. +func (fs *httpsFileSystem) doRequest(ctx context.Context, method, urlStr string, body io.Reader, extraHeaders map[string]string) (*http.Response, error) { + req, err := http.NewRequestWithContext(ctx, method, urlStr, body) + if err != nil { + return nil, err + } + if token := fs.getToken(ctx); token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + for k, v := range extraHeaders { + req.Header.Set(k, v) + } + if ph := server_utils.PelicanHeadersFromContext(ctx); ph != nil { + if ph.JobId != "" { + req.Header.Set("X-Pelican-JobId", ph.JobId) + } + if ph.Timeout != "" { + req.Header.Set("X-Pelican-Timeout", ph.Timeout) + } + } + return fs.httpClient.Do(req) +} + +// --------------------------------------------------------------------------- +// webdav.FileSystem method implementations +// --------------------------------------------------------------------------- + +// Mkdir implements webdav.FileSystem. +func (fs *httpsFileSystem) Mkdir(ctx context.Context, name string, perm os.FileMode) error { + if fs.backendMode == BackendModeWebDAV { + client := fs.getDavClient(ctx) + return client.Mkdir(fs.davPath(name), perm) + } + return fmt.Errorf("mkdir not supported on HTTP-only backend") +} + +// OpenFile implements webdav.FileSystem. +func (fs *httpsFileSystem) OpenFile(ctx context.Context, name string, flag int, perm os.FileMode) (webdav.File, error) { + // For write operations, return a writer that PUTs on Close. + if flag&(os.O_WRONLY|os.O_RDWR|os.O_CREATE|os.O_TRUNC) != 0 { + return newHTTPSWriteFile(ctx, fs, name), nil + } + + // In WebDAV mode, check whether the path is a directory first. + if fs.backendMode == BackendModeWebDAV { + client := fs.getDavClient(ctx) + davP := fs.davPath(name) + info, err := client.Stat(davP) + if err == nil && info.IsDir() { + children, dirErr := client.ReadDir(davP) + if dirErr != nil { + return nil, dirErr + } + return &httpsReadDirFile{name: name, entries: children}, nil + } + if err != nil && gowebdav.IsErrNotFound(err) { + return nil, os.ErrNotExist + } + // Either it's a regular file or Stat failed for a non-404 reason — fall + // through to GET. + } + + urlStr := fs.upstreamURL(name) + resp, err := fs.doRequest(ctx, http.MethodGet, urlStr, nil, nil) + if err != nil { + return nil, err + } + + if resp.StatusCode == http.StatusNotFound { + resp.Body.Close() + return nil, os.ErrNotExist + } + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + return nil, fmt.Errorf("https get failed with status %d", resp.StatusCode) + } + + return &httpsReadFile{ + name: name, + body: resp.Body, + contentLength: resp.ContentLength, + lastModified: parseHTTPDate(resp.Header.Get("Last-Modified")), + }, nil +} + +// RemoveAll implements webdav.FileSystem. +func (fs *httpsFileSystem) RemoveAll(ctx context.Context, name string) error { + if fs.backendMode == BackendModeWebDAV { + client := fs.getDavClient(ctx) + return client.RemoveAll(fs.davPath(name)) + } + // HTTP-only: plain DELETE. + urlStr := fs.upstreamURL(name) + resp, err := fs.doRequest(ctx, http.MethodDelete, urlStr, nil, nil) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusNoContent || resp.StatusCode == http.StatusNotFound { + return nil + } + return fmt.Errorf("https delete failed with status %d", resp.StatusCode) +} + +// Rename implements webdav.FileSystem. +func (fs *httpsFileSystem) Rename(ctx context.Context, oldName, newName string) error { + if fs.backendMode == BackendModeWebDAV { + client := fs.getDavClient(ctx) + return client.Rename(fs.davPath(oldName), fs.davPath(newName), true) + } + return fmt.Errorf("rename not supported on HTTP-only backend") +} + +// Stat implements webdav.FileSystem. +func (fs *httpsFileSystem) Stat(ctx context.Context, name string) (os.FileInfo, error) { + if fs.backendMode == BackendModeWebDAV { + client := fs.getDavClient(ctx) + info, err := client.Stat(fs.davPath(name)) + if err != nil { + if gowebdav.IsErrNotFound(err) { + return nil, os.ErrNotExist + } + return nil, err + } + return info, nil + } + + // HTTP-only: use HEAD. + urlStr := fs.upstreamURL(name) + resp, err := fs.doRequest(ctx, http.MethodHead, urlStr, nil, nil) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode == http.StatusNotFound { + return nil, os.ErrNotExist + } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("https head failed with status %d", resp.StatusCode) + } + + return &httpsFileInfo{ + name: path.Base(name), + size: resp.ContentLength, + modTime: parseHTTPDate(resp.Header.Get("Last-Modified")), + isDir: false, + }, nil +} + +// --------------------------------------------------------------------------- +// Token passthrough context key +// --------------------------------------------------------------------------- + +type clientTokenKey struct{} + +// WithClientToken stores the client's bearer token in the context +// so that the HTTPS backend can forward it to the upstream server. +func WithClientToken(ctx context.Context, token string) context.Context { + return context.WithValue(ctx, clientTokenKey{}, token) +} + +func tokenFromContext(ctx context.Context) string { + if tok, ok := ctx.Value(clientTokenKey{}).(string); ok { + return tok + } + return "" +} + +// --------------------------------------------------------------------------- +// simpleBearerAuth — implements gowebdav.Authorizer for a fixed bearer token. +// A fresh instance is created per request via getDavClient. +// --------------------------------------------------------------------------- + +type simpleBearerAuth struct { + token string +} + +type simpleBearerAuthenticator struct { + token string +} + +func (a *simpleBearerAuth) NewAuthenticator(body io.Reader) (gowebdav.Authenticator, io.Reader) { + return &simpleBearerAuthenticator{token: a.token}, body +} + +func (a *simpleBearerAuth) AddAuthenticator(_ string, _ gowebdav.AuthFactory) {} + +func (auth *simpleBearerAuthenticator) Authorize(_ *http.Client, rq *http.Request, _ string) error { + if auth.token != "" { + rq.Header.Set("Authorization", "Bearer "+auth.token) + } + return nil +} + +func (auth *simpleBearerAuthenticator) Verify(_ *http.Client, _ *http.Response, _ string) (bool, error) { + return false, nil +} + +func (auth *simpleBearerAuthenticator) Close() error { return nil } + +func (auth *simpleBearerAuthenticator) Clone() gowebdav.Authenticator { + return &simpleBearerAuthenticator{token: auth.token} +} + +// --------------------------------------------------------------------------- +// httpsFileInfo — implements os.FileInfo (used only in HTTP-only mode). +// In WebDAV mode the gowebdav library returns its own FileInfo. +// --------------------------------------------------------------------------- + +type httpsFileInfo struct { + name string + size int64 + modTime time.Time + isDir bool +} + +func (fi *httpsFileInfo) Name() string { return fi.name } +func (fi *httpsFileInfo) Size() int64 { return fi.size } +func (fi *httpsFileInfo) Mode() os.FileMode { return 0444 } +func (fi *httpsFileInfo) ModTime() time.Time { + if fi.modTime.IsZero() { + return time.Now() + } + return fi.modTime +} +func (fi *httpsFileInfo) IsDir() bool { return fi.isDir } +func (fi *httpsFileInfo) Sys() interface{} { return nil } + +// --------------------------------------------------------------------------- +// httpsReadFile — read-only file backed by an HTTPS GET response. +// Uses atomic offset for concurrent safety. +// --------------------------------------------------------------------------- + +type httpsReadFile struct { + name string + body io.ReadCloser + contentLength int64 + lastModified time.Time + offset atomic.Int64 +} + +func (f *httpsReadFile) Read(p []byte) (int, error) { + n, err := f.body.Read(p) + f.offset.Add(int64(n)) + return n, err +} + +func (f *httpsReadFile) Seek(offset int64, whence int) (int64, error) { + var newOff int64 + switch whence { + case io.SeekStart: + newOff = offset + case io.SeekCurrent: + newOff = f.offset.Load() + offset + case io.SeekEnd: + newOff = f.contentLength + offset + } + f.offset.Store(newOff) + return newOff, nil +} + +func (f *httpsReadFile) Close() error { return f.body.Close() } + +func (f *httpsReadFile) Write(_ []byte) (int, error) { + return 0, fmt.Errorf("write not supported on read file") +} + +func (f *httpsReadFile) Readdir(_ int) ([]os.FileInfo, error) { + return nil, fmt.Errorf("readdir not supported on file") +} + +func (f *httpsReadFile) Stat() (os.FileInfo, error) { + return &httpsFileInfo{ + name: path.Base(f.name), + size: f.contentLength, + modTime: f.lastModified, + isDir: false, + }, nil +} + +// --------------------------------------------------------------------------- +// httpsWriteFile — write file that PUTs to the upstream server on Close. +// Uses a mutex to protect concurrent appends to the buffer. +// --------------------------------------------------------------------------- + +type httpsWriteFile struct { + ctx context.Context + fs *httpsFileSystem + name string + mu sync.Mutex + buf []byte +} + +func newHTTPSWriteFile(ctx context.Context, fs *httpsFileSystem, name string) *httpsWriteFile { + return &httpsWriteFile{ctx: ctx, fs: fs, name: name} +} + +func (f *httpsWriteFile) Write(p []byte) (int, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.buf = append(f.buf, p...) + return len(p), nil +} + +func (f *httpsWriteFile) Close() error { + f.mu.Lock() + data := make([]byte, len(f.buf)) + copy(data, f.buf) + f.mu.Unlock() + + urlStr := f.fs.upstreamURL(f.name) + body := strings.NewReader(string(data)) + + resp, err := f.fs.doRequest(f.ctx, http.MethodPut, urlStr, body, map[string]string{ + "Content-Length": fmt.Sprintf("%d", len(data)), + }) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusNoContent { + return nil + } + respBody, _ := io.ReadAll(resp.Body) + log.Debugf("HTTPS PUT response (%d): %s", resp.StatusCode, string(respBody)) + return fmt.Errorf("https put failed with status %d", resp.StatusCode) +} + +func (f *httpsWriteFile) Read(_ []byte) (int, error) { + return 0, fmt.Errorf("read not supported on write file") +} + +func (f *httpsWriteFile) Seek(_ int64, _ int) (int64, error) { + return 0, fmt.Errorf("seek not supported on write file") +} + +func (f *httpsWriteFile) Readdir(_ int) ([]os.FileInfo, error) { + return nil, fmt.Errorf("readdir not supported on write file") +} + +func (f *httpsWriteFile) Stat() (os.FileInfo, error) { + f.mu.Lock() + n := len(f.buf) + f.mu.Unlock() + return &httpsFileInfo{ + name: path.Base(f.name), + size: int64(n), + isDir: false, + }, nil +} + +// --------------------------------------------------------------------------- +// httpsReadDirFile — directory listing returned from OpenFile for collections. +// --------------------------------------------------------------------------- + +type httpsReadDirFile struct { + name string + entries []os.FileInfo +} + +func (f *httpsReadDirFile) Read(_ []byte) (int, error) { + return 0, fmt.Errorf("read not supported on directory") +} + +func (f *httpsReadDirFile) Seek(_ int64, _ int) (int64, error) { + return 0, fmt.Errorf("seek not supported on directory") +} + +func (f *httpsReadDirFile) Close() error { return nil } + +func (f *httpsReadDirFile) Write(_ []byte) (int, error) { + return 0, fmt.Errorf("write not supported on directory") +} + +func (f *httpsReadDirFile) Readdir(count int) ([]os.FileInfo, error) { + if count <= 0 || count > len(f.entries) { + result := f.entries + f.entries = nil + return result, nil + } + result := f.entries[:count] + f.entries = f.entries[count:] + return result, nil +} + +func (f *httpsReadDirFile) Stat() (os.FileInfo, error) { + return &httpsFileInfo{ + name: path.Base(f.name), + isDir: true, + }, nil +} diff --git a/origin_serve/backend_https_test.go b/origin_serve/backend_https_test.go new file mode 100644 index 0000000000..1e5fdfefe8 --- /dev/null +++ b/origin_serve/backend_https_test.go @@ -0,0 +1,561 @@ +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package origin_serve + +import ( + "context" + "fmt" + "io" + "net/http" + "net/http/httptest" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --------------------------------------------------------------------------- +// HTTPSTokenMode / BackendMode constants +// --------------------------------------------------------------------------- + +func TestHTTPSTokenModeConstants(t *testing.T) { + assert.Equal(t, HTTPSTokenMode(0), HTTPSTokenNone) + assert.Equal(t, HTTPSTokenMode(1), HTTPSTokenStatic) + assert.Equal(t, HTTPSTokenMode(2), HTTPSTokenPassthrough) + assert.Equal(t, HTTPSTokenMode(3), HTTPSTokenOAuth2) +} + +func TestBackendModeConstants(t *testing.T) { + assert.Equal(t, BackendMode(0), BackendModeUnknown) + assert.Equal(t, BackendMode(1), BackendModeWebDAV) + assert.Equal(t, BackendMode(2), BackendModeHTTP) +} + +// --------------------------------------------------------------------------- +// Token passthrough context +// --------------------------------------------------------------------------- + +func TestWithClientToken(t *testing.T) { + ctx := context.Background() + assert.Empty(t, tokenFromContext(ctx)) + + ctx = WithClientToken(ctx, "my-token-123") + assert.Equal(t, "my-token-123", tokenFromContext(ctx)) +} + +// --------------------------------------------------------------------------- +// simpleBearerAuth +// --------------------------------------------------------------------------- + +func TestSimpleBearerAuth(t *testing.T) { + auth := &simpleBearerAuth{token: "tok123"} + authenticator, body := auth.NewAuthenticator(nil) + assert.Nil(t, body) + assert.NotNil(t, authenticator) + + sba := authenticator.(*simpleBearerAuthenticator) + assert.Equal(t, "tok123", sba.token) + + // Authorize should set the header + req := httptest.NewRequest(http.MethodGet, "/test", nil) + err := sba.Authorize(nil, req, "") + require.NoError(t, err) + assert.Equal(t, "Bearer tok123", req.Header.Get("Authorization")) + + // Clone should return an equivalent authenticator + cloned := sba.Clone() + assert.IsType(t, &simpleBearerAuthenticator{}, cloned) + assert.Equal(t, "tok123", cloned.(*simpleBearerAuthenticator).token) + + // Close should succeed + assert.NoError(t, sba.Close()) + + // Verify always returns false + ok, err := sba.Verify(nil, nil, "") + require.NoError(t, err) + assert.False(t, ok) +} + +func TestSimpleBearerAuth_EmptyToken(t *testing.T) { + auth := &simpleBearerAuth{token: ""} + authenticator, _ := auth.NewAuthenticator(nil) + sba := authenticator.(*simpleBearerAuthenticator) + + req := httptest.NewRequest(http.MethodGet, "/test", nil) + err := sba.Authorize(nil, req, "") + require.NoError(t, err) + // Should not set Authorization header with empty token + assert.Empty(t, req.Header.Get("Authorization")) +} + +// --------------------------------------------------------------------------- +// httpsFileInfo +// --------------------------------------------------------------------------- + +func TestHTTPSFileInfo(t *testing.T) { + fi := &httpsFileInfo{name: "test.txt", size: 100, isDir: false} + assert.Equal(t, "test.txt", fi.Name()) + assert.Equal(t, int64(100), fi.Size()) + assert.Equal(t, os.FileMode(0444), fi.Mode()) + assert.False(t, fi.IsDir()) + assert.Nil(t, fi.Sys()) + assert.False(t, fi.ModTime().IsZero()) // zero modtime gets replaced + + fiDir := &httpsFileInfo{name: "dir", isDir: true} + assert.True(t, fiDir.IsDir()) +} + +// --------------------------------------------------------------------------- +// httpsReadDirFile +// --------------------------------------------------------------------------- + +func TestHTTPSReadDirFile(t *testing.T) { + entries := []os.FileInfo{ + &httpsFileInfo{name: "a.txt"}, + &httpsFileInfo{name: "b.txt"}, + } + df := &httpsReadDirFile{name: "/listing", entries: entries} + + // Stat + info, err := df.Stat() + require.NoError(t, err) + assert.True(t, info.IsDir()) + assert.Equal(t, "listing", info.Name()) + + // Readdir + result, err := df.Readdir(-1) + require.NoError(t, err) + assert.Len(t, result, 2) + + // Unsupported operations + _, err = df.Read(nil) + assert.Error(t, err) + _, err = df.Seek(0, 0) + assert.Error(t, err) + _, err = df.Write(nil) + assert.Error(t, err) + require.NoError(t, df.Close()) +} + +func TestHTTPSReadDirFile_Partial(t *testing.T) { + entries := []os.FileInfo{ + &httpsFileInfo{name: "a.txt"}, + &httpsFileInfo{name: "b.txt"}, + &httpsFileInfo{name: "c.txt"}, + } + df := &httpsReadDirFile{name: "/dir", entries: entries} + + result, err := df.Readdir(2) + require.NoError(t, err) + assert.Len(t, result, 2) + + result2, err := df.Readdir(-1) + require.NoError(t, err) + assert.Len(t, result2, 1) +} + +// --------------------------------------------------------------------------- +// httpsWriteFile — unit tests +// --------------------------------------------------------------------------- + +func TestHTTPSWriteFile_UnsupportedOps(t *testing.T) { + wf := &httpsWriteFile{name: "/test"} + _, err := wf.Read(nil) + assert.Error(t, err) + _, err = wf.Seek(0, 0) + assert.Error(t, err) + _, err = wf.Readdir(-1) + assert.Error(t, err) +} + +func TestHTTPSWriteFile_Stat(t *testing.T) { + wf := &httpsWriteFile{name: "/test.txt"} + wf.buf = []byte("hello") + info, err := wf.Stat() + require.NoError(t, err) + assert.Equal(t, "test.txt", info.Name()) + assert.Equal(t, int64(5), info.Size()) +} + +// --------------------------------------------------------------------------- +// httpsReadFile — unit tests +// --------------------------------------------------------------------------- + +func TestHTTPSReadFile_Seek(t *testing.T) { + body := io.NopCloser(strings.NewReader("0123456789")) + rf := &httpsReadFile{ + name: "/test.bin", + body: body, + contentLength: 10, + } + defer rf.Close() + + // SeekStart + pos, err := rf.Seek(5, io.SeekStart) + require.NoError(t, err) + assert.Equal(t, int64(5), pos) + + // SeekCurrent + pos, err = rf.Seek(2, io.SeekCurrent) + require.NoError(t, err) + assert.Equal(t, int64(7), pos) + + // SeekEnd + pos, err = rf.Seek(-3, io.SeekEnd) + require.NoError(t, err) + assert.Equal(t, int64(7), pos) +} + +func TestHTTPSReadFile_Stat(t *testing.T) { + body := io.NopCloser(strings.NewReader("content")) + rf := &httpsReadFile{ + name: "/data/file.txt", + body: body, + contentLength: 7, + } + defer rf.Close() + + info, err := rf.Stat() + require.NoError(t, err) + assert.Equal(t, "file.txt", info.Name()) + assert.Equal(t, int64(7), info.Size()) + assert.False(t, info.IsDir()) +} + +func TestHTTPSReadFile_UnsupportedOps(t *testing.T) { + body := io.NopCloser(strings.NewReader("x")) + rf := &httpsReadFile{name: "/x", body: body} + defer rf.Close() + + _, err := rf.Write(nil) + assert.Error(t, err) + _, err = rf.Readdir(-1) + assert.Error(t, err) +} + +// --------------------------------------------------------------------------- +// Static token reading +// --------------------------------------------------------------------------- + +func TestHTTPSFileSystem_ReadStaticToken(t *testing.T) { + t.Run("FromFile", func(t *testing.T) { + dir := t.TempDir() + tokFile := dir + "/token" + require.NoError(t, os.WriteFile(tokFile, []byte(" mytoken \n"), 0600)) + + fs := &httpsFileSystem{staticTokenFile: tokFile, tokenMode: HTTPSTokenStatic} + assert.Equal(t, "mytoken", fs.readStaticToken()) + }) + + t.Run("MissingFile", func(t *testing.T) { + fs := &httpsFileSystem{staticTokenFile: "/nonexistent", tokenMode: HTTPSTokenStatic} + assert.Empty(t, fs.readStaticToken()) + }) + + t.Run("EmptyPath", func(t *testing.T) { + fs := &httpsFileSystem{tokenMode: HTTPSTokenStatic} + assert.Empty(t, fs.readStaticToken()) + }) +} + +// --------------------------------------------------------------------------- +// Integration test: HTTPS backend w/ a mock HTTP server +// --------------------------------------------------------------------------- + +func TestHTTPSBackend_PlainHTTP_Integration(t *testing.T) { + // Set up a simple in-memory file store served over HTTP + store := map[string][]byte{ + "/data/hello.txt": []byte("Hello, World!"), + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case http.MethodOptions: + w.Header().Set("Allow", "GET, PUT, DELETE, HEAD, OPTIONS") + w.WriteHeader(http.StatusOK) + case http.MethodHead: + data, ok := store[r.URL.Path] + if !ok { + w.WriteHeader(http.StatusNotFound) + return + } + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) + w.WriteHeader(http.StatusOK) + case http.MethodGet: + data, ok := store[r.URL.Path] + if !ok { + w.WriteHeader(http.StatusNotFound) + return + } + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) + w.Write(data) + case http.MethodPut: + body, _ := io.ReadAll(r.Body) + store[r.URL.Path] = body + w.WriteHeader(http.StatusCreated) + case http.MethodDelete: + delete(store, r.URL.Path) + w.WriteHeader(http.StatusNoContent) + default: + w.WriteHeader(http.StatusMethodNotAllowed) + } + })) + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenNone, + }) + + // Probe → should detect plain HTTP (no PROPFIND in Allow) + require.NoError(t, backend.CheckAvailability()) + assert.Equal(t, BackendModeHTTP, backend.BackendMode()) + + ctx := context.Background() + fs := backend.FileSystem() + + // Stat existing file + info, err := fs.Stat(ctx, "/data/hello.txt") + require.NoError(t, err) + assert.Equal(t, int64(13), info.Size()) + + // Stat non-existent file + _, err = fs.Stat(ctx, "/nope.txt") + assert.ErrorIs(t, err, os.ErrNotExist) + + // Read existing file + rf, err := fs.OpenFile(ctx, "/data/hello.txt", os.O_RDONLY, 0) + require.NoError(t, err) + data, err := io.ReadAll(rf) + require.NoError(t, err) + assert.Equal(t, "Hello, World!", string(data)) + rf.Close() + + // Write a new file + wf, err := fs.OpenFile(ctx, "/data/new.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("new content")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + // Verify it was stored + assert.Equal(t, []byte("new content"), store["/data/new.txt"]) + + // Delete + require.NoError(t, fs.RemoveAll(ctx, "/data/new.txt")) + _, exists := store["/data/new.txt"] + assert.False(t, exists) + + // Read non-existent should return ErrNotExist + _, err = fs.OpenFile(ctx, "/gone.txt", os.O_RDONLY, 0) + assert.ErrorIs(t, err, os.ErrNotExist) +} + +func TestHTTPSBackend_TokenPassthrough(t *testing.T) { + var receivedAuth string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedAuth = r.Header.Get("Authorization") + if r.Method == http.MethodOptions { + w.Header().Set("Allow", "GET, HEAD, OPTIONS") + w.WriteHeader(http.StatusOK) + return + } + w.Header().Set("Content-Length", "5") + w.Write([]byte("hello")) + })) + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenPassthrough, + }) + require.NoError(t, backend.CheckAvailability()) + + ctx := WithClientToken(context.Background(), "client-bearer-xyz") + rf, err := backend.FileSystem().OpenFile(ctx, "/test.txt", os.O_RDONLY, 0) + require.NoError(t, err) + defer rf.Close() + + assert.Equal(t, "Bearer client-bearer-xyz", receivedAuth) +} + +func TestHTTPSBackend_StaticToken(t *testing.T) { + dir := t.TempDir() + tokFile := dir + "/token" + require.NoError(t, os.WriteFile(tokFile, []byte("static-tok"), 0600)) + + var receivedAuth string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedAuth = r.Header.Get("Authorization") + if r.Method == http.MethodOptions { + w.Header().Set("Allow", "GET, HEAD, OPTIONS") + w.WriteHeader(http.StatusOK) + return + } + w.Header().Set("Content-Length", "2") + w.Write([]byte("ok")) + })) + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenStatic, + StaticTokenFile: tokFile, + }) + require.NoError(t, backend.CheckAvailability()) + + rf, err := backend.FileSystem().OpenFile(context.Background(), "/file.txt", os.O_RDONLY, 0) + require.NoError(t, err) + defer rf.Close() + + assert.Equal(t, "Bearer static-tok", receivedAuth) +} + +func TestHTTPSBackend_WebDAVProbe(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodOptions { + w.Header().Set("Allow", "GET, PUT, DELETE, HEAD, OPTIONS, PROPFIND, MKCOL, MOVE, COPY") + w.Header().Set("DAV", "1, 2") + w.WriteHeader(http.StatusOK) + return + } + w.WriteHeader(http.StatusMethodNotAllowed) + })) + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenNone, + }) + require.NoError(t, backend.CheckAvailability()) + assert.Equal(t, BackendModeWebDAV, backend.BackendMode()) +} + +func TestHTTPSBackend_OPTIONSFails(t *testing.T) { + // server that immediately closes so OPTIONS fails + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodOptions { + // Force a connection close to trigger an error + hj, ok := w.(http.Hijacker) + if ok { + conn, _, _ := hj.Hijack() + conn.Close() + return + } + } + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenNone, + }) + // Should not error — just defaults to HTTP mode + require.NoError(t, backend.CheckAvailability()) + assert.Equal(t, BackendModeHTTP, backend.BackendMode()) +} + +func TestHTTPSBackend_NoChecksummer(t *testing.T) { + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: "https://example.com", + }) + assert.Nil(t, backend.Checksummer()) +} + +// --------------------------------------------------------------------------- +// davPath and upstreamURL tests +// --------------------------------------------------------------------------- + +func TestHTTPSFileSystem_DavPath(t *testing.T) { + fs := &httpsFileSystem{serviceURL: "https://example.com", storagePrefix: ""} + assert.Equal(t, "/foo.txt", fs.davPath("/foo.txt")) + assert.Equal(t, "/foo.txt", fs.davPath("foo.txt")) + + fs2 := &httpsFileSystem{serviceURL: "https://example.com", storagePrefix: "/prefix"} + assert.Equal(t, "/prefix/bar.txt", fs2.davPath("/bar.txt")) +} + +func TestHTTPSFileSystem_UpstreamURL(t *testing.T) { + fs := &httpsFileSystem{serviceURL: "https://example.com", storagePrefix: ""} + assert.Equal(t, "https://example.com/foo.txt", fs.upstreamURL("/foo.txt")) + + fs2 := &httpsFileSystem{serviceURL: "https://example.com", storagePrefix: "/pfx"} + assert.Equal(t, "https://example.com/pfx/bar.txt", fs2.upstreamURL("/bar.txt")) +} + +// --------------------------------------------------------------------------- +// getToken dispatch tests +// --------------------------------------------------------------------------- + +func TestHTTPSFileSystem_GetToken(t *testing.T) { + t.Run("None", func(t *testing.T) { + fs := &httpsFileSystem{tokenMode: HTTPSTokenNone} + assert.Empty(t, fs.getToken(context.Background())) + }) + + t.Run("Passthrough", func(t *testing.T) { + fs := &httpsFileSystem{tokenMode: HTTPSTokenPassthrough} + ctx := WithClientToken(context.Background(), "pass-tok") + assert.Equal(t, "pass-tok", fs.getToken(ctx)) + }) + + t.Run("PassthroughEmpty", func(t *testing.T) { + fs := &httpsFileSystem{tokenMode: HTTPSTokenPassthrough} + assert.Empty(t, fs.getToken(context.Background())) + }) + + t.Run("OAuthNoConfig", func(t *testing.T) { + fs := &httpsFileSystem{tokenMode: HTTPSTokenOAuth2} + assert.Empty(t, fs.getToken(context.Background())) + }) +} + +// --------------------------------------------------------------------------- +// HTTP-only backend: Mkdir and Rename should return errors +// --------------------------------------------------------------------------- + +func TestHTTPSBackend_HTTPOnly_UnsupportedOps(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodOptions { + w.Header().Set("Allow", "GET, PUT, DELETE, HEAD, OPTIONS") + w.WriteHeader(http.StatusOK) + return + } + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenNone, + }) + require.NoError(t, backend.CheckAvailability()) + + ctx := context.Background() + err := backend.FileSystem().Mkdir(ctx, "/newdir", 0755) + assert.Error(t, err) + assert.Contains(t, err.Error(), "mkdir not supported") + + err = backend.FileSystem().Rename(ctx, "/a", "/b") + assert.Error(t, err) + assert.Contains(t, err.Error(), "rename not supported") +} diff --git a/origin_serve/backend_s3.go b/origin_serve/backend_s3.go new file mode 100644 index 0000000000..9258b8473b --- /dev/null +++ b/origin_serve/backend_s3.go @@ -0,0 +1,545 @@ +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package origin_serve + +import ( + "context" + "fmt" + "io" + "net/url" + "os" + "path" + "strings" + "sync" + "sync/atomic" + "time" + + log "github.com/sirupsen/logrus" + "gocloud.dev/blob" + _ "gocloud.dev/blob/azureblob" // register azblob:// URL opener + _ "gocloud.dev/blob/gcsblob" // register gs:// URL opener + _ "gocloud.dev/blob/memblob" // register mem:// URL opener (useful for testing) + _ "gocloud.dev/blob/s3blob" // register s3:// URL opener + "gocloud.dev/gcerrors" + "golang.org/x/net/webdav" + + "github.com/pelicanplatform/pelican/server_utils" +) + +// --------------------------------------------------------------------------- +// blobBackend implements server_utils.OriginBackend using gocloud.dev/blob. +// Supports S3, GCS (gs://), and Azure (azblob://) via driver imports. +// --------------------------------------------------------------------------- + +type blobBackend struct { + bucket *blob.Bucket + fs *blobFileSystem +} + +// BlobBackendOptions groups the parameters needed to construct a blob backend. +// There are two ways to open a bucket: +// 1. Set BlobURL to a gocloud.dev URL (e.g. "s3://bucket", "gs://bucket", "azblob://container"). +// 2. Set the S3-specific fields (ServiceURL, Region, Bucket, etc.) for backwards-compatible S3 config. +// If BlobURL is set it takes precedence. +type BlobBackendOptions struct { + // Generic gocloud.dev/blob URL — takes precedence over the S3-specific fields. + BlobURL string + + // S3-specific fields (used only when BlobURL is empty). + ServiceURL string // e.g. "https://s3.us-east-1.amazonaws.com" + Region string + Bucket string + AccessKey string + SecretKey string + URLStyle string // "path" or "virtual" (default: "path") + + // Common fields. + StoragePrefix string // optional key prefix within the bucket/container +} + +// buildS3BlobURL constructs an s3:// gocloud URL from the backward-compatible +// S3-specific fields in BlobBackendOptions. +func buildS3BlobURL(opts BlobBackendOptions) (string, error) { + if opts.Bucket == "" { + return "", fmt.Errorf("S3 bucket name is required when BlobURL is not set") + } + + u := &url.URL{ + Scheme: "s3", + Host: opts.Bucket, + } + q := u.Query() + + if opts.Region != "" { + q.Set("region", opts.Region) + } + + if opts.ServiceURL != "" { + q.Set("endpoint", opts.ServiceURL) + } + + urlStyle := strings.ToLower(opts.URLStyle) + if urlStyle != "virtual" { + q.Set("use_path_style", "true") + } + + u.RawQuery = q.Encode() + return u.String(), nil +} + +// newBlobBackend opens a gocloud.dev/blob bucket according to opts and returns +// a blobBackend. +func newBlobBackend(opts BlobBackendOptions) (*blobBackend, error) { + var ( + bucket *blob.Bucket + err error + ) + + blobURL := opts.BlobURL + if blobURL == "" { + // Build an s3:// URL from the backward-compatible S3-specific fields. + blobURL, err = buildS3BlobURL(opts) + if err != nil { + return nil, err + } + } + + // If per-export S3 credentials were provided, set them in the environment + // so the gocloud AWS credential chain picks them up. + if opts.AccessKey != "" && opts.SecretKey != "" { + os.Setenv("AWS_ACCESS_KEY_ID", opts.AccessKey) + os.Setenv("AWS_SECRET_ACCESS_KEY", opts.SecretKey) + } else if strings.HasPrefix(blobURL, "s3://") { + // No credentials supplied — request anonymous access unless the env + // already has credentials configured. + if os.Getenv("AWS_ACCESS_KEY_ID") == "" { + // Append anonymous=true so the SDK doesn't try IAM, etc. + if strings.Contains(blobURL, "?") { + blobURL += "&anonymous=true" + } else { + blobURL += "?anonymous=true" + } + } + } + + log.Infof("Opening blob bucket via URL: %s", blobURL) + bucket, err = blob.OpenBucket(context.Background(), blobURL) + if err != nil { + return nil, fmt.Errorf("failed to open blob bucket from URL %q: %w", blobURL, err) + } + + // If a storagePrefix is configured, scope all operations to it. + prefix := strings.TrimPrefix(opts.StoragePrefix, "/") + if prefix != "" { + prefix = strings.TrimSuffix(prefix, "/") + "/" + bucket = blob.PrefixedBucket(bucket, prefix) + } + + fs := &blobFileSystem{bucket: bucket} + return &blobBackend{bucket: bucket, fs: fs}, nil +} + +func (b *blobBackend) CheckAvailability() error { + ok, err := b.bucket.IsAccessible(context.Background()) + if err != nil { + return fmt.Errorf("blob bucket accessibility check failed: %w", err) + } + if !ok { + return fmt.Errorf("blob bucket is not accessible") + } + return nil +} + +func (b *blobBackend) FileSystem() webdav.FileSystem { return b.fs } +func (b *blobBackend) Checksummer() server_utils.OriginChecksummer { + return nil // Cloud blob backends don't support xattr-based checksums +} + +// Close cleans up the underlying bucket handle. +func (b *blobBackend) Close() error { + return b.bucket.Close() +} + +// --------------------------------------------------------------------------- +// blobFileSystem — implements webdav.FileSystem backed by gocloud.dev/blob. +// --------------------------------------------------------------------------- + +type blobFileSystem struct { + bucket *blob.Bucket +} + +// blobKey normalises a webdav path ("/foo/bar") to a blob key ("foo/bar"). +func blobKey(name string) string { + return strings.TrimPrefix(name, "/") +} + +// Mkdir implements webdav.FileSystem. +// Blob stores don't have real directories; we create a zero-byte marker. +func (fs *blobFileSystem) Mkdir(ctx context.Context, name string, _ os.FileMode) error { + key := blobKey(name) + if !strings.HasSuffix(key, "/") { + key += "/" + } + return fs.bucket.WriteAll(ctx, key, nil, nil) +} + +// OpenFile implements webdav.FileSystem. +func (fs *blobFileSystem) OpenFile(ctx context.Context, name string, flag int, _ os.FileMode) (webdav.File, error) { + key := blobKey(name) + + // Write mode — return a writer that uploads on Close. + if flag&(os.O_WRONLY|os.O_RDWR|os.O_CREATE|os.O_TRUNC) != 0 { + return newBlobWriteFile(ctx, fs.bucket, key, name), nil + } + + // Check if this is a "directory" by listing with prefix. + dirPrefix := key + if dirPrefix != "" && !strings.HasSuffix(dirPrefix, "/") { + dirPrefix += "/" + } + entries, err := fs.listDir(ctx, dirPrefix) + if err == nil && len(entries) > 0 { + return &blobDirFile{name: name, entries: entries}, nil + } + + // Read mode — open via blob.NewReader (supports seek). + reader, err := fs.bucket.NewReader(ctx, key, nil) + if err != nil { + if isNotFound(err) { + return nil, os.ErrNotExist + } + return nil, fmt.Errorf("blob read %q: %w", key, err) + } + + return &blobReadFile{ + name: name, + reader: reader, + size: reader.Size(), + mod: reader.ModTime(), + }, nil +} + +// RemoveAll implements webdav.FileSystem. +func (fs *blobFileSystem) RemoveAll(ctx context.Context, name string) error { + key := blobKey(name) + + // Try deleting as a plain object first. + err := fs.bucket.Delete(ctx, key) + if err != nil && !isNotFound(err) { + return err + } + + // Also try the directory marker. + _ = fs.bucket.Delete(ctx, key+"/") + return nil +} + +// Rename implements webdav.FileSystem. +func (fs *blobFileSystem) Rename(ctx context.Context, oldName, newName string) error { + oldKey := blobKey(oldName) + newKey := blobKey(newName) + + if err := fs.bucket.Copy(ctx, newKey, oldKey, nil); err != nil { + return fmt.Errorf("blob copy %q -> %q: %w", oldKey, newKey, err) + } + return fs.bucket.Delete(ctx, oldKey) +} + +// Stat implements webdav.FileSystem. +func (fs *blobFileSystem) Stat(ctx context.Context, name string) (os.FileInfo, error) { + key := blobKey(name) + + attrs, err := fs.bucket.Attributes(ctx, key) + if err == nil { + return &blobFileInfo{ + name: path.Base(name), + size: attrs.Size, + mod: attrs.ModTime, + }, nil + } + + // Not found as an object — check if it's a directory prefix. + dirPrefix := key + if dirPrefix != "" && !strings.HasSuffix(dirPrefix, "/") { + dirPrefix += "/" + } + iter := fs.bucket.List(&blob.ListOptions{Prefix: dirPrefix, Delimiter: "/"}) + obj, iterErr := iter.Next(ctx) + if iterErr == nil && obj != nil { + return &blobFileInfo{name: path.Base(name), isDir: true}, nil + } + + if isNotFound(err) { + return nil, os.ErrNotExist + } + return nil, err +} + +// listDir lists immediate children under prefix (with "/" delimiter). +func (fs *blobFileSystem) listDir(ctx context.Context, prefix string) ([]os.FileInfo, error) { + iter := fs.bucket.List(&blob.ListOptions{Prefix: prefix, Delimiter: "/"}) + var entries []os.FileInfo + for { + obj, err := iter.Next(ctx) + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + baseName := strings.TrimPrefix(obj.Key, prefix) + baseName = strings.TrimSuffix(baseName, "/") + if baseName == "" { + continue + } + entries = append(entries, &blobFileInfo{ + name: baseName, + size: obj.Size, + mod: obj.ModTime, + isDir: obj.IsDir, + }) + } + return entries, nil +} + +// isNotFound returns true if the error represents a "not found" condition. +func isNotFound(err error) bool { + if err == nil { + return false + } + return gcerrors.Code(err) == gcerrors.NotFound +} + +// --------------------------------------------------------------------------- +// blobFileInfo — implements os.FileInfo +// --------------------------------------------------------------------------- + +type blobFileInfo struct { + name string + size int64 + mod time.Time + isDir bool +} + +func (fi *blobFileInfo) Name() string { return fi.name } +func (fi *blobFileInfo) Size() int64 { return fi.size } +func (fi *blobFileInfo) Mode() os.FileMode { return 0444 } +func (fi *blobFileInfo) ModTime() time.Time { + if fi.mod.IsZero() { + return time.Now() + } + return fi.mod +} +func (fi *blobFileInfo) IsDir() bool { return fi.isDir } +func (fi *blobFileInfo) Sys() interface{} { return nil } + +// --------------------------------------------------------------------------- +// blobReadFile — read-only file backed by a blob.Reader. +// blob.Reader already supports Read and Seek. +// Uses atomic offset tracking for concurrent safety. +// --------------------------------------------------------------------------- + +type blobReadFile struct { + name string + reader *blob.Reader + size int64 + mod time.Time + offset atomic.Int64 +} + +func (f *blobReadFile) Read(p []byte) (int, error) { + n, err := f.reader.Read(p) + f.offset.Add(int64(n)) + return n, err +} + +func (f *blobReadFile) Seek(offset int64, whence int) (int64, error) { + n, err := f.reader.Seek(offset, whence) + if err == nil { + f.offset.Store(n) + } + return n, err +} + +func (f *blobReadFile) Close() error { return f.reader.Close() } + +func (f *blobReadFile) Write(_ []byte) (int, error) { + return 0, fmt.Errorf("write not supported on read file") +} + +func (f *blobReadFile) Readdir(_ int) ([]os.FileInfo, error) { + return nil, fmt.Errorf("readdir not supported on file") +} + +func (f *blobReadFile) Stat() (os.FileInfo, error) { + return &blobFileInfo{ + name: path.Base(f.name), + size: f.size, + mod: f.mod, + }, nil +} + +// --------------------------------------------------------------------------- +// blobWriteFile — write file backed by blob.Writer. +// Streams writes directly through to the underlying blob store. +// Uses a mutex to protect concurrent writes. +// --------------------------------------------------------------------------- + +type blobWriteFile struct { + ctx context.Context + bucket *blob.Bucket + key string + name string + + mu sync.Mutex + writer *blob.Writer + opened bool + closed bool +} + +func newBlobWriteFile(ctx context.Context, bucket *blob.Bucket, key, name string) *blobWriteFile { + return &blobWriteFile{ctx: ctx, bucket: bucket, key: key, name: name} +} + +// ensureWriter lazily opens the blob.Writer on first Write. +func (f *blobWriteFile) ensureWriter() error { + if f.opened { + return nil + } + w, err := f.bucket.NewWriter(f.ctx, f.key, nil) + if err != nil { + return fmt.Errorf("blob new writer %q: %w", f.key, err) + } + f.writer = w + f.opened = true + return nil +} + +func (f *blobWriteFile) Write(p []byte) (int, error) { + f.mu.Lock() + defer f.mu.Unlock() + if f.closed { + return 0, fmt.Errorf("write to closed file") + } + if err := f.ensureWriter(); err != nil { + return 0, err + } + return f.writer.Write(p) +} + +func (f *blobWriteFile) Close() error { + f.mu.Lock() + defer f.mu.Unlock() + if f.closed { + return nil + } + f.closed = true + if !f.opened { + // Nothing was written; create an empty object. + return f.bucket.WriteAll(f.ctx, f.key, nil, nil) + } + return f.writer.Close() +} + +func (f *blobWriteFile) Read(_ []byte) (int, error) { + return 0, fmt.Errorf("read not supported on write file") +} + +func (f *blobWriteFile) Seek(_ int64, _ int) (int64, error) { + return 0, fmt.Errorf("seek not supported on write file") +} + +func (f *blobWriteFile) Readdir(_ int) ([]os.FileInfo, error) { + return nil, fmt.Errorf("readdir not supported on write file") +} + +func (f *blobWriteFile) Stat() (os.FileInfo, error) { + return &blobFileInfo{ + name: path.Base(f.name), + }, nil +} + +// --------------------------------------------------------------------------- +// blobDirFile — directory representation for blob listings. +// --------------------------------------------------------------------------- + +type blobDirFile struct { + name string + entries []os.FileInfo +} + +func (f *blobDirFile) Read(_ []byte) (int, error) { + return 0, fmt.Errorf("read not supported on directory") +} + +func (f *blobDirFile) Seek(_ int64, _ int) (int64, error) { + return 0, fmt.Errorf("seek not supported on directory") +} + +func (f *blobDirFile) Close() error { return nil } + +func (f *blobDirFile) Write(_ []byte) (int, error) { + return 0, fmt.Errorf("write not supported on directory") +} + +func (f *blobDirFile) Readdir(count int) ([]os.FileInfo, error) { + if count <= 0 || count > len(f.entries) { + result := f.entries + f.entries = nil + return result, nil + } + result := f.entries[:count] + f.entries = f.entries[count:] + return result, nil +} + +func (f *blobDirFile) Stat() (os.FileInfo, error) { + return &blobFileInfo{ + name: path.Base(f.name), + isDir: true, + }, nil +} + +// --------------------------------------------------------------------------- +// S3 credential loading (unchanged — reads key files from disk) +// --------------------------------------------------------------------------- + +func loadS3Credentials(accessKeyFile, secretKeyFile string) (accessKey, secretKey string, err error) { + if accessKeyFile == "" || secretKeyFile == "" { + return "", "", nil + } + akBytes, rErr := os.ReadFile(accessKeyFile) + if rErr != nil { + return "", "", fmt.Errorf("failed to read S3 access key file %s: %w", accessKeyFile, rErr) + } + skBytes, rErr := os.ReadFile(secretKeyFile) + if rErr != nil { + return "", "", fmt.Errorf("failed to read S3 secret key file %s: %w", secretKeyFile, rErr) + } + return strings.TrimSpace(string(akBytes)), strings.TrimSpace(string(skBytes)), nil +} + +// parseHTTPDate parses an HTTP-Date header value. +func parseHTTPDate(s string) time.Time { + t, err := time.Parse(time.RFC1123, s) + if err != nil { + return time.Time{} + } + return t +} diff --git a/origin_serve/backend_s3_minio_test.go b/origin_serve/backend_s3_minio_test.go new file mode 100644 index 0000000000..ac7213a1e5 --- /dev/null +++ b/origin_serve/backend_s3_minio_test.go @@ -0,0 +1,309 @@ +//go:build !windows + +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package origin_serve + +import ( + "context" + "io" + "os" + "os/exec" + "path/filepath" + "regexp" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// skipIfNoMinio skips the test if the minio binary is not available on PATH. +func skipIfNoMinio(t *testing.T) { + t.Helper() + if _, err := exec.LookPath("minio"); err != nil { + t.Skip("minio not found on PATH; skipping S3 integration test") + } +} + +// startMinio launches a minio server bound to 127.0.0.1:0 (OS-assigned port), +// parses the actual listening port from minio's log output, and returns the +// endpoint URL, access key, and secret key. The server is killed when the test +// completes. +// +// This avoids the TOCTOU race inherent in picking a free port first and then +// passing it to minio. +func startMinio(t *testing.T) (endpoint, accessKey, secretKey string) { + t.Helper() + skipIfNoMinio(t) + + dataDir := t.TempDir() + + accessKey = "minioadmin" + secretKey = "minioadmin" + + cmd := exec.Command("minio", "server", + "--address", "127.0.0.1:0", + "--console-address", "127.0.0.2:0", + dataDir, + ) + cmd.Env = append(os.Environ(), + "MINIO_ROOT_USER="+accessKey, + "MINIO_ROOT_PASSWORD="+secretKey, + ) + + // Capture stdout so we can parse the "S3-API:" line for the real port. + // Minio writes its banner to stderr, so merge stderr into stdout. + logPath := filepath.Join(t.TempDir(), "minio.log") + logFile, err := os.Create(logPath) + require.NoError(t, err) + t.Cleanup(func() { logFile.Close() }) + cmd.Stdout = logFile + cmd.Stderr = logFile + + require.NoError(t, cmd.Start(), "failed to start minio") + t.Cleanup(func() { + cmd.Process.Kill() + cmd.Wait() //nolint:errcheck + }) + + // Minio prints a line like: + // S3-API: http://127.0.0.1:43219 + // Poll the log file until we find it (with a 30-second deadline). + apiRe := regexp.MustCompile(`S3-API:\s+(https?://\S+)`) + require.Eventually(t, func() bool { + data, err := os.ReadFile(logPath) + if err != nil { + return false + } + if m := apiRe.FindSubmatch(data); m != nil { + endpoint = string(m[1]) + return true + } + return false + }, 30*time.Second, 200*time.Millisecond, "minio never printed an S3-API endpoint") + + // Pre-create the bucket directory on disk so it's available immediately. + bucketDir := filepath.Join(dataDir, "test-bucket") + require.NoError(t, os.Mkdir(bucketDir, 0755)) + + return endpoint, accessKey, secretKey +} + +// --------------------------------------------------------------------------- +// TestBlobBackend_MinioS3 — full integration test using a real minio server. +// Tests the complete S3 flow: build URL, open bucket, write, read, stat, +// rename, delete, directory listing. +// Skipped if minio is not installed. +// --------------------------------------------------------------------------- + +func TestBlobBackend_MinioS3(t *testing.T) { + skipIfNoMinio(t) + + endpoint, accessKey, secretKey := startMinio(t) + + backend, err := newBlobBackend(BlobBackendOptions{ + ServiceURL: endpoint, + Region: "us-east-1", + Bucket: "test-bucket", + AccessKey: accessKey, + SecretKey: secretKey, + URLStyle: "path", + }) + require.NoError(t, err) + defer backend.Close() + + ctx := context.Background() + + t.Run("CheckAvailability", func(t *testing.T) { + require.NoError(t, backend.CheckAvailability()) + }) + + t.Run("WriteAndRead", func(t *testing.T) { + content := []byte("Hello from MinIO integration test!") + + wf, err := backend.FileSystem().OpenFile(ctx, "/greeting.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + n, err := wf.Write(content) + require.NoError(t, err) + assert.Equal(t, len(content), n) + require.NoError(t, wf.Close()) + + rf, err := backend.FileSystem().OpenFile(ctx, "/greeting.txt", os.O_RDONLY, 0) + require.NoError(t, err) + got, err := io.ReadAll(rf) + require.NoError(t, err) + assert.Equal(t, content, got) + rf.Close() + }) + + t.Run("Stat", func(t *testing.T) { + // Write an object directly + wf, err := backend.FileSystem().OpenFile(ctx, "/statfile.bin", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("0123456789")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + info, err := backend.FileSystem().Stat(ctx, "/statfile.bin") + require.NoError(t, err) + assert.Equal(t, int64(10), info.Size()) + assert.Equal(t, "statfile.bin", info.Name()) + assert.False(t, info.IsDir()) + }) + + t.Run("StatNonExistent", func(t *testing.T) { + _, err := backend.FileSystem().Stat(ctx, "/nonexistent.txt") + assert.ErrorIs(t, err, os.ErrNotExist) + }) + + t.Run("ReadNonExistent", func(t *testing.T) { + _, err := backend.FileSystem().OpenFile(ctx, "/does-not-exist.txt", os.O_RDONLY, 0) + assert.ErrorIs(t, err, os.ErrNotExist) + }) + + t.Run("Rename", func(t *testing.T) { + wf, err := backend.FileSystem().OpenFile(ctx, "/rename-src.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("rename me")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + require.NoError(t, backend.FileSystem().Rename(ctx, "/rename-src.txt", "/rename-dst.txt")) + + _, err = backend.FileSystem().Stat(ctx, "/rename-src.txt") + assert.ErrorIs(t, err, os.ErrNotExist) + + info, err := backend.FileSystem().Stat(ctx, "/rename-dst.txt") + require.NoError(t, err) + assert.Equal(t, int64(9), info.Size()) + }) + + t.Run("RemoveAll", func(t *testing.T) { + wf, err := backend.FileSystem().OpenFile(ctx, "/delete-me.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("gone")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + require.NoError(t, backend.FileSystem().RemoveAll(ctx, "/delete-me.txt")) + + _, err = backend.FileSystem().Stat(ctx, "/delete-me.txt") + assert.ErrorIs(t, err, os.ErrNotExist) + }) + + t.Run("DirectoryListing", func(t *testing.T) { + // Write multiple objects under a "directory" + for _, name := range []string{"/listing/a.txt", "/listing/b.txt", "/listing/c.txt"} { + wf, err := backend.FileSystem().OpenFile(ctx, name, os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte(name)) + require.NoError(t, err) + require.NoError(t, wf.Close()) + } + + f, err := backend.FileSystem().OpenFile(ctx, "/listing", os.O_RDONLY, 0) + require.NoError(t, err) + defer f.Close() + + entries, err := f.Readdir(-1) + require.NoError(t, err) + assert.Len(t, entries, 3) + + names := make(map[string]bool) + for _, e := range entries { + names[e.Name()] = true + } + assert.True(t, names["a.txt"]) + assert.True(t, names["b.txt"]) + assert.True(t, names["c.txt"]) + }) + + t.Run("SeekOnRead", func(t *testing.T) { + content := []byte("0123456789ABCDEF") + wf, err := backend.FileSystem().OpenFile(ctx, "/seekable.bin", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write(content) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + rf, err := backend.FileSystem().OpenFile(ctx, "/seekable.bin", os.O_RDONLY, 0) + require.NoError(t, err) + defer rf.Close() + + pos, err := rf.Seek(10, io.SeekStart) + require.NoError(t, err) + assert.Equal(t, int64(10), pos) + + buf := make([]byte, 6) + n, err := rf.Read(buf) + // Read may return io.EOF along with the final data — that's valid + if err != nil { + assert.ErrorIs(t, err, io.EOF) + } + assert.Equal(t, 6, n) + assert.Equal(t, "ABCDEF", string(buf)) + }) + + t.Run("WriteEmptyObject", func(t *testing.T) { + wf, err := backend.FileSystem().OpenFile(ctx, "/empty.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + info, err := backend.FileSystem().Stat(ctx, "/empty.txt") + require.NoError(t, err) + assert.Equal(t, int64(0), info.Size()) + }) + + t.Run("StoragePrefix", func(t *testing.T) { + prefixedBackend, err := newBlobBackend(BlobBackendOptions{ + ServiceURL: endpoint, + Region: "us-east-1", + Bucket: "test-bucket", + AccessKey: accessKey, + SecretKey: secretKey, + URLStyle: "path", + StoragePrefix: "/prefixed", + }) + require.NoError(t, err) + defer prefixedBackend.Close() + + wf, err := prefixedBackend.FileSystem().OpenFile(ctx, "/scoped.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("scoped content")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + // Read back via the prefixed backend + rf, err := prefixedBackend.FileSystem().OpenFile(ctx, "/scoped.txt", os.O_RDONLY, 0) + require.NoError(t, err) + got, err := io.ReadAll(rf) + require.NoError(t, err) + assert.Equal(t, "scoped content", string(got)) + rf.Close() + + // The un-prefixed backend should see it at /prefixed/scoped.txt + rf2, err := backend.FileSystem().OpenFile(ctx, "/prefixed/scoped.txt", os.O_RDONLY, 0) + require.NoError(t, err) + got2, err := io.ReadAll(rf2) + require.NoError(t, err) + assert.Equal(t, "scoped content", string(got2)) + rf2.Close() + }) +} diff --git a/origin_serve/backend_s3_test.go b/origin_serve/backend_s3_test.go new file mode 100644 index 0000000000..929e1ee891 --- /dev/null +++ b/origin_serve/backend_s3_test.go @@ -0,0 +1,622 @@ +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package origin_serve + +import ( + "context" + "io" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "gocloud.dev/blob" + "gocloud.dev/blob/memblob" +) + +// --------------------------------------------------------------------------- +// buildS3BlobURL unit tests +// --------------------------------------------------------------------------- + +func TestBuildS3BlobURL(t *testing.T) { + t.Run("FullOptions", func(t *testing.T) { + url, err := buildS3BlobURL(BlobBackendOptions{ + Bucket: "my-bucket", + Region: "us-west-2", + ServiceURL: "https://s3.example.com", + URLStyle: "path", + }) + require.NoError(t, err) + assert.Contains(t, url, "s3://my-bucket") + assert.Contains(t, url, "region=us-west-2") + assert.Contains(t, url, "endpoint=") + assert.Contains(t, url, "use_path_style=true") + }) + + t.Run("VirtualHostStyle", func(t *testing.T) { + url, err := buildS3BlobURL(BlobBackendOptions{ + Bucket: "my-bucket", + Region: "eu-central-1", + URLStyle: "virtual", + }) + require.NoError(t, err) + assert.Contains(t, url, "s3://my-bucket") + assert.NotContains(t, url, "use_path_style") + }) + + t.Run("DefaultStyleIsPath", func(t *testing.T) { + url, err := buildS3BlobURL(BlobBackendOptions{ + Bucket: "my-bucket", + }) + require.NoError(t, err) + assert.Contains(t, url, "use_path_style=true") + }) + + t.Run("MissingBucket", func(t *testing.T) { + _, err := buildS3BlobURL(BlobBackendOptions{ + Region: "us-east-1", + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "bucket name is required") + }) + + t.Run("MinimalOptions", func(t *testing.T) { + url, err := buildS3BlobURL(BlobBackendOptions{ + Bucket: "test", + }) + require.NoError(t, err) + assert.Equal(t, "s3://test?use_path_style=true", url) + }) +} + +// --------------------------------------------------------------------------- +// blobKey unit tests +// --------------------------------------------------------------------------- + +func TestBlobKey(t *testing.T) { + assert.Equal(t, "foo/bar", blobKey("/foo/bar")) + assert.Equal(t, "foo", blobKey("/foo")) + assert.Equal(t, "foo", blobKey("foo")) + assert.Equal(t, "", blobKey("/")) + assert.Equal(t, "", blobKey("")) +} + +// --------------------------------------------------------------------------- +// loadS3Credentials unit tests +// --------------------------------------------------------------------------- + +func TestLoadS3Credentials(t *testing.T) { + t.Run("EmptyPaths", func(t *testing.T) { + ak, sk, err := loadS3Credentials("", "") + require.NoError(t, err) + assert.Empty(t, ak) + assert.Empty(t, sk) + }) + + t.Run("ValidFiles", func(t *testing.T) { + dir := t.TempDir() + akFile := dir + "/access_key" + skFile := dir + "/secret_key" + require.NoError(t, os.WriteFile(akFile, []byte(" AKID123 \n"), 0600)) + require.NoError(t, os.WriteFile(skFile, []byte(" SECRET456 \n"), 0600)) + + ak, sk, err := loadS3Credentials(akFile, skFile) + require.NoError(t, err) + assert.Equal(t, "AKID123", ak) + assert.Equal(t, "SECRET456", sk) + }) + + t.Run("MissingAccessKeyFile", func(t *testing.T) { + dir := t.TempDir() + skFile := dir + "/secret_key" + require.NoError(t, os.WriteFile(skFile, []byte("SECRET"), 0600)) + + _, _, err := loadS3Credentials(dir+"/nonexistent", skFile) + require.Error(t, err) + assert.Contains(t, err.Error(), "access key file") + }) + + t.Run("MissingSecretKeyFile", func(t *testing.T) { + dir := t.TempDir() + akFile := dir + "/access_key" + require.NoError(t, os.WriteFile(akFile, []byte("AKID"), 0600)) + + _, _, err := loadS3Credentials(akFile, dir+"/nonexistent") + require.Error(t, err) + assert.Contains(t, err.Error(), "secret key file") + }) +} + +// --------------------------------------------------------------------------- +// parseHTTPDate unit tests +// --------------------------------------------------------------------------- + +func TestParseHTTPDate(t *testing.T) { + t.Run("Valid", func(t *testing.T) { + ts := parseHTTPDate("Mon, 02 Jan 2006 15:04:05 GMT") + assert.Equal(t, 2006, ts.Year()) + assert.Equal(t, 2, ts.Day()) + }) + + t.Run("Invalid", func(t *testing.T) { + ts := parseHTTPDate("not a date") + assert.True(t, ts.IsZero()) + }) + + t.Run("Empty", func(t *testing.T) { + ts := parseHTTPDate("") + assert.True(t, ts.IsZero()) + }) +} + +// --------------------------------------------------------------------------- +// blobFileSystem using memblob — comprehensive integration tests +// --------------------------------------------------------------------------- + +func newMemBlobFS(t *testing.T) (*blobFileSystem, *blob.Bucket) { + t.Helper() + bucket := memblob.OpenBucket(nil) + t.Cleanup(func() { bucket.Close() }) + return &blobFileSystem{bucket: bucket}, bucket +} + +func TestBlobFileSystem_WriteAndRead(t *testing.T) { + fs, _ := newMemBlobFS(t) + ctx := context.Background() + content := []byte("hello, blob world!") + + // Write a file + wf, err := fs.OpenFile(ctx, "/data/greeting.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + n, err := wf.Write(content) + require.NoError(t, err) + assert.Equal(t, len(content), n) + require.NoError(t, wf.Close()) + + // Read it back + rf, err := fs.OpenFile(ctx, "/data/greeting.txt", os.O_RDONLY, 0) + require.NoError(t, err) + defer rf.Close() + + got, err := io.ReadAll(rf) + require.NoError(t, err) + assert.Equal(t, content, got) +} + +func TestBlobFileSystem_WriteEmpty(t *testing.T) { + fs, bucket := newMemBlobFS(t) + ctx := context.Background() + + wf, err := fs.OpenFile(ctx, "/empty.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + // Close without writing — should create an empty object + require.NoError(t, wf.Close()) + + exists, err := bucket.Exists(ctx, "empty.txt") + require.NoError(t, err) + assert.True(t, exists) +} + +func TestBlobFileSystem_ReadNonExistent(t *testing.T) { + fs, _ := newMemBlobFS(t) + ctx := context.Background() + + _, err := fs.OpenFile(ctx, "/does/not/exist.txt", os.O_RDONLY, 0) + assert.ErrorIs(t, err, os.ErrNotExist) +} + +func TestBlobFileSystem_Stat(t *testing.T) { + fs, bucket := newMemBlobFS(t) + ctx := context.Background() + + // Write an object + require.NoError(t, bucket.WriteAll(ctx, "doc/readme.md", []byte("# README"), nil)) + + t.Run("ObjectExists", func(t *testing.T) { + info, err := fs.Stat(ctx, "/doc/readme.md") + require.NoError(t, err) + assert.Equal(t, "readme.md", info.Name()) + assert.Equal(t, int64(8), info.Size()) + assert.False(t, info.IsDir()) + }) + + t.Run("DirectoryPrefix", func(t *testing.T) { + info, err := fs.Stat(ctx, "/doc") + require.NoError(t, err) + assert.Equal(t, "doc", info.Name()) + assert.True(t, info.IsDir()) + }) + + t.Run("NonExistent", func(t *testing.T) { + _, err := fs.Stat(ctx, "/nope") + assert.ErrorIs(t, err, os.ErrNotExist) + }) +} + +func TestBlobFileSystem_Mkdir(t *testing.T) { + fs, bucket := newMemBlobFS(t) + ctx := context.Background() + + require.NoError(t, fs.Mkdir(ctx, "/mydir", 0755)) + + // Directory marker object should exist + exists, err := bucket.Exists(ctx, "mydir/") + require.NoError(t, err) + assert.True(t, exists) + + // Stat should report as directory + info, err := fs.Stat(ctx, "/mydir") + require.NoError(t, err) + assert.True(t, info.IsDir()) +} + +func TestBlobFileSystem_RemoveAll(t *testing.T) { + fs, bucket := newMemBlobFS(t) + ctx := context.Background() + + require.NoError(t, bucket.WriteAll(ctx, "removeme.txt", []byte("gone"), nil)) + require.NoError(t, fs.RemoveAll(ctx, "/removeme.txt")) + + exists, err := bucket.Exists(ctx, "removeme.txt") + require.NoError(t, err) + assert.False(t, exists) +} + +func TestBlobFileSystem_RemoveAllDirectory(t *testing.T) { + fs, bucket := newMemBlobFS(t) + ctx := context.Background() + + // Create a dir marker and a file "inside" it + require.NoError(t, bucket.WriteAll(ctx, "dir/", nil, nil)) + require.NoError(t, bucket.WriteAll(ctx, "dir/file.txt", []byte("data"), nil)) + + // RemoveAll only removes the object itself + directory marker, not children + require.NoError(t, fs.RemoveAll(ctx, "/dir")) + + // Directory marker should be gone + exists, err := bucket.Exists(ctx, "dir/") + require.NoError(t, err) + assert.False(t, exists) +} + +func TestBlobFileSystem_Rename(t *testing.T) { + fs, bucket := newMemBlobFS(t) + ctx := context.Background() + + require.NoError(t, bucket.WriteAll(ctx, "old.txt", []byte("content"), nil)) + require.NoError(t, fs.Rename(ctx, "/old.txt", "/new.txt")) + + // Old should not exist + exists, err := bucket.Exists(ctx, "old.txt") + require.NoError(t, err) + assert.False(t, exists) + + // New should exist with same content + data, err := bucket.ReadAll(ctx, "new.txt") + require.NoError(t, err) + assert.Equal(t, []byte("content"), data) +} + +func TestBlobFileSystem_DirectoryListing(t *testing.T) { + fs, bucket := newMemBlobFS(t) + ctx := context.Background() + + // Create files under a prefix + require.NoError(t, bucket.WriteAll(ctx, "listing/a.txt", []byte("a"), nil)) + require.NoError(t, bucket.WriteAll(ctx, "listing/b.txt", []byte("bb"), nil)) + require.NoError(t, bucket.WriteAll(ctx, "listing/sub/c.txt", []byte("ccc"), nil)) + + // Open the directory + f, err := fs.OpenFile(ctx, "/listing", os.O_RDONLY, 0) + require.NoError(t, err) + defer f.Close() + + entries, err := f.Readdir(-1) + require.NoError(t, err) + assert.Len(t, entries, 3) // a.txt, b.txt, sub/ + + names := make(map[string]bool) + for _, e := range entries { + names[e.Name()] = true + } + assert.True(t, names["a.txt"]) + assert.True(t, names["b.txt"]) + assert.True(t, names["sub"]) +} + +func TestBlobFileSystem_SeekOnRead(t *testing.T) { + fs, bucket := newMemBlobFS(t) + ctx := context.Background() + + data := []byte("0123456789ABCDEF") + require.NoError(t, bucket.WriteAll(ctx, "seekable.bin", data, nil)) + + f, err := fs.OpenFile(ctx, "/seekable.bin", os.O_RDONLY, 0) + require.NoError(t, err) + defer f.Close() + + // Seek to offset 10 + pos, err := f.Seek(10, io.SeekStart) + require.NoError(t, err) + assert.Equal(t, int64(10), pos) + + buf := make([]byte, 6) + n, err := f.Read(buf) + require.NoError(t, err) + assert.Equal(t, 6, n) + assert.Equal(t, "ABCDEF", string(buf)) +} + +// --------------------------------------------------------------------------- +// blobReadFile and blobWriteFile unit tests +// --------------------------------------------------------------------------- + +func TestBlobWriteFile_DoubleClose(t *testing.T) { + bucket := memblob.OpenBucket(nil) + defer bucket.Close() + + wf := newBlobWriteFile(context.Background(), bucket, "double.txt", "/double.txt") + _, err := wf.Write([]byte("data")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + // Second close should be a no-op + require.NoError(t, wf.Close()) +} + +func TestBlobWriteFile_WriteAfterClose(t *testing.T) { + bucket := memblob.OpenBucket(nil) + defer bucket.Close() + + wf := newBlobWriteFile(context.Background(), bucket, "closed.txt", "/closed.txt") + require.NoError(t, wf.Close()) + + _, err := wf.Write([]byte("too late")) + assert.Error(t, err) +} + +func TestBlobWriteFile_Stat(t *testing.T) { + bucket := memblob.OpenBucket(nil) + defer bucket.Close() + + wf := newBlobWriteFile(context.Background(), bucket, "stat.txt", "/stat.txt") + info, err := wf.Stat() + require.NoError(t, err) + assert.Equal(t, "stat.txt", info.Name()) +} + +func TestBlobReadFile_Stat(t *testing.T) { + bucket := memblob.OpenBucket(nil) + defer bucket.Close() + ctx := context.Background() + + require.NoError(t, bucket.WriteAll(ctx, "info.txt", []byte("hello"), nil)) + reader, err := bucket.NewReader(ctx, "info.txt", nil) + require.NoError(t, err) + + rf := &blobReadFile{ + name: "/info.txt", + reader: reader, + size: reader.Size(), + mod: reader.ModTime(), + } + defer rf.Close() + + info, err := rf.Stat() + require.NoError(t, err) + assert.Equal(t, "info.txt", info.Name()) + assert.Equal(t, int64(5), info.Size()) +} + +func TestBlobReadFile_WriteNotSupported(t *testing.T) { + bucket := memblob.OpenBucket(nil) + defer bucket.Close() + ctx := context.Background() + + require.NoError(t, bucket.WriteAll(ctx, "ro.txt", []byte("x"), nil)) + reader, err := bucket.NewReader(ctx, "ro.txt", nil) + require.NoError(t, err) + + rf := &blobReadFile{name: "/ro.txt", reader: reader} + defer rf.Close() + + _, werr := rf.Write([]byte("nope")) + assert.Error(t, werr) +} + +// --------------------------------------------------------------------------- +// blobDirFile unit tests +// --------------------------------------------------------------------------- + +func TestBlobDirFile_Readdir(t *testing.T) { + entries := []os.FileInfo{ + &blobFileInfo{name: "a.txt", size: 10}, + &blobFileInfo{name: "b.txt", size: 20}, + &blobFileInfo{name: "c.txt", size: 30}, + } + + t.Run("ReadAll", func(t *testing.T) { + df := &blobDirFile{name: "/test", entries: append([]os.FileInfo{}, entries...)} + result, err := df.Readdir(-1) + require.NoError(t, err) + assert.Len(t, result, 3) + }) + + t.Run("ReadPartial", func(t *testing.T) { + df := &blobDirFile{name: "/test", entries: append([]os.FileInfo{}, entries...)} + result, err := df.Readdir(2) + require.NoError(t, err) + assert.Len(t, result, 2) + assert.Equal(t, "a.txt", result[0].Name()) + + // Read remaining + result2, err := df.Readdir(-1) + require.NoError(t, err) + assert.Len(t, result2, 1) + assert.Equal(t, "c.txt", result2[0].Name()) + }) + + t.Run("Stat", func(t *testing.T) { + df := &blobDirFile{name: "/somedir"} + info, err := df.Stat() + require.NoError(t, err) + assert.True(t, info.IsDir()) + assert.Equal(t, "somedir", info.Name()) + }) + + t.Run("UnsupportedOps", func(t *testing.T) { + df := &blobDirFile{name: "/dir"} + _, err := df.Read(nil) + assert.Error(t, err) + _, err = df.Seek(0, 0) + assert.Error(t, err) + _, err = df.Write(nil) + assert.Error(t, err) + }) +} + +// --------------------------------------------------------------------------- +// blobFileInfo unit tests +// --------------------------------------------------------------------------- + +func TestBlobFileInfo(t *testing.T) { + fi := &blobFileInfo{name: "test.txt", size: 42, isDir: false} + assert.Equal(t, "test.txt", fi.Name()) + assert.Equal(t, int64(42), fi.Size()) + assert.Equal(t, os.FileMode(0444), fi.Mode()) + assert.False(t, fi.IsDir()) + assert.Nil(t, fi.Sys()) + + // Zero ModTime gets replaced with something non-zero + assert.False(t, fi.ModTime().IsZero()) + + fiDir := &blobFileInfo{name: "subdir", isDir: true} + assert.True(t, fiDir.IsDir()) +} + +// --------------------------------------------------------------------------- +// blobBackend top-level tests (using memblob via URL) +// --------------------------------------------------------------------------- + +func TestBlobBackend_MemURL(t *testing.T) { + // The "mem://" URL scheme is registered by memblob's init(). + // We import it via the test build only. + backend, err := newBlobBackend(BlobBackendOptions{ + BlobURL: "mem://", + }) + require.NoError(t, err) + defer backend.Close() + + // Accessibility + require.NoError(t, backend.CheckAvailability()) + + // FileSystem should be non-nil + assert.NotNil(t, backend.FileSystem()) + + // Checksummer should be nil for blob backends + assert.Nil(t, backend.Checksummer()) +} + +func TestBlobBackend_WithStoragePrefix(t *testing.T) { + backend, err := newBlobBackend(BlobBackendOptions{ + BlobURL: "mem://", + StoragePrefix: "/myprefix", + }) + require.NoError(t, err) + defer backend.Close() + + ctx := context.Background() + + // Write via the filesystem (will be prefixed) + wf, err := backend.FileSystem().OpenFile(ctx, "/file.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("prefixed content")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + // Read back + rf, err := backend.FileSystem().OpenFile(ctx, "/file.txt", os.O_RDONLY, 0) + require.NoError(t, err) + defer rf.Close() + data, err := io.ReadAll(rf) + require.NoError(t, err) + assert.Equal(t, "prefixed content", string(data)) +} + +// --------------------------------------------------------------------------- +// isNotFound unit tests +// --------------------------------------------------------------------------- + +func TestIsNotFound(t *testing.T) { + assert.False(t, isNotFound(nil)) + // gcerrors-based check is tested transitively via OpenFile/Stat on missing keys +} + +// --------------------------------------------------------------------------- +// Full round-trip integration test: write, stat, read, seek, rename, delete +// --------------------------------------------------------------------------- + +func TestBlobFileSystem_FullRoundTrip(t *testing.T) { + fs, _ := newMemBlobFS(t) + ctx := context.Background() + + // 1. Write + wf, err := fs.OpenFile(ctx, "/trip/data.bin", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("ABCDEFGHIJ")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + // 2. Stat + info, err := fs.Stat(ctx, "/trip/data.bin") + require.NoError(t, err) + assert.Equal(t, int64(10), info.Size()) + assert.False(t, info.IsDir()) + + // 3. Read + rf, err := fs.OpenFile(ctx, "/trip/data.bin", os.O_RDONLY, 0) + require.NoError(t, err) + + buf := make([]byte, 5) + n, err := rf.Read(buf) + require.NoError(t, err) + assert.Equal(t, 5, n) + assert.Equal(t, "ABCDE", string(buf)) + + // 4. Seek back to start and re-read + pos, err := rf.Seek(0, io.SeekStart) + require.NoError(t, err) + assert.Equal(t, int64(0), pos) + buf2, err := io.ReadAll(rf) + require.NoError(t, err) + assert.Equal(t, "ABCDEFGHIJ", string(buf2)) + rf.Close() + + // 5. Rename + require.NoError(t, fs.Rename(ctx, "/trip/data.bin", "/trip/renamed.bin")) + _, err = fs.Stat(ctx, "/trip/data.bin") + assert.ErrorIs(t, err, os.ErrNotExist) + info, err = fs.Stat(ctx, "/trip/renamed.bin") + require.NoError(t, err) + assert.Equal(t, int64(10), info.Size()) + + // 6. RemoveAll + require.NoError(t, fs.RemoveAll(ctx, "/trip/renamed.bin")) + _, err = fs.Stat(ctx, "/trip/renamed.bin") + assert.ErrorIs(t, err, os.ErrNotExist) +} diff --git a/origin_serve/handlers.go b/origin_serve/handlers.go index cc58d32e15..f6bde21d93 100644 --- a/origin_serve/handlers.go +++ b/origin_serve/handlers.go @@ -37,6 +37,7 @@ import ( log "github.com/sirupsen/logrus" "github.com/spf13/afero" "golang.org/x/net/webdav" + "golang.org/x/oauth2" "github.com/pelicanplatform/pelican/config" "github.com/pelicanplatform/pelican/identity" @@ -55,6 +56,9 @@ var ( exportPrefixMap map[string]string // Maps federation prefix to storage prefix copyEnabledPrefixes map[string]bool // Set of federation prefixes that have the Copies capability handlersRegistered bool // Tracks whether handlers have been registered + + // globusBackends stores Globus v2 backends for token refresh management. + globusBackends map[string]*globusBackend ) const ( @@ -191,6 +195,7 @@ func ResetHandlers() { exportPrefixMap = nil copyEnabledPrefixes = nil handlersRegistered = false + globusBackends = nil } // extractTokens extracts bearer tokens from the request @@ -411,6 +416,14 @@ func authMiddleware() gin.HandlerFunc { } else if authorizedContext != nil { c.Request = c.Request.WithContext(authorizedContext) } + + // For HTTPS passthrough backends, stash the client token in context + // so the backend filesystem can forward it to the upstream server. + if len(tokens) > 0 { + ctx := WithClientToken(c.Request.Context(), tokens[0]) + c.Request = c.Request.WithContext(ctx) + } + c.Next() } } @@ -562,8 +575,9 @@ func InitializeHandlers(ctx context.Context, exports []server_utils.OriginExport backends = make(map[string]server_utils.OriginBackend) webdavHandlers = make(map[string]*webdav.Handler) - exportPrefixMap = make(map[string]string) + exportPrefixMap = make(map[string]string) // Initialize the global map copyEnabledPrefixes = make(map[string]bool) + globusBackends = make(map[string]*globusBackend) // Get optional rate limit for testing readRateLimit := param.Origin_TransferRateLimit.GetByteRate() @@ -592,6 +606,121 @@ func InitializeHandlers(ctx context.Context, exports []server_utils.OriginExport return fmt.Errorf("failed to create SSH backend for %s: %w", export.FederationPrefix, err) } backend = sshBackend + + case server_structs.OriginStorageS3v2: + // Native blob backend (S3, GCS, Azure) via gocloud.dev/blob + blobURL := param.Origin_ObjectProviderURL.GetString() + + var blobOpts BlobBackendOptions + if blobURL != "" { + blobOpts = BlobBackendOptions{ + BlobURL: blobURL, + StoragePrefix: export.StoragePrefix, + } + } else { + accessKey, secretKey, err := loadS3Credentials(export.S3AccessKeyfile, export.S3SecretKeyfile) + if err != nil { + return fmt.Errorf("failed to load S3 credentials for %s: %w", export.FederationPrefix, err) + } + s3ServiceURL := param.Origin_S3ServiceUrl.GetString() + s3Region := param.Origin_S3Region.GetString() + s3UrlStyle := param.Origin_S3UrlStyle.GetString() + if s3UrlStyle == "" { + s3UrlStyle = "path" + } + blobOpts = BlobBackendOptions{ + ServiceURL: s3ServiceURL, + Region: s3Region, + Bucket: export.S3Bucket, + AccessKey: accessKey, + SecretKey: secretKey, + URLStyle: s3UrlStyle, + StoragePrefix: export.StoragePrefix, + } + } + blobBe, blobErr := newBlobBackend(blobOpts) + if blobErr != nil { + return fmt.Errorf("failed to create blob backend for %s: %w", export.FederationPrefix, blobErr) + } + backend = blobBe + if blobURL != "" { + log.Infof("Initialized blob backend for %s (url: %s)", export.FederationPrefix, blobURL) + } else { + log.Infof("Initialized native S3 backend for %s (bucket: %s, region: %s)", export.FederationPrefix, export.S3Bucket, param.Origin_S3Region.GetString()) + } + + case server_structs.OriginStorageHTTPSv2: + // Native HTTPS/WebDAV backend (no XRootD) + httpServiceURL := param.Origin_HttpServiceUrl.GetString() + tokenMode := HTTPSTokenNone + staticTokenFile := "" + var oauth2Cfg *oauth2.Config + var oauth2Tok *oauth2.Token + + // Determine token mode + if param.Origin_HttpAuthTokenPassthrough.GetBool() { + tokenMode = HTTPSTokenPassthrough + } else if param.Origin_HttpAuthOAuth2ClientID.GetString() != "" { + tokenMode = HTTPSTokenOAuth2 + + secretFile := param.Origin_HttpAuthOAuth2ClientSecretFile.GetString() + if secretFile == "" { + return fmt.Errorf("Origin.HttpAuthOAuth2ClientSecretFile must be set when Origin.HttpAuthOAuth2ClientID is configured") + } + secretBytes, rErr := os.ReadFile(secretFile) + if rErr != nil { + return fmt.Errorf("failed to read OAuth2 client secret file %s: %w", secretFile, rErr) + } + + issuerUrl := param.Origin_HttpAuthOAuth2Issuer.GetString() + if issuerUrl == "" { + return fmt.Errorf("Origin.HttpAuthOAuth2Issuer must be set when Origin.HttpAuthOAuth2ClientID is configured") + } + issuerMeta, dErr := config.GetIssuerMetadata(issuerUrl) + if dErr != nil { + return fmt.Errorf("failed to discover OAuth2 metadata from issuer %s: %w", issuerUrl, dErr) + } + if issuerMeta.TokenURL == "" { + return fmt.Errorf("OAuth2 issuer %s did not advertise a token_endpoint", issuerUrl) + } + + oauth2Cfg = &oauth2.Config{ + ClientID: param.Origin_HttpAuthOAuth2ClientID.GetString(), + ClientSecret: strings.TrimSpace(string(secretBytes)), + Endpoint: oauth2.Endpoint{ + TokenURL: issuerMeta.TokenURL, + }, + } + // For client_credentials flow, create an initial token that will be refreshed + oauth2Tok = &oauth2.Token{} + } else if param.Origin_HttpAuthTokenFile.GetString() != "" { + tokenMode = HTTPSTokenStatic + staticTokenFile = param.Origin_HttpAuthTokenFile.GetString() + } + + backend = newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: httpServiceURL, + StoragePrefix: export.StoragePrefix, + TokenMode: tokenMode, + StaticTokenFile: staticTokenFile, + OAuth2Config: oauth2Cfg, + OAuth2Token: oauth2Tok, + }) + log.Infof("Initialized native HTTPS backend for %s (upstream: %s, token mode: %d)", export.FederationPrefix, httpServiceURL, tokenMode) + + case server_structs.OriginStorageGlobusv2: + // Native Globus backend (no XRootD) + // The Globus backend is initialized in two phases: + // 1. Here we create the backend structure + // 2. In OriginServe, Globus tokens are loaded and the backend is activated + gb := NewGlobusBackend(GlobusBackendConfig{ + CollectionID: export.GlobusCollectionID, + StoragePrefix: export.StoragePrefix, + }) + backend = gb + globusBackends[export.GlobusCollectionID] = gb + log.Infof("Initialized native Globus backend for %s (collection: %s)", export.FederationPrefix, export.GlobusCollectionID) + default: // Use local filesystem (POSIXv2) // Create a filesystem for this export with auto-directory creation diff --git a/param/parameters.go b/param/parameters.go index 17bc25c297..b64c14469e 100644 --- a/param/parameters.go +++ b/param/parameters.go @@ -355,7 +355,11 @@ var runtimeConfigurableMap = map[string]bool{ "Origin.GlobusCollectionName": false, "Origin.GlobusConfigLocation": false, "Origin.GlobusTransferTokenFile": false, + "Origin.HttpAuthOAuth2ClientID": false, + "Origin.HttpAuthOAuth2ClientSecretFile": false, + "Origin.HttpAuthOAuth2Issuer": false, "Origin.HttpAuthTokenFile": false, + "Origin.HttpAuthTokenPassthrough": false, "Origin.HttpServiceUrl": false, "Origin.IssuerMode": false, "Origin.Mode": false, @@ -364,6 +368,7 @@ var runtimeConfigurableMap = map[string]bool{ "Origin.MultiuserUmask": false, "Origin.MultiuserVarlinkSocketPath": false, "Origin.NamespacePrefix": false, + "Origin.ObjectProviderURL": false, "Origin.Port": false, "Origin.RunLocation": false, "Origin.S3AccessKeyfile": false, @@ -658,12 +663,16 @@ var stringAccessors = map[string]func(*Config) string{ "Origin.GlobusCollectionName": func(c *Config) string { return c.Origin.GlobusCollectionName }, "Origin.GlobusConfigLocation": func(c *Config) string { return c.Origin.GlobusConfigLocation }, "Origin.GlobusTransferTokenFile": func(c *Config) string { return c.Origin.GlobusTransferTokenFile }, + "Origin.HttpAuthOAuth2ClientID": func(c *Config) string { return c.Origin.HttpAuthOAuth2ClientID }, + "Origin.HttpAuthOAuth2ClientSecretFile": func(c *Config) string { return c.Origin.HttpAuthOAuth2ClientSecretFile }, + "Origin.HttpAuthOAuth2Issuer": func(c *Config) string { return c.Origin.HttpAuthOAuth2Issuer }, "Origin.HttpAuthTokenFile": func(c *Config) string { return c.Origin.HttpAuthTokenFile }, "Origin.HttpServiceUrl": func(c *Config) string { return c.Origin.HttpServiceUrl }, "Origin.IssuerMode": func(c *Config) string { return c.Origin.IssuerMode }, "Origin.Mode": func(c *Config) string { return c.Origin.Mode }, "Origin.MultiuserVarlinkSocketPath": func(c *Config) string { return c.Origin.MultiuserVarlinkSocketPath }, "Origin.NamespacePrefix": func(c *Config) string { return c.Origin.NamespacePrefix }, + "Origin.ObjectProviderURL": func(c *Config) string { return c.Origin.ObjectProviderURL }, "Origin.RunLocation": func(c *Config) string { return c.Origin.RunLocation }, "Origin.S3AccessKeyfile": func(c *Config) string { return c.Origin.S3AccessKeyfile }, "Origin.S3Bucket": func(c *Config) string { return c.Origin.S3Bucket }, @@ -1011,6 +1020,7 @@ var boolAccessors = map[string]func(*Config) bool{ "Origin.EnableVoms": func(c *Config) bool { return c.Origin.EnableVoms }, "Origin.EnableWrite": func(c *Config) bool { return c.Origin.EnableWrite }, "Origin.EnableWrites": func(c *Config) bool { return c.Origin.EnableWrites }, + "Origin.HttpAuthTokenPassthrough": func(c *Config) bool { return c.Origin.HttpAuthTokenPassthrough }, "Origin.Multiuser": func(c *Config) bool { return c.Origin.Multiuser }, "Origin.SSH.AutoAddHostKey": func(c *Config) bool { return c.Origin.SSH.AutoAddHostKey }, "Origin.SSH.TunnelCallback": func(c *Config) bool { return c.Origin.SSH.TunnelCallback }, @@ -1486,7 +1496,11 @@ var allParameterNames = []string{ "Origin.GlobusCollectionName", "Origin.GlobusConfigLocation", "Origin.GlobusTransferTokenFile", + "Origin.HttpAuthOAuth2ClientID", + "Origin.HttpAuthOAuth2ClientSecretFile", + "Origin.HttpAuthOAuth2Issuer", "Origin.HttpAuthTokenFile", + "Origin.HttpAuthTokenPassthrough", "Origin.HttpServiceUrl", "Origin.IssuerMode", "Origin.Mode", @@ -1495,6 +1509,7 @@ var allParameterNames = []string{ "Origin.MultiuserUmask", "Origin.MultiuserVarlinkSocketPath", "Origin.NamespacePrefix", + "Origin.ObjectProviderURL", "Origin.Port", "Origin.RunLocation", "Origin.S3AccessKeyfile", @@ -1762,12 +1777,16 @@ var ( Origin_GlobusCollectionName = StringParam{"Origin.GlobusCollectionName"} Origin_GlobusConfigLocation = StringParam{"Origin.GlobusConfigLocation"} Origin_GlobusTransferTokenFile = StringParam{"Origin.GlobusTransferTokenFile"} + Origin_HttpAuthOAuth2ClientID = StringParam{"Origin.HttpAuthOAuth2ClientID"} + Origin_HttpAuthOAuth2ClientSecretFile = StringParam{"Origin.HttpAuthOAuth2ClientSecretFile"} + Origin_HttpAuthOAuth2Issuer = StringParam{"Origin.HttpAuthOAuth2Issuer"} Origin_HttpAuthTokenFile = StringParam{"Origin.HttpAuthTokenFile"} Origin_HttpServiceUrl = StringParam{"Origin.HttpServiceUrl"} Origin_IssuerMode = StringParam{"Origin.IssuerMode"} Origin_Mode = StringParam{"Origin.Mode"} Origin_MultiuserVarlinkSocketPath = StringParam{"Origin.MultiuserVarlinkSocketPath"} Origin_NamespacePrefix = StringParam{"Origin.NamespacePrefix"} + Origin_ObjectProviderURL = StringParam{"Origin.ObjectProviderURL"} Origin_RunLocation = StringParam{"Origin.RunLocation"} Origin_S3AccessKeyfile = StringParam{"Origin.S3AccessKeyfile"} Origin_S3Bucket = StringParam{"Origin.S3Bucket"} @@ -1994,6 +2013,7 @@ var ( Origin_EnableVoms = BoolParam{"Origin.EnableVoms"} Origin_EnableWrite = BoolParam{"Origin.EnableWrite"} Origin_EnableWrites = BoolParam{"Origin.EnableWrites"} + Origin_HttpAuthTokenPassthrough = BoolParam{"Origin.HttpAuthTokenPassthrough"} Origin_Multiuser = BoolParam{"Origin.Multiuser"} Origin_SSH_AutoAddHostKey = BoolParam{"Origin.SSH.AutoAddHostKey"} Origin_SSH_TunnelCallback = BoolParam{"Origin.SSH.TunnelCallback"} @@ -2226,12 +2246,16 @@ func init() { "Origin.GlobusCollectionName": Origin_GlobusCollectionName, "Origin.GlobusConfigLocation": Origin_GlobusConfigLocation, "Origin.GlobusTransferTokenFile": Origin_GlobusTransferTokenFile, + "Origin.HttpAuthOAuth2ClientID": Origin_HttpAuthOAuth2ClientID, + "Origin.HttpAuthOAuth2ClientSecretFile": Origin_HttpAuthOAuth2ClientSecretFile, + "Origin.HttpAuthOAuth2Issuer": Origin_HttpAuthOAuth2Issuer, "Origin.HttpAuthTokenFile": Origin_HttpAuthTokenFile, "Origin.HttpServiceUrl": Origin_HttpServiceUrl, "Origin.IssuerMode": Origin_IssuerMode, "Origin.Mode": Origin_Mode, "Origin.MultiuserVarlinkSocketPath": Origin_MultiuserVarlinkSocketPath, "Origin.NamespacePrefix": Origin_NamespacePrefix, + "Origin.ObjectProviderURL": Origin_ObjectProviderURL, "Origin.RunLocation": Origin_RunLocation, "Origin.S3AccessKeyfile": Origin_S3AccessKeyfile, "Origin.S3Bucket": Origin_S3Bucket, @@ -2446,6 +2470,7 @@ func init() { "Origin.EnableVoms": Origin_EnableVoms, "Origin.EnableWrite": Origin_EnableWrite, "Origin.EnableWrites": Origin_EnableWrites, + "Origin.HttpAuthTokenPassthrough": Origin_HttpAuthTokenPassthrough, "Origin.Multiuser": Origin_Multiuser, "Origin.SSH.AutoAddHostKey": Origin_SSH_AutoAddHostKey, "Origin.SSH.TunnelCallback": Origin_SSH_TunnelCallback, diff --git a/param/parameters_struct.go b/param/parameters_struct.go index 03e8a087a8..a93e9c323e 100644 --- a/param/parameters_struct.go +++ b/param/parameters_struct.go @@ -313,7 +313,11 @@ type Config struct { GlobusCollectionName string `mapstructure:"globuscollectionname" yaml:"GlobusCollectionName"` GlobusConfigLocation string `mapstructure:"globusconfiglocation" yaml:"GlobusConfigLocation"` GlobusTransferTokenFile string `mapstructure:"globustransfertokenfile" yaml:"GlobusTransferTokenFile"` + HttpAuthOAuth2ClientID string `mapstructure:"httpauthoauth2clientid" yaml:"HttpAuthOAuth2ClientID"` + HttpAuthOAuth2ClientSecretFile string `mapstructure:"httpauthoauth2clientsecretfile" yaml:"HttpAuthOAuth2ClientSecretFile"` + HttpAuthOAuth2Issuer string `mapstructure:"httpauthoauth2issuer" yaml:"HttpAuthOAuth2Issuer"` HttpAuthTokenFile string `mapstructure:"httpauthtokenfile" yaml:"HttpAuthTokenFile"` + HttpAuthTokenPassthrough bool `mapstructure:"httpauthtokenpassthrough" yaml:"HttpAuthTokenPassthrough"` HttpServiceUrl string `mapstructure:"httpserviceurl" yaml:"HttpServiceUrl"` IssuerMode string `mapstructure:"issuermode" yaml:"IssuerMode"` Mode string `mapstructure:"mode" yaml:"Mode"` @@ -322,6 +326,7 @@ type Config struct { MultiuserUmask int `mapstructure:"multiuserumask" yaml:"MultiuserUmask"` MultiuserVarlinkSocketPath string `mapstructure:"multiuservarlinksocketpath" yaml:"MultiuserVarlinkSocketPath"` NamespacePrefix string `mapstructure:"namespaceprefix" yaml:"NamespacePrefix"` + ObjectProviderURL string `mapstructure:"objectproviderurl" yaml:"ObjectProviderURL"` Port int `mapstructure:"port" yaml:"Port"` RunLocation string `mapstructure:"runlocation" yaml:"RunLocation"` S3AccessKeyfile string `mapstructure:"s3accesskeyfile" yaml:"S3AccessKeyfile"` @@ -799,7 +804,11 @@ type configWithType struct { GlobusCollectionName struct { Type string; Value string } GlobusConfigLocation struct { Type string; Value string } GlobusTransferTokenFile struct { Type string; Value string } + HttpAuthOAuth2ClientID struct { Type string; Value string } + HttpAuthOAuth2ClientSecretFile struct { Type string; Value string } + HttpAuthOAuth2Issuer struct { Type string; Value string } HttpAuthTokenFile struct { Type string; Value string } + HttpAuthTokenPassthrough struct { Type string; Value bool } HttpServiceUrl struct { Type string; Value string } IssuerMode struct { Type string; Value string } Mode struct { Type string; Value string } @@ -808,6 +817,7 @@ type configWithType struct { MultiuserUmask struct { Type string; Value int } MultiuserVarlinkSocketPath struct { Type string; Value string } NamespacePrefix struct { Type string; Value string } + ObjectProviderURL struct { Type string; Value string } Port struct { Type string; Value int } RunLocation struct { Type string; Value string } S3AccessKeyfile struct { Type string; Value string } diff --git a/server_structs/origin.go b/server_structs/origin.go index 5a5aac89ae..6d815e2aa9 100644 --- a/server_structs/origin.go +++ b/server_structs/origin.go @@ -25,13 +25,16 @@ type ( ) const ( - OriginStoragePosix OriginStorageType = "posix" - OriginStoragePosixv2 OriginStorageType = "posixv2" - OriginStorageSSH OriginStorageType = "ssh" - OriginStorageS3 OriginStorageType = "s3" - OriginStorageHTTPS OriginStorageType = "https" - OriginStorageGlobus OriginStorageType = "globus" - OriginStorageXRoot OriginStorageType = "xroot" // Not meant to be extensible, but facilitates legacy OSDF --> Pelican transition + OriginStoragePosix OriginStorageType = "posix" + OriginStoragePosixv2 OriginStorageType = "posixv2" + OriginStorageSSH OriginStorageType = "ssh" + OriginStorageS3 OriginStorageType = "s3" + OriginStorageHTTPS OriginStorageType = "https" + OriginStorageGlobus OriginStorageType = "globus" + OriginStorageS3v2 OriginStorageType = "s3v2" // Native S3 backend (no XRootD) + OriginStorageHTTPSv2 OriginStorageType = "httpsv2" // Native HTTPS/WebDAV backend (no XRootD) + OriginStorageGlobusv2 OriginStorageType = "globusv2" // Native Globus backend (no XRootD) + OriginStorageXRoot OriginStorageType = "xroot" // Not meant to be extensible, but facilitates legacy OSDF --> Pelican transition ) var ( @@ -72,8 +75,14 @@ func ParseOriginStorageType(storageType string) (ost OriginStorageType, err erro ost = OriginStorageXRoot case string(OriginStorageGlobus): ost = OriginStorageGlobus + case string(OriginStorageS3v2): + ost = OriginStorageS3v2 + case string(OriginStorageHTTPSv2): + ost = OriginStorageHTTPSv2 + case string(OriginStorageGlobusv2): + ost = OriginStorageGlobusv2 default: - err = errors.Wrapf(ErrUnknownOriginStorageType, "storage type %s (known types are posix, posixv2, ssh, s3, https, globus, and xroot)", storageType) + err = errors.Wrapf(ErrUnknownOriginStorageType, "storage type %s (known types are posix, posixv2, ssh, s3, s3v2, https, httpsv2, globus, globusv2, and xroot)", storageType) } return } diff --git a/server_utils/origin.go b/server_utils/origin.go index a5ef62247f..fe47744992 100644 --- a/server_utils/origin.go +++ b/server_utils/origin.go @@ -643,6 +643,12 @@ func GetOriginExports() ([]OriginExport, error) { origin = &S3Origin{} case server_structs.OriginStorageGlobus: origin = &GlobusOrigin{} + case server_structs.OriginStorageS3v2: + origin = &S3v2Origin{} + case server_structs.OriginStorageHTTPSv2: + origin = &HTTPSv2Origin{} + case server_structs.OriginStorageGlobusv2: + origin = &Globusv2Origin{} case server_structs.OriginStorageXRoot: origin = &XRootOrigin{} default: diff --git a/server_utils/origin_globusv2.go b/server_utils/origin_globusv2.go new file mode 100644 index 0000000000..774891fbce --- /dev/null +++ b/server_utils/origin_globusv2.go @@ -0,0 +1,31 @@ +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package server_utils + +import "github.com/pelicanplatform/pelican/server_structs" + +// Globusv2Origin is the native (non-XRootD) Globus backend. +// It reuses the same configuration and validation as GlobusOrigin. +type Globusv2Origin struct { + GlobusOrigin +} + +func (o *Globusv2Origin) Type(_ Origin) server_structs.OriginStorageType { + return server_structs.OriginStorageGlobusv2 +} diff --git a/server_utils/origin_httpsv2.go b/server_utils/origin_httpsv2.go new file mode 100644 index 0000000000..8bd5fbfb16 --- /dev/null +++ b/server_utils/origin_httpsv2.go @@ -0,0 +1,31 @@ +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package server_utils + +import "github.com/pelicanplatform/pelican/server_structs" + +// HTTPSv2Origin is the native (non-XRootD) HTTPS/WebDAV backend. +// It reuses the same configuration and validation as HTTPSOrigin. +type HTTPSv2Origin struct { + HTTPSOrigin +} + +func (o *HTTPSv2Origin) Type(_ Origin) server_structs.OriginStorageType { + return server_structs.OriginStorageHTTPSv2 +} diff --git a/server_utils/origin_s3.go b/server_utils/origin_s3.go index 37392350bb..72ec51514b 100644 --- a/server_utils/origin_s3.go +++ b/server_utils/origin_s3.go @@ -53,6 +53,26 @@ func (o *S3Origin) handleVolumeMountsExtra() error { return nil } +func (o *S3Origin) handleExportsExtra() error { + bucket := param.Origin_S3Bucket.GetString() + akf := param.Origin_S3AccessKeyfile.GetString() + skf := param.Origin_S3SecretKeyfile.GetString() + + for i := range o.Exports { + if o.Exports[i].S3Bucket == "" { + o.Exports[i].S3Bucket = bucket + } + if o.Exports[i].S3AccessKeyfile == "" { + o.Exports[i].S3AccessKeyfile = akf + } + if o.Exports[i].S3SecretKeyfile == "" { + o.Exports[i].S3SecretKeyfile = skf + } + } + + return nil +} + func (o *S3Origin) handleTopLevelExtra() error { if o.Exports == nil { return errors.New("internal error -- discovered nil origin exports while processing top-level Origin.XXX configuration") diff --git a/server_utils/origin_s3v2.go b/server_utils/origin_s3v2.go new file mode 100644 index 0000000000..5d667a9cd6 --- /dev/null +++ b/server_utils/origin_s3v2.go @@ -0,0 +1,45 @@ +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package server_utils + +import ( + "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/server_structs" +) + +// S3v2Origin is the native (non-XRootD) S3 backend. +// It reuses the same configuration and validation as S3Origin, but +// relaxes the validation when Origin.ObjectProviderURL is set (since +// the S3-specific params like S3ServiceUrl are not needed then). +type S3v2Origin struct { + S3Origin +} + +func (o *S3v2Origin) Type(_ Origin) server_structs.OriginStorageType { + return server_structs.OriginStorageS3v2 +} + +func (o *S3v2Origin) validateExtra(e *OriginExport, numExports int) error { + // When ObjectProviderURL is set, the S3-specific fields (S3ServiceUrl, + // S3Bucket, etc.) are not required — the bucket is opened via the URL. + if param.Origin_ObjectProviderURL.GetString() != "" { + return nil + } + return o.S3Origin.validateExtra(e, numExports) +} From 89795ab7bfa299d1e3fb6fe3ba4d565d9bd3e67e Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 14 Mar 2026 11:32:53 -0500 Subject: [PATCH 02/10] Round of review fixes --- .../{backend_s3.go => backend_blob.go} | 242 ++++++++--- origin_serve/backend_blob_aws_test.go | 410 ++++++++++++++++++ ...nio_test.go => backend_blob_minio_test.go} | 34 +- ...ackend_s3_test.go => backend_blob_test.go} | 277 +++++++++++- origin_serve/backend_globus.go | 17 +- origin_serve/backend_https.go | 206 +++++++-- origin_serve/backend_https_test.go | 362 +++++++++++++++- origin_serve/handlers.go | 8 + 8 files changed, 1415 insertions(+), 141 deletions(-) rename origin_serve/{backend_s3.go => backend_blob.go} (72%) create mode 100644 origin_serve/backend_blob_aws_test.go rename origin_serve/{backend_s3_minio_test.go => backend_blob_minio_test.go} (90%) rename origin_serve/{backend_s3_test.go => backend_blob_test.go} (67%) diff --git a/origin_serve/backend_s3.go b/origin_serve/backend_blob.go similarity index 72% rename from origin_serve/backend_s3.go rename to origin_serve/backend_blob.go index 9258b8473b..2c2d97b264 100644 --- a/origin_serve/backend_s3.go +++ b/origin_serve/backend_blob.go @@ -20,6 +20,7 @@ package origin_serve import ( "context" + "encoding/base64" "fmt" "io" "net/url" @@ -168,7 +169,7 @@ func (b *blobBackend) CheckAvailability() error { func (b *blobBackend) FileSystem() webdav.FileSystem { return b.fs } func (b *blobBackend) Checksummer() server_utils.OriginChecksummer { - return nil // Cloud blob backends don't support xattr-based checksums + return &blobChecksummer{bucket: b.bucket} } // Close cleans up the underlying bucket handle. @@ -176,6 +177,60 @@ func (b *blobBackend) Close() error { return b.bucket.Close() } +// --------------------------------------------------------------------------- +// blobChecksummer — implements OriginChecksummer using blob Attributes. +// Returns MD5 digests (RFC 3230) when the provider supplies them. +// --------------------------------------------------------------------------- + +type blobChecksummer struct { + bucket *blob.Bucket +} + +func (c *blobChecksummer) GetDigests(relativePath, wantDigest string) ([]string, error) { + key := blobKey(relativePath) + attrs, err := c.bucket.Attributes(context.Background(), key) + if err != nil { + // Best-effort: if we can't get attributes, return nothing. + return nil, nil + } + + var digests []string + for _, alg := range strings.Split(wantDigest, ",") { + alg = strings.TrimSpace(strings.ToLower(alg)) + switch alg { + case "md5": + if len(attrs.MD5) > 0 { + digests = append(digests, "md5="+base64.StdEncoding.EncodeToString(attrs.MD5)) + } + } + } + return digests, nil +} + +// --------------------------------------------------------------------------- +// Content-length hint — allows callers (e.g. HTTP handlers) to pass the +// expected upload size to the blob writer via context. This mirrors +// xrootd-s3-http's "oss.asize" mechanism. +// --------------------------------------------------------------------------- + +type blobCtxKey int + +const blobContentLengthKey blobCtxKey = iota + +// ContextWithContentLength returns a child context carrying the expected +// upload size. The blob filesystem's OpenFile will use this to hint the +// underlying writer, enabling single-PUT uploads for small objects. +func ContextWithContentLength(ctx context.Context, size int64) context.Context { + return context.WithValue(ctx, blobContentLengthKey, size) +} + +func contentLengthFromCtx(ctx context.Context) int64 { + if v, ok := ctx.Value(blobContentLengthKey).(int64); ok { + return v + } + return -1 +} + // --------------------------------------------------------------------------- // blobFileSystem — implements webdav.FileSystem backed by gocloud.dev/blob. // --------------------------------------------------------------------------- @@ -193,6 +248,9 @@ func blobKey(name string) string { // Blob stores don't have real directories; we create a zero-byte marker. func (fs *blobFileSystem) Mkdir(ctx context.Context, name string, _ os.FileMode) error { key := blobKey(name) + if key == "" { + return nil // Root always exists. + } if !strings.HasSuffix(key, "/") { key += "/" } @@ -203,19 +261,26 @@ func (fs *blobFileSystem) Mkdir(ctx context.Context, name string, _ os.FileMode) func (fs *blobFileSystem) OpenFile(ctx context.Context, name string, flag int, _ os.FileMode) (webdav.File, error) { key := blobKey(name) - // Write mode — return a writer that uploads on Close. + // Write mode — open a streaming writer immediately so permission + // or connectivity errors surface now, not on the first Write call. if flag&(os.O_WRONLY|os.O_RDWR|os.O_CREATE|os.O_TRUNC) != 0 { - return newBlobWriteFile(ctx, fs.bucket, key, name), nil + wf, err := newBlobWriteFile(ctx, fs.bucket, key, name) + if err != nil { + return nil, err + } + return wf, nil } - // Check if this is a "directory" by listing with prefix. + // Check if this is a "directory" by peeking at a prefix listing. dirPrefix := key if dirPrefix != "" && !strings.HasSuffix(dirPrefix, "/") { dirPrefix += "/" } - entries, err := fs.listDir(ctx, dirPrefix) - if err == nil && len(entries) > 0 { - return &blobDirFile{name: name, entries: entries}, nil + peekIter := fs.bucket.List(&blob.ListOptions{Prefix: dirPrefix, Delimiter: "/"}) + if _, peekErr := peekIter.Next(ctx); peekErr == nil { + // It is a directory — return a lazy dir handle (a fresh iterator + // will be created when Readdir is called). + return &blobDirFile{name: name, bucket: fs.bucket, prefix: dirPrefix}, nil } // Read mode — open via blob.NewReader (supports seek). @@ -271,6 +336,7 @@ func (fs *blobFileSystem) Stat(ctx context.Context, name string) (os.FileInfo, e name: path.Base(name), size: attrs.Size, mod: attrs.ModTime, + etag: attrs.ETag, }, nil } @@ -291,32 +357,7 @@ func (fs *blobFileSystem) Stat(ctx context.Context, name string) (os.FileInfo, e return nil, err } -// listDir lists immediate children under prefix (with "/" delimiter). -func (fs *blobFileSystem) listDir(ctx context.Context, prefix string) ([]os.FileInfo, error) { - iter := fs.bucket.List(&blob.ListOptions{Prefix: prefix, Delimiter: "/"}) - var entries []os.FileInfo - for { - obj, err := iter.Next(ctx) - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - baseName := strings.TrimPrefix(obj.Key, prefix) - baseName = strings.TrimSuffix(baseName, "/") - if baseName == "" { - continue - } - entries = append(entries, &blobFileInfo{ - name: baseName, - size: obj.Size, - mod: obj.ModTime, - isDir: obj.IsDir, - }) - } - return entries, nil -} + // isNotFound returns true if the error represents a "not found" condition. func isNotFound(err error) bool { @@ -335,6 +376,12 @@ type blobFileInfo struct { size int64 mod time.Time isDir bool + etag string +} + +// BlobFileSysInfo is returned by blobFileInfo.Sys() when metadata is available. +type BlobFileSysInfo struct { + ETag string } func (fi *blobFileInfo) Name() string { return fi.name } @@ -346,8 +393,13 @@ func (fi *blobFileInfo) ModTime() time.Time { } return fi.mod } -func (fi *blobFileInfo) IsDir() bool { return fi.isDir } -func (fi *blobFileInfo) Sys() interface{} { return nil } +func (fi *blobFileInfo) IsDir() bool { return fi.isDir } +func (fi *blobFileInfo) Sys() interface{} { + if fi.etag != "" { + return &BlobFileSysInfo{ETag: fi.etag} + } + return nil +} // --------------------------------------------------------------------------- // blobReadFile — read-only file backed by a blob.Reader. @@ -397,38 +449,40 @@ func (f *blobReadFile) Stat() (os.FileInfo, error) { // --------------------------------------------------------------------------- // blobWriteFile — write file backed by blob.Writer. -// Streams writes directly through to the underlying blob store. +// The writer is opened eagerly so that permission and connectivity +// errors are reported at OpenFile time, not deferred to the first Write. +// Data is streamed directly through to the underlying blob store; +// nothing is buffered beyond the driver's internal upload-part buffer. // Uses a mutex to protect concurrent writes. // --------------------------------------------------------------------------- type blobWriteFile struct { - ctx context.Context - bucket *blob.Bucket - key string name string - mu sync.Mutex writer *blob.Writer - opened bool closed bool } -func newBlobWriteFile(ctx context.Context, bucket *blob.Bucket, key, name string) *blobWriteFile { - return &blobWriteFile{ctx: ctx, bucket: bucket, key: key, name: name} -} - -// ensureWriter lazily opens the blob.Writer on first Write. -func (f *blobWriteFile) ensureWriter() error { - if f.opened { - return nil +// newBlobWriteFile opens a blob.Writer immediately. If the context +// carries a content-length hint (see ContextWithContentLength), it is +// used to size the driver's upload buffer — small objects that fit in +// a single part avoid multipart overhead entirely. +func newBlobWriteFile(ctx context.Context, bucket *blob.Bucket, key, name string) (*blobWriteFile, error) { + var opts blob.WriterOptions + if hint := contentLengthFromCtx(ctx); hint > 0 { + // Set the buffer to the exact object size when it is small + // enough for a single-part upload. The S3 driver will issue + // a simple PutObject instead of a multipart sequence. + const maxSinglePart = 5 * 1024 * 1024 * 1024 // 5 GiB S3 single-part limit + if hint <= maxSinglePart { + opts.BufferSize = int(hint) + } } - w, err := f.bucket.NewWriter(f.ctx, f.key, nil) + w, err := bucket.NewWriter(ctx, key, &opts) if err != nil { - return fmt.Errorf("blob new writer %q: %w", f.key, err) + return nil, fmt.Errorf("blob open for write %q: %w", key, err) } - f.writer = w - f.opened = true - return nil + return &blobWriteFile{name: name, writer: w}, nil } func (f *blobWriteFile) Write(p []byte) (int, error) { @@ -437,9 +491,6 @@ func (f *blobWriteFile) Write(p []byte) (int, error) { if f.closed { return 0, fmt.Errorf("write to closed file") } - if err := f.ensureWriter(); err != nil { - return 0, err - } return f.writer.Write(p) } @@ -450,10 +501,8 @@ func (f *blobWriteFile) Close() error { return nil } f.closed = true - if !f.opened { - // Nothing was written; create an empty object. - return f.bucket.WriteAll(f.ctx, f.key, nil, nil) - } + // Close flushes buffered data and finalises the upload. + // If nothing was written the driver creates a zero-byte object. return f.writer.Close() } @@ -476,12 +525,19 @@ func (f *blobWriteFile) Stat() (os.FileInfo, error) { } // --------------------------------------------------------------------------- -// blobDirFile — directory representation for blob listings. +// blobDirFile — lazy directory representation for blob listings. +// Entries are fetched on demand from the blob iterator, avoiding +// pre-buffering an unbounded number of objects. // --------------------------------------------------------------------------- type blobDirFile struct { - name string - entries []os.FileInfo + name string + bucket *blob.Bucket + prefix string + + mu sync.Mutex + iter *blob.ListIterator + done bool } func (f *blobDirFile) Read(_ []byte) (int, error) { @@ -498,15 +554,57 @@ func (f *blobDirFile) Write(_ []byte) (int, error) { return 0, fmt.Errorf("write not supported on directory") } +// Readdir returns directory entries lazily from the underlying blob +// listing iterator. When count <= 0 it returns all remaining entries; +// otherwise it returns up to count entries per call. +// Internally the iterator pages through the provider's native page size +// (typically 1 000 objects for S3) so memory stays bounded even for +// very large directories. func (f *blobDirFile) Readdir(count int) ([]os.FileInfo, error) { - if count <= 0 || count > len(f.entries) { - result := f.entries - f.entries = nil - return result, nil + f.mu.Lock() + defer f.mu.Unlock() + + if f.iter == nil { + f.iter = f.bucket.List(&blob.ListOptions{Prefix: f.prefix, Delimiter: "/"}) + } + + if f.done { + return nil, io.EOF } - result := f.entries[:count] - f.entries = f.entries[count:] - return result, nil + + var entries []os.FileInfo + for { + if count > 0 && len(entries) >= count { + break + } + + obj, err := f.iter.Next(context.Background()) + if err == io.EOF { + f.done = true + break + } + if err != nil { + return entries, err + } + + baseName := strings.TrimPrefix(obj.Key, f.prefix) + baseName = strings.TrimSuffix(baseName, "/") + if baseName == "" { + continue + } + + entries = append(entries, &blobFileInfo{ + name: baseName, + size: obj.Size, + mod: obj.ModTime, + isDir: obj.IsDir, + }) + } + + if len(entries) == 0 && f.done { + return nil, io.EOF + } + return entries, nil } func (f *blobDirFile) Stat() (os.FileInfo, error) { @@ -517,7 +615,7 @@ func (f *blobDirFile) Stat() (os.FileInfo, error) { } // --------------------------------------------------------------------------- -// S3 credential loading (unchanged — reads key files from disk) +// S3 credential loading (reads key files from disk) // --------------------------------------------------------------------------- func loadS3Credentials(accessKeyFile, secretKeyFile string) (accessKey, secretKey string, err error) { diff --git a/origin_serve/backend_blob_aws_test.go b/origin_serve/backend_blob_aws_test.go new file mode 100644 index 0000000000..7694af23b0 --- /dev/null +++ b/origin_serve/backend_blob_aws_test.go @@ -0,0 +1,410 @@ +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package origin_serve + +// These tests exercise the native S3 (gocloud.dev/blob) backend against the +// same public AWS S3 bucket used by xrootd-s3-http's s3_tests.cc, ensuring +// parity between the old C++ plugin and the new Go implementation. +// +// The bucket "genome-browser" at s3.us-east-1.amazonaws.com contains the +// UCSC Cell Browser dataset. It is publicly accessible and read-only; +// no credentials are needed. +// +// Target directory: cells/tabula-sapiens +// Known file: cells/tabula-sapiens/cellbrowser.json.bak (672 bytes) +// +// The xrootd-s3-http tests define five fixture configurations: +// +// FileSystemS3VirtualBucket – virtual URL style, bucket = genome-browser +// FileSystemS3VirtualNoBucket – virtual URL style, no bucket (bucket in path) +// FileSystemS3PathBucket – path URL style, bucket = genome-browser +// FileSystemS3PathNoBucket – path URL style, no bucket (bucket in path) +// FileSystemS3PathBucketSlash – path URL style, trailing slash on service URL +// +// We replicate equivalent configurations below, each for Stat and List. +// +// NOTE: These tests make live network requests to AWS S3. They are skipped +// when the PELICAN_TEST_AWS_S3 environment variable is not set to "1". + +import ( + "context" + "io" + "os" + "sort" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func skipIfNoAWS(t *testing.T) { + t.Helper() + if os.Getenv("PELICAN_TEST_AWS_S3") != "1" { + t.Skip("PELICAN_TEST_AWS_S3 not set; skipping live AWS S3 test") + } +} + +// The xrootd-s3-http tests use s3.path_name = /test, which means the namespace +// root is /test and the actual S3 objects live under the key prefix that follows. +// In the Go backend, StoragePrefix serves the same role. + +// -------------------------------------------------------------------------- +// Configuration 1: Virtual-hosted URL style, bucket = genome-browser +// Equivalent to FileSystemS3VirtualBucket in s3_tests.cc +// -------------------------------------------------------------------------- + +func openVirtualBucket(t *testing.T) *blobBackend { + t.Helper() + backend, err := newBlobBackend(BlobBackendOptions{ + ServiceURL: "https://s3.us-east-1.amazonaws.com", + Region: "us-east-1", + Bucket: "genome-browser", + URLStyle: "virtual", + }) + require.NoError(t, err, "failed to open genome-browser bucket (virtual style)") + t.Cleanup(func() { backend.Close() }) + return backend +} + +// -------------------------------------------------------------------------- +// Configuration 2: Path-style URL, bucket = genome-browser +// Equivalent to FileSystemS3PathBucket in s3_tests.cc +// -------------------------------------------------------------------------- + +func openPathBucket(t *testing.T) *blobBackend { + t.Helper() + backend, err := newBlobBackend(BlobBackendOptions{ + ServiceURL: "https://s3.us-east-1.amazonaws.com", + Region: "us-east-1", + Bucket: "genome-browser", + URLStyle: "path", + }) + require.NoError(t, err, "failed to open genome-browser bucket (path style)") + t.Cleanup(func() { backend.Close() }) + return backend +} + +// -------------------------------------------------------------------------- +// Configuration 3: Path-style URL, trailing slash on service URL +// Equivalent to FileSystemS3PathBucketSlash in s3_tests.cc +// -------------------------------------------------------------------------- + +func openPathBucketSlash(t *testing.T) *blobBackend { + t.Helper() + backend, err := newBlobBackend(BlobBackendOptions{ + ServiceURL: "https://s3.us-east-1.amazonaws.com/", + Region: "us-east-1", + Bucket: "genome-browser", + URLStyle: "path", + }) + require.NoError(t, err, "failed to open genome-browser bucket (path+slash)") + t.Cleanup(func() { backend.Close() }) + return backend +} + +// -------------------------------------------------------------------------- +// Configuration 4: Virtual-hosted, no explicit bucket — bucket is in the +// StoragePrefix. +// Equivalent to FileSystemS3VirtualNoBucket in s3_tests.cc. +// NOTE: gocloud.dev requires the bucket name in the URL scheme, so we +// test this by putting the first path component as the StoragePrefix. +// +// In xrootd-s3-http, when no bucket is given, the first path component +// *after* the path_name is the bucket. The stat path used is: +// /test/genome-browser/cells/tabula-sapiens/cellbrowser.json.bak +// which resolves to bucket=genome-browser, key=cells/tabula-sapiens/cellbrowser.json.bak +// +// We can't replicate the exact no-bucket behavior with gocloud (which +// requires a bucket at URL time), so we test the same data via the +// StoragePrefix configuration — a realistic usage pattern. +// -------------------------------------------------------------------------- + +func openVirtualBucketWithPrefix(t *testing.T) *blobBackend { + t.Helper() + backend, err := newBlobBackend(BlobBackendOptions{ + ServiceURL: "https://s3.us-east-1.amazonaws.com", + Region: "us-east-1", + Bucket: "genome-browser", + URLStyle: "virtual", + StoragePrefix: "/cells/tabula-sapiens", + }) + require.NoError(t, err, "failed to open genome-browser bucket (virtual+prefix)") + t.Cleanup(func() { backend.Close() }) + return backend +} + +// ========================================================================== +// Stat tests — match xrootd-s3-http TEST_F(..., Stat) tests +// ========================================================================== + +func TestAWSS3_VirtualBucket_Stat(t *testing.T) { + skipIfNoAWS(t) + backend := openVirtualBucket(t) + + ctx := context.Background() + info, err := backend.FileSystem().Stat(ctx, "/cells/tabula-sapiens/cellbrowser.json.bak") + require.NoError(t, err, "Stat failed on genome-browser bucket (virtual)") + assert.Equal(t, int64(672), info.Size(), "unexpected file size") + assert.False(t, info.IsDir()) + assert.Equal(t, "cellbrowser.json.bak", info.Name()) +} + +func TestAWSS3_PathBucket_Stat(t *testing.T) { + skipIfNoAWS(t) + backend := openPathBucket(t) + + ctx := context.Background() + info, err := backend.FileSystem().Stat(ctx, "/cells/tabula-sapiens/cellbrowser.json.bak") + require.NoError(t, err, "Stat failed on genome-browser bucket (path)") + assert.Equal(t, int64(672), info.Size()) +} + +func TestAWSS3_PathBucketSlash_Stat(t *testing.T) { + skipIfNoAWS(t) + backend := openPathBucketSlash(t) + + ctx := context.Background() + info, err := backend.FileSystem().Stat(ctx, "/cells/tabula-sapiens/cellbrowser.json.bak") + require.NoError(t, err, "Stat failed on genome-browser bucket (path+slash)") + assert.Equal(t, int64(672), info.Size()) +} + +func TestAWSS3_VirtualBucket_StatNotFound(t *testing.T) { + skipIfNoAWS(t) + backend := openVirtualBucket(t) + + ctx := context.Background() + _, err := backend.FileSystem().Stat(ctx, "/cells/tabula-sapiens/does_not_exist.zzz") + assert.ErrorIs(t, err, os.ErrNotExist) +} + +// ========================================================================== +// Stat directory tests — match xrootd-s3-http StatRoot/NestedDir tests +// ========================================================================== + +func TestAWSS3_VirtualBucket_StatDirectory(t *testing.T) { + skipIfNoAWS(t) + backend := openVirtualBucket(t) + + ctx := context.Background() + fs := backend.FileSystem() + + // "cells" should appear as a directory + info, err := fs.Stat(ctx, "/cells") + require.NoError(t, err, "Stat directory /cells failed") + assert.True(t, info.IsDir()) + + // "cells/tabula-sapiens" should appear as a directory + info, err = fs.Stat(ctx, "/cells/tabula-sapiens") + require.NoError(t, err, "Stat directory /cells/tabula-sapiens failed") + assert.True(t, info.IsDir()) +} + +// ========================================================================== +// Stat with StoragePrefix — the prefix scopes us inside the bucket, so +// "cellbrowser.json.bak" should be accessible at the root of the +// prefix namespace. +// ========================================================================== + +func TestAWSS3_VirtualBucket_WithPrefix_Stat(t *testing.T) { + skipIfNoAWS(t) + backend := openVirtualBucketWithPrefix(t) + + ctx := context.Background() + info, err := backend.FileSystem().Stat(ctx, "/cellbrowser.json.bak") + require.NoError(t, err, "Stat with StoragePrefix failed") + assert.Equal(t, int64(672), info.Size()) +} + +// ========================================================================== +// List tests — match xrootd-s3-http TestDirectoryContents helper +// +// The xrootd-s3-http test verifies the exact order and contents of +// cells/tabula-sapiens: +// cellbrowser.json.bak (file, 672 bytes) +// dataset.json (file, 1847 bytes) +// desc.json (file, 1091 bytes) +// all/ (directory) +// by-organ/ (directory) +// func-compart/ (directory) +// +// S3 list output is lexicographic. We sort our results the same way +// and verify file vs directory type plus sizes for files. +// ========================================================================== + +type dirEntry struct { + name string + size int64 + isDir bool +} + +func listDirViaFS(t *testing.T, backend *blobBackend, dirPath string) []dirEntry { + t.Helper() + ctx := context.Background() + f, err := backend.FileSystem().OpenFile(ctx, dirPath, os.O_RDONLY, 0) + require.NoError(t, err, "OpenFile(%s) failed", dirPath) + defer f.Close() + + entries, err := f.Readdir(-1) + require.NoError(t, err, "Readdir(%s) failed", dirPath) + + var result []dirEntry + for _, e := range entries { + result = append(result, dirEntry{ + name: e.Name(), + size: e.Size(), + isDir: e.IsDir(), + }) + } + sort.Slice(result, func(i, j int) bool { return result[i].name < result[j].name }) + return result +} + +func TestAWSS3_VirtualBucket_List(t *testing.T) { + skipIfNoAWS(t) + backend := openVirtualBucket(t) + + entries := listDirViaFS(t, backend, "/cells/tabula-sapiens") + + // Build maps for easier assertion + entryByName := map[string]dirEntry{} + for _, e := range entries { + entryByName[e.name] = e + } + + // Files expected by xrootd-s3-http tests + expectedFiles := map[string]int64{ + "cellbrowser.json.bak": 672, + "dataset.json": 1847, + "desc.json": 1091, + } + for name, size := range expectedFiles { + e, ok := entryByName[name] + require.True(t, ok, "expected file %q not found in listing", name) + assert.False(t, e.isDir, "%q should be a file", name) + assert.Equal(t, size, e.size, "wrong size for %q", name) + } + + // Directories expected by xrootd-s3-http tests + expectedDirs := []string{"all", "by-organ", "func-compart"} + for _, name := range expectedDirs { + e, ok := entryByName[name] + require.True(t, ok, "expected directory %q not found in listing", name) + assert.True(t, e.isDir, "%q should be a directory", name) + } +} + +func TestAWSS3_PathBucket_List(t *testing.T) { + skipIfNoAWS(t) + backend := openPathBucket(t) + + entries := listDirViaFS(t, backend, "/cells/tabula-sapiens") + + entryByName := map[string]dirEntry{} + for _, e := range entries { + entryByName[e.name] = e + } + + e, ok := entryByName["cellbrowser.json.bak"] + require.True(t, ok, "cellbrowser.json.bak not found in path-style listing") + assert.Equal(t, int64(672), e.size) + assert.False(t, e.isDir) +} + +func TestAWSS3_PathBucketSlash_List(t *testing.T) { + skipIfNoAWS(t) + backend := openPathBucketSlash(t) + + entries := listDirViaFS(t, backend, "/cells/tabula-sapiens") + + entryByName := map[string]dirEntry{} + for _, e := range entries { + entryByName[e.name] = e + } + + e, ok := entryByName["cellbrowser.json.bak"] + require.True(t, ok, "cellbrowser.json.bak not found in path+slash listing") + assert.Equal(t, int64(672), e.size) +} + +func TestAWSS3_VirtualBucket_WithPrefix_List(t *testing.T) { + skipIfNoAWS(t) + backend := openVirtualBucketWithPrefix(t) + + // With prefix = /cells/tabula-sapiens, the root "/" should list + // the same contents as /cells/tabula-sapiens without prefix. + entries := listDirViaFS(t, backend, "/") + + entryByName := map[string]dirEntry{} + for _, e := range entries { + entryByName[e.name] = e + } + + e, ok := entryByName["cellbrowser.json.bak"] + require.True(t, ok, "cellbrowser.json.bak not found via prefix listing") + assert.Equal(t, int64(672), e.size) +} + +// ========================================================================== +// Read test — verify we can actually download and read file content +// ========================================================================== + +func TestAWSS3_VirtualBucket_Read(t *testing.T) { + skipIfNoAWS(t) + backend := openVirtualBucket(t) + + ctx := context.Background() + f, err := backend.FileSystem().OpenFile(ctx, "/cells/tabula-sapiens/cellbrowser.json.bak", os.O_RDONLY, 0) + require.NoError(t, err) + defer f.Close() + + data, err := io.ReadAll(f) + require.NoError(t, err) + assert.Equal(t, 672, len(data), "read returned unexpected number of bytes") +} + +func TestAWSS3_PathBucket_Read(t *testing.T) { + skipIfNoAWS(t) + backend := openPathBucket(t) + + ctx := context.Background() + f, err := backend.FileSystem().OpenFile(ctx, "/cells/tabula-sapiens/cellbrowser.json.bak", os.O_RDONLY, 0) + require.NoError(t, err) + defer f.Close() + + data, err := io.ReadAll(f) + require.NoError(t, err) + assert.Equal(t, 672, len(data)) +} + +// ========================================================================== +// Availability test — bucket should be accessible +// ========================================================================== + +func TestAWSS3_VirtualBucket_Availability(t *testing.T) { + skipIfNoAWS(t) + backend := openVirtualBucket(t) + require.NoError(t, backend.CheckAvailability()) +} + +func TestAWSS3_PathBucket_Availability(t *testing.T) { + skipIfNoAWS(t) + backend := openPathBucket(t) + require.NoError(t, backend.CheckAvailability()) +} diff --git a/origin_serve/backend_s3_minio_test.go b/origin_serve/backend_blob_minio_test.go similarity index 90% rename from origin_serve/backend_s3_minio_test.go rename to origin_serve/backend_blob_minio_test.go index ac7213a1e5..132e78000a 100644 --- a/origin_serve/backend_s3_minio_test.go +++ b/origin_serve/backend_blob_minio_test.go @@ -27,6 +27,7 @@ import ( "os/exec" "path/filepath" "regexp" + "sync/atomic" "testing" "time" @@ -60,12 +61,16 @@ func startMinio(t *testing.T) (endpoint, accessKey, secretKey string) { cmd := exec.Command("minio", "server", "--address", "127.0.0.1:0", - "--console-address", "127.0.0.2:0", dataDir, ) cmd.Env = append(os.Environ(), "MINIO_ROOT_USER="+accessKey, "MINIO_ROOT_PASSWORD="+secretKey, + // Disable the web console so we don't need --console-address. + // Using 127.0.0.2 for the console fails on macOS (only 127.0.0.1 + // is configured), and using 127.0.0.1:0 is rejected by minio + // because it matches --address. + "MINIO_BROWSER=off", ) // Capture stdout so we can parse the "S3-API:" line for the real port. @@ -78,16 +83,30 @@ func startMinio(t *testing.T) (endpoint, accessKey, secretKey string) { cmd.Stderr = logFile require.NoError(t, cmd.Start(), "failed to start minio") + + // Monitor for early exit so we fail fast with diagnostics. + var minioDone atomic.Bool + var minioErr error + go func() { + minioErr = cmd.Wait() + minioDone.Store(true) + }() t.Cleanup(func() { cmd.Process.Kill() - cmd.Wait() //nolint:errcheck + for !minioDone.Load() { + time.Sleep(10 * time.Millisecond) + } }) // Minio prints a line like: // S3-API: http://127.0.0.1:43219 // Poll the log file until we find it (with a 30-second deadline). + // Use assert (not require) so we can print minio's log on failure. apiRe := regexp.MustCompile(`S3-API:\s+(https?://\S+)`) - require.Eventually(t, func() bool { + ok := assert.Eventually(t, func() bool { + if minioDone.Load() { + return false + } data, err := os.ReadFile(logPath) if err != nil { return false @@ -97,7 +116,14 @@ func startMinio(t *testing.T) (endpoint, accessKey, secretKey string) { return true } return false - }, 30*time.Second, 200*time.Millisecond, "minio never printed an S3-API endpoint") + }, 30*time.Second, 200*time.Millisecond) + if !ok { + logData, _ := os.ReadFile(logPath) + if minioDone.Load() { + t.Fatalf("minio exited early (err=%v); log output:\n%s", minioErr, logData) + } + t.Fatalf("minio never printed an S3-API endpoint; log output:\n%s", logData) + } // Pre-create the bucket directory on disk so it's available immediately. bucketDir := filepath.Join(dataDir, "test-bucket") diff --git a/origin_serve/backend_s3_test.go b/origin_serve/backend_blob_test.go similarity index 67% rename from origin_serve/backend_s3_test.go rename to origin_serve/backend_blob_test.go index 929e1ee891..500c8d5919 100644 --- a/origin_serve/backend_s3_test.go +++ b/origin_serve/backend_blob_test.go @@ -371,8 +371,9 @@ func TestBlobWriteFile_DoubleClose(t *testing.T) { bucket := memblob.OpenBucket(nil) defer bucket.Close() - wf := newBlobWriteFile(context.Background(), bucket, "double.txt", "/double.txt") - _, err := wf.Write([]byte("data")) + wf, err := newBlobWriteFile(context.Background(), bucket, "double.txt", "/double.txt") + require.NoError(t, err) + _, err = wf.Write([]byte("data")) require.NoError(t, err) require.NoError(t, wf.Close()) // Second close should be a no-op @@ -383,10 +384,11 @@ func TestBlobWriteFile_WriteAfterClose(t *testing.T) { bucket := memblob.OpenBucket(nil) defer bucket.Close() - wf := newBlobWriteFile(context.Background(), bucket, "closed.txt", "/closed.txt") + wf, err := newBlobWriteFile(context.Background(), bucket, "closed.txt", "/closed.txt") + require.NoError(t, err) require.NoError(t, wf.Close()) - _, err := wf.Write([]byte("too late")) + _, err = wf.Write([]byte("too late")) assert.Error(t, err) } @@ -394,7 +396,8 @@ func TestBlobWriteFile_Stat(t *testing.T) { bucket := memblob.OpenBucket(nil) defer bucket.Close() - wf := newBlobWriteFile(context.Background(), bucket, "stat.txt", "/stat.txt") + wf, err := newBlobWriteFile(context.Background(), bucket, "stat.txt", "/stat.txt") + require.NoError(t, err) info, err := wf.Stat() require.NoError(t, err) assert.Equal(t, "stat.txt", info.Name()) @@ -444,31 +447,45 @@ func TestBlobReadFile_WriteNotSupported(t *testing.T) { // --------------------------------------------------------------------------- func TestBlobDirFile_Readdir(t *testing.T) { - entries := []os.FileInfo{ - &blobFileInfo{name: "a.txt", size: 10}, - &blobFileInfo{name: "b.txt", size: 20}, - &blobFileInfo{name: "c.txt", size: 30}, + // Use a real memblob bucket so the lazy iterator works. + newDirFile := func(t *testing.T) *blobDirFile { + t.Helper() + bucket := memblob.OpenBucket(nil) + t.Cleanup(func() { bucket.Close() }) + ctx := context.Background() + require.NoError(t, bucket.WriteAll(ctx, "dir/a.txt", []byte("aaaaaaaaaa"), nil)) + require.NoError(t, bucket.WriteAll(ctx, "dir/b.txt", []byte("bbbbbbbbbbbbbbbbbbbb"), nil)) + require.NoError(t, bucket.WriteAll(ctx, "dir/c.txt", []byte("cccccccccccccccccccccccccccccc"), nil)) + return &blobDirFile{name: "/dir", bucket: bucket, prefix: "dir/"} } t.Run("ReadAll", func(t *testing.T) { - df := &blobDirFile{name: "/test", entries: append([]os.FileInfo{}, entries...)} + df := newDirFile(t) result, err := df.Readdir(-1) require.NoError(t, err) assert.Len(t, result, 3) }) t.Run("ReadPartial", func(t *testing.T) { - df := &blobDirFile{name: "/test", entries: append([]os.FileInfo{}, entries...)} + df := newDirFile(t) result, err := df.Readdir(2) require.NoError(t, err) assert.Len(t, result, 2) - assert.Equal(t, "a.txt", result[0].Name()) // Read remaining result2, err := df.Readdir(-1) require.NoError(t, err) assert.Len(t, result2, 1) - assert.Equal(t, "c.txt", result2[0].Name()) + }) + + t.Run("ReadPastEnd", func(t *testing.T) { + df := newDirFile(t) + _, err := df.Readdir(-1) + require.NoError(t, err) + + // Subsequent call should return io.EOF + _, err = df.Readdir(-1) + assert.ErrorIs(t, err, io.EOF) }) t.Run("Stat", func(t *testing.T) { @@ -507,6 +524,12 @@ func TestBlobFileInfo(t *testing.T) { fiDir := &blobFileInfo{name: "subdir", isDir: true} assert.True(t, fiDir.IsDir()) + + // ETag stored in Sys() + fiETag := &blobFileInfo{name: "etag.txt", etag: "\"abc123\""} + sysInfo, ok := fiETag.Sys().(*BlobFileSysInfo) + require.True(t, ok) + assert.Equal(t, "\"abc123\"", sysInfo.ETag) } // --------------------------------------------------------------------------- @@ -528,8 +551,8 @@ func TestBlobBackend_MemURL(t *testing.T) { // FileSystem should be non-nil assert.NotNil(t, backend.FileSystem()) - // Checksummer should be nil for blob backends - assert.Nil(t, backend.Checksummer()) + // Checksummer should be a blobChecksummer for blob backends + assert.NotNil(t, backend.Checksummer()) } func TestBlobBackend_WithStoragePrefix(t *testing.T) { @@ -620,3 +643,227 @@ func TestBlobFileSystem_FullRoundTrip(t *testing.T) { _, err = fs.Stat(ctx, "/trip/renamed.bin") assert.ErrorIs(t, err, os.ErrNotExist) } + +// --------------------------------------------------------------------------- +// Root directory tests — ensure "/" is handled correctly +// --------------------------------------------------------------------------- + +func TestBlobFileSystem_RootStat(t *testing.T) { + fs, bucket := newMemBlobFS(t) + ctx := context.Background() + + // Even with no objects, root should exist as a directory once + // there is at least one object in the bucket. + require.NoError(t, bucket.WriteAll(ctx, "top.txt", []byte("hello"), nil)) + + info, err := fs.Stat(ctx, "/") + require.NoError(t, err) + assert.True(t, info.IsDir()) +} + +func TestBlobFileSystem_RootList(t *testing.T) { + fs, bucket := newMemBlobFS(t) + ctx := context.Background() + + require.NoError(t, bucket.WriteAll(ctx, "a.txt", []byte("a"), nil)) + require.NoError(t, bucket.WriteAll(ctx, "b.txt", []byte("bb"), nil)) + require.NoError(t, bucket.WriteAll(ctx, "sub/c.txt", []byte("ccc"), nil)) + + f, err := fs.OpenFile(ctx, "/", os.O_RDONLY, 0) + require.NoError(t, err) + defer f.Close() + + entries, err := f.Readdir(-1) + require.NoError(t, err) + assert.Len(t, entries, 3) // a.txt, b.txt, sub/ + + names := make(map[string]bool) + for _, e := range entries { + names[e.Name()] = true + } + assert.True(t, names["a.txt"]) + assert.True(t, names["b.txt"]) + assert.True(t, names["sub"]) +} + +func TestBlobFileSystem_RootMkdir(t *testing.T) { + fs, _ := newMemBlobFS(t) + ctx := context.Background() + + // Mkdir("/") should be a no-op (not create a "/" marker object) + require.NoError(t, fs.Mkdir(ctx, "/", 0755)) +} + +func TestBlobFileSystem_RootObjectReadback(t *testing.T) { + fs, _ := newMemBlobFS(t) + ctx := context.Background() + + // Write a file directly in the root + wf, err := fs.OpenFile(ctx, "/rootfile.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("root content")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + // Read it back + rf, err := fs.OpenFile(ctx, "/rootfile.txt", os.O_RDONLY, 0) + require.NoError(t, err) + defer rf.Close() + data, err := io.ReadAll(rf) + require.NoError(t, err) + assert.Equal(t, "root content", string(data)) + + // Stat it + info, err := fs.Stat(ctx, "/rootfile.txt") + require.NoError(t, err) + assert.Equal(t, int64(12), info.Size()) + assert.False(t, info.IsDir()) + + // Root listing should include it + dir, err := fs.OpenFile(ctx, "/", os.O_RDONLY, 0) + require.NoError(t, err) + defer dir.Close() + entries, err := dir.Readdir(-1) + require.NoError(t, err) + found := false + for _, e := range entries { + if e.Name() == "rootfile.txt" { + found = true + } + } + assert.True(t, found, "rootfile.txt not found in root listing") +} + +// --------------------------------------------------------------------------- +// Checksummer tests +// --------------------------------------------------------------------------- + +func TestBlobChecksummer_GetDigests(t *testing.T) { + bucket := memblob.OpenBucket(nil) + defer bucket.Close() + ctx := context.Background() + + // Write an object (memblob computes MD5 for stored objects). + require.NoError(t, bucket.WriteAll(ctx, "check.txt", []byte("checksum me"), nil)) + + cs := &blobChecksummer{bucket: bucket} + + t.Run("MD5Available", func(t *testing.T) { + digests, err := cs.GetDigests("/check.txt", "md5") + require.NoError(t, err) + // memblob should provide MD5 + if len(digests) > 0 { + assert.Contains(t, digests[0], "md5=") + } + }) + + t.Run("UnsupportedAlgorithm", func(t *testing.T) { + digests, err := cs.GetDigests("/check.txt", "sha512") + require.NoError(t, err) + assert.Empty(t, digests) + }) + + t.Run("NonExistentObject", func(t *testing.T) { + digests, err := cs.GetDigests("/no-such-thing.txt", "md5") + require.NoError(t, err) + assert.Empty(t, digests) + }) + + t.Run("MultipleAlgorithms", func(t *testing.T) { + digests, err := cs.GetDigests("/check.txt", "md5, sha256") + require.NoError(t, err) + // Only md5 is supported; sha256 is ignored + for _, d := range digests { + assert.Contains(t, d, "md5=") + } + }) +} + +// --------------------------------------------------------------------------- +// ETag pass-through via Stat +// --------------------------------------------------------------------------- + +func TestBlobFileSystem_StatETag(t *testing.T) { + // memblob doesn't set ETag, so we just verify the field plumbing works. + fi := &blobFileInfo{name: "e.txt", size: 5, etag: "\"abcdef\""} + sysInfo := fi.Sys().(*BlobFileSysInfo) + assert.Equal(t, "\"abcdef\"", sysInfo.ETag) + + // Without ETag, Sys() returns nil + fi2 := &blobFileInfo{name: "no-etag.txt", size: 5} + assert.Nil(t, fi2.Sys()) +} + +// --------------------------------------------------------------------------- +// Eager writer open tests +// --------------------------------------------------------------------------- + +func TestBlobWriteFile_EagerOpen(t *testing.T) { + fs, _ := newMemBlobFS(t) + ctx := context.Background() + + // OpenFile for write should succeed and return a usable file. + wf, err := fs.OpenFile(ctx, "/eager.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("hello")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + // Verify the data was written. + rf, err := fs.OpenFile(ctx, "/eager.txt", os.O_RDONLY, 0) + require.NoError(t, err) + defer rf.Close() + data, err := io.ReadAll(rf) + require.NoError(t, err) + assert.Equal(t, "hello", string(data)) +} + +func TestBlobWriteFile_EmptyClose(t *testing.T) { + fs, bucket := newMemBlobFS(t) + ctx := context.Background() + + // Open for write then immediately close without writing. + wf, err := fs.OpenFile(ctx, "/empty-eager.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + // The zero-byte object should exist. + exists, err := bucket.Exists(ctx, "empty-eager.txt") + require.NoError(t, err) + assert.True(t, exists) +} + +// --------------------------------------------------------------------------- +// Content-length hint tests +// --------------------------------------------------------------------------- + +func TestContentLengthHint_RoundTrip(t *testing.T) { + ctx := context.Background() + + // No hint by default + assert.Equal(t, int64(-1), contentLengthFromCtx(ctx)) + + // Set a hint + ctx = ContextWithContentLength(ctx, 42) + assert.Equal(t, int64(42), contentLengthFromCtx(ctx)) +} + +func TestBlobWriteFile_WithContentLengthHint(t *testing.T) { + fs, _ := newMemBlobFS(t) + ctx := ContextWithContentLength(context.Background(), 12) + + // OpenFile should use the hint and still write correctly. + wf, err := fs.OpenFile(ctx, "/hinted.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("hinted write")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + // Read back + rf, err := fs.OpenFile(context.Background(), "/hinted.txt", os.O_RDONLY, 0) + require.NoError(t, err) + defer rf.Close() + data, err := io.ReadAll(rf) + require.NoError(t, err) + assert.Equal(t, "hinted write", string(data)) +} diff --git a/origin_serve/backend_globus.go b/origin_serve/backend_globus.go index 2df268fea6..aca78165a4 100644 --- a/origin_serve/backend_globus.go +++ b/origin_serve/backend_globus.go @@ -88,11 +88,12 @@ type GlobusBackendConfig struct { // NewGlobusBackend creates a new native Globus backend. func NewGlobusBackend(cfg GlobusBackendConfig) *globusBackend { inner := newHTTPSBackend(HTTPSBackendOptions{ - ServiceURL: cfg.HTTPSServer, - StoragePrefix: cfg.StoragePrefix, - TokenMode: HTTPSTokenOAuth2, - OAuth2Config: cfg.OAuth2Config, - OAuth2Token: cfg.CollectionToken, + ServiceURL: cfg.HTTPSServer, + StoragePrefix: cfg.StoragePrefix, + TokenMode: HTTPSTokenOAuth2, + OAuth2Config: cfg.OAuth2Config, + OAuth2Token: cfg.CollectionToken, + EnableAutoMkdir: true, }) return &globusBackend{ @@ -121,7 +122,10 @@ func (b *globusBackend) CheckAvailability() error { func (b *globusBackend) FileSystem() webdav.FileSystem { return b.inner.FileSystem() } func (b *globusBackend) Checksummer() server_utils.OriginChecksummer { - return nil // Globus doesn't support local checksums + // TODO: Globus collections may provide checksums via the Transfer API + // (GET /endpoint//ls with checksum fields). Investigate whether + // we can surface those through the OriginChecksummer interface. + return nil } // IsActivated returns whether the Globus collection has been activated. @@ -211,7 +215,6 @@ func GetGlobusBackends() map[string]GlobusBackendActivator { // LaunchGlobusv2TokenRefresh starts a periodic goroutine (every 5 min) that // refreshes the OAuth2 tokens for all activated Globus v2 backends. -// Unlike the XRootD Globus backend, tokens are kept in memory only. func LaunchGlobusv2TokenRefresh(ctx context.Context, egrp *errgroup.Group) { if len(globusBackends) == 0 { return diff --git a/origin_serve/backend_https.go b/origin_serve/backend_https.go index 1fe8f20384..e5d863ad1a 100644 --- a/origin_serve/backend_https.go +++ b/origin_serve/backend_https.go @@ -20,6 +20,7 @@ package origin_serve import ( "context" + "errors" "fmt" "io" "net/http" @@ -35,6 +36,7 @@ import ( "golang.org/x/net/webdav" "golang.org/x/oauth2" + "github.com/pelicanplatform/pelican/config" "github.com/pelicanplatform/pelican/server_utils" ) @@ -86,15 +88,24 @@ type HTTPSBackendOptions struct { // For OAuth2 tokens: OAuth2Config *oauth2.Config OAuth2Token *oauth2.Token // initial token (with refresh_token) + // EnableAutoMkdir, when true, causes PUT operations to automatically + // create missing parent directories via WebDAV MKCOL before retrying. + EnableAutoMkdir bool } +// ErrNotSupported is returned when an operation is not supported by the +// backend (e.g. Mkdir on a plain HTTP server). Callers can test for this +// with errors.Is(err, ErrNotSupported). +var ErrNotSupported = errors.New("operation not supported by backend") + func newHTTPSBackend(opts HTTPSBackendOptions) *httpsBackend { fs := &httpsFileSystem{ serviceURL: strings.TrimSuffix(opts.ServiceURL, "/"), storagePrefix: opts.StoragePrefix, tokenMode: opts.TokenMode, staticTokenFile: opts.StaticTokenFile, - httpClient: &http.Client{Timeout: 60 * time.Second}, + httpClient: &http.Client{Transport: config.GetTransport()}, + enableAutoMkdir: opts.EnableAutoMkdir, } if opts.OAuth2Config != nil && opts.OAuth2Token != nil { fs.oauth2Cfg = opts.OAuth2Config @@ -142,12 +153,16 @@ type httpsFileSystem struct { staticTokenFile string backendMode BackendMode - // OAuth2 token management (in-memory only — no disk persistence) + // OAuth2 token management oauth2Cfg *oauth2.Config oauth2Tok *oauth2.Token oauthMu sync.Mutex // protects oauth2Tok httpClient *http.Client + + // enableAutoMkdir, when true, causes PUT operations to automatically + // create missing parent directories on the upstream server. + enableAutoMkdir bool } // probeBackendMode issues an OPTIONS request against the upstream root and @@ -190,17 +205,13 @@ func (fs *httpsFileSystem) davPath(name string) string { } // getDavClient returns a gowebdav.Client configured with the appropriate bearer -// token for the current request context. A fresh client is created per call so -// that the passthrough token is always correct even under concurrent requests. +// token for the current request context. The simpleBearerAuth captures a +// reference to the httpsFileSystem so that every HTTP request made through +// this client calls getToken() afresh — this ensures tokens that expire +// mid-transfer are transparently renewed for long-lived clients. func (fs *httpsFileSystem) getDavClient(ctx context.Context) *gowebdav.Client { - token := fs.getToken(ctx) - var client *gowebdav.Client - if token != "" { - auth := &simpleBearerAuth{token: token} - client = gowebdav.NewAuthClient(fs.serviceURL, auth) - } else { - client = gowebdav.NewClient(fs.serviceURL, "", "") - } + auth := &simpleBearerAuth{tokenFunc: func() string { return fs.getToken(ctx) }} + client := gowebdav.NewAuthClient(fs.serviceURL, auth) if fs.httpClient.Transport != nil { client.SetTransport(fs.httpClient.Transport) } @@ -294,7 +305,67 @@ func (fs *httpsFileSystem) Mkdir(ctx context.Context, name string, perm os.FileM client := fs.getDavClient(ctx) return client.Mkdir(fs.davPath(name), perm) } - return fmt.Errorf("mkdir not supported on HTTP-only backend") + return fmt.Errorf("mkdir: %w", ErrNotSupported) +} + +// ensureParentDirs recursively creates parent directories for the given file +// path. It walks up from the deepest parent toward the root until it finds an +// existing directory, then creates the missing directories back down the path. +// +// This mirrors the approach used by the xrootd-s3-http Globus plugin: probe +// from deepest parent upward (via Stat) until we find one that exists, then +// mkdir each missing component going back down. EEXIST is tolerated to +// handle concurrent writers that may create the same directory between our +// Stat and Mkdir calls. +func (fs *httpsFileSystem) ensureParentDirs(ctx context.Context, name string) error { + if fs.backendMode != BackendModeWebDAV { + return fmt.Errorf("auto-mkdir requires WebDAV backend") + } + + // Build all parent prefixes. For "/a/b/c/file.txt" we get ["/a", "/a/b", "/a/b/c"]. + dir := path.Dir(name) + if dir == "." || dir == "/" || dir == "" { + return nil // no parent directories to create + } + + var prefixes []string + for cur := dir; cur != "." && cur != "/" && cur != ""; cur = path.Dir(cur) { + prefixes = append(prefixes, cur) + } + if len(prefixes) == 0 { + return nil + } + + // prefixes is deepest-first: ["/a/b/c", "/a/b", "/a"]. + // Walk from deepest toward root to find the first existing directory. + firstMissingIdx := 0 + for i := 0; i < len(prefixes); i++ { + _, err := fs.Stat(ctx, prefixes[i]) + if err == nil { + // This prefix exists; everything deeper needs to be created. + firstMissingIdx = i + break + } + if i == len(prefixes)-1 { + // Even the shallowest prefix doesn't exist; create everything. + firstMissingIdx = len(prefixes) + } + } + + // Create from shallowest missing toward deepest. + for i := firstMissingIdx - 1; i >= 0; i-- { + err := fs.Mkdir(ctx, prefixes[i], 0755) + if err != nil { + // Tolerate "already exists" (405 Method Not Allowed in WebDAV) in + // case a concurrent writer created the directory between our Stat + // and Mkdir calls. + if !gowebdav.IsErrCode(err, http.StatusMethodNotAllowed) { + return fmt.Errorf("failed to create directory %q: %w", prefixes[i], err) + } + } + } + + return nil } // OpenFile implements webdav.FileSystem. @@ -371,7 +442,7 @@ func (fs *httpsFileSystem) Rename(ctx context.Context, oldName, newName string) client := fs.getDavClient(ctx) return client.Rename(fs.davPath(oldName), fs.davPath(newName), true) } - return fmt.Errorf("rename not supported on HTTP-only backend") + return fmt.Errorf("rename: %w", ErrNotSupported) } // Stat implements webdav.FileSystem. @@ -408,6 +479,7 @@ func (fs *httpsFileSystem) Stat(ctx context.Context, name string) (os.FileInfo, size: resp.ContentLength, modTime: parseHTTPDate(resp.Header.Get("Last-Modified")), isDir: false, + etag: resp.Header.Get("ETag"), }, nil } @@ -431,27 +503,28 @@ func tokenFromContext(ctx context.Context) string { } // --------------------------------------------------------------------------- -// simpleBearerAuth — implements gowebdav.Authorizer for a fixed bearer token. -// A fresh instance is created per request via getDavClient. +// simpleBearerAuth — implements gowebdav.Authorizer using a token-getter +// function. The function is called for every HTTP request so that expired +// tokens are transparently refreshed without recreating the gowebdav.Client. // --------------------------------------------------------------------------- type simpleBearerAuth struct { - token string + tokenFunc func() string } type simpleBearerAuthenticator struct { - token string + tokenFunc func() string } func (a *simpleBearerAuth) NewAuthenticator(body io.Reader) (gowebdav.Authenticator, io.Reader) { - return &simpleBearerAuthenticator{token: a.token}, body + return &simpleBearerAuthenticator{tokenFunc: a.tokenFunc}, body } func (a *simpleBearerAuth) AddAuthenticator(_ string, _ gowebdav.AuthFactory) {} func (auth *simpleBearerAuthenticator) Authorize(_ *http.Client, rq *http.Request, _ string) error { - if auth.token != "" { - rq.Header.Set("Authorization", "Bearer "+auth.token) + if tok := auth.tokenFunc(); tok != "" { + rq.Header.Set("Authorization", "Bearer "+tok) } return nil } @@ -463,7 +536,7 @@ func (auth *simpleBearerAuthenticator) Verify(_ *http.Client, _ *http.Response, func (auth *simpleBearerAuthenticator) Close() error { return nil } func (auth *simpleBearerAuthenticator) Clone() gowebdav.Authenticator { - return &simpleBearerAuthenticator{token: auth.token} + return &simpleBearerAuthenticator{tokenFunc: auth.tokenFunc} } // --------------------------------------------------------------------------- @@ -471,11 +544,18 @@ func (auth *simpleBearerAuthenticator) Clone() gowebdav.Authenticator { // In WebDAV mode the gowebdav library returns its own FileInfo. // --------------------------------------------------------------------------- +// HTTPSFileSysInfo carries optional metadata (e.g. ETag) from an upstream +// HTTPS/WebDAV server. Returned by httpsFileInfo.Sys() when populated. +type HTTPSFileSysInfo struct { + ETag string +} + type httpsFileInfo struct { name string size int64 modTime time.Time isDir bool + etag string } func (fi *httpsFileInfo) Name() string { return fi.name } @@ -487,8 +567,13 @@ func (fi *httpsFileInfo) ModTime() time.Time { } return fi.modTime } -func (fi *httpsFileInfo) IsDir() bool { return fi.isDir } -func (fi *httpsFileInfo) Sys() interface{} { return nil } +func (fi *httpsFileInfo) IsDir() bool { return fi.isDir } +func (fi *httpsFileInfo) Sys() interface{} { + if fi.etag != "" { + return &HTTPSFileSysInfo{ETag: fi.etag} + } + return nil +} // --------------------------------------------------------------------------- // httpsReadFile — read-only file backed by an HTTPS GET response. @@ -548,11 +633,12 @@ func (f *httpsReadFile) Stat() (os.FileInfo, error) { // --------------------------------------------------------------------------- type httpsWriteFile struct { - ctx context.Context - fs *httpsFileSystem - name string - mu sync.Mutex - buf []byte + ctx context.Context + fs *httpsFileSystem + name string + mu sync.Mutex + buf []byte + offset int64 } func newHTTPSWriteFile(ctx context.Context, fs *httpsFileSystem, name string) *httpsWriteFile { @@ -563,6 +649,7 @@ func (f *httpsWriteFile) Write(p []byte) (int, error) { f.mu.Lock() defer f.mu.Unlock() f.buf = append(f.buf, p...) + f.offset += int64(len(p)) return len(p), nil } @@ -573,11 +660,11 @@ func (f *httpsWriteFile) Close() error { f.mu.Unlock() urlStr := f.fs.upstreamURL(f.name) - body := strings.NewReader(string(data)) - - resp, err := f.fs.doRequest(f.ctx, http.MethodPut, urlStr, body, map[string]string{ + headers := map[string]string{ "Content-Length": fmt.Sprintf("%d", len(data)), - }) + } + + resp, err := f.fs.doRequest(f.ctx, http.MethodPut, urlStr, strings.NewReader(string(data)), headers) if err != nil { return err } @@ -586,7 +673,36 @@ func (f *httpsWriteFile) Close() error { if resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusNoContent { return nil } + respBody, _ := io.ReadAll(resp.Body) + resp.Body.Close() + + // If auto-mkdir is enabled and the server indicates a missing parent + // directory (409 Conflict in WebDAV, or 404 Not Found), create the + // directory tree and retry the PUT. + if f.fs.enableAutoMkdir && (resp.StatusCode == http.StatusConflict || resp.StatusCode == http.StatusNotFound) { + log.Debugf("HTTPS PUT to %s returned %d; attempting auto-mkdir for parent directories", urlStr, resp.StatusCode) + + if mkdirErr := f.fs.ensureParentDirs(f.ctx, f.name); mkdirErr != nil { + log.Warningf("Auto-mkdir failed for %s: %v", f.name, mkdirErr) + return fmt.Errorf("https put failed with status %d (auto-mkdir also failed: %v)", resp.StatusCode, mkdirErr) + } + + // Retry the PUT after creating parent directories. + retryResp, retryErr := f.fs.doRequest(f.ctx, http.MethodPut, urlStr, strings.NewReader(string(data)), headers) + if retryErr != nil { + return retryErr + } + defer retryResp.Body.Close() + + if retryResp.StatusCode == http.StatusOK || retryResp.StatusCode == http.StatusCreated || retryResp.StatusCode == http.StatusNoContent { + return nil + } + retryBody, _ := io.ReadAll(retryResp.Body) + log.Debugf("HTTPS PUT retry response (%d): %s", retryResp.StatusCode, string(retryBody)) + return fmt.Errorf("https put failed with status %d after auto-mkdir", retryResp.StatusCode) + } + log.Debugf("HTTPS PUT response (%d): %s", resp.StatusCode, string(respBody)) return fmt.Errorf("https put failed with status %d", resp.StatusCode) } @@ -595,8 +711,28 @@ func (f *httpsWriteFile) Read(_ []byte) (int, error) { return 0, fmt.Errorf("read not supported on write file") } -func (f *httpsWriteFile) Seek(_ int64, _ int) (int64, error) { - return 0, fmt.Errorf("seek not supported on write file") +// Seek supports only no-op seeks (seeking to the current offset). +// This satisfies callers like the WebDAV handler that seek to the +// current position to determine the write offset. +func (f *httpsWriteFile) Seek(offset int64, whence int) (int64, error) { + f.mu.Lock() + defer f.mu.Unlock() + var target int64 + switch whence { + case io.SeekStart: + target = offset + case io.SeekCurrent: + target = f.offset + offset + case io.SeekEnd: + // For a write file, "end" is the current write position. + target = f.offset + offset + default: + return 0, fmt.Errorf("httpsWriteFile.Seek: invalid whence %d", whence) + } + if target != f.offset { + return 0, fmt.Errorf("httpsWriteFile.Seek: non-sequential seek not supported") + } + return f.offset, nil } func (f *httpsWriteFile) Readdir(_ int) ([]os.FileInfo, error) { diff --git a/origin_serve/backend_https_test.go b/origin_serve/backend_https_test.go index 1e5fdfefe8..7d08939d7e 100644 --- a/origin_serve/backend_https_test.go +++ b/origin_serve/backend_https_test.go @@ -20,6 +20,7 @@ package origin_serve import ( "context" + "errors" "fmt" "io" "net/http" @@ -66,13 +67,12 @@ func TestWithClientToken(t *testing.T) { // --------------------------------------------------------------------------- func TestSimpleBearerAuth(t *testing.T) { - auth := &simpleBearerAuth{token: "tok123"} + auth := &simpleBearerAuth{tokenFunc: func() string { return "tok123" }} authenticator, body := auth.NewAuthenticator(nil) assert.Nil(t, body) assert.NotNil(t, authenticator) sba := authenticator.(*simpleBearerAuthenticator) - assert.Equal(t, "tok123", sba.token) // Authorize should set the header req := httptest.NewRequest(http.MethodGet, "/test", nil) @@ -83,7 +83,6 @@ func TestSimpleBearerAuth(t *testing.T) { // Clone should return an equivalent authenticator cloned := sba.Clone() assert.IsType(t, &simpleBearerAuthenticator{}, cloned) - assert.Equal(t, "tok123", cloned.(*simpleBearerAuthenticator).token) // Close should succeed assert.NoError(t, sba.Close()) @@ -95,7 +94,7 @@ func TestSimpleBearerAuth(t *testing.T) { } func TestSimpleBearerAuth_EmptyToken(t *testing.T) { - auth := &simpleBearerAuth{token: ""} + auth := &simpleBearerAuth{tokenFunc: func() string { return "" }} authenticator, _ := auth.NewAuthenticator(nil) sba := authenticator.(*simpleBearerAuthenticator) @@ -106,6 +105,26 @@ func TestSimpleBearerAuth_EmptyToken(t *testing.T) { assert.Empty(t, req.Header.Get("Authorization")) } +func TestSimpleBearerAuth_TokenRefresh(t *testing.T) { + // Verify that the tokenFunc is called on each Authorize, so + // a refreshed token is used for subsequent requests. + callCount := 0 + auth := &simpleBearerAuth{tokenFunc: func() string { + callCount++ + return fmt.Sprintf("tok-%d", callCount) + }} + authenticator, _ := auth.NewAuthenticator(nil) + sba := authenticator.(*simpleBearerAuthenticator) + + req1 := httptest.NewRequest(http.MethodGet, "/a", nil) + require.NoError(t, sba.Authorize(nil, req1, "")) + assert.Equal(t, "Bearer tok-1", req1.Header.Get("Authorization")) + + req2 := httptest.NewRequest(http.MethodGet, "/b", nil) + require.NoError(t, sba.Authorize(nil, req2, "")) + assert.Equal(t, "Bearer tok-2", req2.Header.Get("Authorization")) +} + // --------------------------------------------------------------------------- // httpsFileInfo // --------------------------------------------------------------------------- @@ -121,6 +140,13 @@ func TestHTTPSFileInfo(t *testing.T) { fiDir := &httpsFileInfo{name: "dir", isDir: true} assert.True(t, fiDir.IsDir()) + + fiEtag := &httpsFileInfo{name: "e.txt", etag: `"abc123"`} + sys := fiEtag.Sys() + require.NotNil(t, sys) + info, ok := sys.(*HTTPSFileSysInfo) + require.True(t, ok) + assert.Equal(t, `"abc123"`, info.ETag) } // --------------------------------------------------------------------------- @@ -180,12 +206,44 @@ func TestHTTPSWriteFile_UnsupportedOps(t *testing.T) { wf := &httpsWriteFile{name: "/test"} _, err := wf.Read(nil) assert.Error(t, err) - _, err = wf.Seek(0, 0) - assert.Error(t, err) _, err = wf.Readdir(-1) assert.Error(t, err) } +func TestHTTPSWriteFile_NoOpSeek(t *testing.T) { + wf := &httpsWriteFile{name: "/test"} + + // Seek to current offset (0) should succeed + pos, err := wf.Seek(0, io.SeekStart) + require.NoError(t, err) + assert.Equal(t, int64(0), pos) + + pos, err = wf.Seek(0, io.SeekCurrent) + require.NoError(t, err) + assert.Equal(t, int64(0), pos) + + // Write some data + _, _ = wf.Write([]byte("hello")) + + // Seeking to current offset (5) should succeed + pos, err = wf.Seek(5, io.SeekStart) + require.NoError(t, err) + assert.Equal(t, int64(5), pos) + + pos, err = wf.Seek(0, io.SeekCurrent) + require.NoError(t, err) + assert.Equal(t, int64(5), pos) + + pos, err = wf.Seek(0, io.SeekEnd) + require.NoError(t, err) + assert.Equal(t, int64(5), pos) + + // Non-current seek should fail + _, err = wf.Seek(0, io.SeekStart) + assert.Error(t, err) + assert.Contains(t, err.Error(), "non-sequential") +} + func TestHTTPSWriteFile_Stat(t *testing.T) { wf := &httpsWriteFile{name: "/test.txt"} wf.buf = []byte("hello") @@ -481,6 +539,41 @@ func TestHTTPSBackend_NoChecksummer(t *testing.T) { assert.Nil(t, backend.Checksummer()) } +func TestHTTPSBackend_StatETag(t *testing.T) { + // Verify that ETag from HEAD responses is surfaced through Sys(). + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodOptions { + w.Header().Set("Allow", "GET, HEAD, OPTIONS") + w.WriteHeader(http.StatusOK) + return + } + if r.Method == http.MethodHead { + w.Header().Set("ETag", `"abc-etag"`) + w.Header().Set("Content-Length", "42") + w.WriteHeader(http.StatusOK) + return + } + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenNone, + }) + require.NoError(t, backend.CheckAvailability()) + + info, err := backend.FileSystem().Stat(context.Background(), "/file.txt") + require.NoError(t, err) + assert.Equal(t, int64(42), info.Size()) + + sys := info.Sys() + require.NotNil(t, sys) + hsi, ok := sys.(*HTTPSFileSysInfo) + require.True(t, ok) + assert.Equal(t, `"abc-etag"`, hsi.ETag) +} + // --------------------------------------------------------------------------- // davPath and upstreamURL tests // --------------------------------------------------------------------------- @@ -553,9 +646,262 @@ func TestHTTPSBackend_HTTPOnly_UnsupportedOps(t *testing.T) { ctx := context.Background() err := backend.FileSystem().Mkdir(ctx, "/newdir", 0755) assert.Error(t, err) - assert.Contains(t, err.Error(), "mkdir not supported") + assert.True(t, errors.Is(err, ErrNotSupported)) err = backend.FileSystem().Rename(ctx, "/a", "/b") assert.Error(t, err) - assert.Contains(t, err.Error(), "rename not supported") + assert.True(t, errors.Is(err, ErrNotSupported)) +} + +// --------------------------------------------------------------------------- +// Auto-mkdir tests +// --------------------------------------------------------------------------- + +// mockWebDAVServer creates an httptest.Server that simulates a WebDAV-capable +// server with in-memory storage. It supports MKCOL, PUT, PROPFIND (stat), and +// OPTIONS. PUT to paths whose parent directory hasn't been created via MKCOL +// returns 409 Conflict, matching standard WebDAV semantics. +func mockWebDAVServer() (*httptest.Server, map[string][]byte) { + files := map[string][]byte{} + dirs := map[string]bool{"/": true} + + // normalize strips trailing slashes (except for root "/") + normalize := func(p string) string { + for len(p) > 1 && p[len(p)-1] == '/' { + p = p[:len(p)-1] + } + return p + } + + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + p := normalize(r.URL.Path) + switch r.Method { + case http.MethodOptions: + w.Header().Set("Allow", "GET, PUT, DELETE, HEAD, OPTIONS, PROPFIND, MKCOL, MOVE, COPY") + w.Header().Set("DAV", "1, 2") + w.WriteHeader(http.StatusOK) + + case "MKCOL": + if dirs[p] { + // Already exists + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + // Check parent exists + parent := p + if idx := strings.LastIndex(parent, "/"); idx >= 0 { + parent = parent[:idx] + if parent == "" { + parent = "/" + } + } + if !dirs[parent] { + w.WriteHeader(http.StatusConflict) + return + } + dirs[p] = true + w.WriteHeader(http.StatusCreated) + + case http.MethodPut: + // Check that parent directory exists + parent := p + if idx := strings.LastIndex(parent, "/"); idx >= 0 { + parent = parent[:idx] + if parent == "" { + parent = "/" + } + } + if !dirs[parent] { + w.WriteHeader(http.StatusConflict) + return + } + body, _ := io.ReadAll(r.Body) + files[p] = body + w.WriteHeader(http.StatusCreated) + + case "PROPFIND": + if dirs[p] { + // Return a minimal multistatus response indicating a directory + w.Header().Set("Content-Type", "application/xml; charset=utf-8") + w.WriteHeader(207) // Multi-Status + fmt.Fprintf(w, ` + + + %s + + + + + HTTP/1.1 200 OK + + +`, p) + return + } + if _, ok := files[p]; ok { + w.Header().Set("Content-Type", "application/xml; charset=utf-8") + w.WriteHeader(207) + fmt.Fprintf(w, ` + + + %s + + + + %d + + HTTP/1.1 200 OK + + +`, p, len(files[p])) + return + } + w.WriteHeader(http.StatusNotFound) + + case http.MethodHead: + if _, ok := files[p]; ok { + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(files[p]))) + w.WriteHeader(http.StatusOK) + return + } + w.WriteHeader(http.StatusNotFound) + + case http.MethodGet: + if data, ok := files[p]; ok { + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) + w.Write(data) + return + } + w.WriteHeader(http.StatusNotFound) + + default: + w.WriteHeader(http.StatusMethodNotAllowed) + } + })), files +} + +func TestHTTPSBackend_AutoMkdir_CreatesParentDirs(t *testing.T) { + server, files := mockWebDAVServer() + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenNone, + EnableAutoMkdir: true, + }) + require.NoError(t, backend.CheckAvailability()) + assert.Equal(t, BackendModeWebDAV, backend.BackendMode()) + + ctx := context.Background() + fs := backend.FileSystem() + + // PUT a file into a deeply nested path that doesn't exist yet. + // Without auto-mkdir this would fail with 409 Conflict. + wf, err := fs.OpenFile(ctx, "/a/b/c/file.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("deep content")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + // Verify the file was stored + assert.Equal(t, []byte("deep content"), files["/a/b/c/file.txt"]) +} + +func TestHTTPSBackend_AutoMkdir_Disabled(t *testing.T) { + server, _ := mockWebDAVServer() + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenNone, + EnableAutoMkdir: false, + }) + require.NoError(t, backend.CheckAvailability()) + + ctx := context.Background() + fs := backend.FileSystem() + + // Without auto-mkdir, PUT into a missing directory should fail. + wf, err := fs.OpenFile(ctx, "/x/y/file.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("data")) + require.NoError(t, err) + err = wf.Close() + assert.Error(t, err) + assert.Contains(t, err.Error(), "409") +} + +func TestHTTPSBackend_AutoMkdir_ExistingParent(t *testing.T) { + server, files := mockWebDAVServer() + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenNone, + EnableAutoMkdir: true, + }) + require.NoError(t, backend.CheckAvailability()) + + ctx := context.Background() + fs := backend.FileSystem() + + // First create /existing via MKCOL + require.NoError(t, fs.Mkdir(ctx, "/existing", 0755)) + + // Now PUT under /existing/sub/file.txt — only "sub" needs to be created + wf, err := fs.OpenFile(ctx, "/existing/sub/file.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("hello")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + assert.Equal(t, []byte("hello"), files["/existing/sub/file.txt"]) +} + +func TestHTTPSBackend_AutoMkdir_TopLevelFile(t *testing.T) { + server, files := mockWebDAVServer() + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenNone, + EnableAutoMkdir: true, + }) + require.NoError(t, backend.CheckAvailability()) + + ctx := context.Background() + fs := backend.FileSystem() + + // PUT at root level should work directly without needing auto-mkdir + wf, err := fs.OpenFile(ctx, "/root-file.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("top level")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + + assert.Equal(t, []byte("top level"), files["/root-file.txt"]) +} + +func TestEnsureParentDirs_NoParent(t *testing.T) { + // When the file is at the root, ensureParentDirs should be a no-op. + fs := &httpsFileSystem{ + backendMode: BackendModeWebDAV, + enableAutoMkdir: true, + } + + ctx := context.Background() + assert.NoError(t, fs.ensureParentDirs(ctx, "/file.txt")) + assert.NoError(t, fs.ensureParentDirs(ctx, "file.txt")) +} + +func TestEnsureParentDirs_RequiresWebDAV(t *testing.T) { + fs := &httpsFileSystem{ + backendMode: BackendModeHTTP, + enableAutoMkdir: true, + } + + ctx := context.Background() + err := fs.ensureParentDirs(ctx, "/a/b/file.txt") + assert.Error(t, err) + assert.Contains(t, err.Error(), "auto-mkdir requires WebDAV") } diff --git a/origin_serve/handlers.go b/origin_serve/handlers.go index f6bde21d93..ff274c5302 100644 --- a/origin_serve/handlers.go +++ b/origin_serve/handlers.go @@ -705,6 +705,7 @@ func InitializeHandlers(ctx context.Context, exports []server_utils.OriginExport StaticTokenFile: staticTokenFile, OAuth2Config: oauth2Cfg, OAuth2Token: oauth2Tok, + EnableAutoMkdir: true, }) log.Infof("Initialized native HTTPS backend for %s (upstream: %s, token mode: %d)", export.FederationPrefix, httpServiceURL, tokenMode) @@ -842,6 +843,13 @@ func RegisterHandlers(engine *gin.Engine, directorEnabled bool) error { // that forward requests can propagate them. req := server_utils.StashPelicanHeaders(c.Request) + // For PUT requests, pass the Content-Length as a size hint + // so the blob backend can optimize upload part sizes. + if c.Request.Method == http.MethodPut && c.Request.ContentLength > 0 { + ctx := ContextWithContentLength(req.Context(), c.Request.ContentLength) + req = req.WithContext(ctx) + } + if isTPCRequest(c.Request) { handleCopyTPC(c, backend, prefix) } else if c.Request.Method == http.MethodHead { From 7647598baa9e459a3e2e79583853605b5289f90f Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 14 Mar 2026 13:16:11 -0500 Subject: [PATCH 03/10] Add a regression test ensuring path.Clean is always present in the authz code --- e2e_fed_tests/s3v2_test.go | 4 +- origin_serve/authz_test.go | 103 ++++++ origin_serve/backend_blob.go | 6 +- origin_serve/backend_blob_minio_test.go | 2 +- origin_serve/backend_globus.go | 4 +- origin_serve/backend_https.go | 4 + origin_serve/backend_https_test.go | 8 +- origin_serve/handlers.go | 32 +- origin_serve/path_traversal_test.go | 403 ++++++++++++++++++++++++ 9 files changed, 546 insertions(+), 20 deletions(-) create mode 100644 origin_serve/path_traversal_test.go diff --git a/e2e_fed_tests/s3v2_test.go b/e2e_fed_tests/s3v2_test.go index 00539d4e89..21bafa959d 100644 --- a/e2e_fed_tests/s3v2_test.go +++ b/e2e_fed_tests/s3v2_test.go @@ -372,8 +372,8 @@ func startMinioServer(t *testing.T) (endpoint string) { require.NoError(t, cmd.Start(), "failed to start minio") t.Cleanup(func() { - cmd.Process.Kill() - cmd.Wait() //nolint:errcheck + cmd.Process.Kill() //nolint:errcheck + cmd.Wait() //nolint:errcheck }) // Minio prints a line like: S3-API: http://127.0.0.1:43219 diff --git a/origin_serve/authz_test.go b/origin_serve/authz_test.go index 0e2def1dcc..d4807bcc60 100644 --- a/origin_serve/authz_test.go +++ b/origin_serve/authz_test.go @@ -371,6 +371,109 @@ func TestPositiveAuthorizationWithRegisteredKey(t *testing.T) { assert.Equal(t, 1, jwksFetchCount, "JWKS should still not be fetched again even for failed authorization") } +// TestAuthorizationWithDirtyPaths verifies that authorizeWithContext succeeds +// even when the requested resource contains un-cleaned path traversal sequences +// (e.g. "..", ".", double slashes). This works because NewResourceScope calls +// path.Clean on the resource. If that call were removed, these tests would fail +// because Contains does a raw string prefix comparison. +func TestAuthorizationWithDirtyPaths(t *testing.T) { + // --- set up a JWKS server so tokens can be fully validated ---------- + key := generateTestKey(t) + pubKey, err := key.PublicKey() + require.NoError(t, err) + require.NoError(t, pubKey.Set(jwk.KeyIDKey, "test-key")) + require.NoError(t, pubKey.Set(jwk.AlgorithmKey, jwa.ES256)) + + jwks := jwk.NewSet() + require.NoError(t, jwks.AddKey(pubKey)) + + mux := http.NewServeMux() + mux.HandleFunc("/jwks", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + data, _ := json.Marshal(jwks) + _, _ = w.Write(data) + }) + mux.HandleFunc("/.well-known/openid-configuration", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + cfg := map[string]interface{}{ + "issuer": "http://" + r.Host, + "jwks_uri": "http://" + r.Host + "/jwks", + } + data, _ := json.Marshal(cfg) + _, _ = w.Write(data) + }) + server := httptest.NewServer(mux) + defer server.Close() + + issuerURL := server.URL + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + egrp := &errgroup.Group{} + + exports := []server_utils.OriginExport{{ + FederationPrefix: "/data", + StoragePrefix: "/tmp/data", + IssuerUrls: []string{issuerURL}, + Capabilities: server_structs.Capabilities{Reads: true}, + }} + require.NoError(t, InitAuthConfig(ctx, egrp, exports)) + + // Token grants storage.read:/ (covers everything under /data) + tok := createTestToken(t, key, issuerURL, "alice", nil, "storage.read:/") + ac := GetAuthConfig() + require.NotNil(t, ac) + + // Sanity: clean path works + _, ok := ac.authorizeWithContext(ctx, token_scopes.Wlcg_Storage_Read, "/data/file.txt", tok) + require.True(t, ok, "clean path must be authorized (sanity check)") + + // Dirty paths that should still resolve to an authorized resource + dirtyPaths := []struct { + name string + resource string + }{ + {"DotDotPartial", "/data/sub/../file.txt"}, + {"DotDotDeep", "/data/a/b/../../file.txt"}, + {"DotCurrent", "/data/./file.txt"}, + {"DoubleSlash", "/data//file.txt"}, + {"TrailingDotDot", "/data/sub/.."}, + } + for _, dp := range dirtyPaths { + t.Run(dp.name, func(t *testing.T) { + _, authorized := ac.authorizeWithContext(ctx, token_scopes.Wlcg_Storage_Read, dp.resource, tok) + assert.True(t, authorized, + "dirty path %q should be authorized after path.Clean normalisation", dp.resource) + }) + } + + // --- negative tests: path.Clean must prevent auth bypass --------------- + // Use a narrowly-scoped token that only grants access to /subdir. + // Without path.Clean, a request containing ".." could trick the prefix + // match into granting access outside the token's scope. + narrowTok := createTestToken(t, key, issuerURL, "alice", nil, "storage.read:/subdir") + + // Sanity: the narrow token works for its intended path + _, ok = ac.authorizeWithContext(ctx, token_scopes.Wlcg_Storage_Read, "/data/subdir/file.txt", narrowTok) + require.True(t, ok, "narrow token must authorize its own subdir") + + bypassPaths := []struct { + name string + resource string + }{ + // After path.Clean these all resolve outside /data/subdir + {"EscapeViaRelative", "/data/subdir/../secret/file.txt"}, + {"EscapeViaDeepRelative", "/data/subdir/a/../../secret/file.txt"}, + {"EscapeToRoot", "/data/subdir/../../etc/passwd"}, + } + for _, bp := range bypassPaths { + t.Run(bp.name, func(t *testing.T) { + _, authorized := ac.authorizeWithContext(ctx, token_scopes.Wlcg_Storage_Read, bp.resource, narrowTok) + assert.False(t, authorized, + "dirty path %q must NOT be authorized — it escapes the token scope after cleaning", bp.resource) + }) + } +} + // TestAuthorizationFailureWithoutUserInfo tests that failed authorization doesn't provide user info func TestAuthorizationFailureWithoutUserInfo(t *testing.T) { // When authorization fails, user info should not be added to context diff --git a/origin_serve/backend_blob.go b/origin_serve/backend_blob.go index 2c2d97b264..92d74f1a95 100644 --- a/origin_serve/backend_blob.go +++ b/origin_serve/backend_blob.go @@ -57,6 +57,7 @@ type blobBackend struct { // There are two ways to open a bucket: // 1. Set BlobURL to a gocloud.dev URL (e.g. "s3://bucket", "gs://bucket", "azblob://container"). // 2. Set the S3-specific fields (ServiceURL, Region, Bucket, etc.) for backwards-compatible S3 config. +// // If BlobURL is set it takes precedence. type BlobBackendOptions struct { // Generic gocloud.dev/blob URL — takes precedence over the S3-specific fields. @@ -240,8 +241,9 @@ type blobFileSystem struct { } // blobKey normalises a webdav path ("/foo/bar") to a blob key ("foo/bar"). +// Also cleans path traversal sequences as defense-in-depth. func blobKey(name string) string { - return strings.TrimPrefix(name, "/") + return strings.TrimPrefix(path.Clean("/"+name), "/") } // Mkdir implements webdav.FileSystem. @@ -357,8 +359,6 @@ func (fs *blobFileSystem) Stat(ctx context.Context, name string) (os.FileInfo, e return nil, err } - - // isNotFound returns true if the error represents a "not found" condition. func isNotFound(err error) bool { if err == nil { diff --git a/origin_serve/backend_blob_minio_test.go b/origin_serve/backend_blob_minio_test.go index 132e78000a..dc779226a8 100644 --- a/origin_serve/backend_blob_minio_test.go +++ b/origin_serve/backend_blob_minio_test.go @@ -92,7 +92,7 @@ func startMinio(t *testing.T) (endpoint, accessKey, secretKey string) { minioDone.Store(true) }() t.Cleanup(func() { - cmd.Process.Kill() + cmd.Process.Kill() //nolint:errcheck for !minioDone.Load() { time.Sleep(10 * time.Millisecond) } diff --git a/origin_serve/backend_globus.go b/origin_serve/backend_globus.go index aca78165a4..5680af43e3 100644 --- a/origin_serve/backend_globus.go +++ b/origin_serve/backend_globus.go @@ -60,8 +60,8 @@ type globusBackend struct { inner *httpsBackend // Globus-specific token management - collectionID string - mu sync.RWMutex + collectionID string + mu sync.RWMutex collectionToken *oauth2.Token transferToken *oauth2.Token oauth2Cfg *oauth2.Config diff --git a/origin_serve/backend_https.go b/origin_serve/backend_https.go index e5d863ad1a..76f8bb54cd 100644 --- a/origin_serve/backend_https.go +++ b/origin_serve/backend_https.go @@ -196,6 +196,8 @@ func (fs *httpsFileSystem) probeBackendMode() error { // davPath constructs the path that the gowebdav client expects (relative to the // service URL root). It prepends the configured storagePrefix. func (fs *httpsFileSystem) davPath(name string) string { + // Clean the path as defense-in-depth against traversal attacks. + name = path.Clean("/" + name) name = strings.TrimPrefix(name, "/") prefix := strings.TrimPrefix(fs.storagePrefix, "/") if prefix != "" { @@ -220,6 +222,8 @@ func (fs *httpsFileSystem) getDavClient(ctx context.Context) *gowebdav.Client { // upstreamURL returns the full URL for the given path on the upstream server. func (fs *httpsFileSystem) upstreamURL(name string) string { + // Clean the path as defense-in-depth against traversal attacks. + name = path.Clean("/" + name) name = strings.TrimPrefix(name, "/") prefix := strings.TrimPrefix(fs.storagePrefix, "/") if prefix != "" { diff --git a/origin_serve/backend_https_test.go b/origin_serve/backend_https_test.go index 7d08939d7e..3171d1a376 100644 --- a/origin_serve/backend_https_test.go +++ b/origin_serve/backend_https_test.go @@ -364,7 +364,7 @@ func TestHTTPSBackend_PlainHTTP_Integration(t *testing.T) { return } w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) - w.Write(data) + _, _ = w.Write(data) case http.MethodPut: body, _ := io.ReadAll(r.Body) store[r.URL.Path] = body @@ -437,7 +437,7 @@ func TestHTTPSBackend_TokenPassthrough(t *testing.T) { return } w.Header().Set("Content-Length", "5") - w.Write([]byte("hello")) + _, _ = w.Write([]byte("hello")) })) defer server.Close() @@ -469,7 +469,7 @@ func TestHTTPSBackend_StaticToken(t *testing.T) { return } w.Header().Set("Content-Length", "2") - w.Write([]byte("ok")) + _, _ = w.Write([]byte("ok")) })) defer server.Close() @@ -769,7 +769,7 @@ func mockWebDAVServer() (*httptest.Server, map[string][]byte) { case http.MethodGet: if data, ok := files[p]; ok { w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) - w.Write(data) + _, _ = w.Write(data) return } w.WriteHeader(http.StatusNotFound) diff --git a/origin_serve/handlers.go b/origin_serve/handlers.go index ff274c5302..ee41740546 100644 --- a/origin_serve/handlers.go +++ b/origin_serve/handlers.go @@ -282,7 +282,10 @@ func authMiddleware() gin.HandlerFunc { tokens := extractTokens(c.Request) action := getActionFromMethod(c.Request.Method) - resource := c.Request.URL.Path + // Clean the request path to prevent path-traversal attacks + // via URL-encoded dot segments (e.g., %2e%2e). Gin and Go's + // net/http do NOT normalize these before reaching handlers. + resource := path.Clean(c.Request.URL.Path) // Strip the /api/v1.0/origin/data prefix if present // This happens when the director is co-located with the origin // Token scopes are always for the federation prefix (e.g., /test/...), @@ -838,16 +841,29 @@ func RegisterHandlers(engine *gin.Engine, directorEnabled bool) error { // Get the path relative to the export (strip the federation prefix) wildcardPath := c.Param("path") + // Clean the path to prevent traversal attacks via + // URL-encoded dot-dot sequences (%2e%2e). Without + // this, a request like /prefix/%2e%2e/secret reaches + // the backend with ".." intact, potentially escaping + // the storage root. + newPath := path.Clean(wildcardPath) + + // Create a shallow copy of the request and modify its URL + modifiedReq := c.Request.Clone(c.Request.Context()) + modifiedURL := *c.Request.URL + modifiedURL.Path = newPath + modifiedReq.URL = &modifiedURL + // Stash client tracing headers (X-Pelican-JobId, // X-Pelican-Timeout) in the request context so backends // that forward requests can propagate them. - req := server_utils.StashPelicanHeaders(c.Request) + modifiedReq = server_utils.StashPelicanHeaders(modifiedReq) // For PUT requests, pass the Content-Length as a size hint // so the blob backend can optimize upload part sizes. if c.Request.Method == http.MethodPut && c.Request.ContentLength > 0 { - ctx := ContextWithContentLength(req.Context(), c.Request.ContentLength) - req = req.WithContext(ctx) + ctx := ContextWithContentLength(modifiedReq.Context(), c.Request.ContentLength) + modifiedReq = modifiedReq.WithContext(ctx) } if isTPCRequest(c.Request) { @@ -856,19 +872,19 @@ func RegisterHandlers(engine *gin.Engine, directorEnabled bool) error { // For HEAD requests, pass the original request to the WebDAV handler // (it needs the full URL so its Prefix stripping works correctly). // wildcardPath is used only for checksum lookup on the filesystem. - handleHeadWithChecksum(c, handler, req, wildcardPath, backend) + handleHeadWithChecksum(c, handler, modifiedReq, wildcardPath, backend) } else if c.Request.Method == http.MethodGet { // For GET requests, add ETag header based on file metadata - handleGetWithETag(c, handler, req, wildcardPath, exportPrefixMap[prefix]) + handleGetWithETag(c, handler, modifiedReq, wildcardPath, exportPrefixMap[prefix]) } else if c.Request.Method == http.MethodPut { // For PUT requests, return ETag of the newly written file - handlePutWithETag(c, handler, req, wildcardPath, exportPrefixMap[prefix]) + handlePutWithETag(c, handler, modifiedReq, wildcardPath, exportPrefixMap[prefix]) } else { // For all other methods (including PROPFIND), pass the original request // to the WebDAV handler. The handler's Prefix field ensures it strips // the route prefix for filesystem access while using it to construct // correct href values in responses. - handler.ServeHTTP(c.Writer, req) + handler.ServeHTTP(c.Writer, modifiedReq) } } diff --git a/origin_serve/path_traversal_test.go b/origin_serve/path_traversal_test.go new file mode 100644 index 0000000000..a8913413b9 --- /dev/null +++ b/origin_serve/path_traversal_test.go @@ -0,0 +1,403 @@ +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package origin_serve + +import ( + "context" + "net/http" + "net/http/httptest" + "os" + "sync" + "testing" + + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/net/webdav" + + "github.com/pelicanplatform/pelican/server_structs" + "github.com/pelicanplatform/pelican/server_utils" +) + +// spyFileSystem records the paths passed to webdav.FileSystem methods so tests +// can verify which path actually reaches the storage backend. +type spyFileSystem struct { + mu sync.Mutex + lastPath string +} + +func (s *spyFileSystem) OpenFile(_ context.Context, name string, _ int, _ os.FileMode) (webdav.File, error) { + s.mu.Lock() + s.lastPath = name + s.mu.Unlock() + return nil, os.ErrNotExist +} + +func (s *spyFileSystem) Stat(_ context.Context, name string) (os.FileInfo, error) { + s.mu.Lock() + s.lastPath = name + s.mu.Unlock() + return nil, os.ErrNotExist +} + +func (s *spyFileSystem) Mkdir(context.Context, string, os.FileMode) error { return os.ErrPermission } +func (s *spyFileSystem) RemoveAll(context.Context, string) error { return os.ErrPermission } +func (s *spyFileSystem) Rename(context.Context, string, string) error { return os.ErrPermission } + +// spyBackend implements server_utils.OriginBackend backed by a spyFileSystem. +type spyBackend struct { + fs *spyFileSystem +} + +func (b *spyBackend) CheckAvailability() error { return nil } +func (b *spyBackend) FileSystem() webdav.FileSystem { return b.fs } +func (b *spyBackend) Checksummer() server_utils.OriginChecksummer { return nil } + +// TestPathTraversal_HasPathPrefix verifies that hasPathPrefix is robust +// against attempts to escape an authorized prefix using ".." sequences. +func TestPathTraversal_HasPathPrefix(t *testing.T) { + tests := []struct { + name string + requestPath string + authorizedPrefix string + expected bool + }{ + // Baseline: normal access within the prefix + { + name: "NormalSubpath", + requestPath: "/data/project/file.txt", + authorizedPrefix: "/data/project", + expected: true, + }, + // Dot-dot that escapes the prefix entirely + { + name: "DotDotEscapesPrefix", + requestPath: "/data/project/../../etc/passwd", + authorizedPrefix: "/data/project", + expected: false, + }, + // Dot-dot that escapes one level above prefix + { + name: "DotDotToSibling", + requestPath: "/data/project/../other/secret", + authorizedPrefix: "/data/project", + expected: false, + }, + // Dot-dot that stays within (normalizes back into) the prefix + { + name: "DotDotStaysWithin", + requestPath: "/data/project/sub/../file.txt", + authorizedPrefix: "/data/project", + expected: true, + }, + // Many dot-dots that would traverse past the root + { + name: "ManyDotDotsPastRoot", + requestPath: "/data/project/../../../../../etc/shadow", + authorizedPrefix: "/data/project", + expected: false, + }, + // Single dot (current dir) should normalize cleanly + { + name: "DotCurrent", + requestPath: "/data/project/./file.txt", + authorizedPrefix: "/data/project", + expected: true, + }, + // Double slashes should normalize + { + name: "DoubleSlash", + requestPath: "/data/project//file.txt", + authorizedPrefix: "/data/project", + expected: true, + }, + // Trailing slash normalization + { + name: "TrailingSlash", + requestPath: "/data/project/sub/", + authorizedPrefix: "/data/project", + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := hasPathPrefix(tt.requestPath, tt.authorizedPrefix) + assert.Equal(t, tt.expected, result) + }) + } +} + +// TestPathTraversal_DavPath verifies that the HTTPS backend's davPath +// function normalizes path traversal sequences. +func TestPathTraversal_DavPath(t *testing.T) { + tests := []struct { + name string + storagePrefix string + inputPath string + expected string + }{ + { + name: "Normal", + storagePrefix: "/store", + inputPath: "/file.txt", + expected: "/store/file.txt", + }, + { + name: "DotDotEscapesStorage", + storagePrefix: "/store", + inputPath: "/../../etc/passwd", + expected: "/store/etc/passwd", + }, + { + name: "DotDotPartial", + storagePrefix: "/store", + inputPath: "/sub/../other", + expected: "/store/other", + }, + { + name: "DotDotWithoutPrefix", + storagePrefix: "", + inputPath: "/foo/../bar", + expected: "/bar", + }, + { + name: "DotDotMultiple", + storagePrefix: "/store", + inputPath: "/a/b/../../c", + expected: "/store/c", + }, + { + name: "DotOnly", + storagePrefix: "/store", + inputPath: "/./file.txt", + expected: "/store/file.txt", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fs := &httpsFileSystem{ + serviceURL: "https://example.com", + storagePrefix: tt.storagePrefix, + } + result := fs.davPath(tt.inputPath) + assert.Equal(t, tt.expected, result) + }) + } +} + +// TestPathTraversal_UpstreamURL verifies that the HTTPS backend's +// upstreamURL function normalizes path traversal. +func TestPathTraversal_UpstreamURL(t *testing.T) { + tests := []struct { + name string + storagePrefix string + inputPath string + expected string + }{ + { + name: "Normal", + storagePrefix: "/store", + inputPath: "/file.txt", + expected: "https://example.com/store/file.txt", + }, + { + name: "DotDotEscape", + storagePrefix: "/store", + inputPath: "/../../etc/passwd", + expected: "https://example.com/store/etc/passwd", + }, + { + name: "NoPrefix_DotDot", + storagePrefix: "", + inputPath: "/foo/../bar", + expected: "https://example.com/bar", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fs := &httpsFileSystem{ + serviceURL: "https://example.com", + storagePrefix: tt.storagePrefix, + } + result := fs.upstreamURL(tt.inputPath) + assert.Equal(t, tt.expected, result) + }) + } +} + +// TestPathTraversal_BlobKey verifies that the blob backend's blobKey +// function normalizes path traversal sequences. +func TestPathTraversal_BlobKey(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "Normal", + input: "/foo/bar", + expected: "foo/bar", + }, + { + name: "DotDotEscape", + input: "/foo/../../etc/passwd", + expected: "etc/passwd", + }, + { + name: "DotDotPartial", + input: "/foo/bar/../baz", + expected: "foo/baz", + }, + { + name: "Root", + input: "/", + expected: "", + }, + { + name: "DotOnly", + input: "/./foo", + expected: "foo", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := blobKey(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +// TestPathTraversal_HandleRequest exercises the real RegisterHandlers / +// handleRequest code path to verify that path.Clean is applied before the +// request reaches the storage backend. If the path.Clean call in +// handleRequest were removed, the spy filesystem would receive uncleaned +// ".." sequences and the assertions would fail. +func TestPathTraversal_HandleRequest(t *testing.T) { + gin.SetMode(gin.TestMode) + + // --- set up spy backend ------------------------------------------------ + spy := &spyFileSystem{} + be := &spyBackend{fs: spy} + + // --- wire package-level state ------------------------------------------ + ResetHandlers() + t.Cleanup(func() { + ResetHandlers() + globalAuthConfig = nil + }) + + backends = map[string]server_utils.OriginBackend{ + "/test": be, + } + webdavHandlers = map[string]*webdav.Handler{ + "/test": { + FileSystem: spy, + LockSystem: webdav.NewMemLS(), + }, + } + exportPrefixMap = map[string]string{ + "/test": "/storage", + } + + // --- minimal auth config with PublicReads so no token is required ------ + exports := []server_utils.OriginExport{{ + FederationPrefix: "/test", + StoragePrefix: "/storage", + Capabilities: server_structs.Capabilities{PublicReads: true}, + }} + ac := &authConfig{} + ac.exports.Store(&exports) + globalAuthConfig = ac + + // --- register the real handlers ---------------------------------------- + engine := gin.New() + require.NoError(t, RegisterHandlers(engine, false)) + + // --- test cases -------------------------------------------------------- + tests := []struct { + name string + requestPath string // sent to the router + expectedFSPath string // what the spy filesystem should see ("" = not called) + expectAuthz401 bool // true if auth middleware should block the request + }{ + { + name: "NormalPath", + requestPath: "/test/sub/file.txt", + expectedFSPath: "/sub/file.txt", + }, + { + name: "DotDotPartial", + requestPath: "/test/sub/../other", + expectedFSPath: "/other", + }, + { + name: "DotDotDeep", + requestPath: "/test/a/b/../../c", + expectedFSPath: "/c", + }, + { + // When .. escapes the export prefix entirely, the auth + // middleware (which also path.Clean's) correctly blocks + // the request because the resolved path is outside /test. + name: "DotDotEscapesExport_blocked", + requestPath: "/test/sub/../../etc/passwd", + expectAuthz401: true, + }, + { + name: "DotOnly", + requestPath: "/test/./sub/file.txt", + expectedFSPath: "/sub/file.txt", + }, + { + name: "DoubleSlash", + requestPath: "/test/sub//file.txt", + expectedFSPath: "/sub/file.txt", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + spy.mu.Lock() + spy.lastPath = "" + spy.mu.Unlock() + + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, tt.requestPath, nil) + engine.ServeHTTP(w, req) + + spy.mu.Lock() + got := spy.lastPath + spy.mu.Unlock() + + if tt.expectAuthz401 { + assert.Equal(t, http.StatusUnauthorized, w.Code, + "auth middleware should block paths that escape the export") + assert.Empty(t, got, "filesystem should NOT be called for blocked requests") + } else { + assert.NotEmpty(t, got, "filesystem should have been called") + assert.Equal(t, tt.expectedFSPath, got, + "filesystem should receive the path.Clean'd path") + } + }) + } +} From e6f514e439c7bff5c344847adc6d5937f19093ee Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 14 Mar 2026 13:27:10 -0500 Subject: [PATCH 04/10] Add extensive Globus and WebDAV E2E tests --- config/parameter_defaults.go | 8 + docs/parameters.yaml | 28 ++ e2e_fed_tests/globusv2_test.go | 432 +++++++++++++++++++++ e2e_fed_tests/httpsv2_test.go | 568 ++++++++++++++++++++++++++++ launchers/origin_serve.go | 2 +- origin/globus.go | 5 +- origin/globus_client.go | 35 +- origin_serve/backend_globus.go | 50 ++- origin_serve/backend_globus_test.go | 2 +- origin_serve/backend_https.go | 11 + origin_serve/path_traversal_test.go | 6 +- param/parameters.go | 15 + param/parameters_struct.go | 6 + 13 files changed, 1143 insertions(+), 25 deletions(-) create mode 100644 e2e_fed_tests/globusv2_test.go create mode 100644 e2e_fed_tests/httpsv2_test.go diff --git a/config/parameter_defaults.go b/config/parameter_defaults.go index e974e833fd..4f632d3edf 100644 --- a/config/parameter_defaults.go +++ b/config/parameter_defaults.go @@ -550,6 +550,14 @@ func SetParameterDefaults(v *viper.Viper, isRoot bool, isOSDF bool) { v.SetDefault(param.Origin_GlobusConfigLocation.GetName(), val) } } + // Origin.GlobusIssuerURL + v.SetDefault(param.Origin_GlobusIssuerURL.GetName(), "https://auth.globus.org/") + // Origin.GlobusTransferAPIBaseUrl + v.SetDefault(param.Origin_GlobusTransferAPIBaseUrl.GetName(), "https://transfer.api.globusonline.org/v0.10/") + // Origin.Globusv2TokenRefreshInterval + v.SetDefault(param.Origin_Globusv2TokenRefreshInterval.GetName(), "5m") + // Origin.HttpAuthTokenPassthrough + v.SetDefault(param.Origin_HttpAuthTokenPassthrough.GetName(), false) // Origin.IssuerMode v.SetDefault(param.Origin_IssuerMode.GetName(), "oa4mp") // Origin.Multiuser diff --git a/docs/parameters.yaml b/docs/parameters.yaml index 938691c419..6c6fa4f6d6 100644 --- a/docs/parameters.yaml +++ b/docs/parameters.yaml @@ -1802,6 +1802,34 @@ type: filename default: none components: ["origin"] --- +name: Origin.GlobusIssuerURL +description: |+ + The OIDC issuer URL for Globus authentication. + This is used for OIDC discovery and constructing OAuth2 endpoints. + Only override this for testing with a mock Globus server. +type: url +default: https://auth.globus.org/ +hidden: true +components: ["origin"] +--- +name: Origin.GlobusTransferAPIBaseUrl +description: |+ + The base URL for the Globus Transfer API, used to look up collection endpoint information. + Only override this for testing with a mock Globus server. +type: url +default: https://transfer.api.globusonline.org/v0.10/ +hidden: true +components: ["origin"] +--- +name: Origin.Globusv2TokenRefreshInterval +description: |+ + The interval between periodic token refreshes for native Globus v2 backends. + Only override this for testing. +type: duration +default: 5m +hidden: true +components: ["origin"] +--- name: Origin.FedTokenLocation description: |+ A path to the file containing a token issued by the federation's issuer. This token may be consumed by other federation services diff --git a/e2e_fed_tests/globusv2_test.go b/e2e_fed_tests/globusv2_test.go new file mode 100644 index 0000000000..ae09c9fee0 --- /dev/null +++ b/e2e_fed_tests/globusv2_test.go @@ -0,0 +1,432 @@ +//go:build !windows + +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package fed_tests + +import ( + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/oauth2" + + "github.com/pelicanplatform/pelican/client" + "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/fed_test_utils" + "github.com/pelicanplatform/pelican/origin" + "github.com/pelicanplatform/pelican/origin_serve" + "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/server_utils" + "github.com/pelicanplatform/pelican/test_utils" + "github.com/pelicanplatform/pelican/token" + "github.com/pelicanplatform/pelican/token_scopes" +) + +const testCollectionID = "e2e-test-collection-00000001" + +// --------------------------------------------------------------------------- +// Mock Globus servers +// --------------------------------------------------------------------------- + +// mockGlobusOIDC serves the OIDC discovery document. +// GET /.well-known/openid-configuration → returns token_endpoint, etc. +func mockGlobusOIDC(tokenEndpointURL string) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/.well-known/openid-configuration" { + http.NotFound(w, r) + return + } + resp := map[string]interface{}{ + "issuer": "https://mock-globus-auth.test/", + "authorization_endpoint": "https://mock-globus-auth.test/v2/oauth2/authorize", + "token_endpoint": tokenEndpointURL, + "device_authorization_endpoint": "https://mock-globus-auth.test/v2/oauth2/device/authorize", + "scopes_supported": []string{"openid", "email", "profile"}, + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(resp) + }) +} + +// mockGlobusTokenServer serves the OAuth2 token endpoint. +// POST /v2/oauth2/token → returns access/refresh tokens. +// Also tracks how many refresh requests have been made. +func mockGlobusTokenServer(refreshCount *atomic.Int64) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost || r.URL.Path != "/v2/oauth2/token" { + http.NotFound(w, r) + return + } + if err := r.ParseForm(); err != nil { + http.Error(w, "bad form", http.StatusBadRequest) + return + } + grantType := r.FormValue("grant_type") + if grantType == "refresh_token" { + refreshCount.Add(1) + } + + // Return a new access token with short expiry + resp := map[string]interface{}{ + "access_token": fmt.Sprintf("mock-access-token-%d", time.Now().UnixNano()), + "refresh_token": "mock-refresh-token-stable", + "expires_in": 3600, + "token_type": "Bearer", + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(resp) + }) +} + +// mockGlobusTransferAPI serves the Globus Transfer API. +// GET /v0.10/endpoint/{cid} → returns collection HTTPS URL. +func mockGlobusTransferAPI(httpsServerURL string) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + prefix := "/v0.10/endpoint/" + if !strings.HasPrefix(r.URL.Path, prefix) { + http.NotFound(w, r) + return + } + resp := map[string]interface{}{ + "DATA_TYPE": "endpoint", + "id": strings.TrimPrefix(r.URL.Path, prefix), + "display_name": "Mock Test Collection", + "https_server": httpsServerURL, + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(resp) + }) +} + +// startMockGlobusServers starts all mock Globus services and returns: +// - oidcURL: the base URL for OIDC discovery +// - tokenURL: the token endpoint URL +// - transferAPIBaseURL: the Transfer API base URL (with trailing slash) +// - refreshCount: an atomic counter for token refresh requests +func startMockGlobusServers(t *testing.T, webdavURL string) (oidcURL, tokenURL, transferAPIBaseURL string, refreshCount *atomic.Int64) { + t.Helper() + refreshCount = &atomic.Int64{} + + // Token server + tokenSrv := httptest.NewServer(mockGlobusTokenServer(refreshCount)) + t.Cleanup(tokenSrv.Close) + tokenURL = tokenSrv.URL + "/v2/oauth2/token" + + // OIDC discovery server + oidcSrv := httptest.NewServer(mockGlobusOIDC(tokenURL)) + t.Cleanup(oidcSrv.Close) + oidcURL = oidcSrv.URL + "/" + + // Transfer API server + transferSrv := httptest.NewServer(mockGlobusTransferAPI(webdavURL)) + t.Cleanup(transferSrv.Close) + transferAPIBaseURL = transferSrv.URL + "/v0.10/" + + return +} + +// globusv2OriginConfig returns a YAML origin config for the Globus v2 E2E test. +func globusv2OriginConfig(collectionID, collectionName string) string { + return fmt.Sprintf(` +Origin: + StorageType: globusv2 + Exports: + - FederationPrefix: /test + StoragePrefix: "/" + GlobusCollectionID: "%s" + GlobusCollectionName: "%s" + Capabilities: ["PublicReads", "Writes", "Listings"] +Director: + MinStatResponse: 1 + MaxStatResponse: 1 +`, collectionID, collectionName) +} + +// getGlobusv2Token creates a federation token for the test. +func getGlobusv2Token(t *testing.T) string { + t.Helper() + issuer, err := config.GetServerIssuerURL() + require.NoError(t, err) + + tokenConfig := token.NewWLCGToken() + tokenConfig.Lifetime = time.Minute + tokenConfig.Issuer = issuer + tokenConfig.Subject = "origin" + tokenConfig.AddAudienceAny() + + readScope, err := token_scopes.Wlcg_Storage_Read.Path("/") + require.NoError(t, err) + createScope, err := token_scopes.Wlcg_Storage_Create.Path("/") + require.NoError(t, err) + modScope, err := token_scopes.Wlcg_Storage_Modify.Path("/") + require.NoError(t, err) + tokenConfig.AddScopes(readScope, createScope, modScope) + + tkn, err := tokenConfig.CreateToken() + require.NoError(t, err) + return tkn +} + +// --------------------------------------------------------------------------- +// TestGlobusv2Origin — E2E test with mocked Globus API +// --------------------------------------------------------------------------- + +func TestGlobusv2Origin(t *testing.T) { + t.Cleanup(test_utils.SetupTestLogging(t)) + server_utils.ResetTestState() + defer server_utils.ResetTestState() + + // Reset the Globus OAuth config singleton so it can pick up our mock endpoints + origin.ResetGlobusOAuthCfg() + t.Cleanup(origin.ResetGlobusOAuthCfg) + + // Start a WebDAV server as the "Globus collection HTTPS endpoint". + // This is what the real Globus HTTPS endpoint exposes: a WebDAV-capable + // HTTP server that accepts GET, PUT, MKCOL, PROPFIND, etc. + webdavRoot := t.TempDir() + webdavURL := startWebDAVServer(t, webdavRoot) + + // Start mock Globus API servers + oidcURL, tokenURL, transferAPIBaseURL, refreshCount := startMockGlobusServers(t, webdavURL) + + // Create Globus client credential files + tmpDir := t.TempDir() + clientIDFile := filepath.Join(tmpDir, "globus-client-id") + clientSecretFile := filepath.Join(tmpDir, "globus-client-secret") + require.NoError(t, os.WriteFile(clientIDFile, []byte("test-globus-client-id"), 0600)) + require.NoError(t, os.WriteFile(clientSecretFile, []byte("test-globus-client-secret"), 0600)) + + // Configure Globus hidden params to point at mock servers + require.NoError(t, param.Set(param.Origin_GlobusIssuerURL.GetName(), oidcURL)) + require.NoError(t, param.Set(param.Origin_GlobusTransferAPIBaseUrl.GetName(), transferAPIBaseURL)) + require.NoError(t, param.Set(param.Origin_GlobusClientIDFile.GetName(), clientIDFile)) + require.NoError(t, param.Set(param.Origin_GlobusClientSecretFile.GetName(), clientSecretFile)) + // Set a short refresh interval so we can verify token refresh in the test + require.NoError(t, param.Set(param.Origin_Globusv2TokenRefreshInterval.GetName(), "2s")) + + originConfig := globusv2OriginConfig(testCollectionID, "Mock Test Collection") + + ft := fed_test_utils.NewFedTest(t, originConfig) + require.NotNil(t, ft) + require.Greater(t, len(ft.Exports), 0) + + // NewFedTest overrides StoragePrefix to a random temp path. The Globus + // backend was created with an empty HTTPSServer and not activated (because + // InitGlobusBackend found no DB records to load). + // Activate the backend directly with mock tokens pointing at the WebDAV server. + storagePrefix := ft.Exports[0].StoragePrefix + + // Create the directory structure that the WebDAV server needs. + // The origin will send requests to //...; the + // WebDAV server maps that to //... + webdavDataDir := filepath.Join(webdavRoot, storagePrefix) + require.NoError(t, os.MkdirAll(webdavDataDir, 0755)) + + // Copy the hello_world.txt that NewFedTest created + hwSrc := filepath.Join(storagePrefix, "hello_world.txt") + hwDst := filepath.Join(webdavDataDir, "hello_world.txt") + if data, err := os.ReadFile(hwSrc); err == nil { + require.NoError(t, os.WriteFile(hwDst, data, 0644)) + } + + // Activate the Globus backend with mock tokens + gBackends := origin_serve.GetGlobusBackends() + require.Contains(t, gBackends, testCollectionID, "Globus backend for %s should exist", testCollectionID) + gb := gBackends[testCollectionID] + + collectionToken := &oauth2.Token{ + AccessToken: "mock-collection-access-token", + RefreshToken: "mock-collection-refresh-token", + Expiry: time.Now().Add(1 * time.Hour), + TokenType: "Bearer", + } + transferToken := &oauth2.Token{ + AccessToken: "mock-transfer-access-token", + RefreshToken: "mock-transfer-refresh-token", + Expiry: time.Now().Add(1 * time.Hour), + TokenType: "Bearer", + } + + // Create an OAuth2 config pointing at the mock token endpoint for refresh + mockOAuth2Cfg := &oauth2.Config{ + ClientID: "test-globus-client-id", + ClientSecret: "test-globus-client-secret", + Endpoint: oauth2.Endpoint{ + TokenURL: tokenURL, + AuthStyle: oauth2.AuthStyleInHeader, + }, + } + + gb.Activate(collectionToken, transferToken, webdavURL, mockOAuth2Cfg) + + testToken := getGlobusv2Token(t) + localTmpDir := t.TempDir() + + t.Run("UploadAndDownload", func(t *testing.T) { + testContent := "Hello from the Globus v2 E2E federation test!" + localFile := filepath.Join(localTmpDir, "globus_test.txt") + require.NoError(t, os.WriteFile(localFile, []byte(testContent), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/globus_test.txt", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + uploadResults, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, uploadResults) + assert.Greater(t, uploadResults[0].TransferredBytes, int64(0)) + + downloadFile := filepath.Join(localTmpDir, "downloaded.txt") + downloadResults, err := client.DoGet(ft.Ctx, uploadURL, downloadFile, false, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, downloadResults) + + got, err := os.ReadFile(downloadFile) + require.NoError(t, err) + assert.Equal(t, testContent, string(got)) + }) + + t.Run("RecursiveUploadDownload", func(t *testing.T) { + sourceDir := t.TempDir() + sourceSubdir := filepath.Join(sourceDir, "subdir") + sourceDeepdir := filepath.Join(sourceSubdir, "deepdir") + require.NoError(t, os.MkdirAll(sourceDeepdir, 0755)) + + require.NoError(t, os.WriteFile(filepath.Join(sourceDir, "file1.txt"), []byte("globus-content1"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(sourceDir, "file2.txt"), []byte("globus-content2"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(sourceSubdir, "file3.txt"), []byte("globus-content3"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(sourceDeepdir, "file4.txt"), []byte("globus-content4"), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/recursive/", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + _, err := client.DoPut(ft.Ctx, sourceDir, uploadURL, true, client.WithToken(testToken)) + require.NoError(t, err, "recursive upload should succeed") + + downloadDir := t.TempDir() + _, err = client.DoGet(ft.Ctx, uploadURL, downloadDir, true, client.WithToken(testToken)) + require.NoError(t, err, "recursive download should succeed") + + testCases := []struct { + relativePath string + expectedContent string + }{ + {"file1.txt", "globus-content1"}, + {"file2.txt", "globus-content2"}, + {filepath.Join("subdir", "file3.txt"), "globus-content3"}, + {filepath.Join("subdir", "deepdir", "file4.txt"), "globus-content4"}, + } + for _, tc := range testCases { + downloadedPath := filepath.Join(downloadDir, tc.relativePath) + content, err := os.ReadFile(downloadedPath) + require.NoError(t, err, "should be able to read %s", tc.relativePath) + assert.Equal(t, tc.expectedContent, string(content), "content of %s should match", tc.relativePath) + } + }) + + t.Run("Listing", func(t *testing.T) { + files := []string{"list_a.txt", "list_b.txt", "list_c.txt"} + for _, name := range files { + localFile := filepath.Join(localTmpDir, name) + require.NoError(t, os.WriteFile(localFile, []byte("globus-list-"+name), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/%s", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt(), name) + _, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err, "failed to upload %s", name) + } + + listURL := fmt.Sprintf("pelican://%s:%d/test/", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + entries, err := client.DoList(ft.Ctx, listURL, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, entries) + + nameSet := make(map[string]bool) + for _, e := range entries { + nameSet[e.Name] = true + } + for _, name := range files { + found := false + for key := range nameSet { + if strings.Contains(key, name) { + found = true + break + } + } + assert.True(t, found, "listing should contain %s", name) + } + }) + + t.Run("TokenRefresh", func(t *testing.T) { + // The token refresh interval is set to 2s. Give short-lived tokens + // so the refresher actually hits the mock token endpoint. + shortCollectionToken := &oauth2.Token{ + AccessToken: "short-lived-collection-token", + RefreshToken: "mock-collection-refresh-token", + Expiry: time.Now().Add(30 * time.Second), // expiring soon (within 10min threshold) + TokenType: "Bearer", + } + shortTransferToken := &oauth2.Token{ + AccessToken: "short-lived-transfer-token", + RefreshToken: "mock-transfer-refresh-token", + Expiry: time.Now().Add(30 * time.Second), + TokenType: "Bearer", + } + gb.Activate(shortCollectionToken, shortTransferToken, webdavURL, mockOAuth2Cfg) + + initialRefreshes := refreshCount.Load() + // Wait for the periodic refresh (interval = 2s) to trigger + require.Eventually(t, func() bool { + return refreshCount.Load() > initialRefreshes + }, 10*time.Second, 500*time.Millisecond, "expected at least one token refresh to occur") + + t.Logf("Token refresh count increased from %d to %d", initialRefreshes, refreshCount.Load()) + + // Verify the backend is still activated after refresh + assert.True(t, gb.IsActivated(), "backend should remain activated after token refresh") + + // Verify file operations still work after token refresh + testContent := "post-refresh content" + localFile := filepath.Join(localTmpDir, "post_refresh.txt") + require.NoError(t, os.WriteFile(localFile, []byte(testContent), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/post_refresh.txt", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + _, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err, "upload should succeed after token refresh") + + downloadFile := filepath.Join(localTmpDir, "post_refresh_download.txt") + _, err = client.DoGet(ft.Ctx, uploadURL, downloadFile, false, client.WithToken(testToken)) + require.NoError(t, err, "download should succeed after token refresh") + + got, err := os.ReadFile(downloadFile) + require.NoError(t, err) + assert.Equal(t, testContent, string(got)) + }) +} diff --git a/e2e_fed_tests/httpsv2_test.go b/e2e_fed_tests/httpsv2_test.go new file mode 100644 index 0000000000..ccc1c804fc --- /dev/null +++ b/e2e_fed_tests/httpsv2_test.go @@ -0,0 +1,568 @@ +//go:build !windows + +/*************************************************************** + * + * Copyright (C) 2026, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package fed_tests + +import ( + "fmt" + "net/http" + "net/http/httptest" + "os" + "os/exec" + "os/user" + "path/filepath" + "regexp" + "strconv" + "strings" + "syscall" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/net/webdav" + + "github.com/pelicanplatform/pelican/client" + "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/fed_test_utils" + "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/server_utils" + "github.com/pelicanplatform/pelican/test_utils" + "github.com/pelicanplatform/pelican/token" + "github.com/pelicanplatform/pelican/token_scopes" +) + +// getHTTPSv2Token creates a token with read/create/modify scopes for HTTPSv2 tests. +func getHTTPSv2Token(t *testing.T) string { + t.Helper() + issuer, err := config.GetServerIssuerURL() + require.NoError(t, err) + + tokenConfig := token.NewWLCGToken() + tokenConfig.Lifetime = time.Minute + tokenConfig.Issuer = issuer + tokenConfig.Subject = "origin" + tokenConfig.AddAudienceAny() + + readScope, err := token_scopes.Wlcg_Storage_Read.Path("/") + require.NoError(t, err) + createScope, err := token_scopes.Wlcg_Storage_Create.Path("/") + require.NoError(t, err) + modScope, err := token_scopes.Wlcg_Storage_Modify.Path("/") + require.NoError(t, err) + tokenConfig.AddScopes(readScope, createScope, modScope) + + tkn, err := tokenConfig.CreateToken() + require.NoError(t, err) + return tkn +} + +// startWebDAVServer starts a real WebDAV server backed by the given directory, +// and returns its URL. The server is stopped when the test completes. +func startWebDAVServer(t *testing.T, root string) string { + t.Helper() + + handler := &webdav.Handler{ + FileSystem: webdav.Dir(root), + LockSystem: webdav.NewMemLS(), + Logger: func(_ *http.Request, err error) { + if err != nil { + t.Logf("WebDAV: %v", err) + } + }, + } + + srv := httptest.NewServer(handler) + t.Cleanup(srv.Close) + return srv.URL +} + +// startXRootDHTTPServer starts an XRootD HTTP server whose filesystem root is +// localRoot (via oss.localroot). all.export is set to "/" so any URL path is +// accepted, but because oss.localroot confines all I/O to localRoot, the server +// cannot read or write anything outside that directory tree. +// +// Returns the base URL of the XRootD HTTP service (e.g., "http://localhost:34567"). +func startXRootDHTTPServer(t *testing.T, localRoot string) string { + t.Helper() + + // Create work directories under /tmp so they have predictable paths + // and are accessible to the xrootd user after chown. + cfgDir, err := os.MkdirTemp("/tmp", "xrdcfg-test-*") + require.NoError(t, err) + t.Cleanup(func() { os.RemoveAll(cfgDir) }) + + logDir := filepath.Join(cfgDir, "log") + adminDir := filepath.Join(cfgDir, "admin") + pidDir := filepath.Join(cfgDir, "pid") + for _, d := range []string{logDir, adminDir, pidDir} { + require.NoError(t, os.MkdirAll(d, 0755)) + } + + logFile := filepath.Join(logDir, "xrootd.log") + + cfgContent := fmt.Sprintf(`all.export / +oss.localroot %s +xrd.port any +xrd.protocol http:any libXrdHttp-5.so +http.desthttps no +http.selfhttps no +http.listingdeny no +http.listingredir no +sec.protocol host +all.adminpath %s +all.pidpath %s +`, localRoot, adminDir, pidDir) + + cfgFile := filepath.Join(cfgDir, "xrootd.cfg") + require.NoError(t, os.WriteFile(cfgFile, []byte(cfgContent), 0644)) + + // Build the command. XRootD refuses to run as UID 0 (root), so when + // running as root we drop privileges to the xrootd user via + // SysProcAttr.Credential. Non-root runners can invoke xrootd directly. + cmd := exec.Command("xrootd", "-c", cfgFile, "-l", logFile) + if os.Getuid() == 0 { + xrdUser, err := user.Lookup("xrootd") + require.NoError(t, err, "xrootd user must exist when running as root") + uid, err := strconv.Atoi(xrdUser.Uid) + require.NoError(t, err) + gid, err := strconv.Atoi(xrdUser.Gid) + require.NoError(t, err) + + // chown the work dirs so xrootd can write to them + require.NoError(t, chownRecursive(cfgDir, uid, gid)) + require.NoError(t, chownRecursive(localRoot, uid, gid)) + + cmd.SysProcAttr = &syscall.SysProcAttr{ + Credential: &syscall.Credential{ + Uid: uint32(uid), + Gid: uint32(gid), + }, + } + } + + require.NoError(t, cmd.Start(), "failed to start xrootd") + t.Cleanup(func() { + cmd.Process.Kill() //nolint:errcheck + cmd.Wait() //nolint:errcheck + }) + + // Parse the port from XRootD's log output. It prints a line like: + // ------ xrootd anon@hostname:34567 initialization completed. + portRe := regexp.MustCompile(`initialization completed\.\s*$`) + addrRe := regexp.MustCompile(`anon@[^:]+:(\d+)`) + var port string + require.Eventually(t, func() bool { + data, err := os.ReadFile(logFile) + if err != nil { + return false + } + if !portRe.Match(data) { + return false + } + if m := addrRe.FindSubmatch(data); m != nil { + port = string(m[1]) + return true + } + return false + }, 30*time.Second, 200*time.Millisecond, "xrootd never finished initialization") + + baseURL := fmt.Sprintf("http://localhost:%s", port) + + // Verify the server is responding + require.Eventually(t, func() bool { + resp, err := http.Get(baseURL + "/") + if err != nil { + return false + } + resp.Body.Close() + return resp.StatusCode < 500 + }, 10*time.Second, 200*time.Millisecond, "xrootd HTTP not responding") + + return baseURL +} + +// chownRecursive changes ownership of a directory tree to the given uid/gid. +func chownRecursive(dir string, uid, gid int) error { + return filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + return os.Chown(path, uid, gid) + }) +} + +// httpsv2OriginConfig returns a Pelican origin YAML config for HTTPSv2 tests. +// The storagePrefix is the URL-path prefix on the upstream HTTP server where +// files are stored. For a Go WebDAV server this is typically "/" (the root). +// For XRootD it might be "/data" or whatever path is exported. +func httpsv2OriginConfig(httpServiceURL, storagePrefix string) string { + return fmt.Sprintf(` +Origin: + StorageType: httpsv2 + HttpServiceUrl: "%s" + Exports: + - FederationPrefix: /test + StoragePrefix: "%s" + Capabilities: ["PublicReads", "Writes", "Listings"] +Director: + MinStatResponse: 1 + MaxStatResponse: 1 +`, httpServiceURL, storagePrefix) +} + +// -------------------------------------------------------------------------- +// Test with a Go-native WebDAV server +// -------------------------------------------------------------------------- + +func TestHTTPSv2WebDAVOrigin(t *testing.T) { + t.Cleanup(test_utils.SetupTestLogging(t)) + server_utils.ResetTestState() + defer server_utils.ResetTestState() + + // The Go WebDAV server needs its own root directory. We'll create it + // up front; NewFedTest will override StoragePrefix to a temp dir, + // but for HTTPSv2 that's the path prefix on the upstream URL (not + // a local dir). We set StoragePrefix to "/" so the origin hits the + // WebDAV root, and map the WebDAV server's root to the directory + // that NewFedTest creates for us. + // + // Strategy: start WebDAV on a temporary dir, pass that as the + // StoragePrefix in the config so NewFedTest won't break the mapping. + // After NewFedTest overrides StoragePrefix, we re-point the WebDAV + // server at the directory NewFedTest created. + // + // Actually simpler: we start the WebDAV server on "/" and after + // NewFedTest we know ft.Exports[0].StoragePrefix. The WebDAV server + // has webdav.Dir(webdavRoot), so URL path /tmp/ExportXXX/file.txt + // maps to webdavRoot/tmp/ExportXXX/file.txt. We just need to create + // that path structure in the webdav root. + // + // Simplest: use a placeholder StoragePrefix of "/data", start WebDAV + // on a temp dir, and create a "data" subdir in it that holds test files. + // NewFedTest will override StoragePrefix, but we handle that below. + + // Create a top-level dir for the WebDAV server + webdavRoot := t.TempDir() + webdavURL := startWebDAVServer(t, webdavRoot) + + // We'll configure StoragePrefix = "/data" but NewFedTest overrides it. + // So after NewFedTest we need to create the directory structure that + // matches the overridden StoragePrefix inside webdavRoot. + originConfig := httpsv2OriginConfig(webdavURL, "/data") + + ft := fed_test_utils.NewFedTest(t, originConfig) + require.NotNil(t, ft) + require.Greater(t, len(ft.Exports), 0) + + // NewFedTest overrode StoragePrefix to a temp path. The HTTPSv2 + // backend will use this as the URL path prefix. Since the WebDAV + // server root is webdavRoot, URL path /foo maps to + // webdavRoot//foo. Create that directory structure. + storagePrefix := ft.Exports[0].StoragePrefix + webdavDataDir := filepath.Join(webdavRoot, storagePrefix) + require.NoError(t, os.MkdirAll(webdavDataDir, 0755)) + + // Copy the hello_world.txt that NewFedTest created into the WebDAV data dir + hwSrc := filepath.Join(storagePrefix, "hello_world.txt") + hwDst := filepath.Join(webdavDataDir, "hello_world.txt") + if data, err := os.ReadFile(hwSrc); err == nil { + require.NoError(t, os.WriteFile(hwDst, data, 0644)) + } + + testToken := getHTTPSv2Token(t) + localTmpDir := t.TempDir() + + t.Run("UploadAndDownload", func(t *testing.T) { + testContent := "Hello from the HTTPSv2 WebDAV federation test!" + localFile := filepath.Join(localTmpDir, "test_file.txt") + require.NoError(t, os.WriteFile(localFile, []byte(testContent), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/test_file.txt", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + uploadResults, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, uploadResults) + assert.Greater(t, uploadResults[0].TransferredBytes, int64(0)) + + downloadFile := filepath.Join(localTmpDir, "downloaded.txt") + downloadResults, err := client.DoGet(ft.Ctx, uploadURL, downloadFile, false, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, downloadResults) + + got, err := os.ReadFile(downloadFile) + require.NoError(t, err) + assert.Equal(t, testContent, string(got)) + }) + + t.Run("RecursiveUploadDownload", func(t *testing.T) { + // Create nested directory structure + sourceDir := t.TempDir() + sourceSubdir := filepath.Join(sourceDir, "subdir") + sourceDeepdir := filepath.Join(sourceSubdir, "deepdir") + require.NoError(t, os.MkdirAll(sourceDeepdir, 0755)) + + require.NoError(t, os.WriteFile(filepath.Join(sourceDir, "file1.txt"), []byte("content1"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(sourceDir, "file2.txt"), []byte("content2"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(sourceSubdir, "file3.txt"), []byte("content3"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(sourceDeepdir, "file4.txt"), []byte("content4"), 0644)) + + // Recursive upload + uploadURL := fmt.Sprintf("pelican://%s:%d/test/recursive/", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + _, err := client.DoPut(ft.Ctx, sourceDir, uploadURL, true, client.WithToken(testToken)) + require.NoError(t, err, "recursive upload should succeed") + + // Recursive download + downloadDir := t.TempDir() + _, err = client.DoGet(ft.Ctx, uploadURL, downloadDir, true, client.WithToken(testToken)) + require.NoError(t, err, "recursive download should succeed") + + // Verify all files + testCases := []struct { + relativePath string + expectedContent string + }{ + {"file1.txt", "content1"}, + {"file2.txt", "content2"}, + {filepath.Join("subdir", "file3.txt"), "content3"}, + {filepath.Join("subdir", "deepdir", "file4.txt"), "content4"}, + } + for _, tc := range testCases { + downloadedPath := filepath.Join(downloadDir, tc.relativePath) + content, err := os.ReadFile(downloadedPath) + require.NoError(t, err, "should be able to read %s", tc.relativePath) + assert.Equal(t, tc.expectedContent, string(content), "content of %s should match", tc.relativePath) + } + }) + + t.Run("Listing", func(t *testing.T) { + // Upload a few files + files := []string{"list_a.txt", "list_b.txt", "list_c.txt"} + for _, name := range files { + localFile := filepath.Join(localTmpDir, name) + require.NoError(t, os.WriteFile(localFile, []byte("list-content-"+name), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/%s", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt(), name) + _, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err, "failed to upload %s", name) + } + + listURL := fmt.Sprintf("pelican://%s:%d/test/", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + entries, err := client.DoList(ft.Ctx, listURL, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, entries) + + nameSet := make(map[string]bool) + for _, e := range entries { + nameSet[e.Name] = true + } + for _, name := range files { + found := false + for key := range nameSet { + if strings.Contains(key, name) { + found = true + break + } + } + assert.True(t, found, "listing should contain %s", name) + } + }) +} + +// -------------------------------------------------------------------------- +// Test with a real XRootD HTTP server +// -------------------------------------------------------------------------- + +// skipIfNoXRootD skips the test if the xrootd binary is not available. +// When running as root, also checks that the xrootd user exists (needed +// because XRootD refuses to run as UID 0). +func skipIfNoXRootD(t *testing.T) { + t.Helper() + if _, err := exec.LookPath("xrootd"); err != nil { + t.Skip("xrootd not found on PATH; skipping XRootD-backed test") + } + if os.Getuid() == 0 { + if _, err := user.Lookup("xrootd"); err != nil { + t.Skip("xrootd user does not exist; skipping XRootD-backed test (running as root)") + } + } +} + +func TestHTTPSv2XRootDOrigin(t *testing.T) { + skipIfNoXRootD(t) + t.Cleanup(test_utils.SetupTestLogging(t)) + server_utils.ResetTestState() + defer server_utils.ResetTestState() + + // Create the data dir under /tmp so it's accessible to the xrootd user. + // This becomes XRootD's oss.localroot — all I/O is confined here. + xrdDataDir, err := os.MkdirTemp("/tmp", "xrddata-test-*") + require.NoError(t, err) + t.Cleanup(func() { os.RemoveAll(xrdDataDir) }) + require.NoError(t, os.Chmod(xrdDataDir, 0755)) + + // When running as root, chown the data dir to the xrootd user so that + // XRootD (which drops to that UID) can write to it. + if os.Getuid() == 0 { + xrdUser, err := user.Lookup("xrootd") + require.NoError(t, err) + uid, err := strconv.Atoi(xrdUser.Uid) + require.NoError(t, err) + gid, err := strconv.Atoi(xrdUser.Gid) + require.NoError(t, err) + require.NoError(t, chownRecursive(xrdDataDir, uid, gid)) + } + + xrootdURL := startXRootDHTTPServer(t, xrdDataDir) + + // StoragePrefix placeholder — NewFedTest will override it to a random temp path. + originConfig := httpsv2OriginConfig(xrootdURL, "/placeholder") + + ft := fed_test_utils.NewFedTest(t, originConfig) + require.NotNil(t, ft) + require.Greater(t, len(ft.Exports), 0) + + // NewFedTest overrode StoragePrefix to a temp dir (e.g. /tmp/Export0XXXXX) + // and created hello_world.txt there, chowned to xrootd. The origin will + // request URL paths like //file.txt from XRootD, which maps + // to //file.txt via oss.localroot. Create that + // path inside the localroot and copy the hello_world.txt. + storagePrefix := ft.Exports[0].StoragePrefix + xrdExportDir := filepath.Join(xrdDataDir, storagePrefix) + require.NoError(t, os.MkdirAll(xrdExportDir, 0755)) + hwSrc := filepath.Join(storagePrefix, "hello_world.txt") + hwDst := filepath.Join(xrdExportDir, "hello_world.txt") + if data, err := os.ReadFile(hwSrc); err == nil { + require.NoError(t, os.WriteFile(hwDst, data, 0644)) + } + // Re-chown so XRootD can access the newly created directories + if os.Getuid() == 0 { + xrdUser, err := user.Lookup("xrootd") + require.NoError(t, err) + uid, err := strconv.Atoi(xrdUser.Uid) + require.NoError(t, err) + gid, err := strconv.Atoi(xrdUser.Gid) + require.NoError(t, err) + require.NoError(t, chownRecursive(xrdDataDir, uid, gid)) + } + + testToken := getHTTPSv2Token(t) + localTmpDir := t.TempDir() + + t.Run("UploadAndDownload", func(t *testing.T) { + testContent := "Hello from the HTTPSv2 XRootD federation test!" + localFile := filepath.Join(localTmpDir, "test_file.txt") + require.NoError(t, os.WriteFile(localFile, []byte(testContent), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/xrd_test.txt", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + uploadResults, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, uploadResults) + assert.Greater(t, uploadResults[0].TransferredBytes, int64(0)) + + downloadFile := filepath.Join(localTmpDir, "downloaded.txt") + downloadResults, err := client.DoGet(ft.Ctx, uploadURL, downloadFile, false, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, downloadResults) + + got, err := os.ReadFile(downloadFile) + require.NoError(t, err) + assert.Equal(t, testContent, string(got)) + }) + + t.Run("RecursiveUploadDownload", func(t *testing.T) { + sourceDir := t.TempDir() + sourceSubdir := filepath.Join(sourceDir, "subdir") + sourceDeepdir := filepath.Join(sourceSubdir, "deepdir") + require.NoError(t, os.MkdirAll(sourceDeepdir, 0755)) + + require.NoError(t, os.WriteFile(filepath.Join(sourceDir, "file1.txt"), []byte("xrd-content1"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(sourceDir, "file2.txt"), []byte("xrd-content2"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(sourceSubdir, "file3.txt"), []byte("xrd-content3"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(sourceDeepdir, "file4.txt"), []byte("xrd-content4"), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/recursive/", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + _, err := client.DoPut(ft.Ctx, sourceDir, uploadURL, true, client.WithToken(testToken)) + require.NoError(t, err, "recursive upload should succeed") + + downloadDir := t.TempDir() + _, err = client.DoGet(ft.Ctx, uploadURL, downloadDir, true, client.WithToken(testToken)) + require.NoError(t, err, "recursive download should succeed") + + testCases := []struct { + relativePath string + expectedContent string + }{ + {"file1.txt", "xrd-content1"}, + {"file2.txt", "xrd-content2"}, + {filepath.Join("subdir", "file3.txt"), "xrd-content3"}, + {filepath.Join("subdir", "deepdir", "file4.txt"), "xrd-content4"}, + } + for _, tc := range testCases { + downloadedPath := filepath.Join(downloadDir, tc.relativePath) + content, err := os.ReadFile(downloadedPath) + require.NoError(t, err, "should be able to read %s", tc.relativePath) + assert.Equal(t, tc.expectedContent, string(content), "content of %s should match", tc.relativePath) + } + }) + + t.Run("Listing", func(t *testing.T) { + files := []string{"list_a.txt", "list_b.txt", "list_c.txt"} + for _, name := range files { + localFile := filepath.Join(localTmpDir, name) + require.NoError(t, os.WriteFile(localFile, []byte("xrd-list-"+name), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/%s", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt(), name) + _, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err, "failed to upload %s", name) + } + + listURL := fmt.Sprintf("pelican://%s:%d/test/", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + entries, err := client.DoList(ft.Ctx, listURL, client.WithToken(testToken)) + require.NoError(t, err) + require.NotEmpty(t, entries) + + nameSet := make(map[string]bool) + for _, e := range entries { + nameSet[e.Name] = true + } + for _, name := range files { + found := false + for key := range nameSet { + if strings.Contains(key, name) { + found = true + break + } + } + assert.True(t, found, "listing should contain %s", name) + } + }) +} diff --git a/launchers/origin_serve.go b/launchers/origin_serve.go index 4028bdbe57..97baefd1cb 100644 --- a/launchers/origin_serve.go +++ b/launchers/origin_serve.go @@ -286,7 +286,7 @@ func OriginServeFinish(ctx context.Context, egrp *errgroup.Group, engine *gin.En gBackends := origin_serve.GetGlobusBackends() for _, col := range collections { if gb, ok := gBackends[col.CollectionID]; ok { - gb.Activate(col.CollectionToken, col.TransferToken, col.HTTPSServer) + gb.Activate(col.CollectionToken, col.TransferToken, col.HTTPSServer, col.OAuth2Config) log.Infof("Activated Globus v2 backend for collection %s", col.CollectionID) } } diff --git a/origin/globus.go b/origin/globus.go index 53c7701ce0..d60be08b9d 100644 --- a/origin/globus.go +++ b/origin/globus.go @@ -134,8 +134,9 @@ func InitGlobusBackend(exps []server_utils.OriginExport) error { return errors.Wrap(err, "failed to initialize Globus backend: failed to get xrootd gid") } - if server_structs.OriginStorageType(param.Origin_StorageType.GetString()) != server_structs.OriginStorageGlobus { - return errors.Errorf("failed to initialize Globus backend: Origin.StorageType is not Globus: %s", + ost := server_structs.OriginStorageType(param.Origin_StorageType.GetString()) + if ost != server_structs.OriginStorageGlobus && ost != server_structs.OriginStorageGlobusv2 { + return errors.Errorf("failed to initialize Globus backend: Origin.StorageType is not Globus or Globusv2: %s", param.Origin_StorageType.GetString()) } // Init map diff --git a/origin/globus_client.go b/origin/globus_client.go index 307c932364..22e7559740 100644 --- a/origin/globus_client.go +++ b/origin/globus_client.go @@ -61,13 +61,36 @@ var ( globusOAuthCfgError error ) +// ResetGlobusOAuthCfg resets the cached OAuth2 configuration so it will +// be rediscovered on the next call to GetGlobusOAuthCfg. This is +// intended for use in tests only. +func ResetGlobusOAuthCfg() { + onceGlobusOAuthCfg = sync.Once{} + globusOAuthCfg = nil + globusOAuthCfgError = nil +} + const ( - globusIssuerEndpoint = "https://auth.globus.org/" // Globus issuer endpoint - globusTransferServer = "transfer.api.globus.org" // The resource name for the Globus transfer API server - globusTransferEndpointBaseUrl = "https://transfer.api.globusonline.org/v0.10/" - globusTransferBaseScope = "urn:globus:auth:scope:transfer.api.globus.org:all" + globusTransferServer = "transfer.api.globus.org" // The resource name for the Globus transfer API server + globusTransferBaseScope = "urn:globus:auth:scope:transfer.api.globus.org:all" ) +// globusIssuerURL returns the configurable Globus OIDC issuer URL. +func globusIssuerURL() string { + if v := param.Origin_GlobusIssuerURL.GetString(); v != "" { + return v + } + return "https://auth.globus.org/" +} + +// globusTransferAPIBaseURL returns the configurable Globus Transfer API base URL. +func globusTransferAPIBaseURL() string { + if v := param.Origin_GlobusTransferAPIBaseUrl.GetString(); v != "" { + return v + } + return "https://transfer.api.globusonline.org/v0.10/" +} + const ( // We render the frontend and call the API from there for better user experience globusCallbackPath = "/view/origin/globus/callback" @@ -119,7 +142,7 @@ func setupGlobusOAuthCfg() { } // 2. Get Globus OAuth endpoints - iss, err := config.GetIssuerMetadata(globusIssuerEndpoint) + iss, err := config.GetIssuerMetadata(globusIssuerURL()) if err != nil { globusOAuthCfgError = err return @@ -376,7 +399,7 @@ func handleGlobusCallback(ctx *gin.Context) { } // Get the https server of the collection from Globus transfer API server - transferReq, err := http.NewRequest(http.MethodGet, globusTransferEndpointBaseUrl+"endpoint/"+cid, nil) + transferReq, err := http.NewRequest(http.MethodGet, globusTransferAPIBaseURL()+"endpoint/"+cid, nil) if err != nil { log.Errorf("Error creating http request for Globus transfer API: %v", err) ctx.JSON(http.StatusInternalServerError, diff --git a/origin_serve/backend_globus.go b/origin_serve/backend_globus.go index 5680af43e3..fd96ff9b8f 100644 --- a/origin_serve/backend_globus.go +++ b/origin_serve/backend_globus.go @@ -30,6 +30,7 @@ import ( "golang.org/x/oauth2" "golang.org/x/sync/errgroup" + "github.com/pelicanplatform/pelican/param" "github.com/pelicanplatform/pelican/server_utils" ) @@ -41,8 +42,8 @@ import ( // a Globus v2 backend. It is the exported interface that external packages // (e.g. launchers) use to manage Globus backends. type GlobusBackendActivator interface { - // Activate marks the collection as activated with the given tokens. - Activate(collectionToken, transferToken *oauth2.Token, httpsServer string) + // Activate marks the collection as activated with the given tokens and OAuth2 config. + Activate(collectionToken, transferToken *oauth2.Token, httpsServer string, oauth2Cfg *oauth2.Config) // RefreshTokens refreshes both the collection and transfer tokens. RefreshTokens() error // IsActivated returns whether the Globus collection has been activated. @@ -136,16 +137,26 @@ func (b *globusBackend) IsActivated() bool { } // Activate marks the collection as activated with the given tokens. -func (b *globusBackend) Activate(collectionToken, transferToken *oauth2.Token, httpsServer string) { +func (b *globusBackend) Activate(collectionToken, transferToken *oauth2.Token, httpsServer string, oauth2Cfg *oauth2.Config) { b.mu.Lock() defer b.mu.Unlock() b.collectionToken = collectionToken b.transferToken = transferToken b.httpsServer = httpsServer b.activated = true + if oauth2Cfg != nil { + b.oauth2Cfg = oauth2Cfg + } - // Update the inner HTTPS backend's OAuth2 token + // Update the inner HTTPS backend's service URL and OAuth2 token + b.inner.SetServiceURL(httpsServer) b.inner.SetOAuth2Token(collectionToken) + + // Re-probe backend mode (WebDAV vs plain HTTP) since SetServiceURL + // resets it to Unknown. + if err := b.inner.CheckAvailability(); err != nil { + log.Warningf("Globus collection %s: failed to probe backend mode: %v", b.collectionID, err) + } } // RefreshTokens refreshes both the collection and transfer tokens. @@ -157,9 +168,15 @@ func (b *globusBackend) RefreshTokens() error { return nil } - // Refresh collection token + // Refresh collection token. + // We present an already-expired copy to oauth2.TokenSource so it always + // performs a refresh_token grant instead of silently reusing the cached token. if b.collectionToken != nil && b.collectionToken.Expiry.Before(time.Now().Add(10*time.Minute)) { - ts := b.oauth2Cfg.TokenSource(nil, b.collectionToken) + expiredCopy := &oauth2.Token{ + RefreshToken: b.collectionToken.RefreshToken, + Expiry: time.Now().Add(-time.Minute), + } + ts := b.oauth2Cfg.TokenSource(context.Background(), expiredCopy) newTok, err := ts.Token() if err != nil { log.Warningf("Failed to refresh Globus collection token for %s: %v", b.collectionID, err) @@ -171,9 +188,13 @@ func (b *globusBackend) RefreshTokens() error { log.Debugf("Refreshed Globus collection token for %s", b.collectionID) } - // Refresh transfer token + // Refresh transfer token (same expired-copy trick). if b.transferToken != nil && b.transferToken.Expiry.Before(time.Now().Add(10*time.Minute)) { - ts := b.oauth2Cfg.TokenSource(nil, b.transferToken) + expiredCopy := &oauth2.Token{ + RefreshToken: b.transferToken.RefreshToken, + Expiry: time.Now().Add(-time.Minute), + } + ts := b.oauth2Cfg.TokenSource(context.Background(), expiredCopy) newTok, err := ts.Token() if err != nil { log.Warningf("Failed to refresh Globus transfer token for %s: %v", b.collectionID, err) @@ -213,15 +234,20 @@ func GetGlobusBackends() map[string]GlobusBackendActivator { return result } -// LaunchGlobusv2TokenRefresh starts a periodic goroutine (every 5 min) that -// refreshes the OAuth2 tokens for all activated Globus v2 backends. +// LaunchGlobusv2TokenRefresh starts a periodic goroutine that refreshes the +// OAuth2 tokens for all activated Globus v2 backends. The refresh interval +// defaults to 5 min but can be overridden via Origin.Globusv2TokenRefreshInterval. func LaunchGlobusv2TokenRefresh(ctx context.Context, egrp *errgroup.Group) { if len(globusBackends) == 0 { return } - log.Info("Launching periodic Globus v2 token refresh") + interval := param.Origin_Globusv2TokenRefreshInterval.GetDuration() + if interval <= 0 { + interval = 5 * time.Minute + } + log.Infof("Launching periodic Globus v2 token refresh (interval=%s)", interval) egrp.Go(func() error { - ticker := time.NewTicker(5 * time.Minute) + ticker := time.NewTicker(interval) defer ticker.Stop() for { select { diff --git a/origin_serve/backend_globus_test.go b/origin_serve/backend_globus_test.go index 3a9930b62c..c1e21cfbd7 100644 --- a/origin_serve/backend_globus_test.go +++ b/origin_serve/backend_globus_test.go @@ -116,7 +116,7 @@ func TestGlobusBackend_Activate(t *testing.T) { Expiry: time.Now().Add(1 * time.Hour), } - gb.Activate(collTok, transTok, "https://g-new.data.globus.org") + gb.Activate(collTok, transTok, "https://g-new.data.globus.org", nil) assert.True(t, gb.IsActivated()) require.NoError(t, gb.CheckAvailability()) } diff --git a/origin_serve/backend_https.go b/origin_serve/backend_https.go index 76f8bb54cd..defcea4396 100644 --- a/origin_serve/backend_https.go +++ b/origin_serve/backend_https.go @@ -135,6 +135,17 @@ func (b *httpsBackend) SetOAuth2Token(tok *oauth2.Token) { } } +// SetServiceURL updates the upstream service URL at runtime. +// This is used by the Globus backend which discovers the collection +// HTTPS endpoint after initial construction. +func (b *httpsBackend) SetServiceURL(u string) { + if b.fs != nil { + b.fs.serviceURL = strings.TrimSuffix(u, "/") + // Reset detected mode so it will be re-probed on next request + b.fs.backendMode = BackendModeUnknown + } +} + // BackendMode returns the detected mode (WebDAV or HTTP). func (b *httpsBackend) BackendMode() BackendMode { return b.fs.backendMode diff --git a/origin_serve/path_traversal_test.go b/origin_serve/path_traversal_test.go index a8913413b9..b91408a3a0 100644 --- a/origin_serve/path_traversal_test.go +++ b/origin_serve/path_traversal_test.go @@ -56,9 +56,9 @@ func (s *spyFileSystem) Stat(_ context.Context, name string) (os.FileInfo, error return nil, os.ErrNotExist } -func (s *spyFileSystem) Mkdir(context.Context, string, os.FileMode) error { return os.ErrPermission } -func (s *spyFileSystem) RemoveAll(context.Context, string) error { return os.ErrPermission } -func (s *spyFileSystem) Rename(context.Context, string, string) error { return os.ErrPermission } +func (s *spyFileSystem) Mkdir(context.Context, string, os.FileMode) error { return os.ErrPermission } +func (s *spyFileSystem) RemoveAll(context.Context, string) error { return os.ErrPermission } +func (s *spyFileSystem) Rename(context.Context, string, string) error { return os.ErrPermission } // spyBackend implements server_utils.OriginBackend backed by a spyFileSystem. type spyBackend struct { diff --git a/param/parameters.go b/param/parameters.go index b64c14469e..9c517ecb78 100644 --- a/param/parameters.go +++ b/param/parameters.go @@ -354,7 +354,10 @@ var runtimeConfigurableMap = map[string]bool{ "Origin.GlobusCollectionID": false, "Origin.GlobusCollectionName": false, "Origin.GlobusConfigLocation": false, + "Origin.GlobusIssuerURL": false, + "Origin.GlobusTransferAPIBaseUrl": false, "Origin.GlobusTransferTokenFile": false, + "Origin.Globusv2TokenRefreshInterval": false, "Origin.HttpAuthOAuth2ClientID": false, "Origin.HttpAuthOAuth2ClientSecretFile": false, "Origin.HttpAuthOAuth2Issuer": false, @@ -662,6 +665,8 @@ var stringAccessors = map[string]func(*Config) string{ "Origin.GlobusCollectionID": func(c *Config) string { return c.Origin.GlobusCollectionID }, "Origin.GlobusCollectionName": func(c *Config) string { return c.Origin.GlobusCollectionName }, "Origin.GlobusConfigLocation": func(c *Config) string { return c.Origin.GlobusConfigLocation }, + "Origin.GlobusIssuerURL": func(c *Config) string { return c.Origin.GlobusIssuerURL }, + "Origin.GlobusTransferAPIBaseUrl": func(c *Config) string { return c.Origin.GlobusTransferAPIBaseUrl }, "Origin.GlobusTransferTokenFile": func(c *Config) string { return c.Origin.GlobusTransferTokenFile }, "Origin.HttpAuthOAuth2ClientID": func(c *Config) string { return c.Origin.HttpAuthOAuth2ClientID }, "Origin.HttpAuthOAuth2ClientSecretFile": func(c *Config) string { return c.Origin.HttpAuthOAuth2ClientSecretFile }, @@ -1122,6 +1127,7 @@ var durationAccessors = map[string]func(*Config) time.Duration{ "Monitoring.TokenRefreshInterval": func(c *Config) time.Duration { return c.Monitoring.TokenRefreshInterval }, "Origin.DiskUsageCalculationDelay": func(c *Config) time.Duration { return c.Origin.DiskUsageCalculationDelay }, "Origin.DiskUsageCalculationInterval": func(c *Config) time.Duration { return c.Origin.DiskUsageCalculationInterval }, + "Origin.Globusv2TokenRefreshInterval": func(c *Config) time.Duration { return c.Origin.Globusv2TokenRefreshInterval }, "Origin.SSH.ChallengeTimeout": func(c *Config) time.Duration { return c.Origin.SSH.ChallengeTimeout }, "Origin.SSH.ConnectTimeout": func(c *Config) time.Duration { return c.Origin.SSH.ConnectTimeout }, "Origin.SSH.KeepaliveInterval": func(c *Config) time.Duration { return c.Origin.SSH.KeepaliveInterval }, @@ -1495,7 +1501,10 @@ var allParameterNames = []string{ "Origin.GlobusCollectionID", "Origin.GlobusCollectionName", "Origin.GlobusConfigLocation", + "Origin.GlobusIssuerURL", + "Origin.GlobusTransferAPIBaseUrl", "Origin.GlobusTransferTokenFile", + "Origin.Globusv2TokenRefreshInterval", "Origin.HttpAuthOAuth2ClientID", "Origin.HttpAuthOAuth2ClientSecretFile", "Origin.HttpAuthOAuth2Issuer", @@ -1776,6 +1785,8 @@ var ( Origin_GlobusCollectionID = StringParam{"Origin.GlobusCollectionID"} Origin_GlobusCollectionName = StringParam{"Origin.GlobusCollectionName"} Origin_GlobusConfigLocation = StringParam{"Origin.GlobusConfigLocation"} + Origin_GlobusIssuerURL = StringParam{"Origin.GlobusIssuerURL"} + Origin_GlobusTransferAPIBaseUrl = StringParam{"Origin.GlobusTransferAPIBaseUrl"} Origin_GlobusTransferTokenFile = StringParam{"Origin.GlobusTransferTokenFile"} Origin_HttpAuthOAuth2ClientID = StringParam{"Origin.HttpAuthOAuth2ClientID"} Origin_HttpAuthOAuth2ClientSecretFile = StringParam{"Origin.HttpAuthOAuth2ClientSecretFile"} @@ -2087,6 +2098,7 @@ var ( Monitoring_TokenRefreshInterval = DurationParam{"Monitoring.TokenRefreshInterval"} Origin_DiskUsageCalculationDelay = DurationParam{"Origin.DiskUsageCalculationDelay"} Origin_DiskUsageCalculationInterval = DurationParam{"Origin.DiskUsageCalculationInterval"} + Origin_Globusv2TokenRefreshInterval = DurationParam{"Origin.Globusv2TokenRefreshInterval"} Origin_SSH_ChallengeTimeout = DurationParam{"Origin.SSH.ChallengeTimeout"} Origin_SSH_ConnectTimeout = DurationParam{"Origin.SSH.ConnectTimeout"} Origin_SSH_KeepaliveInterval = DurationParam{"Origin.SSH.KeepaliveInterval"} @@ -2245,6 +2257,8 @@ func init() { "Origin.GlobusCollectionID": Origin_GlobusCollectionID, "Origin.GlobusCollectionName": Origin_GlobusCollectionName, "Origin.GlobusConfigLocation": Origin_GlobusConfigLocation, + "Origin.GlobusIssuerURL": Origin_GlobusIssuerURL, + "Origin.GlobusTransferAPIBaseUrl": Origin_GlobusTransferAPIBaseUrl, "Origin.GlobusTransferTokenFile": Origin_GlobusTransferTokenFile, "Origin.HttpAuthOAuth2ClientID": Origin_HttpAuthOAuth2ClientID, "Origin.HttpAuthOAuth2ClientSecretFile": Origin_HttpAuthOAuth2ClientSecretFile, @@ -2541,6 +2555,7 @@ func init() { "Monitoring.TokenRefreshInterval": Monitoring_TokenRefreshInterval, "Origin.DiskUsageCalculationDelay": Origin_DiskUsageCalculationDelay, "Origin.DiskUsageCalculationInterval": Origin_DiskUsageCalculationInterval, + "Origin.Globusv2TokenRefreshInterval": Origin_Globusv2TokenRefreshInterval, "Origin.SSH.ChallengeTimeout": Origin_SSH_ChallengeTimeout, "Origin.SSH.ConnectTimeout": Origin_SSH_ConnectTimeout, "Origin.SSH.KeepaliveInterval": Origin_SSH_KeepaliveInterval, diff --git a/param/parameters_struct.go b/param/parameters_struct.go index a93e9c323e..6ed98aab0f 100644 --- a/param/parameters_struct.go +++ b/param/parameters_struct.go @@ -312,7 +312,10 @@ type Config struct { GlobusCollectionID string `mapstructure:"globuscollectionid" yaml:"GlobusCollectionID"` GlobusCollectionName string `mapstructure:"globuscollectionname" yaml:"GlobusCollectionName"` GlobusConfigLocation string `mapstructure:"globusconfiglocation" yaml:"GlobusConfigLocation"` + GlobusIssuerURL string `mapstructure:"globusissuerurl" yaml:"GlobusIssuerURL"` + GlobusTransferAPIBaseUrl string `mapstructure:"globustransferapibaseurl" yaml:"GlobusTransferAPIBaseUrl"` GlobusTransferTokenFile string `mapstructure:"globustransfertokenfile" yaml:"GlobusTransferTokenFile"` + Globusv2TokenRefreshInterval time.Duration `mapstructure:"globusv2tokenrefreshinterval" yaml:"Globusv2TokenRefreshInterval"` HttpAuthOAuth2ClientID string `mapstructure:"httpauthoauth2clientid" yaml:"HttpAuthOAuth2ClientID"` HttpAuthOAuth2ClientSecretFile string `mapstructure:"httpauthoauth2clientsecretfile" yaml:"HttpAuthOAuth2ClientSecretFile"` HttpAuthOAuth2Issuer string `mapstructure:"httpauthoauth2issuer" yaml:"HttpAuthOAuth2Issuer"` @@ -803,7 +806,10 @@ type configWithType struct { GlobusCollectionID struct { Type string; Value string } GlobusCollectionName struct { Type string; Value string } GlobusConfigLocation struct { Type string; Value string } + GlobusIssuerURL struct { Type string; Value string } + GlobusTransferAPIBaseUrl struct { Type string; Value string } GlobusTransferTokenFile struct { Type string; Value string } + Globusv2TokenRefreshInterval struct { Type string; Value time.Duration } HttpAuthOAuth2ClientID struct { Type string; Value string } HttpAuthOAuth2ClientSecretFile struct { Type string; Value string } HttpAuthOAuth2Issuer struct { Type string; Value string } From 8e3b0577da2d167f3868c2f26cfa4837dc2d7915 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 14 Mar 2026 15:35:06 -0500 Subject: [PATCH 05/10] Fix range read; minio on mac --- e2e_fed_tests/httpsv2_test.go | 177 ++++++++++++++++++++++++ origin_serve/backend_blob_minio_test.go | 7 +- origin_serve/backend_https.go | 104 +++++++++++--- 3 files changed, 269 insertions(+), 19 deletions(-) diff --git a/e2e_fed_tests/httpsv2_test.go b/e2e_fed_tests/httpsv2_test.go index ccc1c804fc..667ae18e51 100644 --- a/e2e_fed_tests/httpsv2_test.go +++ b/e2e_fed_tests/httpsv2_test.go @@ -21,7 +21,9 @@ package fed_tests import ( + "crypto/tls" "fmt" + "io" "net/http" "net/http/httptest" "os" @@ -389,6 +391,181 @@ func TestHTTPSv2WebDAVOrigin(t *testing.T) { assert.True(t, found, "listing should contain %s", name) } }) + + t.Run("RangeRead", func(t *testing.T) { + // Upload a file with deterministic content large enough to exercise + // multi-range behaviour. Each byte position is predictable so we can + // verify any sub-range independently: + // content[i] = byte(i % 251) (251 is prime → avoids alignment artifacts) + const fileSize = 256 * 1024 // 256 KiB — spans two 128 KiB PFC blocks + content := make([]byte, fileSize) + for i := range content { + content[i] = byte(i % 251) + } + localFile := filepath.Join(localTmpDir, "range_test.bin") + require.NoError(t, os.WriteFile(localFile, content, 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/range_test.bin", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + _, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err, "upload for range test should succeed") + + // Build a direct HTTPS URL to the origin's data endpoint so we + // bypass the cache and can send arbitrary Range headers. + originDataURL := fmt.Sprintf("https://%s:%d/api/v1.0/origin/data/test/range_test.bin", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + httpClient := &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec + }, + } + + type rangeCase struct { + name string + rangeHdr string + wantStatus int + wantBytes []byte // expected body (nil → skip body check) + } + + cases := []rangeCase{ + { + name: "FirstByte", + rangeHdr: "bytes=0-0", + wantStatus: http.StatusPartialContent, + wantBytes: content[0:1], + }, + { + name: "LastByte", + rangeHdr: fmt.Sprintf("bytes=%d-%d", fileSize-1, fileSize-1), + wantStatus: http.StatusPartialContent, + wantBytes: content[fileSize-1 : fileSize], + }, + { + name: "First128KiB", + rangeHdr: "bytes=0-131071", + wantStatus: http.StatusPartialContent, + wantBytes: content[0:131072], + }, + { + name: "Second128KiB", + rangeHdr: "bytes=131072-262143", + wantStatus: http.StatusPartialContent, + wantBytes: content[131072:262144], + }, + { + name: "MidRange", + rangeHdr: "bytes=1000-1999", + wantStatus: http.StatusPartialContent, + wantBytes: content[1000:2000], + }, + { + name: "OffsetNotAligned", + rangeHdr: "bytes=100000-100099", + wantStatus: http.StatusPartialContent, + wantBytes: content[100000:100100], + }, + { + name: "SuffixRange", + rangeHdr: "bytes=-100", + wantStatus: http.StatusPartialContent, + wantBytes: content[fileSize-100 : fileSize], + }, + { + name: "NoRangeHeader", + rangeHdr: "", + wantStatus: http.StatusOK, + wantBytes: content, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + req, err := http.NewRequest("GET", originDataURL, nil) + require.NoError(t, err) + req.Header.Set("Authorization", "Bearer "+testToken) + if tc.rangeHdr != "" { + req.Header.Set("Range", tc.rangeHdr) + } + + resp, err := httpClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, tc.wantStatus, resp.StatusCode, + "unexpected status for Range: %s", tc.rangeHdr) + + if tc.wantBytes != nil { + got, err := io.ReadAll(resp.Body) + require.NoError(t, err) + assert.Equal(t, len(tc.wantBytes), len(got), + "body length mismatch for Range: %s", tc.rangeHdr) + assert.Equal(t, tc.wantBytes, got, + "body content mismatch for Range: %s", tc.rangeHdr) + } + }) + } + }) + + t.Run("ETagPassthrough", func(t *testing.T) { + // Upload a small file and verify the origin's response includes an + // ETag header that matches what the upstream WebDAV server provides. + testContent := "etag-passthrough-test-content" + localFile := filepath.Join(localTmpDir, "etag_test.txt") + require.NoError(t, os.WriteFile(localFile, []byte(testContent), 0644)) + + uploadURL := fmt.Sprintf("pelican://%s:%d/test/etag_test.txt", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + _, err := client.DoPut(ft.Ctx, localFile, uploadURL, false, client.WithToken(testToken)) + require.NoError(t, err) + + originDataURL := fmt.Sprintf("https://%s:%d/api/v1.0/origin/data/test/etag_test.txt", + param.Server_Hostname.GetString(), param.Server_WebPort.GetInt()) + + httpClient := &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec + }, + } + + // GET the file from the origin — expect an ETag header. + req, err := http.NewRequest("GET", originDataURL, nil) + require.NoError(t, err) + req.Header.Set("Authorization", "Bearer "+testToken) + resp, err := httpClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + require.Equal(t, http.StatusOK, resp.StatusCode) + etag := resp.Header.Get("ETag") + assert.NotEmpty(t, etag, "origin response should include an ETag header") + + // Query the upstream WebDAV server directly and compare ETags. + upstreamURL := webdavURL + storagePrefix + "/etag_test.txt" + upReq, err := http.NewRequest("HEAD", upstreamURL, nil) + require.NoError(t, err) + upResp, err := http.DefaultClient.Do(upReq) + require.NoError(t, err) + upResp.Body.Close() + upstreamETag := upResp.Header.Get("ETag") + + if upstreamETag != "" { + // The upstream WebDAV server provides an ETag — verify passthrough. + assert.Equal(t, upstreamETag, etag, + "origin ETag should match the upstream WebDAV server's ETag") + } + + // Verify conditional GET: If-None-Match with the ETag should return 304. + condReq, err := http.NewRequest("GET", originDataURL, nil) + require.NoError(t, err) + condReq.Header.Set("Authorization", "Bearer "+testToken) + condReq.Header.Set("If-None-Match", etag) + condResp, err := httpClient.Do(condReq) + require.NoError(t, err) + condResp.Body.Close() + assert.Equal(t, http.StatusNotModified, condResp.StatusCode, + "conditional GET with matching ETag should return 304") + }) } // -------------------------------------------------------------------------- diff --git a/origin_serve/backend_blob_minio_test.go b/origin_serve/backend_blob_minio_test.go index dc779226a8..619b22b76c 100644 --- a/origin_serve/backend_blob_minio_test.go +++ b/origin_serve/backend_blob_minio_test.go @@ -99,10 +99,11 @@ func startMinio(t *testing.T) (endpoint, accessKey, secretKey string) { }) // Minio prints a line like: - // S3-API: http://127.0.0.1:43219 + // S3-API: http://127.0.0.1:43219 (older versions) + // API: http://127.0.0.1:43219 (newer versions) // Poll the log file until we find it (with a 30-second deadline). // Use assert (not require) so we can print minio's log on failure. - apiRe := regexp.MustCompile(`S3-API:\s+(https?://\S+)`) + apiRe := regexp.MustCompile(`(?:S3-)?API:\s+(https?://\S+)`) ok := assert.Eventually(t, func() bool { if minioDone.Load() { return false @@ -122,7 +123,7 @@ func startMinio(t *testing.T) (endpoint, accessKey, secretKey string) { if minioDone.Load() { t.Fatalf("minio exited early (err=%v); log output:\n%s", minioErr, logData) } - t.Fatalf("minio never printed an S3-API endpoint; log output:\n%s", logData) + t.Fatalf("minio never printed an API endpoint; log output:\n%s", logData) } // Pre-create the bucket directory on disk so it's available immediately. diff --git a/origin_serve/backend_https.go b/origin_serve/backend_https.go index defcea4396..bbe5e2d881 100644 --- a/origin_serve/backend_https.go +++ b/origin_serve/backend_https.go @@ -28,7 +28,6 @@ import ( "path" "strings" "sync" - "sync/atomic" "time" log "github.com/sirupsen/logrus" @@ -405,30 +404,50 @@ func (fs *httpsFileSystem) OpenFile(ctx context.Context, name string, flag int, if err != nil && gowebdav.IsErrNotFound(err) { return nil, os.ErrNotExist } - // Either it's a regular file or Stat failed for a non-404 reason — fall - // through to GET. + // Regular file — we already have size & mod-time from the Stat above, + // so skip the HEAD request and return a lazy-read file directly. + if err == nil { + var etag string + if gf, ok := info.(interface{ ETag() string }); ok { + etag = gf.ETag() + } + return &httpsReadFile{ + name: name, + fs: fs, + ctx: ctx, + contentLength: info.Size(), + lastModified: info.ModTime(), + etag: etag, + }, nil + } + // Stat failed for a non-404 reason — fall through to HEAD. } urlStr := fs.upstreamURL(name) - resp, err := fs.doRequest(ctx, http.MethodGet, urlStr, nil, nil) + + // Use HEAD to discover the file's size and last-modified time without + // downloading the body. The actual bytes are fetched lazily (possibly + // with a Range header) on the first Read call. + resp, err := fs.doRequest(ctx, http.MethodHead, urlStr, nil, nil) if err != nil { return nil, err } + resp.Body.Close() if resp.StatusCode == http.StatusNotFound { - resp.Body.Close() return nil, os.ErrNotExist } if resp.StatusCode != http.StatusOK { - resp.Body.Close() - return nil, fmt.Errorf("https get failed with status %d", resp.StatusCode) + return nil, fmt.Errorf("https head failed with status %d", resp.StatusCode) } return &httpsReadFile{ name: name, - body: resp.Body, + fs: fs, + ctx: ctx, contentLength: resp.ContentLength, lastModified: parseHTTPDate(resp.Header.Get("Last-Modified")), + etag: resp.Header.Get("ETag"), }, nil } @@ -590,22 +609,56 @@ func (fi *httpsFileInfo) Sys() interface{} { return nil } +// ETag implements the webdav.ETager interface so that the webdav handler +// can set the ETag response header from the upstream server's value. +func (fi *httpsFileInfo) ETag(_ context.Context) (string, error) { + if fi.etag != "" { + return fi.etag, nil + } + // Return ErrNotImplemented so the webdav handler falls back to its + // default ETag computation (modtime + size). + return "", webdav.ErrNotImplemented +} + // --------------------------------------------------------------------------- -// httpsReadFile — read-only file backed by an HTTPS GET response. -// Uses atomic offset for concurrent safety. +// httpsReadFile — read-only file backed by an HTTPS upstream. +// Seek is real: it records the desired offset and lazily opens a Range GET +// on the next Read call. This means only the requested byte range is fetched +// from the upstream server, which is critical for multi-gigabyte files. // --------------------------------------------------------------------------- type httpsReadFile struct { name string - body io.ReadCloser + fs *httpsFileSystem + ctx context.Context contentLength int64 lastModified time.Time - offset atomic.Int64 + etag string + + offset int64 // logical cursor position + body io.ReadCloser // current upstream body (nil until first Read after Seek) } func (f *httpsReadFile) Read(p []byte) (int, error) { + if f.body == nil { + // Open a GET with a Range header starting at the current offset. + urlStr := f.fs.upstreamURL(f.name) + headers := map[string]string{ + "Range": fmt.Sprintf("bytes=%d-", f.offset), + } + resp, err := f.fs.doRequest(f.ctx, http.MethodGet, urlStr, nil, headers) + if err != nil { + return 0, err + } + // Accept both 200 (server ignores Range) and 206 (partial content). + if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusPartialContent { + resp.Body.Close() + return 0, fmt.Errorf("https range get failed with status %d", resp.StatusCode) + } + f.body = resp.Body + } n, err := f.body.Read(p) - f.offset.Add(int64(n)) + f.offset += int64(n) return n, err } @@ -615,15 +668,33 @@ func (f *httpsReadFile) Seek(offset int64, whence int) (int64, error) { case io.SeekStart: newOff = offset case io.SeekCurrent: - newOff = f.offset.Load() + offset + newOff = f.offset + offset case io.SeekEnd: newOff = f.contentLength + offset + default: + return 0, fmt.Errorf("httpsReadFile.Seek: invalid whence %d", whence) } - f.offset.Store(newOff) + if newOff < 0 { + return 0, fmt.Errorf("httpsReadFile.Seek: negative position %d", newOff) + } + // If the position changed, discard the existing body so the next Read + // opens a fresh Range GET at the new offset. + if newOff != f.offset || f.body == nil { + if f.body != nil { + f.body.Close() + f.body = nil + } + } + f.offset = newOff return newOff, nil } -func (f *httpsReadFile) Close() error { return f.body.Close() } +func (f *httpsReadFile) Close() error { + if f.body != nil { + return f.body.Close() + } + return nil +} func (f *httpsReadFile) Write(_ []byte) (int, error) { return 0, fmt.Errorf("write not supported on read file") @@ -639,6 +710,7 @@ func (f *httpsReadFile) Stat() (os.FileInfo, error) { size: f.contentLength, modTime: f.lastModified, isDir: false, + etag: f.etag, }, nil } From 91452dce784541c9e884c1fe42a95627e54528f3 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 14 Mar 2026 17:08:13 -0500 Subject: [PATCH 06/10] Unify minio startup scripts --- e2e_fed_tests/s3v2_test.go | 70 +------------ origin_serve/backend_blob_minio_test.go | 109 +------------------- test_utils/minio.go | 130 ++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 171 deletions(-) create mode 100644 test_utils/minio.go diff --git a/e2e_fed_tests/s3v2_test.go b/e2e_fed_tests/s3v2_test.go index 21bafa959d..d1f16021d9 100644 --- a/e2e_fed_tests/s3v2_test.go +++ b/e2e_fed_tests/s3v2_test.go @@ -24,9 +24,7 @@ import ( "crypto/md5" "fmt" "os" - "os/exec" "path/filepath" - "regexp" "strings" "testing" "time" @@ -336,84 +334,24 @@ func TestS3v2MemOriginOverwrite(t *testing.T) { // Minio-backed federation tests // --------------------------------------------------------------------------- -// skipIfNoMinio skips the test if the minio binary is not available on PATH. -func skipIfNoMinio(t *testing.T) { - t.Helper() - if _, err := exec.LookPath("minio"); err != nil { - t.Skip("minio not found on PATH; skipping minio-backed test") - } -} - -// startMinioServer launches a minio server bound to 127.0.0.1:0 (OS-assigned port), -// parses the actual listening port from minio's log output, and returns the -// endpoint URL. The server is killed when the test completes. -func startMinioServer(t *testing.T) (endpoint string) { - t.Helper() - skipIfNoMinio(t) - - dataDir := t.TempDir() - - cmd := exec.Command("minio", "server", - "--address", "127.0.0.1:0", - "--console-address", "127.0.0.2:0", - dataDir, - ) - cmd.Env = append(os.Environ(), - "MINIO_ROOT_USER=minioadmin", - "MINIO_ROOT_PASSWORD=minioadmin", - ) - - logPath := filepath.Join(t.TempDir(), "minio.log") - logFile, err := os.Create(logPath) - require.NoError(t, err) - t.Cleanup(func() { logFile.Close() }) - cmd.Stdout = logFile - cmd.Stderr = logFile - - require.NoError(t, cmd.Start(), "failed to start minio") - t.Cleanup(func() { - cmd.Process.Kill() //nolint:errcheck - cmd.Wait() //nolint:errcheck - }) - - // Minio prints a line like: S3-API: http://127.0.0.1:43219 - apiRe := regexp.MustCompile(`S3-API:\s+(https?://\S+)`) - require.Eventually(t, func() bool { - data, err := os.ReadFile(logPath) - if err != nil { - return false - } - if m := apiRe.FindSubmatch(data); m != nil { - endpoint = string(m[1]) - return true - } - return false - }, 30*time.Second, 200*time.Millisecond, "minio never printed an S3-API endpoint") - - // Pre-create the bucket directory on disk. - require.NoError(t, os.Mkdir(filepath.Join(dataDir, "test-bucket"), 0755)) - - return endpoint -} - // TestS3v2MinioOriginUploadDownload runs a full Pelican federation backed by // a real MinIO server. It exercises the complete S3v2 data path: director // redirect → origin HTTP handler → gocloud.dev/blob/s3blob → MinIO. Skipped // if minio is not installed. func TestS3v2MinioOriginUploadDownload(t *testing.T) { - skipIfNoMinio(t) + test_utils.SkipIfNoMinio(t) t.Cleanup(test_utils.SetupTestLogging(t)) server_utils.ResetTestState() defer server_utils.ResetTestState() - minioEndpoint := startMinioServer(t) + minioEndpoint, accessKey, secretKey := test_utils.StartMinio(t, "test-bucket") // Write credential files for the origin to read. credDir := t.TempDir() akFile := filepath.Join(credDir, "access-key") skFile := filepath.Join(credDir, "secret-key") - require.NoError(t, os.WriteFile(akFile, []byte("minioadmin"), 0600)) - require.NoError(t, os.WriteFile(skFile, []byte("minioadmin"), 0600)) + require.NoError(t, os.WriteFile(akFile, []byte(accessKey), 0600)) + require.NoError(t, os.WriteFile(skFile, []byte(secretKey), 0600)) // S3 params must be in the YAML config so they survive NewFedTest's // config.InitServer → viper.MergeConfig flow and are available when diff --git a/origin_serve/backend_blob_minio_test.go b/origin_serve/backend_blob_minio_test.go index 619b22b76c..07541077df 100644 --- a/origin_serve/backend_blob_minio_test.go +++ b/origin_serve/backend_blob_minio_test.go @@ -24,114 +24,13 @@ import ( "context" "io" "os" - "os/exec" - "path/filepath" - "regexp" - "sync/atomic" "testing" - "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" -) - -// skipIfNoMinio skips the test if the minio binary is not available on PATH. -func skipIfNoMinio(t *testing.T) { - t.Helper() - if _, err := exec.LookPath("minio"); err != nil { - t.Skip("minio not found on PATH; skipping S3 integration test") - } -} - -// startMinio launches a minio server bound to 127.0.0.1:0 (OS-assigned port), -// parses the actual listening port from minio's log output, and returns the -// endpoint URL, access key, and secret key. The server is killed when the test -// completes. -// -// This avoids the TOCTOU race inherent in picking a free port first and then -// passing it to minio. -func startMinio(t *testing.T) (endpoint, accessKey, secretKey string) { - t.Helper() - skipIfNoMinio(t) - - dataDir := t.TempDir() - - accessKey = "minioadmin" - secretKey = "minioadmin" - - cmd := exec.Command("minio", "server", - "--address", "127.0.0.1:0", - dataDir, - ) - cmd.Env = append(os.Environ(), - "MINIO_ROOT_USER="+accessKey, - "MINIO_ROOT_PASSWORD="+secretKey, - // Disable the web console so we don't need --console-address. - // Using 127.0.0.2 for the console fails on macOS (only 127.0.0.1 - // is configured), and using 127.0.0.1:0 is rejected by minio - // because it matches --address. - "MINIO_BROWSER=off", - ) - - // Capture stdout so we can parse the "S3-API:" line for the real port. - // Minio writes its banner to stderr, so merge stderr into stdout. - logPath := filepath.Join(t.TempDir(), "minio.log") - logFile, err := os.Create(logPath) - require.NoError(t, err) - t.Cleanup(func() { logFile.Close() }) - cmd.Stdout = logFile - cmd.Stderr = logFile - - require.NoError(t, cmd.Start(), "failed to start minio") - - // Monitor for early exit so we fail fast with diagnostics. - var minioDone atomic.Bool - var minioErr error - go func() { - minioErr = cmd.Wait() - minioDone.Store(true) - }() - t.Cleanup(func() { - cmd.Process.Kill() //nolint:errcheck - for !minioDone.Load() { - time.Sleep(10 * time.Millisecond) - } - }) - - // Minio prints a line like: - // S3-API: http://127.0.0.1:43219 (older versions) - // API: http://127.0.0.1:43219 (newer versions) - // Poll the log file until we find it (with a 30-second deadline). - // Use assert (not require) so we can print minio's log on failure. - apiRe := regexp.MustCompile(`(?:S3-)?API:\s+(https?://\S+)`) - ok := assert.Eventually(t, func() bool { - if minioDone.Load() { - return false - } - data, err := os.ReadFile(logPath) - if err != nil { - return false - } - if m := apiRe.FindSubmatch(data); m != nil { - endpoint = string(m[1]) - return true - } - return false - }, 30*time.Second, 200*time.Millisecond) - if !ok { - logData, _ := os.ReadFile(logPath) - if minioDone.Load() { - t.Fatalf("minio exited early (err=%v); log output:\n%s", minioErr, logData) - } - t.Fatalf("minio never printed an API endpoint; log output:\n%s", logData) - } - - // Pre-create the bucket directory on disk so it's available immediately. - bucketDir := filepath.Join(dataDir, "test-bucket") - require.NoError(t, os.Mkdir(bucketDir, 0755)) - return endpoint, accessKey, secretKey -} + "github.com/pelicanplatform/pelican/test_utils" +) // --------------------------------------------------------------------------- // TestBlobBackend_MinioS3 — full integration test using a real minio server. @@ -141,9 +40,9 @@ func startMinio(t *testing.T) (endpoint, accessKey, secretKey string) { // --------------------------------------------------------------------------- func TestBlobBackend_MinioS3(t *testing.T) { - skipIfNoMinio(t) + test_utils.SkipIfNoMinio(t) - endpoint, accessKey, secretKey := startMinio(t) + endpoint, accessKey, secretKey := test_utils.StartMinio(t, "test-bucket") backend, err := newBlobBackend(BlobBackendOptions{ ServiceURL: endpoint, diff --git a/test_utils/minio.go b/test_utils/minio.go new file mode 100644 index 0000000000..0853541129 --- /dev/null +++ b/test_utils/minio.go @@ -0,0 +1,130 @@ +//go:build !windows + +/*************************************************************** + * + * Copyright (C) 2025, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package test_utils + +import ( + "os" + "os/exec" + "path/filepath" + "regexp" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// SkipIfNoMinio skips the test if the minio binary is not available on PATH. +func SkipIfNoMinio(t *testing.T) { + t.Helper() + if _, err := exec.LookPath("minio"); err != nil { + t.Skip("minio not found on PATH; skipping minio-backed test") + } +} + +// StartMinio launches a minio server bound to 127.0.0.1:0 (OS-assigned port), +// parses the actual listening port from minio's log output, and returns the +// endpoint URL, access key, and secret key. The server is killed when the test +// completes. +// +// This avoids the TOCTOU race inherent in picking a free port first and then +// passing it to minio. +func StartMinio(t *testing.T, bucket string) (endpoint, accessKey, secretKey string) { + t.Helper() + SkipIfNoMinio(t) + + dataDir := t.TempDir() + + accessKey = "minioadmin" + secretKey = "minioadmin" + + cmd := exec.Command("minio", "server", + "--address", "127.0.0.1:0", + dataDir, + ) + cmd.Env = append(os.Environ(), + "MINIO_ROOT_USER="+accessKey, + "MINIO_ROOT_PASSWORD="+secretKey, + // Disable the web console so we don't need --console-address. + // Using 127.0.0.2 for the console fails on macOS (only 127.0.0.1 + // is configured), and using 127.0.0.1:0 is rejected by minio + // because it matches --address. + "MINIO_BROWSER=off", + ) + + logPath := filepath.Join(t.TempDir(), "minio.log") + logFile, err := os.Create(logPath) + require.NoError(t, err) + t.Cleanup(func() { logFile.Close() }) + cmd.Stdout = logFile + cmd.Stderr = logFile + + require.NoError(t, cmd.Start(), "failed to start minio") + + // Monitor for early exit so we fail fast with diagnostics. + var minioDone atomic.Bool + var minioErr error + go func() { + minioErr = cmd.Wait() + minioDone.Store(true) + }() + t.Cleanup(func() { + cmd.Process.Kill() //nolint:errcheck + for !minioDone.Load() { + time.Sleep(10 * time.Millisecond) + } + }) + + // Minio prints a line like: + // S3-API: http://127.0.0.1:43219 (older versions) + // API: http://127.0.0.1:43219 (newer versions) + // Poll the log file until we find it (with a 30-second deadline). + // Use assert (not require) so we can print minio's log on failure. + apiRe := regexp.MustCompile(`(?:S3-)?API:\s+(https?://\S+)`) + ok := assert.Eventually(t, func() bool { + if minioDone.Load() { + return false + } + data, err := os.ReadFile(logPath) + if err != nil { + return false + } + if m := apiRe.FindSubmatch(data); m != nil { + endpoint = string(m[1]) + return true + } + return false + }, 30*time.Second, 200*time.Millisecond) + if !ok { + logData, _ := os.ReadFile(logPath) + if minioDone.Load() { + t.Fatalf("minio exited early (err=%v); log output:\n%s", minioErr, logData) + } + t.Fatalf("minio never printed an API endpoint; log output:\n%s", logData) + } + + // Pre-create the bucket directory on disk so it's available immediately. + bucketDir := filepath.Join(dataDir, bucket) + require.NoError(t, os.Mkdir(bucketDir, 0755)) + + return endpoint, accessKey, secretKey +} From 32c5d13dafe00af7bbba1acc1ec1df36d3880ad3 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sun, 15 Mar 2026 20:34:20 -0500 Subject: [PATCH 07/10] Allow retry of concurrent MKCOL --- origin_serve/backend_https.go | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/origin_serve/backend_https.go b/origin_serve/backend_https.go index bbe5e2d881..22627b6524 100644 --- a/origin_serve/backend_https.go +++ b/origin_serve/backend_https.go @@ -368,14 +368,35 @@ func (fs *httpsFileSystem) ensureParentDirs(ctx context.Context, name string) er // Create from shallowest missing toward deepest. for i := firstMissingIdx - 1; i >= 0; i-- { - err := fs.Mkdir(ctx, prefixes[i], 0755) - if err != nil { + var lastErr error + for attempt := 0; attempt < 5; attempt++ { + lastErr = fs.Mkdir(ctx, prefixes[i], 0755) + if lastErr == nil { + break + } // Tolerate "already exists" (405 Method Not Allowed in WebDAV) in // case a concurrent writer created the directory between our Stat // and Mkdir calls. - if !gowebdav.IsErrCode(err, http.StatusMethodNotAllowed) { - return fmt.Errorf("failed to create directory %q: %w", prefixes[i], err) + if gowebdav.IsErrCode(lastErr, http.StatusMethodNotAllowed) { + lastErr = nil + break } + // A 423 Locked response means a concurrent MKCOL is in progress + // on the same resource. Retry after a short backoff; the lock + // will be released once the other writer finishes. + if gowebdav.IsErrCode(lastErr, http.StatusLocked) { + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(100 * time.Millisecond * time.Duration(attempt+1)): + } + continue + } + // Any other error is fatal. + return fmt.Errorf("failed to create directory %q: %w", prefixes[i], lastErr) + } + if lastErr != nil { + return fmt.Errorf("failed to create directory %q after retries: %w", prefixes[i], lastErr) } } From b2c55945eae280f1c73dc3e22332576d9ca15ece Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Tue, 28 Apr 2026 15:17:02 -0500 Subject: [PATCH 08/10] Checkpoint of some self-code review --- e2e_fed_tests/globusv2_test.go | 10 +++--- e2e_fed_tests/s3v2_test.go | 8 ++--- origin_serve/backend_blob.go | 57 ++++++++++++++++++++++++++++++++-- origin_serve/backend_https.go | 20 +++++++++--- origin_serve/handlers.go | 14 +++++++-- 5 files changed, 91 insertions(+), 18 deletions(-) diff --git a/e2e_fed_tests/globusv2_test.go b/e2e_fed_tests/globusv2_test.go index ae09c9fee0..8b63461663 100644 --- a/e2e_fed_tests/globusv2_test.go +++ b/e2e_fed_tests/globusv2_test.go @@ -223,12 +223,12 @@ func TestGlobusv2Origin(t *testing.T) { require.NoError(t, os.WriteFile(clientSecretFile, []byte("test-globus-client-secret"), 0600)) // Configure Globus hidden params to point at mock servers - require.NoError(t, param.Set(param.Origin_GlobusIssuerURL.GetName(), oidcURL)) - require.NoError(t, param.Set(param.Origin_GlobusTransferAPIBaseUrl.GetName(), transferAPIBaseURL)) - require.NoError(t, param.Set(param.Origin_GlobusClientIDFile.GetName(), clientIDFile)) - require.NoError(t, param.Set(param.Origin_GlobusClientSecretFile.GetName(), clientSecretFile)) + require.NoError(t, param.Set(param.Origin_GlobusIssuerURL, oidcURL)) + require.NoError(t, param.Set(param.Origin_GlobusTransferAPIBaseUrl, transferAPIBaseURL)) + require.NoError(t, param.Set(param.Origin_GlobusClientIDFile, clientIDFile)) + require.NoError(t, param.Set(param.Origin_GlobusClientSecretFile, clientSecretFile)) // Set a short refresh interval so we can verify token refresh in the test - require.NoError(t, param.Set(param.Origin_Globusv2TokenRefreshInterval.GetName(), "2s")) + require.NoError(t, param.Set(param.Origin_Globusv2TokenRefreshInterval, "2s")) originConfig := globusv2OriginConfig(testCollectionID, "Mock Test Collection") diff --git a/e2e_fed_tests/s3v2_test.go b/e2e_fed_tests/s3v2_test.go index d1f16021d9..b118196bc7 100644 --- a/e2e_fed_tests/s3v2_test.go +++ b/e2e_fed_tests/s3v2_test.go @@ -300,9 +300,9 @@ func TestS3v2MemOriginOverwrite(t *testing.T) { localTmpDir := t.TempDir() // Enable client-side overwrites so the second PUT doesn't fail with FileAlreadyExists - require.NoError(t, param.Set(param.Client_EnableOverwrites.GetName(), true)) + require.NoError(t, param.Set(param.Client_EnableOverwrites, true)) defer func() { - require.NoError(t, param.Set(param.Client_EnableOverwrites.GetName(), false)) + require.NoError(t, param.Set(param.Client_EnableOverwrites, false)) }() uploadURL := fmt.Sprintf("pelican://%s:%d/test/overwrite.txt", @@ -479,9 +479,9 @@ Director: t.Run("Overwrite", func(t *testing.T) { // Enable client-side overwrites so the second PUT doesn't fail with FileAlreadyExists - require.NoError(t, param.Set(param.Client_EnableOverwrites.GetName(), true)) + require.NoError(t, param.Set(param.Client_EnableOverwrites, true)) defer func() { - require.NoError(t, param.Set(param.Client_EnableOverwrites.GetName(), false)) + require.NoError(t, param.Set(param.Client_EnableOverwrites, false)) }() uploadURL := fmt.Sprintf("pelican://%s:%d/test/overwrite_minio.txt", diff --git a/origin_serve/backend_blob.go b/origin_serve/backend_blob.go index 92d74f1a95..685de91f24 100644 --- a/origin_serve/backend_blob.go +++ b/origin_serve/backend_blob.go @@ -124,6 +124,21 @@ func newBlobBackend(opts BlobBackendOptions) (*blobBackend, error) { // If per-export S3 credentials were provided, set them in the environment // so the gocloud AWS credential chain picks them up. + // + // FIXME(deploy): this mutates global process environment, which has two + // real-world consequences in production: + // 1. With multiple S3 backends configured against different accounts, + // whichever export is initialized last "wins" -- subsequent SDK + // calls (including from other backends, presigners, debug hooks, + // and any subprocess we spawn) see the last-set credentials. + // 2. Because os.Setenv writes to the global env, it leaks into any + // child process inheriting our env (notably the xrootd workers in + // mixed deployments). + // The proper fix is to construct an *s3.Client with explicit + // aws.Credentials and call s3blob.OpenBucket(ctx, client, bucket, opts) + // directly instead of going through blob.OpenBucket(URL). Until then, + // per-export AccessKey/SecretKey is only safe when one S3 export is + // configured per origin process. if opts.AccessKey != "" && opts.SecretKey != "" { os.Setenv("AWS_ACCESS_KEY_ID", opts.AccessKey) os.Setenv("AWS_SECRET_ACCESS_KEY", opts.SecretKey) @@ -303,18 +318,54 @@ func (fs *blobFileSystem) OpenFile(ctx context.Context, name string, flag int, _ } // RemoveAll implements webdav.FileSystem. +// +// Per the webdav.FileSystem contract this must remove `name` and, if it is a +// directory, everything underneath it. The previous implementation only +// deleted the named object plus its directory marker, leaving children +// orphaned. We list the prefix and delete every key, then remove the marker. +// +// Listing is paginated so memory stays bounded for large directories. Each +// delete is best-effort -- a partial failure returns the first error but +// continues so we don't strand half a tree. func (fs *blobFileSystem) RemoveAll(ctx context.Context, name string) error { key := blobKey(name) - // Try deleting as a plain object first. + // First try a plain-object delete (handles non-directory paths). err := fs.bucket.Delete(ctx, key) if err != nil && !isNotFound(err) { return err } - // Also try the directory marker. + // Recursively delete anything under the directory prefix. Note we + // intentionally don't pass a Delimiter here -- we want every descendant. + dirPrefix := key + if dirPrefix != "" && !strings.HasSuffix(dirPrefix, "/") { + dirPrefix += "/" + } + iter := fs.bucket.List(&blob.ListOptions{Prefix: dirPrefix}) + var firstErr error + for { + obj, listErr := iter.Next(ctx) + if listErr == io.EOF { + break + } + if listErr != nil { + if firstErr == nil { + firstErr = listErr + } + break + } + if delErr := fs.bucket.Delete(ctx, obj.Key); delErr != nil && !isNotFound(delErr) { + if firstErr == nil { + firstErr = delErr + } + } + } + + // Finally, the directory marker (some providers return it as a child of + // the prefix above and some don't, so this is belt-and-suspenders). _ = fs.bucket.Delete(ctx, key+"/") - return nil + return firstErr } // Rename implements webdav.FileSystem. diff --git a/origin_serve/backend_https.go b/origin_serve/backend_https.go index 22627b6524..f45d5a2fd9 100644 --- a/origin_serve/backend_https.go +++ b/origin_serve/backend_https.go @@ -19,6 +19,7 @@ package origin_serve import ( + "bytes" "context" "errors" "fmt" @@ -762,9 +763,15 @@ func (f *httpsWriteFile) Write(p []byte) (int, error) { } func (f *httpsWriteFile) Close() error { + // NOTE: This file buffers the entire upload in memory before issuing + // the PUT. For multi-gigabyte writes that is a memory bomb / DoS vector + // in production -- a real fix should stream via io.Pipe and a chunked + // PUT (or use a different write file for large transfers). We skirt + // double-copying the buffer here, but the underlying memory profile + // is still bounded only by client behavior. f.mu.Lock() - data := make([]byte, len(f.buf)) - copy(data, f.buf) + data := f.buf + f.buf = nil // hand ownership to the request body; subsequent Writes are forbidden post-Close f.mu.Unlock() urlStr := f.fs.upstreamURL(f.name) @@ -772,7 +779,12 @@ func (f *httpsWriteFile) Close() error { "Content-Length": fmt.Sprintf("%d", len(data)), } - resp, err := f.fs.doRequest(f.ctx, http.MethodPut, urlStr, strings.NewReader(string(data)), headers) + doPut := func() (*http.Response, error) { + // bytes.NewReader avoids the []byte->string->Reader copy chain. + return f.fs.doRequest(f.ctx, http.MethodPut, urlStr, bytes.NewReader(data), headers) + } + + resp, err := doPut() if err != nil { return err } @@ -797,7 +809,7 @@ func (f *httpsWriteFile) Close() error { } // Retry the PUT after creating parent directories. - retryResp, retryErr := f.fs.doRequest(f.ctx, http.MethodPut, urlStr, strings.NewReader(string(data)), headers) + retryResp, retryErr := doPut() if retryErr != nil { return retryErr } diff --git a/origin_serve/handlers.go b/origin_serve/handlers.go index ee41740546..a2c7afa713 100644 --- a/origin_serve/handlers.go +++ b/origin_serve/handlers.go @@ -847,11 +847,21 @@ func RegisterHandlers(engine *gin.Engine, directorEnabled bool) error { // the backend with ".." intact, potentially escaping // the storage root. newPath := path.Clean(wildcardPath) + // path.Clean("") == "." and path.Clean("/") == "/"; collapse the + // degenerate "." back to "/" so the URL we hand the WebDAV + // handler stays well-formed. + if newPath == "." { + newPath = "/" + } - // Create a shallow copy of the request and modify its URL + // Create a shallow copy of the request and modify its URL. + // The WebDAV handler's stripPrefix relies on URL.Path still + // starting with handler.Prefix (= routePrefix), so put the + // route prefix back on. Keeping the prefix here also keeps + // PROPFIND href values aligned with the public URL. modifiedReq := c.Request.Clone(c.Request.Context()) modifiedURL := *c.Request.URL - modifiedURL.Path = newPath + modifiedURL.Path = routePrefix + newPath modifiedReq.URL = &modifiedURL // Stash client tracing headers (X-Pelican-JobId, From 6768e11c1bee196c63340ef0a15ab384336bdc8a Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 2 May 2026 09:02:21 -0500 Subject: [PATCH 09/10] Bugfix handler to not buffer HTTP PUTs in-memory --- origin_serve/backend_https.go | 206 +++++++++++++++++++---------- origin_serve/backend_https_test.go | 152 ++++++++++++++++++++- 2 files changed, 285 insertions(+), 73 deletions(-) diff --git a/origin_serve/backend_https.go b/origin_serve/backend_https.go index f45d5a2fd9..55c80e721d 100644 --- a/origin_serve/backend_https.go +++ b/origin_serve/backend_https.go @@ -19,7 +19,6 @@ package origin_serve import ( - "bytes" "context" "errors" "fmt" @@ -737,94 +736,161 @@ func (f *httpsReadFile) Stat() (os.FileInfo, error) { } // --------------------------------------------------------------------------- -// httpsWriteFile — write file that PUTs to the upstream server on Close. -// Uses a mutex to protect concurrent appends to the buffer. +// httpsWriteFile — write file that streams PUTs to the upstream server. +// +// Bytes flow Write -> io.Pipe -> http.Request.Body -> upstream. We do NOT +// buffer the whole upload in memory anymore; multi-GB writes therefore +// no longer balloon RSS in proportion to the body size. +// +// Lifecycle: +// - newHTTPSWriteFile only records intent; nothing happens on the wire. +// - The first Write (or Close, for an empty PUT) triggers ensureStarted, +// which optionally pre-flights MKCOL for missing parents and then kicks +// off a goroutine that runs the upstream PUT with the pipe reader as +// its body. Subsequent Writes feed the pipe. +// - Close shuts the pipe writer (signaling EOF to the body) and waits +// for the PUT goroutine to return its status. +// +// Auto-mkdir trade-off: the old code retried on 409/404, but with a +// streamed body the request body is already consumed by the time we'd +// know we need to retry. So when auto-mkdir is on for a WebDAV upstream +// we Stat/MKCOL the parents up front and skip the retry path. For +// plain-HTTP upstreams (or auto-mkdir disabled) we just PUT and surface +// whatever status the server returns. // --------------------------------------------------------------------------- type httpsWriteFile struct { - ctx context.Context - fs *httpsFileSystem - name string + ctx context.Context + fs *httpsFileSystem + name string + + // startOnce gates the lazy spawn of the PUT goroutine. Subsequent + // Writes / a single Close all funnel through it. + startOnce sync.Once + startErr error // non-nil if pre-flight (e.g. ensureParentDirs) failed + + // pipeW is the local end the WebDAV handler's bytes are written into; + // the goroutine consumes the matching reader as the request body. + pipeW *io.PipeWriter + // putErrCh receives exactly one value: the PUT goroutine's terminal + // error (or nil on success). Buffered so the goroutine never blocks. + putErrCh chan error + + // mu guards offset; serializes Stat/Seek against Write. mu sync.Mutex - buf []byte offset int64 + closed bool } func newHTTPSWriteFile(ctx context.Context, fs *httpsFileSystem, name string) *httpsWriteFile { return &httpsWriteFile{ctx: ctx, fs: fs, name: name} } -func (f *httpsWriteFile) Write(p []byte) (int, error) { - f.mu.Lock() - defer f.mu.Unlock() - f.buf = append(f.buf, p...) - f.offset += int64(len(p)) - return len(p), nil -} +// ensureStarted lazily kicks off the upstream PUT. It is safe to call +// repeatedly; only the first call does work. If the pre-flight mkdir +// fails, the cached startErr is returned to every caller. +func (f *httpsWriteFile) ensureStarted() error { + f.startOnce.Do(func() { + // When auto-mkdir is on with a WebDAV upstream we cannot rely on + // the old retry-on-409/404 path -- the streamed body is already + // consumed by the time we know we need a retry. Pay the Stat + // cost up front instead. For the common case the parent already + // exists and Stat short-circuits cheaply. + if f.fs.enableAutoMkdir && f.fs.backendMode == BackendModeWebDAV { + if err := f.fs.ensureParentDirs(f.ctx, f.name); err != nil { + f.startErr = fmt.Errorf("auto-mkdir before HTTPS PUT failed: %w", err) + return + } + } -func (f *httpsWriteFile) Close() error { - // NOTE: This file buffers the entire upload in memory before issuing - // the PUT. For multi-gigabyte writes that is a memory bomb / DoS vector - // in production -- a real fix should stream via io.Pipe and a chunked - // PUT (or use a different write file for large transfers). We skirt - // double-copying the buffer here, but the underlying memory profile - // is still bounded only by client behavior. - f.mu.Lock() - data := f.buf - f.buf = nil // hand ownership to the request body; subsequent Writes are forbidden post-Close - f.mu.Unlock() + pipeR, pipeW := io.Pipe() + f.pipeW = pipeW + f.putErrCh = make(chan error, 1) - urlStr := f.fs.upstreamURL(f.name) - headers := map[string]string{ - "Content-Length": fmt.Sprintf("%d", len(data)), - } + urlStr := f.fs.upstreamURL(f.name) + req, err := http.NewRequestWithContext(f.ctx, http.MethodPut, urlStr, pipeR) + if err != nil { + _ = pipeR.CloseWithError(err) + f.startErr = err + return + } + if token := f.fs.getToken(f.ctx); token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + if ph := server_utils.PelicanHeadersFromContext(f.ctx); ph != nil { + if ph.JobId != "" { + req.Header.Set("X-Pelican-JobId", ph.JobId) + } + if ph.Timeout != "" { + req.Header.Set("X-Pelican-Timeout", ph.Timeout) + } + } + // Honor a Content-Length hint from the request context so the wire + // stays non-chunked when the size is known up front. With no hint + // we leave ContentLength == -1 and the transport will use chunked + // transfer-encoding. + if hint := contentLengthFromCtx(f.ctx); hint >= 0 { + req.ContentLength = hint + } - doPut := func() (*http.Response, error) { - // bytes.NewReader avoids the []byte->string->Reader copy chain. - return f.fs.doRequest(f.ctx, http.MethodPut, urlStr, bytes.NewReader(data), headers) - } + go func() { + resp, err := f.fs.httpClient.Do(req) + if err != nil { + // Wake any blocked Write with the same error so the caller + // learns about the failure synchronously. + _ = pipeR.CloseWithError(err) + f.putErrCh <- err + return + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + switch resp.StatusCode { + case http.StatusOK, http.StatusCreated, http.StatusNoContent: + f.putErrCh <- nil + return + } + log.Debugf("HTTPS PUT to %s response (%d): %s", urlStr, resp.StatusCode, string(body)) + putErr := fmt.Errorf("https put failed with status %d", resp.StatusCode) + _ = pipeR.CloseWithError(putErr) + f.putErrCh <- putErr + }() + }) + return f.startErr +} - resp, err := doPut() - if err != nil { - return err +func (f *httpsWriteFile) Write(p []byte) (int, error) { + if err := f.ensureStarted(); err != nil { + return 0, err } - defer resp.Body.Close() + n, err := f.pipeW.Write(p) + if n > 0 { + f.mu.Lock() + f.offset += int64(n) + f.mu.Unlock() + } + return n, err +} - if resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusNoContent { +func (f *httpsWriteFile) Close() error { + f.mu.Lock() + if f.closed { + f.mu.Unlock() return nil } + f.closed = true + f.mu.Unlock() - respBody, _ := io.ReadAll(resp.Body) - resp.Body.Close() - - // If auto-mkdir is enabled and the server indicates a missing parent - // directory (409 Conflict in WebDAV, or 404 Not Found), create the - // directory tree and retry the PUT. - if f.fs.enableAutoMkdir && (resp.StatusCode == http.StatusConflict || resp.StatusCode == http.StatusNotFound) { - log.Debugf("HTTPS PUT to %s returned %d; attempting auto-mkdir for parent directories", urlStr, resp.StatusCode) - - if mkdirErr := f.fs.ensureParentDirs(f.ctx, f.name); mkdirErr != nil { - log.Warningf("Auto-mkdir failed for %s: %v", f.name, mkdirErr) - return fmt.Errorf("https put failed with status %d (auto-mkdir also failed: %v)", resp.StatusCode, mkdirErr) - } - - // Retry the PUT after creating parent directories. - retryResp, retryErr := doPut() - if retryErr != nil { - return retryErr - } - defer retryResp.Body.Close() - - if retryResp.StatusCode == http.StatusOK || retryResp.StatusCode == http.StatusCreated || retryResp.StatusCode == http.StatusNoContent { - return nil - } - retryBody, _ := io.ReadAll(retryResp.Body) - log.Debugf("HTTPS PUT retry response (%d): %s", retryResp.StatusCode, string(retryBody)) - return fmt.Errorf("https put failed with status %d after auto-mkdir", retryResp.StatusCode) + // Even an empty PUT (no Write calls) must hit the wire so the upstream + // observes a zero-byte file. ensureStarted is idempotent. + if err := f.ensureStarted(); err != nil { + return err } - - log.Debugf("HTTPS PUT response (%d): %s", resp.StatusCode, string(respBody)) - return fmt.Errorf("https put failed with status %d", resp.StatusCode) + // Signal EOF to the request body and wait for the PUT goroutine to + // surface the upstream's status. + if err := f.pipeW.Close(); err != nil { + return err + } + return <-f.putErrCh } func (f *httpsWriteFile) Read(_ []byte) (int, error) { @@ -861,11 +927,11 @@ func (f *httpsWriteFile) Readdir(_ int) ([]os.FileInfo, error) { func (f *httpsWriteFile) Stat() (os.FileInfo, error) { f.mu.Lock() - n := len(f.buf) + n := f.offset f.mu.Unlock() return &httpsFileInfo{ name: path.Base(f.name), - size: int64(n), + size: n, isDir: false, }, nil } diff --git a/origin_serve/backend_https_test.go b/origin_serve/backend_https_test.go index 3171d1a376..0b019db9c0 100644 --- a/origin_serve/backend_https_test.go +++ b/origin_serve/backend_https_test.go @@ -19,6 +19,7 @@ package origin_serve import ( + "bytes" "context" "errors" "fmt" @@ -28,6 +29,7 @@ import ( "os" "strings" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -222,8 +224,10 @@ func TestHTTPSWriteFile_NoOpSeek(t *testing.T) { require.NoError(t, err) assert.Equal(t, int64(0), pos) - // Write some data - _, _ = wf.Write([]byte("hello")) + // Pretend 5 bytes were streamed through. We bypass Write here because + // the streaming impl needs a real upstream + fs to start the PUT + // goroutine; this test only exercises Seek's no-op semantics. + wf.offset = 5 // Seeking to current offset (5) should succeed pos, err = wf.Seek(5, io.SeekStart) @@ -246,13 +250,155 @@ func TestHTTPSWriteFile_NoOpSeek(t *testing.T) { func TestHTTPSWriteFile_Stat(t *testing.T) { wf := &httpsWriteFile{name: "/test.txt"} - wf.buf = []byte("hello") + // Stat reports the cumulative bytes accepted by the pipe; the + // streaming impl no longer keeps a buffer field, but it tracks the + // offset so size reporting still works. + wf.offset = 5 info, err := wf.Stat() require.NoError(t, err) assert.Equal(t, "test.txt", info.Name()) assert.Equal(t, int64(5), info.Size()) } +// TestHTTPSWriteFile_Streams asserts the streaming behavior of Write/Close: +// bytes flow to the wire as Write is called, not buffered until Close, so a +// multi-GB upload no longer requires multi-GB of RSS. We run with a body +// large enough that any in-memory buffering would be obvious, and we use +// a server that records each chunk and unblocks the test once it has +// observed bytes BEFORE Close was called. +func TestHTTPSWriteFile_Streams(t *testing.T) { + const chunkSize = 64 * 1024 + const numChunks = 16 + + receivedFirstChunk := make(chan struct{}, 1) + var totalBytes int64 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case http.MethodOptions: + w.Header().Set("Allow", "GET, PUT, OPTIONS") + w.WriteHeader(http.StatusOK) + case http.MethodPut: + defer r.Body.Close() + buf := make([]byte, chunkSize) + for { + n, err := r.Body.Read(buf) + if n > 0 { + if totalBytes == 0 { + // Signal the test that the upstream has actually + // received bytes -- proving Write is flushing, + // not buffering until Close. + select { + case receivedFirstChunk <- struct{}{}: + default: + } + } + totalBytes += int64(n) + } + if err == io.EOF { + break + } + require.NoError(t, err) + } + w.WriteHeader(http.StatusCreated) + default: + w.WriteHeader(http.StatusMethodNotAllowed) + } + })) + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenNone, + }) + require.NoError(t, backend.CheckAvailability()) + + wf, err := backend.FileSystem().OpenFile(context.Background(), "/big.bin", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + + // Stream the body chunk-by-chunk. We deliberately wait between + // chunks long enough that the upstream-side handler has time to + // observe the first chunk; if the implementation regressed to + // buffering until Close, that would never happen. + chunk := bytes.Repeat([]byte{'A'}, chunkSize) + for i := 0; i < numChunks; i++ { + n, werr := wf.Write(chunk) + require.NoError(t, werr) + require.Equal(t, chunkSize, n) + if i == 0 { + // Allow the upstream goroutine time to read the first chunk + // before we issue the rest. With the streaming impl the + // channel fires; if not, the select below times out. + select { + case <-receivedFirstChunk: + case <-time.After(5 * time.Second): + t.Fatal("upstream never saw bytes before Close; Write is buffering instead of streaming") + } + } + } + require.NoError(t, wf.Close()) + assert.Equal(t, int64(chunkSize*numChunks), totalBytes) +} + +// TestHTTPSWriteFile_EmptyPUT verifies that Close() with no preceding +// Write still issues a zero-byte PUT (correct webdav.FileSystem semantics +// for creating an empty file). +func TestHTTPSWriteFile_EmptyPUT(t *testing.T) { + var sawPUT bool + var receivedBytes int + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case http.MethodOptions: + w.Header().Set("Allow", "GET, PUT, OPTIONS") + w.WriteHeader(http.StatusOK) + case http.MethodPut: + sawPUT = true + body, _ := io.ReadAll(r.Body) + receivedBytes = len(body) + w.WriteHeader(http.StatusCreated) + default: + w.WriteHeader(http.StatusMethodNotAllowed) + } + })) + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenNone, + }) + require.NoError(t, backend.CheckAvailability()) + + wf, err := backend.FileSystem().OpenFile(context.Background(), "/empty.bin", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + require.NoError(t, wf.Close()) + assert.True(t, sawPUT, "Close on a never-written file must still issue a zero-byte PUT") + assert.Equal(t, 0, receivedBytes) +} + +// TestHTTPSWriteFile_PreflightMkdirOn409 verifies that with auto-mkdir +// enabled, the parent directory is created via MKCOL before the streamed +// PUT runs (so a 409-on-PUT is never seen even though we cannot retry +// the streamed body). +func TestHTTPSWriteFile_PreflightMkdirOn409(t *testing.T) { + server, files := mockWebDAVServer() + defer server.Close() + + backend := newHTTPSBackend(HTTPSBackendOptions{ + ServiceURL: server.URL, + TokenMode: HTTPSTokenNone, + EnableAutoMkdir: true, + }) + require.NoError(t, backend.CheckAvailability()) + + // /deep/dir does not exist; without pre-flight MKCOL the streamed + // PUT would receive 409 with no way to retry. + wf, err := backend.FileSystem().OpenFile(context.Background(), "/deep/dir/file.txt", os.O_CREATE|os.O_WRONLY, 0644) + require.NoError(t, err) + _, err = wf.Write([]byte("streamed")) + require.NoError(t, err) + require.NoError(t, wf.Close()) + assert.Equal(t, []byte("streamed"), files["/deep/dir/file.txt"]) +} + // --------------------------------------------------------------------------- // httpsReadFile — unit tests // --------------------------------------------------------------------------- From 22221826054ca0f90038aa951791b8b1f1a08778 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Thu, 18 Jun 2026 07:46:30 -0500 Subject: [PATCH 10/10] Update to reflect review comments: - Do not chown Globus files if we aren't accessing via XRootD - Use simpler detection of whether XRootD is in use. - Switch secret keys from being injected via environment to explicitly (and redacting, as needed) on open. --- go.mod | 6 +- launchers/origin_serve.go | 12 +- origin/globus.go | 33 +++-- origin_serve/backend_blob.go | 217 ++++++++++++++++++++++++------ origin_serve/backend_blob_test.go | 58 ++++++++ origin_serve/handlers.go | 2 +- server_structs/origin.go | 13 ++ 7 files changed, 272 insertions(+), 69 deletions(-) diff --git a/go.mod b/go.mod index 95945c6872..631b460fcc 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,9 @@ require ( github.com/PelicanPlatform/classad v0.0.5 github.com/RoaringBitmap/roaring v1.9.4 github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 + github.com/aws/aws-sdk-go-v2/config v1.32.2 + github.com/aws/aws-sdk-go-v2/credentials v1.19.11 + github.com/aws/aws-sdk-go-v2/service/s3 v1.96.4 github.com/bbockelm/gosssd v0.0.1 github.com/charmbracelet/glamour v0.8.0 github.com/cyphar/filepath-securejoin v0.4.1 @@ -87,8 +90,6 @@ require ( github.com/alecthomas/chroma/v2 v2.14.0 // indirect github.com/aws/aws-sdk-go-v2 v1.41.3 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6 // indirect - github.com/aws/aws-sdk-go-v2/config v1.32.2 // indirect - github.com/aws/aws-sdk-go-v2/credentials v1.19.11 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.19 // indirect github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.20.12 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.19 // indirect @@ -99,7 +100,6 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.11 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.19 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.19 // indirect - github.com/aws/aws-sdk-go-v2/service/s3 v1.96.4 // indirect github.com/aws/aws-sdk-go-v2/service/signin v1.0.7 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.30.12 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.16 // indirect diff --git a/launchers/origin_serve.go b/launchers/origin_serve.go index 97baefd1cb..629e1623c2 100644 --- a/launchers/origin_serve.go +++ b/launchers/origin_serve.go @@ -56,11 +56,7 @@ func OriginServe(ctx context.Context, engine *gin.Engine, egrp *errgroup.Group, // Determine if we should use XRootD or native HTTP server storageType := param.Origin_StorageType.GetString() - useXRootD := storageType != string(server_structs.OriginStoragePosixv2) && - storageType != string(server_structs.OriginStorageSSH) && - storageType != string(server_structs.OriginStorageS3v2) && - storageType != string(server_structs.OriginStorageHTTPSv2) && - storageType != string(server_structs.OriginStorageGlobusv2) + useXRootD := server_structs.OriginStorageType(storageType).UsesXRootD() if useXRootD { metrics.SetComponentHealthStatus(metrics.OriginCache_XRootD, metrics.StatusWarning, "XRootD is initializing") @@ -236,11 +232,7 @@ func OriginServeFinish(ctx context.Context, egrp *errgroup.Group, engine *gin.En // Handle POSIXv2 and SSH-specific initialization now that the web server is running storageType := param.Origin_StorageType.GetString() - useXRootD := storageType != string(server_structs.OriginStoragePosixv2) && - storageType != string(server_structs.OriginStorageSSH) && - storageType != string(server_structs.OriginStorageS3v2) && - storageType != string(server_structs.OriginStorageHTTPSv2) && - storageType != string(server_structs.OriginStorageGlobusv2) + useXRootD := server_structs.OriginStorageType(storageType).UsesXRootD() if !useXRootD { // For SSH backend, initialize the SSH connection before setting up handlers if storageType == string(server_structs.OriginStorageSSH) { diff --git a/origin/globus.go b/origin/globus.go index d60be08b9d..dcaf97cd8f 100644 --- a/origin/globus.go +++ b/origin/globus.go @@ -128,37 +128,46 @@ func InitGlobusBackend(exps []server_utils.OriginExport) error { return errors.Wrap(err, "failed to initialize Globus backend: failed to get pelican user") } - // Get xrootd group so XRootD can read the token files - xrootdGid, err := config.GetDaemonGID() - if err != nil { - return errors.Wrap(err, "failed to initialize Globus backend: failed to get xrootd gid") - } - ost := server_structs.OriginStorageType(param.Origin_StorageType.GetString()) if ost != server_structs.OriginStorageGlobus && ost != server_structs.OriginStorageGlobusv2 { return errors.Errorf("failed to initialize Globus backend: Origin.StorageType is not Globus or Globusv2: %s", param.Origin_StorageType.GetString()) } + + // For the XRootD-backed Globus origin, the token files must be group-readable + // by the xrootd daemon. The native Globusv2 backend is served directly by the + // pelican process, so the xrootd group does not apply -- own the files with + // the pelican user's own group instead. + tokenGid := puser.Gid + if ost == server_structs.OriginStorageGlobus { + xrootdGid, err := config.GetDaemonGID() + if err != nil { + return errors.Wrap(err, "failed to initialize Globus backend: failed to get xrootd gid") + } + tokenGid = xrootdGid + } // Init map globusExports = make(map[string]*globusExport) // Check and setup token location - // Directories are owned by pelican:xrootd with mode 0750: + // Directories are owned by pelican: with mode 0750: // - pelican (owner) can write token files even after dropPrivileges - // - xrootd (group) can read token files at runtime + // - the group can read token files at runtime; for the XRootD-backed + // Globus origin that group is xrootd, while the native Globusv2 backend + // uses pelican's own group since no xrootd daemon is involved globusFdr := param.Origin_GlobusConfigLocation.GetString() tokFdr := filepath.Join(globusFdr, "tokens") if err := os.MkdirAll(tokFdr, 0750); err != nil { return errors.Wrapf(err, "failed to create directory for Globus tokens: %s", tokFdr) } - if err = os.Chown(globusFdr, puser.Uid, xrootdGid); err != nil { - return errors.Wrapf(err, "unable to change the ownership of %s to pelican uid %d and xrootd gid %d for Globus config", globusFdr, puser.Uid, xrootdGid) + if err = os.Chown(globusFdr, puser.Uid, tokenGid); err != nil { + return errors.Wrapf(err, "unable to change the ownership of %s to pelican uid %d and gid %d for Globus config", globusFdr, puser.Uid, tokenGid) } if err = os.Chmod(globusFdr, 0750); err != nil { return errors.Wrapf(err, "unable to change the permissions of %s for Globus config", globusFdr) } - if err = os.Chown(tokFdr, puser.Uid, xrootdGid); err != nil { - return errors.Wrapf(err, "unable to change the ownership of %s to pelican uid %d and xrootd gid %d for Globus tokens", tokFdr, puser.Uid, xrootdGid) + if err = os.Chown(tokFdr, puser.Uid, tokenGid); err != nil { + return errors.Wrapf(err, "unable to change the ownership of %s to pelican uid %d and gid %d for Globus tokens", tokFdr, puser.Uid, tokenGid) } globusAuthCfg, err := GetGlobusOAuthCfg() diff --git a/origin_serve/backend_blob.go b/origin_serve/backend_blob.go index 685de91f24..f4a7f2a249 100644 --- a/origin_serve/backend_blob.go +++ b/origin_serve/backend_blob.go @@ -31,12 +31,15 @@ import ( "sync/atomic" "time" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/s3" log "github.com/sirupsen/logrus" "gocloud.dev/blob" _ "gocloud.dev/blob/azureblob" // register azblob:// URL opener _ "gocloud.dev/blob/gcsblob" // register gs:// URL opener _ "gocloud.dev/blob/memblob" // register mem:// URL opener (useful for testing) - _ "gocloud.dev/blob/s3blob" // register s3:// URL opener + "gocloud.dev/blob/s3blob" "gocloud.dev/gcerrors" "golang.org/x/net/webdav" @@ -108,57 +111,51 @@ func buildS3BlobURL(opts BlobBackendOptions) (string, error) { // newBlobBackend opens a gocloud.dev/blob bucket according to opts and returns // a blobBackend. func newBlobBackend(opts BlobBackendOptions) (*blobBackend, error) { + ctx := context.Background() + var ( bucket *blob.Bucket err error ) - blobURL := opts.BlobURL - if blobURL == "" { - // Build an s3:// URL from the backward-compatible S3-specific fields. - blobURL, err = buildS3BlobURL(opts) + switch { + case opts.BlobURL == "" && opts.AccessKey != "" && opts.SecretKey != "": + // Native S3 export with per-export static credentials. Construct an + // explicit *s3.Client so each backend carries its own credentials. + // Previously the keys were exported into the global process + // environment, which meant two S3 exports configured against + // different accounts would clobber one another -- whichever was + // initialized last won. + bucket, err = openS3BucketWithCredentials(ctx, opts) if err != nil { return nil, err } - } - - // If per-export S3 credentials were provided, set them in the environment - // so the gocloud AWS credential chain picks them up. - // - // FIXME(deploy): this mutates global process environment, which has two - // real-world consequences in production: - // 1. With multiple S3 backends configured against different accounts, - // whichever export is initialized last "wins" -- subsequent SDK - // calls (including from other backends, presigners, debug hooks, - // and any subprocess we spawn) see the last-set credentials. - // 2. Because os.Setenv writes to the global env, it leaks into any - // child process inheriting our env (notably the xrootd workers in - // mixed deployments). - // The proper fix is to construct an *s3.Client with explicit - // aws.Credentials and call s3blob.OpenBucket(ctx, client, bucket, opts) - // directly instead of going through blob.OpenBucket(URL). Until then, - // per-export AccessKey/SecretKey is only safe when one S3 export is - // configured per origin process. - if opts.AccessKey != "" && opts.SecretKey != "" { - os.Setenv("AWS_ACCESS_KEY_ID", opts.AccessKey) - os.Setenv("AWS_SECRET_ACCESS_KEY", opts.SecretKey) - } else if strings.HasPrefix(blobURL, "s3://") { - // No credentials supplied — request anonymous access unless the env - // already has credentials configured. - if os.Getenv("AWS_ACCESS_KEY_ID") == "" { - // Append anonymous=true so the SDK doesn't try IAM, etc. + default: + // Generic gocloud.dev path: an explicit BlobURL (s3/gs/azblob/mem), or + // an S3 bucket with no per-export credentials (anonymous, or ambient + // credentials from the environment / instance role). + blobURL := opts.BlobURL + if blobURL == "" { + // Build an s3:// URL from the backward-compatible S3-specific fields. + blobURL, err = buildS3BlobURL(opts) + if err != nil { + return nil, err + } + } + if strings.HasPrefix(blobURL, "s3://") && os.Getenv("AWS_ACCESS_KEY_ID") == "" { + // No credentials available — request anonymous access so the SDK + // doesn't probe IAM, instance metadata, etc. if strings.Contains(blobURL, "?") { blobURL += "&anonymous=true" } else { blobURL += "?anonymous=true" } } - } - - log.Infof("Opening blob bucket via URL: %s", blobURL) - bucket, err = blob.OpenBucket(context.Background(), blobURL) - if err != nil { - return nil, fmt.Errorf("failed to open blob bucket from URL %q: %w", blobURL, err) + log.Infof("Opening blob bucket via URL: %s", redactBlobURL(blobURL)) + bucket, err = blob.OpenBucket(ctx, blobURL) + if err != nil { + return nil, fmt.Errorf("failed to open blob bucket from URL %q: %w", redactBlobURL(blobURL), err) + } } // If a storagePrefix is configured, scope all operations to it. @@ -172,6 +169,83 @@ func newBlobBackend(opts BlobBackendOptions) (*blobBackend, error) { return &blobBackend{bucket: bucket, fs: fs}, nil } +// openS3BucketWithCredentials opens an S3 bucket using an explicit *s3.Client +// configured with static, per-export credentials. Unlike opening via an s3:// +// URL (which relies on the ambient AWS credential chain backed by process-wide +// environment variables), this keeps each export's credentials local to its +// own client, so multiple S3 exports with distinct accounts can coexist within +// a single origin process. +func openS3BucketWithCredentials(ctx context.Context, opts BlobBackendOptions) (*blob.Bucket, error) { + if opts.Bucket == "" { + return nil, fmt.Errorf("S3 bucket name is required when BlobURL is not set") + } + + cfgOpts := []func(*config.LoadOptions) error{ + config.WithCredentialsProvider( + credentials.NewStaticCredentialsProvider(opts.AccessKey, opts.SecretKey, ""), + ), + } + if opts.Region != "" { + cfgOpts = append(cfgOpts, config.WithRegion(opts.Region)) + } + awsCfg, err := config.LoadDefaultConfig(ctx, cfgOpts...) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config for bucket %q: %w", opts.Bucket, err) + } + + var s3Opts []func(*s3.Options) + // Default to path-style addressing (endpoint/bucket/key) unless virtual-host + // style is explicitly requested; path-style is required by most + // S3-compatible services (MinIO, Ceph) and custom endpoints. + if strings.ToLower(opts.URLStyle) != "virtual" { + s3Opts = append(s3Opts, func(o *s3.Options) { o.UsePathStyle = true }) + } + if opts.ServiceURL != "" { + endpoint := opts.ServiceURL + s3Opts = append(s3Opts, func(o *s3.Options) { o.BaseEndpoint = &endpoint }) + } + client := s3.NewFromConfig(awsCfg, s3Opts...) + + log.Infof("Opening S3 bucket %q with per-export credentials (endpoint: %q, region: %q)", + opts.Bucket, opts.ServiceURL, opts.Region) + + // Mirror gocloud's URL opener: the S3 upload manager doesn't pick up the + // checksum-calculation setting from the config, so propagate it explicitly + // to preserve compatibility with third-party S3 providers. + return s3blob.OpenBucket(ctx, client, opts.Bucket, &s3blob.Options{ + RequestChecksumCalculation: awsCfg.RequestChecksumCalculation, + }) +} + +// redactBlobURL strips any embedded credentials (the userinfo component and +// well-known secret query parameters) from a blob URL so it is safe to log. +// Operators may embed secrets directly in Origin.ObjectProviderURL, e.g. +// "s3://bucket?awssecretkey=...", and those must never reach the logs. +func redactBlobURL(raw string) string { + u, err := url.Parse(raw) + if err != nil { + // If it doesn't parse we can't reason about it; don't risk leaking. + return "[unparsable blob URL redacted]" + } + if u.User != nil { + u.User = url.UserPassword("redacted", "redacted") + } + if q := u.Query(); len(q) > 0 { + changed := false + for key := range q { + switch strings.ToLower(key) { + case "awssecretkey", "secretkey", "secret_access_key", "access_key", "awsaccesskeyid", "password", "token": + q.Set(key, "redacted") + changed = true + } + } + if changed { + u.RawQuery = q.Encode() + } + } + return u.Redacted() +} + func (b *blobBackend) CheckAvailability() error { ok, err := b.bucket.IsAccessible(context.Background()) if err != nil { @@ -296,8 +370,9 @@ func (fs *blobFileSystem) OpenFile(ctx context.Context, name string, flag int, _ peekIter := fs.bucket.List(&blob.ListOptions{Prefix: dirPrefix, Delimiter: "/"}) if _, peekErr := peekIter.Next(ctx); peekErr == nil { // It is a directory — return a lazy dir handle (a fresh iterator - // will be created when Readdir is called). - return &blobDirFile{name: name, bucket: fs.bucket, prefix: dirPrefix}, nil + // will be created when Readdir is called). Carry the request context + // so the deferred listing honours cancellation/deadlines. + return &blobDirFile{name: name, bucket: fs.bucket, prefix: dirPrefix, ctx: ctx}, nil } // Read mode — open via blob.NewReader (supports seek). @@ -369,14 +444,65 @@ func (fs *blobFileSystem) RemoveAll(ctx context.Context, name string) error { } // Rename implements webdav.FileSystem. +// +// Blob stores have no native rename, so we copy-then-delete. For a leaf object +// that is a single copy/delete. When oldName refers to a "directory" (a key +// prefix with children) we must also move every descendant -- otherwise the +// children would be orphaned under the old prefix. Listing is paginated so +// memory stays bounded for large trees, and each object is best-effort: a +// partial failure returns the first error but continues so we don't strand a +// half-moved tree. func (fs *blobFileSystem) Rename(ctx context.Context, oldName, newName string) error { oldKey := blobKey(oldName) newKey := blobKey(newName) + // Move the object at the exact key, if one exists. A missing object is not + // an error here: oldName may be a pure directory prefix with no marker. if err := fs.bucket.Copy(ctx, newKey, oldKey, nil); err != nil { - return fmt.Errorf("blob copy %q -> %q: %w", oldKey, newKey, err) + if !isNotFound(err) { + return fmt.Errorf("blob copy %q -> %q: %w", oldKey, newKey, err) + } + } else if err := fs.bucket.Delete(ctx, oldKey); err != nil && !isNotFound(err) { + return fmt.Errorf("blob delete %q: %w", oldKey, err) + } + + // Move every descendant under the directory prefix. + oldPrefix := oldKey + if oldPrefix != "" && !strings.HasSuffix(oldPrefix, "/") { + oldPrefix += "/" + } + newPrefix := newKey + if newPrefix != "" && !strings.HasSuffix(newPrefix, "/") { + newPrefix += "/" + } + + iter := fs.bucket.List(&blob.ListOptions{Prefix: oldPrefix}) + var firstErr error + for { + obj, listErr := iter.Next(ctx) + if listErr == io.EOF { + break + } + if listErr != nil { + if firstErr == nil { + firstErr = listErr + } + break + } + destKey := newPrefix + strings.TrimPrefix(obj.Key, oldPrefix) + if err := fs.bucket.Copy(ctx, destKey, obj.Key, nil); err != nil { + if firstErr == nil { + firstErr = fmt.Errorf("blob copy %q -> %q: %w", obj.Key, destKey, err) + } + continue + } + if err := fs.bucket.Delete(ctx, obj.Key); err != nil && !isNotFound(err) { + if firstErr == nil { + firstErr = fmt.Errorf("blob delete %q: %w", obj.Key, err) + } + } } - return fs.bucket.Delete(ctx, oldKey) + return firstErr } // Stat implements webdav.FileSystem. @@ -585,6 +711,7 @@ type blobDirFile struct { name string bucket *blob.Bucket prefix string + ctx context.Context mu sync.Mutex iter *blob.ListIterator @@ -629,7 +756,11 @@ func (f *blobDirFile) Readdir(count int) ([]os.FileInfo, error) { break } - obj, err := f.iter.Next(context.Background()) + ctx := f.ctx + if ctx == nil { + ctx = context.Background() + } + obj, err := f.iter.Next(ctx) if err == io.EOF { f.done = true break diff --git a/origin_serve/backend_blob_test.go b/origin_serve/backend_blob_test.go index 500c8d5919..d355726a4c 100644 --- a/origin_serve/backend_blob_test.go +++ b/origin_serve/backend_blob_test.go @@ -85,6 +85,64 @@ func TestBuildS3BlobURL(t *testing.T) { }) } +// --------------------------------------------------------------------------- +// redactBlobURL unit tests +// --------------------------------------------------------------------------- + +func TestRedactBlobURL(t *testing.T) { + t.Run("StripsUserinfoPassword", func(t *testing.T) { + got := redactBlobURL("s3://AKIAEXAMPLE:supersecret@my-bucket?region=us-east-1") + assert.NotContains(t, got, "supersecret") + assert.Contains(t, got, "my-bucket") + }) + + t.Run("RedactsSecretQueryParams", func(t *testing.T) { + got := redactBlobURL("s3://my-bucket?awssecretkey=supersecret®ion=us-east-1") + assert.NotContains(t, got, "supersecret") + assert.Contains(t, got, "region=us-east-1") + }) + + t.Run("LeavesCleanURLUntouched", func(t *testing.T) { + in := "s3://my-bucket?region=us-east-1&use_path_style=true" + got := redactBlobURL(in) + assert.Contains(t, got, "my-bucket") + assert.Contains(t, got, "region=us-east-1") + }) + + t.Run("UnparsableIsFullyRedacted", func(t *testing.T) { + got := redactBlobURL("://::not-a-url::") + assert.Equal(t, "[unparsable blob URL redacted]", got) + }) +} + +// --------------------------------------------------------------------------- +// openS3BucketWithCredentials unit tests +// --------------------------------------------------------------------------- + +// TestOpenS3BucketWithCredentialsDoesNotMutateEnv guards the property that +// motivated the explicit-client path: per-export credentials must stay local +// to the client and never be written into the global process environment +// (where they would clobber other S3 exports). s3blob.OpenBucket is lazy, so +// no S3 server is contacted. +func TestOpenS3BucketWithCredentialsDoesNotMutateEnv(t *testing.T) { + t.Setenv("AWS_ACCESS_KEY_ID", "sentinel-access") + t.Setenv("AWS_SECRET_ACCESS_KEY", "sentinel-secret") + + bucket, err := openS3BucketWithCredentials(context.Background(), BlobBackendOptions{ + ServiceURL: "http://127.0.0.1:1", // never contacted; OpenBucket is lazy + Region: "us-east-1", + Bucket: "my-bucket", + AccessKey: "AKIAEXAMPLE", + SecretKey: "supersecret", + URLStyle: "path", + }) + require.NoError(t, err) + defer bucket.Close() + + assert.Equal(t, "sentinel-access", os.Getenv("AWS_ACCESS_KEY_ID")) + assert.Equal(t, "sentinel-secret", os.Getenv("AWS_SECRET_ACCESS_KEY")) +} + // --------------------------------------------------------------------------- // blobKey unit tests // --------------------------------------------------------------------------- diff --git a/origin_serve/handlers.go b/origin_serve/handlers.go index a2c7afa713..5206149e0e 100644 --- a/origin_serve/handlers.go +++ b/origin_serve/handlers.go @@ -647,7 +647,7 @@ func InitializeHandlers(ctx context.Context, exports []server_utils.OriginExport } backend = blobBe if blobURL != "" { - log.Infof("Initialized blob backend for %s (url: %s)", export.FederationPrefix, blobURL) + log.Infof("Initialized blob backend for %s (url: %s)", export.FederationPrefix, redactBlobURL(blobURL)) } else { log.Infof("Initialized native S3 backend for %s (bucket: %s, region: %s)", export.FederationPrefix, export.S3Bucket, param.Origin_S3Region.GetString()) } diff --git a/server_structs/origin.go b/server_structs/origin.go index 6d815e2aa9..d6df6e633a 100644 --- a/server_structs/origin.go +++ b/server_structs/origin.go @@ -58,6 +58,19 @@ func (t OriginStorageType) IsPosixLike() bool { } } +// UsesXRootD reports whether an origin of this storage type is fronted by an +// XRootD process. The native "v2" backends (and SSH) are served directly by +// the pelican process and never launch XRootD. New native backends should be +// added here rather than duplicating the list of storage-type comparisons. +func (t OriginStorageType) UsesXRootD() bool { + switch t { + case OriginStoragePosixv2, OriginStorageSSH, OriginStorageS3v2, OriginStorageHTTPSv2, OriginStorageGlobusv2: + return false + default: + return true + } +} + // Convert a string to an OriginStorageType func ParseOriginStorageType(storageType string) (ost OriginStorageType, err error) { switch storageType {