Skip to content

Commit

Permalink
sec: fix incorrect host checks for s3 and gcs
Browse files Browse the repository at this point in the history
  • Loading branch information
dduzgun-security committed Jan 6, 2025
1 parent 842d6c3 commit 06a7b84
Show file tree
Hide file tree
Showing 6 changed files with 298 additions and 5 deletions.
110 changes: 109 additions & 1 deletion detect_gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ package getter
import (
"fmt"
"net/url"
"path"
"regexp"
"strings"
"unicode"
)

// GCSDetector implements Detector to detect GCS URLs and turn
Expand All @@ -18,23 +21,39 @@ func (d *GCSDetector) Detect(src, _ string) (string, bool, error) {
return "", false, nil
}

if strings.Contains(src, "googleapis.com/") {
if strings.Contains(src, ".googleapis.com/") {
return d.detectHTTP(src)
}

return "", false, nil
}

func (d *GCSDetector) detectHTTP(src string) (string, bool, error) {
src = path.Clean(src)

parts := strings.Split(src, "/")
if len(parts) < 5 {
return "", false, fmt.Errorf(
"URL is not a valid GCS URL")
}

version := parts[2]
if !isValidGCSVersion(version) {
return "", false, fmt.Errorf(
"GCS URL version is not valid")
}

bucket := parts[3]
if !isValidGCSBucketName(bucket) {
return "", false, fmt.Errorf(
"GCS URL bucket name is not valid")
}

object := strings.Join(parts[4:], "/")
if !isValidGCSObjectName(object) {
return "", false, fmt.Errorf(
"GCS URL object name is not valid")
}

url, err := url.Parse(fmt.Sprintf("https://www.googleapis.com/storage/%s/%s/%s",
version, bucket, object))
Expand All @@ -44,3 +63,92 @@ func (d *GCSDetector) detectHTTP(src string) (string, bool, error) {

return "gcs::" + url.String(), true, nil
}

func isValidGCSVersion(version string) bool {
versionPattern := `^v\d+$`
if matched, _ := regexp.MatchString(versionPattern, version); !matched {
return false
}
return true
}

// Validate the bucket name using the following rules: https://cloud.google.com/storage/docs/naming-buckets
func isValidGCSBucketName(bucket string) bool {
// Rule 1: Must be between 3 and 63 characters (or up to 222 if it contains dots, each component up to 63 chars)
if len(bucket) < 3 || len(bucket) > 63 {
if len(bucket) > 63 && len(bucket) <= 222 {
// If it contains dots, each segment between dots must be <= 63 chars
components := strings.Split(bucket, ".")
for _, component := range components {
if len(component) > 63 {
return false
}
}
} else {
return false
}
}

// Rule 2: Bucket name cannot start or end with a hyphen, dot, or underscore
if bucket[0] == '-' || bucket[0] == '.' || bucket[len(bucket)-1] == '-' || bucket[len(bucket)-1] == '.' || bucket[len(bucket)-1] == '_' {
return false
}

// Rule 3: Bucket name cannot contain spaces
if strings.Contains(bucket, " ") {
return false
}

// Rule 4: Bucket name cannot be an IP address (only digits and dots, e.g., 192.168.5.4)
ipPattern := `^(\d{1,3}\.){3}\d{1,3}$`
if matched, _ := regexp.MatchString(ipPattern, bucket); matched {
return false
}

// Rule 5: Bucket name cannot start with "goog"
if strings.HasPrefix(bucket, "goog") {
return false
}

// Rule 6: Bucket name cannot contain "google" or common misspellings like "g00gle"
googlePattern := `google|g00gle`
if matched, _ := regexp.MatchString(googlePattern, bucket); matched {
return false
}

// Rule 7: Bucket name can only contain lowercase letters, digits, dashes, underscores, and dots
bucketPattern := `^[a-z0-9\-_\.]+$`
if matched, _ := regexp.MatchString(bucketPattern, bucket); !matched {
return false
}

return true
}

// Validate the object name using the following rules: https://cloud.google.com/storage/docs/naming-objects
func isValidGCSObjectName(object string) bool {
// Rule 1: Object names cannot contain Carriage Return (\r) or Line Feed (\n) characters
if strings.Contains(object, "\r") || strings.Contains(object, "\n") {
return false
}

// Rule 2: Object names cannot start with '.well-known/acme-challenge/'
if strings.HasPrefix(object, ".well-known/acme-challenge/") {
return false
}

// Rule 3: Object names cannot be exactly '.' or '..'
if object == "." || object == ".." {
return false
}

// Rule 4: Ensure that the object name contains only valid Unicode characters
// (for simplicity, let's ensure it's not empty and does not contain any forbidden control characters)
for _, r := range object {
if !unicode.IsPrint(r) && !unicode.IsSpace(r) && r != '.' && r != '-' && r != '/' {
return false
}
}

return true
}
143 changes: 143 additions & 0 deletions detect_gcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ func TestGCSDetector(t *testing.T) {
"www.googleapis.com/storage/v1/foo/bar.baz",
"gcs::https://www.googleapis.com/storage/v1/foo/bar.baz",
},
{
"www.googleapis.com/storage/v2/foo/bar/toor.baz",
"gcs::https://www.googleapis.com/storage/v2/foo/bar/toor.baz",
},
}

pwd := "/pwd"
Expand All @@ -42,3 +46,142 @@ func TestGCSDetector(t *testing.T) {
}
}
}

func TestGCSDetector_MalformedDetectHTTP(t *testing.T) {
cases := []struct {
Name string
Input string
Expected string
Output string
}{
{
"valid url",
"www.googleapis.com/storage/v1/my-bucket/foo/bar",
"",
"gcs::https://www.googleapis.com/storage/v1/my-bucket/foo/bar",
},
{
"not valid url length",
"www.googleapis.com.invalid/storage/v1/",
"URL is not a valid GCS URL",
"",
},
{
"not valid version",
"www.googleapis.com/storage/invalid-version/my-bucket/foo",
"GCS URL version is not valid",
"",
},
{
"not valid bucket",
"www.googleapis.com/storage/v1/127.0.0.1/foo",
"GCS URL bucket name is not valid",
"",
},
{
"not valid object",
"www.googleapis.com/storage/v1/my-bucket/.well-known/acme-challenge/foo",
"GCS URL object name is not valid",
"",
},
{
"path traversal",
"www.googleapis.com/storage/v1/my-bucket/../../../foo/bar",
"URL is not a valid GCS URL",
"",
},
}

pwd := "/pwd"
f := new(GCSDetector)
for _, tc := range cases {
output, _, err := f.Detect(tc.Input, pwd)
if err != nil {
if err.Error() != tc.Expected {
t.Fatalf("expected error %s, got %s for %s", tc.Expected, err.Error(), tc.Name)
}
}

if output != tc.Output {
t.Fatalf("expected %s, got %s", tc.Output, output)
}
}
}

func TestIsValidGCSVersion(t *testing.T) {
cases := []struct {
Name string
Input string
Expected bool
}{
{
"valid version",
"v1",
true,
},
{
"invalid version",
"invalid1",
false,
},
}

for _, tc := range cases {
output := isValidGCSVersion(tc.Input)
if output != tc.Expected {
t.Fatalf("expected %t, got %t for test %s", tc.Expected, output, tc.Name)
}
}
}

func TestIsValidGCSBucketName(t *testing.T) {
cases := []struct {
Name string
Input string
Expected bool
}{
{
"valid bucket name",
"my-bucket",
true,
},
{
"invalid bucket name",
"..",
false,
},
}

for _, tc := range cases {
output := isValidGCSBucketName(tc.Input)
if output != tc.Expected {
t.Fatalf("expected %t, got %t for test %s", tc.Expected, output, tc.Name)
}
}
}

func TestIsValidGCSObjectName(t *testing.T) {
cases := []struct {
Name string
Input string
Expected bool
}{
{
"valid object name",
"my-object",
true,
},
{
"invalid object name",
"..",
false,
},
}

for _, tc := range cases {
output := isValidGCSObjectName(tc.Input)
if output != tc.Expected {
t.Fatalf("expected %t, got %t for test %s", tc.Expected, output, tc.Name)
}
}
}
4 changes: 3 additions & 1 deletion get_gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ func (g *GCSGetter) getObject(ctx context.Context, client *storage.Client, dst,
}

func (g *GCSGetter) parseURL(u *url.URL) (bucket, path, fragment string, err error) {
if strings.Contains(u.Host, "googleapis.com") {
if strings.HasSuffix(u.Host, ".googleapis.com") {
hostParts := strings.Split(u.Host, ".")
if len(hostParts) != 3 {
err = fmt.Errorf("URL is not a valid GCS URL")
Expand All @@ -208,6 +208,8 @@ func (g *GCSGetter) parseURL(u *url.URL) (bucket, path, fragment string, err err
bucket = pathParts[3]
path = pathParts[4]
fragment = u.Fragment
} else {
err = fmt.Errorf("URL is not a valid GCS URL")
}
return
}
Expand Down
32 changes: 32 additions & 0 deletions get_gcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,35 @@ func TestGCSGetter_GetFile_OAuthAccessToken(t *testing.T) {
}
assertContents(t, dst, "# Main\n")
}

func Test_GCSGetter_ParseUrl_Malformed(t *testing.T) {
tests := []struct {
name string
url string
}{
{
name: "invalid host suffix",
url: "https://www.googleapis.com.invalid",
},
{
name: "host suffix with a typo",
url: "https://www.googleapi.com.",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := new(GCSGetter)
u, err := url.Parse(tt.url)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
_, _, _, err = g.parseURL(u)
if err == nil {
t.Fatalf("expected error, got none")
}
if err.Error() != "URL is not a valid GCS URL" {
t.Fatalf("expected error 'URL is not a valid GCS URL', got %s", err.Error())
}
})
}
}
6 changes: 3 additions & 3 deletions get_s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ func (g *S3Getter) parseUrl(u *url.URL) (region, bucket, path, version string, c
// This just check whether we are dealing with S3 or
// any other S3 compliant service. S3 has a predictable
// url as others do not
if strings.Contains(u.Host, "amazonaws.com") {
if strings.HasSuffix(u.Host, ".amazonaws.com") {
// Amazon S3 supports both virtual-hosted–style and path-style URLs to access a bucket, although path-style is deprecated
// In both cases few older regions supports dash-style region indication (s3-Region) even if AWS discourages their use.
// The same bucket could be reached with:
Expand Down Expand Up @@ -304,7 +304,7 @@ func (g *S3Getter) parseUrl(u *url.URL) (region, bucket, path, version string, c
path = pathParts[1]

}
if len(hostParts) < 3 && len(hostParts) > 5 {
if len(hostParts) < 3 || len(hostParts) > 5 {
err = fmt.Errorf("URL is not a valid S3 URL")
return
}
Expand All @@ -313,7 +313,7 @@ func (g *S3Getter) parseUrl(u *url.URL) (region, bucket, path, version string, c
} else {
pathParts := strings.SplitN(u.Path, "/", 3)
if len(pathParts) != 3 {
err = fmt.Errorf("URL is not a valid S3 compliant URL")
err = fmt.Errorf("URL is not a valid S3 URL")
return
}
bucket = pathParts[1]
Expand Down
8 changes: 8 additions & 0 deletions get_s3_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,14 @@ func Test_S3Getter_ParseUrl_Malformed(t *testing.T) {
name: "vhost-style, dot region indication",
url: "https://bucket.s3.us-east-1.amazonaws.com",
},
{
name: "invalid host parts",
url: "https://invalid.host.parts.lenght.s3.us-east-1.amazonaws.com",
},
{
name: "invalid host suffix",
url: "https://bucket.s3.amazonaws.com.invalid",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down

0 comments on commit 06a7b84

Please sign in to comment.