Skip to content

Commit 06a7b84

Browse files
sec: fix incorrect host checks for s3 and gcs
1 parent 842d6c3 commit 06a7b84

File tree

6 files changed

+298
-5
lines changed

6 files changed

+298
-5
lines changed

detect_gcs.go

Lines changed: 109 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ package getter
66
import (
77
"fmt"
88
"net/url"
9+
"path"
10+
"regexp"
911
"strings"
12+
"unicode"
1013
)
1114

1215
// GCSDetector implements Detector to detect GCS URLs and turn
@@ -18,23 +21,39 @@ func (d *GCSDetector) Detect(src, _ string) (string, bool, error) {
1821
return "", false, nil
1922
}
2023

21-
if strings.Contains(src, "googleapis.com/") {
24+
if strings.Contains(src, ".googleapis.com/") {
2225
return d.detectHTTP(src)
2326
}
2427

2528
return "", false, nil
2629
}
2730

2831
func (d *GCSDetector) detectHTTP(src string) (string, bool, error) {
32+
src = path.Clean(src)
2933

3034
parts := strings.Split(src, "/")
3135
if len(parts) < 5 {
3236
return "", false, fmt.Errorf(
3337
"URL is not a valid GCS URL")
3438
}
39+
3540
version := parts[2]
41+
if !isValidGCSVersion(version) {
42+
return "", false, fmt.Errorf(
43+
"GCS URL version is not valid")
44+
}
45+
3646
bucket := parts[3]
47+
if !isValidGCSBucketName(bucket) {
48+
return "", false, fmt.Errorf(
49+
"GCS URL bucket name is not valid")
50+
}
51+
3752
object := strings.Join(parts[4:], "/")
53+
if !isValidGCSObjectName(object) {
54+
return "", false, fmt.Errorf(
55+
"GCS URL object name is not valid")
56+
}
3857

3958
url, err := url.Parse(fmt.Sprintf("https://www.googleapis.com/storage/%s/%s/%s",
4059
version, bucket, object))
@@ -44,3 +63,92 @@ func (d *GCSDetector) detectHTTP(src string) (string, bool, error) {
4463

4564
return "gcs::" + url.String(), true, nil
4665
}
66+
67+
func isValidGCSVersion(version string) bool {
68+
versionPattern := `^v\d+$`
69+
if matched, _ := regexp.MatchString(versionPattern, version); !matched {
70+
return false
71+
}
72+
return true
73+
}
74+
75+
// Validate the bucket name using the following rules: https://cloud.google.com/storage/docs/naming-buckets
76+
func isValidGCSBucketName(bucket string) bool {
77+
// Rule 1: Must be between 3 and 63 characters (or up to 222 if it contains dots, each component up to 63 chars)
78+
if len(bucket) < 3 || len(bucket) > 63 {
79+
if len(bucket) > 63 && len(bucket) <= 222 {
80+
// If it contains dots, each segment between dots must be <= 63 chars
81+
components := strings.Split(bucket, ".")
82+
for _, component := range components {
83+
if len(component) > 63 {
84+
return false
85+
}
86+
}
87+
} else {
88+
return false
89+
}
90+
}
91+
92+
// Rule 2: Bucket name cannot start or end with a hyphen, dot, or underscore
93+
if bucket[0] == '-' || bucket[0] == '.' || bucket[len(bucket)-1] == '-' || bucket[len(bucket)-1] == '.' || bucket[len(bucket)-1] == '_' {
94+
return false
95+
}
96+
97+
// Rule 3: Bucket name cannot contain spaces
98+
if strings.Contains(bucket, " ") {
99+
return false
100+
}
101+
102+
// Rule 4: Bucket name cannot be an IP address (only digits and dots, e.g., 192.168.5.4)
103+
ipPattern := `^(\d{1,3}\.){3}\d{1,3}$`
104+
if matched, _ := regexp.MatchString(ipPattern, bucket); matched {
105+
return false
106+
}
107+
108+
// Rule 5: Bucket name cannot start with "goog"
109+
if strings.HasPrefix(bucket, "goog") {
110+
return false
111+
}
112+
113+
// Rule 6: Bucket name cannot contain "google" or common misspellings like "g00gle"
114+
googlePattern := `google|g00gle`
115+
if matched, _ := regexp.MatchString(googlePattern, bucket); matched {
116+
return false
117+
}
118+
119+
// Rule 7: Bucket name can only contain lowercase letters, digits, dashes, underscores, and dots
120+
bucketPattern := `^[a-z0-9\-_\.]+$`
121+
if matched, _ := regexp.MatchString(bucketPattern, bucket); !matched {
122+
return false
123+
}
124+
125+
return true
126+
}
127+
128+
// Validate the object name using the following rules: https://cloud.google.com/storage/docs/naming-objects
129+
func isValidGCSObjectName(object string) bool {
130+
// Rule 1: Object names cannot contain Carriage Return (\r) or Line Feed (\n) characters
131+
if strings.Contains(object, "\r") || strings.Contains(object, "\n") {
132+
return false
133+
}
134+
135+
// Rule 2: Object names cannot start with '.well-known/acme-challenge/'
136+
if strings.HasPrefix(object, ".well-known/acme-challenge/") {
137+
return false
138+
}
139+
140+
// Rule 3: Object names cannot be exactly '.' or '..'
141+
if object == "." || object == ".." {
142+
return false
143+
}
144+
145+
// Rule 4: Ensure that the object name contains only valid Unicode characters
146+
// (for simplicity, let's ensure it's not empty and does not contain any forbidden control characters)
147+
for _, r := range object {
148+
if !unicode.IsPrint(r) && !unicode.IsSpace(r) && r != '.' && r != '-' && r != '/' {
149+
return false
150+
}
151+
}
152+
153+
return true
154+
}

detect_gcs_test.go

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ func TestGCSDetector(t *testing.T) {
2424
"www.googleapis.com/storage/v1/foo/bar.baz",
2525
"gcs::https://www.googleapis.com/storage/v1/foo/bar.baz",
2626
},
27+
{
28+
"www.googleapis.com/storage/v2/foo/bar/toor.baz",
29+
"gcs::https://www.googleapis.com/storage/v2/foo/bar/toor.baz",
30+
},
2731
}
2832

2933
pwd := "/pwd"
@@ -42,3 +46,142 @@ func TestGCSDetector(t *testing.T) {
4246
}
4347
}
4448
}
49+
50+
func TestGCSDetector_MalformedDetectHTTP(t *testing.T) {
51+
cases := []struct {
52+
Name string
53+
Input string
54+
Expected string
55+
Output string
56+
}{
57+
{
58+
"valid url",
59+
"www.googleapis.com/storage/v1/my-bucket/foo/bar",
60+
"",
61+
"gcs::https://www.googleapis.com/storage/v1/my-bucket/foo/bar",
62+
},
63+
{
64+
"not valid url length",
65+
"www.googleapis.com.invalid/storage/v1/",
66+
"URL is not a valid GCS URL",
67+
"",
68+
},
69+
{
70+
"not valid version",
71+
"www.googleapis.com/storage/invalid-version/my-bucket/foo",
72+
"GCS URL version is not valid",
73+
"",
74+
},
75+
{
76+
"not valid bucket",
77+
"www.googleapis.com/storage/v1/127.0.0.1/foo",
78+
"GCS URL bucket name is not valid",
79+
"",
80+
},
81+
{
82+
"not valid object",
83+
"www.googleapis.com/storage/v1/my-bucket/.well-known/acme-challenge/foo",
84+
"GCS URL object name is not valid",
85+
"",
86+
},
87+
{
88+
"path traversal",
89+
"www.googleapis.com/storage/v1/my-bucket/../../../foo/bar",
90+
"URL is not a valid GCS URL",
91+
"",
92+
},
93+
}
94+
95+
pwd := "/pwd"
96+
f := new(GCSDetector)
97+
for _, tc := range cases {
98+
output, _, err := f.Detect(tc.Input, pwd)
99+
if err != nil {
100+
if err.Error() != tc.Expected {
101+
t.Fatalf("expected error %s, got %s for %s", tc.Expected, err.Error(), tc.Name)
102+
}
103+
}
104+
105+
if output != tc.Output {
106+
t.Fatalf("expected %s, got %s", tc.Output, output)
107+
}
108+
}
109+
}
110+
111+
func TestIsValidGCSVersion(t *testing.T) {
112+
cases := []struct {
113+
Name string
114+
Input string
115+
Expected bool
116+
}{
117+
{
118+
"valid version",
119+
"v1",
120+
true,
121+
},
122+
{
123+
"invalid version",
124+
"invalid1",
125+
false,
126+
},
127+
}
128+
129+
for _, tc := range cases {
130+
output := isValidGCSVersion(tc.Input)
131+
if output != tc.Expected {
132+
t.Fatalf("expected %t, got %t for test %s", tc.Expected, output, tc.Name)
133+
}
134+
}
135+
}
136+
137+
func TestIsValidGCSBucketName(t *testing.T) {
138+
cases := []struct {
139+
Name string
140+
Input string
141+
Expected bool
142+
}{
143+
{
144+
"valid bucket name",
145+
"my-bucket",
146+
true,
147+
},
148+
{
149+
"invalid bucket name",
150+
"..",
151+
false,
152+
},
153+
}
154+
155+
for _, tc := range cases {
156+
output := isValidGCSBucketName(tc.Input)
157+
if output != tc.Expected {
158+
t.Fatalf("expected %t, got %t for test %s", tc.Expected, output, tc.Name)
159+
}
160+
}
161+
}
162+
163+
func TestIsValidGCSObjectName(t *testing.T) {
164+
cases := []struct {
165+
Name string
166+
Input string
167+
Expected bool
168+
}{
169+
{
170+
"valid object name",
171+
"my-object",
172+
true,
173+
},
174+
{
175+
"invalid object name",
176+
"..",
177+
false,
178+
},
179+
}
180+
181+
for _, tc := range cases {
182+
output := isValidGCSObjectName(tc.Input)
183+
if output != tc.Expected {
184+
t.Fatalf("expected %t, got %t for test %s", tc.Expected, output, tc.Name)
185+
}
186+
}
187+
}

get_gcs.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ func (g *GCSGetter) getObject(ctx context.Context, client *storage.Client, dst,
193193
}
194194

195195
func (g *GCSGetter) parseURL(u *url.URL) (bucket, path, fragment string, err error) {
196-
if strings.Contains(u.Host, "googleapis.com") {
196+
if strings.HasSuffix(u.Host, ".googleapis.com") {
197197
hostParts := strings.Split(u.Host, ".")
198198
if len(hostParts) != 3 {
199199
err = fmt.Errorf("URL is not a valid GCS URL")
@@ -208,6 +208,8 @@ func (g *GCSGetter) parseURL(u *url.URL) (bucket, path, fragment string, err err
208208
bucket = pathParts[3]
209209
path = pathParts[4]
210210
fragment = u.Fragment
211+
} else {
212+
err = fmt.Errorf("URL is not a valid GCS URL")
211213
}
212214
return
213215
}

get_gcs_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,35 @@ func TestGCSGetter_GetFile_OAuthAccessToken(t *testing.T) {
233233
}
234234
assertContents(t, dst, "# Main\n")
235235
}
236+
237+
func Test_GCSGetter_ParseUrl_Malformed(t *testing.T) {
238+
tests := []struct {
239+
name string
240+
url string
241+
}{
242+
{
243+
name: "invalid host suffix",
244+
url: "https://www.googleapis.com.invalid",
245+
},
246+
{
247+
name: "host suffix with a typo",
248+
url: "https://www.googleapi.com.",
249+
},
250+
}
251+
for _, tt := range tests {
252+
t.Run(tt.name, func(t *testing.T) {
253+
g := new(GCSGetter)
254+
u, err := url.Parse(tt.url)
255+
if err != nil {
256+
t.Fatalf("unexpected error: %s", err)
257+
}
258+
_, _, _, err = g.parseURL(u)
259+
if err == nil {
260+
t.Fatalf("expected error, got none")
261+
}
262+
if err.Error() != "URL is not a valid GCS URL" {
263+
t.Fatalf("expected error 'URL is not a valid GCS URL', got %s", err.Error())
264+
}
265+
})
266+
}
267+
}

get_s3.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ func (g *S3Getter) parseUrl(u *url.URL) (region, bucket, path, version string, c
252252
// This just check whether we are dealing with S3 or
253253
// any other S3 compliant service. S3 has a predictable
254254
// url as others do not
255-
if strings.Contains(u.Host, "amazonaws.com") {
255+
if strings.HasSuffix(u.Host, ".amazonaws.com") {
256256
// Amazon S3 supports both virtual-hosted–style and path-style URLs to access a bucket, although path-style is deprecated
257257
// In both cases few older regions supports dash-style region indication (s3-Region) even if AWS discourages their use.
258258
// The same bucket could be reached with:
@@ -304,7 +304,7 @@ func (g *S3Getter) parseUrl(u *url.URL) (region, bucket, path, version string, c
304304
path = pathParts[1]
305305

306306
}
307-
if len(hostParts) < 3 && len(hostParts) > 5 {
307+
if len(hostParts) < 3 || len(hostParts) > 5 {
308308
err = fmt.Errorf("URL is not a valid S3 URL")
309309
return
310310
}
@@ -313,7 +313,7 @@ func (g *S3Getter) parseUrl(u *url.URL) (region, bucket, path, version string, c
313313
} else {
314314
pathParts := strings.SplitN(u.Path, "/", 3)
315315
if len(pathParts) != 3 {
316-
err = fmt.Errorf("URL is not a valid S3 compliant URL")
316+
err = fmt.Errorf("URL is not a valid S3 URL")
317317
return
318318
}
319319
bucket = pathParts[1]

get_s3_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,14 @@ func Test_S3Getter_ParseUrl_Malformed(t *testing.T) {
293293
name: "vhost-style, dot region indication",
294294
url: "https://bucket.s3.us-east-1.amazonaws.com",
295295
},
296+
{
297+
name: "invalid host parts",
298+
url: "https://invalid.host.parts.lenght.s3.us-east-1.amazonaws.com",
299+
},
300+
{
301+
name: "invalid host suffix",
302+
url: "https://bucket.s3.amazonaws.com.invalid",
303+
},
296304
}
297305
for _, tt := range tests {
298306
t.Run(tt.name, func(t *testing.T) {

0 commit comments

Comments
 (0)