Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions internal/urlutil/domains.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@ import (
"regexp"
"sort"
"strings"

"github.com/github/gh-aw-mcpg/internal/logger"
)

var logDomains = logger.New("urlutil:domains")

// urlPattern requires a non-empty hostname candidate and then captures the rest
// of the URL until common delimiter characters. The (?i) flag makes the scheme
// match case-insensitive (e.g. "HTTPS://"). Matches are still validated with
Expand All @@ -18,6 +22,7 @@ func ExtractURLDomainsFromValue(value any) []string {
domainSet := make(map[string]struct{})
collectURLDomains(value, domainSet)
if len(domainSet) == 0 {
logDomains.Print("ExtractURLDomainsFromValue: no domains found in value tree")
return nil
}

Expand All @@ -26,6 +31,7 @@ func ExtractURLDomainsFromValue(value any) []string {
domains = append(domains, domain)
}
sort.Strings(domains)
logDomains.Printf("ExtractURLDomainsFromValue: extracted %d unique domain(s)", len(domains))
return domains
}

Expand Down Expand Up @@ -56,6 +62,7 @@ func ExtractURLDomains(text string) []string {
if len(matches) == 0 {
return nil
}
logDomains.Printf("ExtractURLDomains: found %d URL candidate(s) in text", len(matches))

domainSet := make(map[string]struct{})
for _, match := range matches {
Expand All @@ -67,6 +74,11 @@ func ExtractURLDomains(text string) []string {
match = strings.TrimRight(match, ".,;:!?)]}\"'")
parsed, err := url.Parse(match)
if err != nil {
if uerr, ok := err.(*url.Error); ok {
logDomains.Printf("ExtractURLDomains: skipping unparseable URL candidate: %v", uerr.Err)
} else {
logDomains.Printf("ExtractURLDomains: skipping unparseable URL candidate (%T)", err)
}
continue
}
Comment thread
Copilot marked this conversation as resolved.
host := strings.ToLower(parsed.Hostname())
Expand All @@ -84,5 +96,6 @@ func ExtractURLDomains(text string) []string {
domains = append(domains, domain)
}
sort.Strings(domains)
logDomains.Printf("ExtractURLDomains: resolved %d unique domain(s) from %d candidate(s)", len(domains), len(matches))
return domains
}
Loading