Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/workflow/awf_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ func injectMaxAICreditsExpression(awfConfigJSON string, expr string) string {
}

func buildWorkflowCallNetworkAllowedUpdateScript() (string, error) {
ecosystemDomains := getLoadedEcosystemDomains()
ecosystemMap := make(map[string][]string, safeAllocationCapacity(len(ecosystemDomains), len(compoundEcosystems)))
for ecosystem := range ecosystemDomains {
ecosystemMap[ecosystem] = getEcosystemDomains(ecosystem)
Expand Down
19 changes: 15 additions & 4 deletions pkg/workflow/compiler_activation_job_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ func (c *Compiler) newActivationJobBuildContext(
}

ctx := newActivationBuildContext(data, preActivationJobCreated, workflowRunRepoSafety, lockFilename)
cacheActivationPreStepPermissions(ctx)
if err := cacheActivationPreStepPermissions(ctx); err != nil {
return nil, err
}
c.addActivationSetupAndWorkflowCallSteps(ctx, setupActionRef)

engine, err := c.getAgenticEngine(data.AI)
Expand Down Expand Up @@ -99,7 +101,7 @@ func newActivationBuildContext(data *WorkflowData, preActivationJobCreated bool,
return ctx
}

func cacheActivationPreStepPermissions(ctx *activationJobBuildContext) {
func cacheActivationPreStepPermissions(ctx *activationJobBuildContext) error {
// Cache scripts from setup/pre-steps and inferred permissions once to avoid redundant
// extraction and inference calls in buildActivationPermissions and
// addActivationFeedbackAndValidationSteps.
Expand All @@ -111,8 +113,13 @@ func cacheActivationPreStepPermissions(ctx *activationJobBuildContext) {
ctx.activationAllScripts = extractRunScriptsFromJobSection(ctx.data.Jobs, activationJobName, "setup-steps")
ctx.activationAllScripts = append(ctx.activationAllScripts, extractRunScriptsFromJobSection(ctx.data.Jobs, activationJobName, "pre-steps")...)
if len(ctx.activationAllScripts) > 0 {
ctx.activationInferredPerms = inferPermissionsFromShellScripts(ctx.activationAllScripts)
inferredPerms, err := inferPermissionsFromShellScripts(ctx.activationAllScripts)
if err != nil {
return err
}
ctx.activationInferredPerms = inferredPerms
}
return nil
}

func (c *Compiler) addActivationSetupAndWorkflowCallSteps(ctx *activationJobBuildContext, setupActionRef string) {
Expand Down Expand Up @@ -855,7 +862,11 @@ func (c *Compiler) addActivationScriptPermissions(permsMap map[PermissionScope]P
if len(ctx.activationAllScripts) > 0 {
// Detect write commands first — these are not permitted in the activation job
// because it intentionally operates with read-only permissions.
if writeCmds := detectWriteCommandsInShellScripts(ctx.activationAllScripts); len(writeCmds) > 0 {
writeCmds, err := detectWriteCommandsInShellScripts(ctx.activationAllScripts)
if err != nil {
return err
}
if len(writeCmds) > 0 {
return fmt.Errorf(
"activation job uses write gh command(s) [%s]; write operations are not permitted in activation job steps because the activation job runs with read-only permissions. Move write operations to the agent job steps or use safe-outputs. See: https://github.github.com/gh-aw/reference/safe-outputs/",
strings.Join(writeCmds, ", "),
Expand Down
11 changes: 9 additions & 2 deletions pkg/workflow/compiler_main_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,11 @@ func (c *Compiler) buildMainJob(data *WorkflowData, activationJobCreated bool) (
agentAllScripts = append(agentAllScripts, extractRunScriptsFromJobSection(data.Jobs, agentJobName, "pre-steps")...)
}
if len(agentAllScripts) > 0 {
if writeCmds := detectWriteCommandsInShellScripts(agentAllScripts); len(writeCmds) > 0 {
writeCmds, err := detectWriteCommandsInShellScripts(agentAllScripts)
if err != nil {
return nil, err
}
if len(writeCmds) > 0 {
return nil, fmt.Errorf(
"agent job uses write gh command(s) [%s]; write operations are not permitted in agent job steps because the agent job runs with read-only permissions. Use safe-outputs for write operations. See: https://github.github.com/gh-aw/reference/safe-outputs/",
strings.Join(writeCmds, ", "),
Expand All @@ -410,7 +414,10 @@ func (c *Compiler) buildMainJob(data *WorkflowData, activationJobCreated bool) (
// Uses the same exact-string check as tools.go (the YAML parser always normalizes
// "permissions: {}" to this canonical form when parsing the frontmatter).
if data.Permissions != "permissions: {}" && permissions != "" {
inferred := inferPermissionsFromShellScripts(agentAllScripts)
inferred, err := inferPermissionsFromShellScripts(agentAllScripts)
if err != nil {
return nil, err
}
if len(inferred) > 0 {
permissions = mergeInferredIntoPermissionsYAML(permissions, inferred)
}
Expand Down
50 changes: 28 additions & 22 deletions pkg/workflow/domains.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"sort"
"strings"
"sync"

"github.com/github/gh-aw/pkg/constants"
"github.com/github/gh-aw/pkg/logger"
Expand All @@ -18,8 +19,31 @@ var domainsLog = logger.New("workflow:domains")
//go:embed data/ecosystem_domains.json
var ecosystemDomainsJSON []byte

// ecosystemDomains holds the loaded domain data
var ecosystemDomains map[string][]string
var loadEcosystemDomains = sync.OnceValues(func() (map[string][]string, error) {
domainsLog.Print("Loading ecosystem domains from embedded JSON")

ecosystemDomains := make(map[string][]string)
if err := json.Unmarshal(ecosystemDomainsJSON, &ecosystemDomains); err != nil {
return nil, fmt.Errorf("failed to load ecosystem domains from JSON: %w", err)
}

// Pre-sort all domain lists once so getEcosystemDomains only needs to copy, not sort.
for key := range ecosystemDomains {
sort.Strings(ecosystemDomains[key])
}

domainsLog.Printf("Loaded %d ecosystem categories", len(ecosystemDomains))
return ecosystemDomains, nil
})

func getLoadedEcosystemDomains() map[string][]string {
ecosystemDomains, err := loadEcosystemDomains()
if err != nil {
domainsLog.Printf("Failed to load ecosystem domains: %v", err)
return map[string][]string{}
}
return ecosystemDomains
}

// CopilotDefaultDomains are the default domains required for GitHub Copilot CLI authentication and operation
var CopilotDefaultDomains = []string{
Expand Down Expand Up @@ -318,25 +342,6 @@ var PlaywrightDomains = []string{
"playwright.download.prss.microsoft.com",
}

// init loads the ecosystem domains from the embedded JSON and pre-sorts each list.
// Pre-sorting at startup avoids the per-call sort.Strings in getEcosystemDomains,
// which is called on every compilation and previously allocated + sorted each list
// on every invocation.
func init() {
domainsLog.Print("Loading ecosystem domains from embedded JSON")

if err := json.Unmarshal(ecosystemDomainsJSON, &ecosystemDomains); err != nil {
panic(fmt.Sprintf("failed to load ecosystem domains from JSON: %v", err))
}

// Pre-sort all domain lists once so getEcosystemDomains only needs to copy, not sort.
for key := range ecosystemDomains {
sort.Strings(ecosystemDomains[key])
}

domainsLog.Printf("Loaded %d ecosystem categories", len(ecosystemDomains))
}

// compoundEcosystems defines ecosystem identifiers that expand to the union of multiple
// component ecosystems. These are resolved at lookup time, so they stay in sync with
// any future changes to the component ecosystems.
Expand Down Expand Up @@ -364,6 +369,7 @@ func getEcosystemDomains(category string) []string {
return result
}

ecosystemDomains := getLoadedEcosystemDomains()
domains, exists := ecosystemDomains[category]
if !exists {
return []string{}
Expand Down Expand Up @@ -586,7 +592,7 @@ func GetDomainEcosystem(domain string) string {

// Fall back to any ecosystems not in the priority list, sorted for determinism
remaining := make([]string, 0)
for ecosystem := range ecosystemDomains {
for ecosystem := range getLoadedEcosystemDomains() {
if _, ok := checked[ecosystem]; !ok {
remaining = append(remaining, ecosystem)
}
Expand Down
37 changes: 25 additions & 12 deletions pkg/workflow/gh_cli_permissions.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"regexp"
"sort"
"strings"
"sync"

"github.com/github/gh-aw/pkg/logger"
"github.com/goccy/go-yaml"
Expand Down Expand Up @@ -74,12 +75,10 @@ type compiledAPIPathPattern struct {
appPermissions []PermissionScope
}

var ghCLIPermissions compiledGHCLIPermissions

func init() {
var getCompiledGHCLIPermissions = sync.OnceValues(func() (compiledGHCLIPermissions, error) {
var data ghCLIPermissionsData
if err := json.Unmarshal(ghCLIPermissionsJSON, &data); err != nil {
panic(fmt.Sprintf("failed to load gh CLI permissions from JSON: %v", err))
return compiledGHCLIPermissions{}, fmt.Errorf("failed to load gh CLI permissions from JSON: %w", err)
}

cp := compiledGHCLIPermissions{
Expand All @@ -99,7 +98,13 @@ func init() {
}
sort.Strings(groups) // deterministic alternation order
subcommandPattern := `(?m)(?:^|[\s|;])gh\s+(` + strings.Join(groups, "|") + `)\s+([\w][\w-]*)\b`
cp.subcommandRE = regexp.MustCompile(subcommandPattern)
// Defensive check: the pattern is built from embedded JSON keys quoted with
// regexp.QuoteMeta, so a compile error would indicate unexpected data corruption.
subcommandRE, err := regexp.Compile(subcommandPattern)
if err != nil {
return compiledGHCLIPermissions{}, fmt.Errorf("invalid gh subcommand pattern %q: %w", subcommandPattern, err)
}
cp.subcommandRE = subcommandRE

for group, sg := range data.SubcommandGroups {
readPerms := make([]PermissionScope, len(sg.ReadPermissions))
Expand Down Expand Up @@ -138,7 +143,7 @@ func init() {
for _, ap := range data.APIPathPatterns {
re, err := regexp.Compile(ap.Pattern)
if err != nil {
panic(fmt.Sprintf("invalid gh API path pattern %q in gh_cli_permissions.json: %v", ap.Pattern, err))
return compiledGHCLIPermissions{}, fmt.Errorf("invalid gh API path pattern %q in gh_cli_permissions.json: %w", ap.Pattern, err)
}
perms := make([]PermissionScope, len(ap.Permissions))
for i, p := range ap.Permissions {
Expand All @@ -155,9 +160,9 @@ func init() {
})
}

ghCLIPermissions = cp
ghCLIPermissionsLog.Printf("Loaded gh CLI permissions: version=%s, subcommand_groups=%d, api_path_patterns=%d", data.Version, len(data.SubcommandGroups), len(data.APIPathPatterns))
}
return cp, nil
})

// ghAPICmdRE matches `gh api` at a command boundary, capturing the rest of the line.
var ghAPICmdRE = regexp.MustCompile(`(?m)(?:^|[\s|;])gh\s+api\s+(.+)`)
Expand Down Expand Up @@ -275,9 +280,13 @@ func splitShellTokens(s string) []string {
// Only read-level permissions are inferred here; write-level operations are
// intentionally not auto-escalated. Use detectWriteCommandsInShellScripts to
// surface write commands as validation errors.
func inferPermissionsFromShellScripts(scripts []string) map[PermissionScope]PermissionLevel {
func inferPermissionsFromShellScripts(scripts []string) (map[PermissionScope]PermissionLevel, error) {
ghCLIPermissionsLog.Printf("Inferring permissions from %d shell script(s)", len(scripts))
perms := make(map[PermissionScope]PermissionLevel)
ghCLIPermissions, err := getCompiledGHCLIPermissions()
if err != nil {
return nil, fmt.Errorf("load gh CLI permissions: %w", err)
}

addScopes := func(scopes []PermissionScope) {
for _, scope := range scopes {
Expand Down Expand Up @@ -337,14 +346,18 @@ func inferPermissionsFromShellScripts(scripts []string) map[PermissionScope]Perm
}

ghCLIPermissionsLog.Printf("Inferred %d permission scope(s) from shell scripts", len(perms))
return perms
return perms, nil
}

// detectWriteCommandsInShellScripts returns all write gh CLI commands found in the
// given scripts, formatted as "gh <group> <action>" (e.g. "gh pr create").
// The slice contains no duplicates and is sorted deterministically in discovery order.
func detectWriteCommandsInShellScripts(scripts []string) []string {
func detectWriteCommandsInShellScripts(scripts []string) ([]string, error) {
ghCLIPermissionsLog.Printf("Scanning %d shell script(s) for write gh CLI commands", len(scripts))
ghCLIPermissions, err := getCompiledGHCLIPermissions()
if err != nil {
return nil, fmt.Errorf("load gh CLI permissions: %w", err)
}
var found []string
seen := make(map[string]struct{})

Expand All @@ -367,7 +380,7 @@ func detectWriteCommandsInShellScripts(scripts []string) []string {
if len(found) > 0 {
ghCLIPermissionsLog.Printf("Detected %d write gh CLI command(s) in shell scripts", len(found))
}
return found
return found, nil
}

// extractRunScriptsFromSectionYAML parses a step-section YAML string (e.g. as stored in
Expand Down
Loading
Loading