diff --git a/cli/docs/flags.go b/cli/docs/flags.go index f3723c3f3..d18d3abdf 100644 --- a/cli/docs/flags.go +++ b/cli/docs/flags.go @@ -316,7 +316,7 @@ var flagsMap = map[string]components.Flag{ WorkingDirs: components.NewStringFlag(WorkingDirs, "A comma-separated(,) list of relative working directories, to determine the audit targets locations. If flag isn't provided, a recursive scan is triggered from the root directory of the project."), OutputDir: components.NewStringFlag(OutputDir, "Target directory to save partial results to.", components.SetHiddenStrFlag()), UploadRepoPath: components.NewStringFlag(UploadRepoPath, "Artifactory repository name or path to upload the cyclonedx file to. If no name or path are provided, a local generic repository will be created which will automatically be indexed by Xray.", components.WithStrDefaultValue("import-cdx-scan-results")), - SkipAutoInstall: components.NewBoolFlag(SkipAutoInstall, "Set to true to skip auto-install of dependencies in un-built modules. Currently supported for Yarn and NPM only.", components.SetHiddenBoolFlag()), + SkipAutoInstall: components.NewBoolFlag(SkipAutoInstall, "Set to true to skip auto-install of dependencies in un-built modules. Currently supported for Yarn, NPM, Pip, and Poetry.", components.SetHiddenBoolFlag()), AllowPartialResults: components.NewBoolFlag(AllowPartialResults, "Set to true to allow partial results and continuance of the scan in case of certain errors.", components.SetHiddenBoolFlag()), ExclusionsAudit: components.NewStringFlag( Exclusions, diff --git a/commands/curation/curationaudit.go b/commands/curation/curationaudit.go index e37ff7cee..dc674643e 100644 --- a/commands/curation/curationaudit.go +++ b/commands/curation/curationaudit.go @@ -108,6 +108,9 @@ var supportedTech = map[techutils.Technology]func(ca *CurationAuditCommand) (boo return ca.checkSupportByVersionOrEnv(techutils.Gem, MinArtiGradleGemSupport) }, techutils.Docker: func(ca *CurationAuditCommand) (bool, error) { return true, nil }, + techutils.Poetry: func(ca *CurationAuditCommand) (bool, error) { + return ca.checkSupportByVersionOrEnv(techutils.Poetry, MinArtiPassThroughSupport) + }, } func (ca *CurationAuditCommand) checkSupportByVersionOrEnv(tech techutils.Technology, minArtiVersion string) (bool, error) { @@ -447,6 +450,7 @@ func (ca *CurationAuditCommand) getBuildInfoParamsByTech() (technologies.BuildIn IgnoreConfigFile: ca.IgnoreConfigFile(), InsecureTls: ca.InsecureTls(), // Install params + SkipAutoInstall: ca.SkipAutoInstall(), InstallCommandName: ca.InstallCommandName(), Args: ca.Args(), InstallCommandArgs: ca.InstallCommandArgs(), @@ -1074,7 +1078,7 @@ func getUrlNameAndVersionByTech(tech techutils.Technology, node *xrayUtils.Graph return getGradleNameScopeAndVersion(node.Id, artiUrl, repo, node) case techutils.Gem: return getGemNameScopeAndVersion(node.Id, artiUrl, repo) - case techutils.Pip: + case techutils.Pip, techutils.Poetry: downloadUrls, name, version = getPythonNameVersion(node.Id, downloadUrlsMap) return case techutils.Go: @@ -1114,7 +1118,7 @@ func getPythonNameVersion(id string, downloadUrlsMap map[string]string) (downloa if dl, ok := downloadUrlsMap[normalizedId]; ok { downloadUrls = []string{dl} } else { - log.Warn(fmt.Sprintf("couldn't find download url for node id %s in report.json", id)) + log.Warn(fmt.Sprintf("Couldn't find download URL for node ID %s", id)) } return } diff --git a/commands/curation/curationaudit_test.go b/commands/curation/curationaudit_test.go index c48caf096..f822173d4 100644 --- a/commands/curation/curationaudit_test.go +++ b/commands/curation/curationaudit_test.go @@ -1693,3 +1693,128 @@ func TestFetchNodesStatusConcurrentMapWrite(t *testing.T) { }) assert.Equal(t, numNodes, count, "expected all %d packages to be recorded as blocked", numNodes) } +// ============================================================================= +// Tests for Poetry support added to curationaudit.go. +// Covers the new dispatcher case (Pip, Poetry -> getPythonNameVersion) and the +// supportedTech registration. +// ============================================================================= + +func Test_getPythonNameVersion(t *testing.T) { + const exampleUrl = "http://test.jfrog.io/artifactory/api/pypi/pypi-remote/packages/aa/bb/flask-2.0.0-py3-none-any.whl" + + tests := []struct { + name string + id string + downloadUrlsMap map[string]string + wantDownloadUrls []string + wantName string + wantVersion string + }{ + { + name: "pip id with matching download url", + id: "pypi://flask:2.0.0", + downloadUrlsMap: map[string]string{"pypi://flask:2.0.0": exampleUrl}, + wantDownloadUrls: []string{exampleUrl}, + wantName: "flask", + wantVersion: "2.0.0", + }, + { + name: "poetry id with matching download url (same pypi:// prefix)", + id: "pypi://click:8.0.1", + downloadUrlsMap: map[string]string{"pypi://click:8.0.1": exampleUrl}, + wantDownloadUrls: []string{exampleUrl}, + wantName: "click", + wantVersion: "8.0.1", + }, + { + name: "id present in map but no entry returns name+version only", + id: "pypi://requests:2.31.0", + downloadUrlsMap: map[string]string{"pypi://other:1.0.0": exampleUrl}, + wantDownloadUrls: nil, + wantName: "requests", + wantVersion: "2.31.0", + }, + { + name: "nil downloadUrlsMap returns name+version only", + id: "pypi://requests:2.31.0", + downloadUrlsMap: nil, + wantDownloadUrls: nil, + wantName: "requests", + wantVersion: "2.31.0", + }, + { + name: "malformed id (no version separator) returns empty", + id: "pypi://malformed", + downloadUrlsMap: nil, + wantDownloadUrls: nil, + wantName: "", + wantVersion: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotDownloadUrls, gotName, gotVersion := getPythonNameVersion(tt.id, tt.downloadUrlsMap) + assert.Equal(t, tt.wantDownloadUrls, gotDownloadUrls, "downloadUrls mismatch") + assert.Equal(t, tt.wantName, gotName, "name mismatch") + assert.Equal(t, tt.wantVersion, gotVersion, "version mismatch") + }) + } +} + +// TestGetBlockedPackageDetails_403UnparsableBodyReturnsError verifies that +// getBlockedPackageDetails returns an error (and no PackageStatus) when a 403 +// response body cannot be resolved to a known curation block reason: +// (1) the body is not valid JSON (e.g. an HTML error page), or +// (2) the body is valid JSON but the Errors array is empty. +func TestGetBlockedPackageDetails_403UnparsableBodyReturnsError(t *testing.T) { + tests := []struct { + name string + respBody string + expectedErrMsg string + }{ + { + name: "non-JSON body (HTML error page)", + respBody: "

403 Forbidden

", + expectedErrMsg: "invalid character", + }, + { + name: "JSON body with empty errors list", + respBody: `{"errors":[]}`, + expectedErrMsg: "received 403 for unknown reason", + }, + } + + const ( + pkgName = "telnyx" + pkgVersion = "4.87.1" + ) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + serverMock, _, rtManager := coreCommonTests.CreateRtRestsMockServer(t, func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusForbidden) + _, _ = w.Write([]byte(tt.respBody)) + }) + defer serverMock.Close() + + rtAuth := rtManager.GetConfig().GetServiceDetails() + httpClientDetails := rtAuth.CreateHttpClientDetails() + analyzer := treeAnalyzer{ + rtManager: rtManager, + rtAuth: rtAuth, + httpClientDetails: httpClientDetails, + extractPoliciesRegex: regexp.MustCompile(extractPoliciesRegexTemplate), + url: rtAuth.GetUrl(), + repo: "pypi-remote", + tech: techutils.Poetry, + } + packageUrl := fmt.Sprintf("%sapi/pypi/pypi-remote/packages/%s-%s.tar.gz", rtAuth.GetUrl(), pkgName, pkgVersion) + + got, err := analyzer.getBlockedPackageDetails(packageUrl, pkgName, pkgVersion) + + require.Error(t, err, "unparseable 403 body must surface as an error") + assert.Nil(t, got, "no PackageStatus should be returned when the block reason cannot be determined") + assert.Contains(t, err.Error(), tt.expectedErrMsg) + }) + } +} diff --git a/go.mod b/go.mod index 10d3b0c80..144ce0f89 100644 --- a/go.mod +++ b/go.mod @@ -161,4 +161,4 @@ replace github.com/CycloneDX/cyclonedx-go => github.com/CycloneDX/cyclonedx-go v // replace github.com/jfrog/build-info-go => github.com/jfrog/build-info-go dev -// replace github.com/jfrog/froggit-go => github.com/jfrog/froggit-go master +// replace github.com/jfrog/froggit-go => github.com/jfrog/froggit-go master \ No newline at end of file diff --git a/sca/bom/buildinfo/technologies/common_test.go b/sca/bom/buildinfo/technologies/common_test.go index b9241384e..c69f8021d 100644 --- a/sca/bom/buildinfo/technologies/common_test.go +++ b/sca/bom/buildinfo/technologies/common_test.go @@ -142,6 +142,7 @@ func TestSuspectCurationBlockedError(t *testing.T) { mvnOutput2 := "status code: 500, reason phrase: Server Error (500)" pipOutput := "because of HTTP error 403 Client Error: Forbidden for url" goOutput := "Failed running Go command: 403 Forbidden" + poetryOutput := "because of HTTP error 403 Client Error: Forbidden for url" tests := []struct { name string @@ -190,6 +191,19 @@ func TestSuspectCurationBlockedError(t *testing.T) { output: goOutput, expect: fmt.Sprintf(CurationErrorMsgToUserTemplate, techutils.Go), }, + { + name: "poetry 403 error (pass-through disabled)", + isCurationCmd: true, + tech: techutils.Poetry, + output: poetryOutput, + expect: fmt.Sprintf(CurationErrorMsgToUserTemplate, techutils.Poetry), + }, + { + name: "poetry not pass through error", + isCurationCmd: true, + tech: techutils.Poetry, + output: "http error 401", + }, { name: "not a supported tech", isCurationCmd: true, diff --git a/sca/bom/buildinfo/technologies/python/python.go b/sca/bom/buildinfo/technologies/python/python.go index 46077c57b..88e30f620 100644 --- a/sca/bom/buildinfo/technologies/python/python.go +++ b/sca/bom/buildinfo/technologies/python/python.go @@ -5,36 +5,72 @@ import ( "errors" "fmt" + "net/http" + "net/url" + "github.com/jfrog/gofrog/version" biutils "github.com/jfrog/build-info-go/utils" "github.com/jfrog/build-info-go/utils/pythonutils" "github.com/jfrog/gofrog/datastructures" artifactoryutils "github.com/jfrog/jfrog-cli-artifactory/artifactory/commands/python" + rtUtils "github.com/jfrog/jfrog-cli-core/v2/artifactory/utils" "github.com/jfrog/jfrog-cli-core/v2/utils/config" "github.com/jfrog/jfrog-cli-core/v2/utils/coreutils" "github.com/jfrog/jfrog-cli-security/sca/bom/buildinfo/technologies" "github.com/jfrog/jfrog-cli-security/utils" "github.com/jfrog/jfrog-cli-security/utils/techutils" + "github.com/jfrog/jfrog-client-go/artifactory" "github.com/jfrog/jfrog-client-go/utils/errorutils" "github.com/jfrog/jfrog-client-go/utils/io/fileutils" + "github.com/jfrog/jfrog-client-go/utils/io/httputils" "github.com/jfrog/jfrog-client-go/utils/log" clientutils "github.com/jfrog/jfrog-client-go/xray/services/utils" "os" "os/exec" + "path" "path/filepath" + "regexp" "runtime" + "strconv" "strings" + + "github.com/spf13/viper" ) const ( PythonPackageTypeIdentifier = "pypi://" pythonReportFile = "report.json" + poetryLockFile = "poetry.lock" + + CurationPipMinimumVersion = "23.0.0" + PoetryNoInteractionFlag = "--no-interaction" + pyprojectToml = "pyproject.toml" + CurationPoetryMinimumVersion = "1.2.0" +) - CurationPipMinimumVersion = "23.0.0" +var ( + poetryLockFileEntry = regexp.MustCompile(`\{[^}]*\bfile\s*=\s*"([^"]+)"`) + simpleIndexHrefEntry = regexp.MustCompile(`]*href\s*=\s*"([^"]+)"`) + // poetryVersionRegex matches the canonical "Poetry (version X.Y.Z)" line + // emitted by `poetry --version`. Older Poetry releases (e.g. 1.2.x on macOS + // with a legacy ~/Library/Application Support/pypoetry config dir) prepend + // deprecation notices on stdout before this line, so we scan the full + // output rather than assuming a single-line response. + poetryVersionRegex = regexp.MustCompile(`Poetry \(?version\s+([^)\s]+)\)?`) ) +// parsePoetryVersion extracts the semantic version (e.g. "1.2.2") from the +// raw stdout of `poetry --version`. Returns "" if no version line is found. +func parsePoetryVersion(out string) string { + m := poetryVersionRegex.FindStringSubmatch(out) + if len(m) < 2 { + return "" + } + return strings.TrimSpace(m[1]) +} + func BuildDependencyTree(params technologies.BuildInfoBomGeneratorParams, technology techutils.Technology) (dependencyTree []*clientutils.GraphNode, uniqueDeps []string, downloadUrls map[string]string, err error) { rootDetected, dependenciesGraph, directDependenciesList, pipUrls, errGetTree := getDependencies(params, technology) if errGetTree != nil { @@ -85,7 +121,7 @@ func getRootNodes(directDependencies []*clientutils.GraphNode, rootDetected bool return } -func getDependencies(params technologies.BuildInfoBomGeneratorParams, technology techutils.Technology) (rootDetected bool, dependenciesGraph map[string][]string, directDependencies []string, pipUrls map[string]string, err error) { +func getDependencies(params technologies.BuildInfoBomGeneratorParams, technology techutils.Technology) (rootDetected bool, dependenciesGraph map[string][]string, directDependencies []string, downloadUrls map[string]string, err error) { wd, err := os.Getwd() if errorutils.CheckError(err) != nil { return @@ -96,6 +132,7 @@ func getDependencies(params technologies.BuildInfoBomGeneratorParams, technology if err != nil { return } + log.Debug(fmt.Sprintf("Python (%s): created temp working dir at %s", technology, tempDirPath)) err = os.Chdir(tempDirPath) if errorutils.CheckError(err) != nil { @@ -140,13 +177,30 @@ func getDependencies(params technologies.BuildInfoBomGeneratorParams, technology technologies.LogExecutableVersion("python") technologies.LogExecutableVersion(string(pythonTool)) } + if technology == techutils.Poetry { + log.Debug(fmt.Sprintf("Poetry: dependency tree built — %d nodes in graph, %d direct dependencies", len(dependenciesGraph), len(directDependencies))) + graphKeyByCanonicalName := make(map[string]string, len(dependenciesGraph)) + for k := range dependenciesGraph { + if name, _, ok := strings.Cut(k, ":"); ok { + graphKeyByCanonicalName[NormalizePypiName(name)] = k + } + } + for i, d := range directDependencies { + name, _, _ := strings.Cut(d, ":") + if key, ok := graphKeyByCanonicalName[NormalizePypiName(name)]; ok { + directDependencies[i] = key + } + } + } if !params.IsCurationCmd { return } - pipUrls, errProcessed := processPipDownloadsUrlsFromReportFile() - if errProcessed != nil { - err = errProcessed - + switch technology { + case techutils.Pip: + downloadUrls, err = processPipDownloadsUrlsFromReportFile() + case techutils.Poetry: + downloadUrls, err = buildPoetryDownloadUrlsMap(params.ServerDetails, params.DependenciesRepository) + log.Debug(fmt.Sprintf("Poetry: curation download-URL map built — %d packages resolved", len(downloadUrls))) } return } @@ -204,6 +258,242 @@ type pypiMetaData struct { Version string `json:"version"` } +type poetryLockPackage struct { + Name string + Version string + Files []string +} + +func buildPoetryDownloadUrlsMap(serverDetails *config.ServerDetails, repository string) (map[string]string, error) { + if serverDetails == nil || serverDetails.GetArtifactoryUrl() == "" { + return nil, errorutils.CheckErrorf("server details with Artifactory URL are required for poetry curation") + } + if repository == "" { + return nil, errorutils.CheckErrorf("a poetry repository must be configured (run 'jf poetry-config') for poetry curation") + } + packages, err := readPoetryLockIfExists() + if err != nil { + return nil, err + } + log.Debug(fmt.Sprintf("Poetry: parsed %d package entries from poetry.lock", len(packages))) + rtAuth, err := serverDetails.CreateArtAuthConfig() + if err != nil { + return nil, err + } + rtManager, err := rtUtils.CreateServiceManager(serverDetails, 2, 0, false) + if err != nil { + return nil, err + } + httpClientDetails := rtAuth.CreateHttpClientDetails() + artiUrl := strings.TrimSuffix(serverDetails.GetArtifactoryUrl(), "/") + urls := map[string]string{} + skipped := 0 + for _, pkg := range packages { + if pkg.Name == "" || pkg.Version == "" || len(pkg.Files) == 0 { + skipped++ + continue + } + downloadUrl, lookupErr := buildPoetryDownloadUrl(rtManager, &httpClientDetails, artiUrl, repository, pkg) + if lookupErr != nil { + log.Debug(fmt.Sprintf("Poetry: could not resolve download URL for %s:%s: %v", pkg.Name, pkg.Version, lookupErr)) + continue + } + normalizedName := strings.ReplaceAll(strings.ToLower(strings.TrimSpace(pkg.Name)), "-", "_") + compId := PythonPackageTypeIdentifier + normalizedName + ":" + pkg.Version + urls[compId] = downloadUrl + } + log.Debug(fmt.Sprintf("Poetry: resolved %d download URLs (skipped %d entries with no files)", len(urls), skipped)) + return urls, nil +} + +// buildPoetryDownloadUrl is the Poetry equivalent of npm's buildNpmDownloadUrl: given a +// package, it returns the absolute Artifactory download URL that curation will HEAD against. +// It does so by fetching the package's simple-index HTML and matching one of the filenames +// recorded in poetry.lock against the listed s. +func buildPoetryDownloadUrl(rtManager artifactory.ArtifactoryServicesManager, clientDetails *httputils.HttpClientDetails, artiUrl, repository string, pkg poetryLockPackage) (string, error) { + normalized := NormalizePypiName(pkg.Name) + simpleIndexUrl := fmt.Sprintf("%s/api/pypi/%s/simple/%s/", artiUrl, repository, normalized) + log.Debug(fmt.Sprintf("Poetry: GET simple-index %s (matching against %d filenames)", simpleIndexUrl, len(pkg.Files))) + resp, body, _, err := rtManager.Client().SendGet(simpleIndexUrl, true, clientDetails) + if err != nil { + return "", err + } + if resp == nil || resp.StatusCode != http.StatusOK { + status := 0 + if resp != nil { + status = resp.StatusCode + } + return "", fmt.Errorf("simple-index GET returned status %d for %s", status, simpleIndexUrl) + } + + href := pickPoetryHrefByFilename(body, pkg.Files) + if href == "" { + return "", fmt.Errorf("no matching href found in simple index for any of %v", pkg.Files) + } + base, err := url.Parse(simpleIndexUrl) + if err != nil { + return "", err + } + target, err := url.Parse(href) + if err != nil { + return "", err + } + absolute := base.ResolveReference(target).String() + log.Debug(fmt.Sprintf("Poetry: resolved %s:%s -> %s", pkg.Name, pkg.Version, absolute)) + return absolute, nil +} + +// pickPoetryHrefByFilename scans the simple-index body for an whose filename +// (after stripping the optional "#sha256=..." fragment) matches one of the wanted filenames. +// Returns "" when no href matches. Mirrors the focused-helper style of npm's appendUniqueChild. +func pickPoetryHrefByFilename(body []byte, wantedFiles []string) string { + wanted := make(map[string]struct{}, len(wantedFiles)) + for _, f := range wantedFiles { + wanted[f] = struct{}{} + } + hrefMatches := simpleIndexHrefEntry.FindAllStringSubmatch(string(body), -1) + for _, m := range hrefMatches { + candidate, _, _ := strings.Cut(m[1], "#") + if _, ok := wanted[path.Base(candidate)]; ok { + return candidate + } + } + return "" +} + +func NormalizePypiName(name string) string { + name = strings.ToLower(name) + var b strings.Builder + prevSep := false + for _, r := range name { + if r == '-' || r == '_' || r == '.' { + if !prevSep { + b.WriteByte('-') + prevSep = true + } + continue + } + b.WriteRune(r) + prevSep = false + } + return b.String() +} + +func readPoetryLockIfExists() ([]poetryLockPackage, error) { + exists, err := fileutils.IsFileExists(poetryLockFile, false) + if err != nil { + return nil, errorutils.CheckError(err) + } + if !exists { + return nil, errorutils.CheckErrorf("process failed, %s wasn't found, can't process poetry curation command", poetryLockFile) + } + content, err := os.ReadFile(poetryLockFile) + if err != nil { + return nil, errorutils.CheckError(err) + } + log.Debug(fmt.Sprintf("Poetry: reading %s (%d bytes)", poetryLockFile, len(content))) + return parsePoetryLockPackages(content), nil +} + +func parsePoetryLockPackages(content []byte) []poetryLockPackage { + var packages []poetryLockPackage + var current *poetryLockPackage + nameToIdx := map[string]int{} + inMetadataFiles := false + currentMetaPkg := "" + lockVersion := "" + + flush := func() { + if current != nil { + key := strings.ToLower(current.Name) + if _, dup := nameToIdx[key]; dup { + log.Warn(fmt.Sprintf("Poetry lock: duplicate package name %q — keeping first entry, skipping index update", current.Name)) + } else { + nameToIdx[key] = len(packages) + } + packages = append(packages, *current) + current = nil + } + } + + for _, raw := range strings.Split(string(content), "\n") { + line := strings.TrimSpace(raw) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + + if line == "[[package]]" { + flush() + inMetadataFiles = false + current = &poetryLockPackage{} + continue + } + if lockVersion == "" && strings.HasPrefix(line, "lock-version") { + if v, ok := parsePoetryScalar(line, "lock-version"); ok { + lockVersion = v + } + } + if strings.HasPrefix(line, "[") { + flush() + inMetadataFiles = line == "[metadata.files]" + currentMetaPkg = "" + continue + } + // lock v1.x: files live in [metadata.files] as pkgname = [{file = "..."},] + if inMetadataFiles { + if strings.Contains(line, "= [") { + raw := strings.TrimSpace(strings.SplitN(line, "=", 2)[0]) + currentMetaPkg = strings.ToLower(strings.Trim(raw, `"`)) + } else if currentMetaPkg != "" { + for _, m := range poetryLockFileEntry.FindAllStringSubmatch(line, -1) { + if idx, ok := nameToIdx[currentMetaPkg]; ok { + packages[idx].Files = append(packages[idx].Files, m[1]) + } + } + } + continue + } + if current == nil { + continue + } + if current.Name == "" && strings.HasPrefix(line, "name") { + if v, ok := parsePoetryScalar(line, "name"); ok { + current.Name = v + continue + } + } + if current.Version == "" && strings.HasPrefix(line, "version") { + if v, ok := parsePoetryScalar(line, "version"); ok { + current.Version = v + continue + } + } + for _, m := range poetryLockFileEntry.FindAllStringSubmatch(line, -1) { + current.Files = append(current.Files, m[1]) + } + } + flush() + log.Debug(fmt.Sprintf("Poetry lock: done — %d packages parsed, lock version: %s", len(packages), lockVersion)) + return packages +} + +func parsePoetryScalar(line, key string) (string, bool) { + rest := strings.TrimSpace(strings.TrimPrefix(line, key)) + if !strings.HasPrefix(rest, "=") { + return "", false + } + rest = strings.TrimSpace(strings.TrimPrefix(rest, "=")) + if !strings.HasPrefix(rest, `"`) { + return "", false + } + rest = rest[1:] + end := strings.IndexByte(rest, '"') + if end < 0 { + return "", false + } + return rest[:end], true +} + func runPythonInstall(params technologies.BuildInfoBomGeneratorParams, tool pythonutils.PythonTool) (rootDetected bool, restoreEnv func() error, err error) { switch tool { case pythonutils.Pip: @@ -220,23 +510,245 @@ func installPoetryDeps(params technologies.BuildInfoBomGeneratorParams) (rootDet restoreEnv = func() error { return nil } + technologies.LogExecutableVersion("poetry") + + var poetryMajor int + if params.IsCurationCmd { + if poetryMajor, err = validateMinimumPoetryVersion(CurationPoetryMinimumVersion); err != nil { + return false, restoreEnv, err + } + } + // jf ca: check lock staleness BEFORE changing the source URL. + // Poetry 1.x stores the source URL in poetry.lock — swapping the URL first causes a + // false stale result even when no dependencies changed. + // lockNeedsGenerate = true → no lock file, generate fresh + // lockIsStale = true → lock exists but is out of sync with pyproject.toml + lockNeedsGenerate, lockIsStale := false, false + var lockCheckErr error + if params.IsCurationCmd { + lockExists, existErr := fileutils.IsFileExists(poetryLockFile, false) + if existErr != nil { + return false, restoreEnv, existErr + } + log.Debug(fmt.Sprintf("Poetry: poetry.lock exists in temp dir: %v", lockExists)) + if !lockExists { + lockNeedsGenerate = true + } else { + // `poetry check --lock` exits 0 when lock matches pyproject.toml (Poetry 1.8+/2.x). + // Older versions expose the same check via `poetry lock --check`. + _, lockCheckErr = executeCommand("poetry", "check", "--lock") + if lockCheckErr != nil && strings.Contains(lockCheckErr.Error(), "does not exist") { + log.Debug("Poetry: 'poetry check --lock' not supported, falling back to 'poetry lock --check'") + _, lockCheckErr = executeCommand("poetry", "lock", "--check") + } + lockIsStale = lockCheckErr != nil + log.Debug(fmt.Sprintf("Poetry: stale check result: stale=%v", lockIsStale)) + } + } + if params.DependenciesRepository != "" { - rtUrl, username, password, err := artifactoryutils.GetPypiRepoUrlWithCredentials(params.ServerDetails, params.DependenciesRepository, false) + rtUrl, username, password, err := artifactoryutils.GetPypiRepoUrlWithCredentials(params.ServerDetails, params.DependenciesRepository, params.IsCurationCmd) if err != nil { return false, restoreEnv, err } - if password != "" { - err = artifactoryutils.ConfigPoetryRepo(rtUrl.Scheme+"://"+rtUrl.Host+rtUrl.Path, username, password, params.DependenciesRepository) - if err != nil { + baseUrl := rtUrl.Scheme + "://" + rtUrl.Host + rtUrl.Path + if params.IsCurationCmd { + // Overwrite [[tool.poetry.source]] in the temp pyproject.toml with the curation + // pass-through URL. + if err = setCurationSourceInPyproject(params.DependenciesRepository, baseUrl, poetryMajor); err != nil { return false, restoreEnv, err } } + if password != "" { + if params.IsCurationCmd { + if _, err = executeCommand("poetry", "config", "--local", "repositories."+params.DependenciesRepository, baseUrl); err != nil { + return false, restoreEnv, err + } + // poetry config --local http-basic. + if _, err = executeCommand("poetry", "config", "--local", "http-basic."+params.DependenciesRepository, username, password); err != nil { + return false, restoreEnv, err + } + } else { + if err = artifactoryutils.ConfigPoetryRepo(baseUrl, username, password, params.DependenciesRepository); err != nil { + return false, restoreEnv, err + } + } + } + } + + if params.IsCurationCmd { + switch { + case lockNeedsGenerate: + // No lock file — generate fresh. + if _, lockErr := executeCommand("poetry", "lock", PoetryNoInteractionFlag); lockErr != nil { + return false, restoreEnv, wrapPoetryCurationErr(params.IsCurationCmd, lockErr) + } + log.Debug("Poetry: lock generated") + case lockIsStale: + // Lock exists but is out of sync — add new/changed deps without bumping locked versions. + // `--no-update` is Poetry 1.x; Poetry 2.x removed the flag (its default is no-update). + _, lockErr := executeCommand("poetry", "lock", "--no-update", PoetryNoInteractionFlag) + if lockErr != nil && strings.Contains(lockErr.Error(), "does not exist") { + log.Debug("Poetry: '--no-update' not supported (Poetry 2.x), running 'poetry lock --no-interaction'") + _, lockErr = executeCommand("poetry", "lock", PoetryNoInteractionFlag) + } + if lockErr != nil { + return false, restoreEnv, wrapPoetryCurationErr(params.IsCurationCmd, errors.Join(lockCheckErr, lockErr)) + } + log.Debug("Poetry: lock updated") + default: + log.Debug("Poetry: poetry.lock is up to date — skipping lock") + } + } else { + _, err = executeCommand("poetry", "install") } - // Run 'poetry install' - _, err = executeCommand("poetry", "install") return false, restoreEnv, err } +func wrapPoetryCurationErr(isCurationCmd bool, lockErr error) error { + if lockErr == nil { + return nil + } + if isCurationCmd && isCvsVersionFilteredOutput(lockErr.Error()) { + pins := parseCvsFailedPackages(lockErr.Error()) + lockErr = errors.Join(lockErr, errors.New(formatCvsBlockedRequirementsMessage(pins))) + } + if msgToUser := technologies.GetMsgToUserForCurationBlock(isCurationCmd, techutils.Poetry, lockErr.Error()); msgToUser != "" { + return errors.Join(lockErr, errors.New(msgToUser)) + } + return lockErr +} + +// setCurationSourceInPyproject rewrites [[tool.poetry.source]] in the temp +// pyproject.toml so that every dependency resolves through the curation +// pass-through endpoint. The source NAME(s) from the user's original +// pyproject.toml are preserved; only the URL is overwritten. +// +// Why preserve the name: poetry.lock records every package against its +// source NAME (not URL). If we renamed the source here, an existing lock +// would suddenly reference a source that no longer exists, Poetry would +// abort the relock with "Repository '' does not exist". +// Preserving the name keeps the lock valid and lets the normal post-lock +// pipeline (with HEAD probes against the wheel URLs) run as designed. +// +// If pyproject.toml has no [[tool.poetry.source]] at all, we fall back to +// adding a single entry named after the Artifactory repository so Poetry +// has somewhere to resolve from. +func setCurationSourceInPyproject(repoName, repoUrl string, majorVersion int) error { + currentDir, err := os.Getwd() + if err != nil { + return errorutils.CheckError(err) + } + absPath := filepath.Join(currentDir, pyprojectToml) + v := viper.New() + v.SetConfigType("toml") + v.SetConfigFile(absPath) + if err = v.ReadInConfig(); err != nil { + return errorutils.CheckErrorf("failed to read %s: %s", pyprojectToml, err) + } + + names := extractPoetrySourceNames(v.Get("tool.poetry.source")) + if len(names) == 0 { + names = []string{repoName} + } + raw, err := os.ReadFile(absPath) + if err != nil { + return errorutils.CheckError(err) + } + var buf strings.Builder + buf.WriteString(strings.TrimRight(stripPoetrySourceBlocks(string(raw)), "\n")) + setDefault := majorVersion < 2 + for i, n := range names { + buf.WriteString("\n\n[[tool.poetry.source]]\n") + buf.WriteString(fmt.Sprintf("name = %q\n", n)) + buf.WriteString(fmt.Sprintf("url = %q\n", repoUrl)) + if setDefault && i == 0 { + buf.WriteString("default = true\n") + } + log.Info(fmt.Sprintf("Configured tool.poetry.source name:%q url:%q for curation", n, repoUrl)) + } + if err = os.WriteFile(absPath, []byte(buf.String()), 0600); err != nil { + return errorutils.CheckErrorf("failed to write %s: %s", pyprojectToml, err) + } + return nil +} + +func stripPoetrySourceBlocks(content string) string { + lines := strings.Split(content, "\n") + out := make([]string, 0, len(lines)) + inSourceBlock := false + for _, line := range lines { + trimmed := strings.TrimSpace(line) + if strings.HasPrefix(trimmed, "[[tool.poetry.source]]") { + inSourceBlock = true + continue + } + if inSourceBlock && strings.HasPrefix(trimmed, "[") { + inSourceBlock = false + } + if !inSourceBlock { + out = append(out, line) + } + } + return strings.Join(out, "\n") +} + +// extractPoetrySourceNames returns the canonical list of source names from +// viper's view of `[[tool.poetry.source]]`. Entries without a name, or with +// duplicate names, are skipped. Returns nil when the key is missing or has +// an unexpected shape so callers can fall back to a default. +func extractPoetrySourceNames(v interface{}) []string { + arr, ok := v.([]interface{}) + if !ok { + return nil + } + names := make([]string, 0, len(arr)) + seen := map[string]struct{}{} + for _, e := range arr { + m, ok := e.(map[string]interface{}) + if !ok { + continue + } + n, _ := m["name"].(string) + n = strings.TrimSpace(n) + if n == "" { + continue + } + if _, dup := seen[n]; dup { + continue + } + seen[n] = struct{}{} + names = append(names, n) + } + return names +} + +func validateMinimumPoetryVersion(minVersion string) (int, error) { + out, err := executeCommand("poetry", "--version") + if err != nil { + log.Debug(fmt.Sprintf("Poetry is not installed or not on PATH: %s", err.Error())) + return 0, errorutils.CheckErrorf("JFrog CLI poetry curation requires Poetry %s or higher to be installed.", minVersion) + } + v := parsePoetryVersion(out) + if v == "" { + log.Debug(fmt.Sprintf("Could not parse Poetry version from output: %q", out)) + return 0, errorutils.CheckErrorf("JFrog CLI poetry curation requires Poetry %s or higher to be installed.", minVersion) + } + log.Debug(fmt.Sprintf("Poetry version: %s", v)) + if !version.NewVersion(v).AtLeast(minVersion) { + return 0, errorutils.CheckErrorf("JFrog CLI poetry curation requires Poetry %s or higher. The current version is: %s", minVersion, v) + } + dot := strings.IndexByte(v, '.') + if dot < 0 { + dot = len(v) + } + major, parseErr := strconv.Atoi(v[:dot]) + if parseErr != nil { + return 0, nil + } + return major, nil +} + func installPipenvDeps(params technologies.BuildInfoBomGeneratorParams) (rootDetected bool, restoreEnv func() error, err error) { // Set virtualenv path to venv dir err = os.Setenv("WORKON_HOME", ".jfrog") diff --git a/sca/bom/buildinfo/technologies/python/python_cvs_fallback.go b/sca/bom/buildinfo/technologies/python/python_cvs_fallback.go index 34c21f245..d8d6c1835 100644 --- a/sca/bom/buildinfo/technologies/python/python_cvs_fallback.go +++ b/sca/bom/buildinfo/technologies/python/python_cvs_fallback.go @@ -19,20 +19,26 @@ var pipFailedPinnedReqRegex = regexp.MustCompile( `(?:No matching distribution found for|satisfies the requirement)\s+` + `([A-Za-z0-9][A-Za-z0-9._-]*)(?:\[[^\]]*\])?==([^\s(,;]+)`) -// parseCvsFailedPackages extracts the pinned packages that pip explicitly -// reported as unresolvable from pip's error output. This ensures only the -// packages that actually caused the failure are listed, not every pin in the -// requirements file. -func parseCvsFailedPackages(pipOutput string) []pinnedRequirement { +// poetryCvsBlockedReqRegex extracts a pinned `name (version)` from poetry's +// "X (Y) which doesn't match any versions" error lines. Both `name (X.Y.Z)` +// and `name (==X.Y.Z)` notations are accepted; range specifiers +// (e.g. `name (>=1.0,<2.0)`) are skipped because they represent transitive +// constraints, not the user's direct pin. +var poetryCvsBlockedReqRegex = regexp.MustCompile( + `([A-Za-z0-9][A-Za-z0-9._-]*)(?:\[[^\]]*\])?\s+\((?:==)?\s*([0-9][0-9A-Za-z._+\-]*)\)\s+which doesn't match any versions`) + +func parseCvsFailedPackages(output string) []pinnedRequirement { var failed []pinnedRequirement seen := map[string]bool{} - for _, m := range pipFailedPinnedReqRegex.FindAllStringSubmatch(pipOutput, -1) { - name := normalizePyPIName(m[1]) - version := strings.TrimRight(m[2], ")") - key := name + "==" + version - if !seen[key] { - seen[key] = true - failed = append(failed, pinnedRequirement{Name: name, Version: version}) + for _, re := range []*regexp.Regexp{pipFailedPinnedReqRegex, poetryCvsBlockedReqRegex} { + for _, m := range re.FindAllStringSubmatch(output, -1) { + name := normalizePyPIName(m[1]) + version := strings.TrimRight(m[2], ")") + key := name + "==" + version + if !seen[key] { + seen[key] = true + failed = append(failed, pinnedRequirement{Name: name, Version: version}) + } } } return failed @@ -58,5 +64,6 @@ func formatCvsBlockedRequirementsMessage(pins []pinnedRequirement) string { func isCvsVersionFilteredOutput(output string) bool { return strings.Contains(output, "No matching distribution found") || - strings.Contains(output, "Could not find a version that satisfies the requirement") + strings.Contains(output, "Could not find a version that satisfies the requirement") || + strings.Contains(output, "doesn't match any versions") } diff --git a/sca/bom/buildinfo/technologies/python/python_cvs_fallback_test.go b/sca/bom/buildinfo/technologies/python/python_cvs_fallback_test.go index 917642a28..9c943d0bb 100644 --- a/sca/bom/buildinfo/technologies/python/python_cvs_fallback_test.go +++ b/sca/bom/buildinfo/technologies/python/python_cvs_fallback_test.go @@ -42,6 +42,16 @@ func TestParseCvsFailedPackages(t *testing.T) { output: "ERROR: 403 Forbidden", want: nil, }, + { + name: "poetry: doesn't match any versions", + output: "Because sample-poetry-project depends on telnyx (4.87.1) which doesn't match any versions, version solving failed.", + want: []pinnedRequirement{{Name: "telnyx", Version: "4.87.1"}}, + }, + { + name: "poetry: range specifier is not captured", + output: "Because sample-poetry-project depends on bar (>=1.0,<2.0) which doesn't match any versions, version solving failed.", + want: nil, + }, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { @@ -52,8 +62,9 @@ func TestParseCvsFailedPackages(t *testing.T) { func TestIsCvsVersionFilteredOutput(t *testing.T) { cases := map[string]bool{ - "ERROR: No matching distribution found for deepagents==0.5.5": true, - "ERROR: Could not find a version that satisfies the requirement langchain-core<2.0.0,>=1.3.2": true, + "ERROR: No matching distribution found for deepagents==0.5.5": true, + "ERROR: Could not find a version that satisfies the requirement langchain-core<2.0.0,>=1.3.2": true, + "Because sample-poetry-project depends on telnyx (4.87.1) which doesn't match any versions, version solving failed.": true, "ERROR: 403 Forbidden": false, } for output, want := range cases { diff --git a/sca/bom/buildinfo/technologies/python/python_test.go b/sca/bom/buildinfo/technologies/python/python_test.go index 1714d4f98..9a8846bd0 100644 --- a/sca/bom/buildinfo/technologies/python/python_test.go +++ b/sca/bom/buildinfo/technologies/python/python_test.go @@ -1,12 +1,14 @@ package python import ( + "net/http" "os" "path/filepath" "strings" "testing" "github.com/jfrog/build-info-go/utils/pythonutils" + coreCommonTests "github.com/jfrog/jfrog-cli-core/v2/common/tests" "github.com/jfrog/jfrog-cli-core/v2/utils/config" "github.com/jfrog/jfrog-cli-security/sca/bom/buildinfo/technologies" "github.com/jfrog/jfrog-client-go/utils/log" @@ -306,3 +308,540 @@ func TestGetPipInstallArgs(t *testing.T) { assert.Equal(t, []string{"-m", "pip", "install", ".", "--cache-dir", filepath.Join("test", "path"), "--ignore-installed", "--report", "report.json"}, getPipInstallArgs("", "", filepath.Join("test", "path"), "report.json")) } + +// ============================================================================= +// Unit tests for Poetry curation helpers. +// These tests do not require poetry, pip, or a real Artifactory — they exercise +// the pure helpers and the filesystem-only branches added for `jf ca --poetry`. +// ============================================================================= + +func TestNormalizePypiName(t *testing.T) { + cases := []struct { + in, want string + }{ + {"Flask", "flask"}, + {"PyYAML", "pyyaml"}, + {"zope.interface", "zope-interface"}, + {"jaraco_classes", "jaraco-classes"}, + {"foo___bar.baz", "foo-bar-baz"}, + {"foo--bar", "foo-bar"}, + {"already-normalized", "already-normalized"}, + {"", ""}, + } + for _, c := range cases { + t.Run(c.in, func(t *testing.T) { + assert.Equal(t, c.want, NormalizePypiName(c.in)) + }) + } +} + +func TestParsePoetryScalar(t *testing.T) { + cases := []struct { + name string + line string + key string + wantVal string + wantOk bool + }{ + {"basic key value", `name = "flask"`, "name", "flask", true}, + {"extra whitespace around equals", `name = "flask"`, "name", "flask", true}, + {"empty quoted value is ok", `name = ""`, "name", "", true}, + {"wrong key returns false", `version = "1.0"`, "name", "", false}, + {"unquoted value returns false", `name = flask`, "name", "", false}, + {"single quotes not supported", `name = 'flask'`, "name", "", false}, + {"missing closing quote returns false", `name = "flask`, "name", "", false}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + gotVal, gotOk := parsePoetryScalar(c.line, c.key) + assert.Equal(t, c.wantOk, gotOk, "ok mismatch") + assert.Equal(t, c.wantVal, gotVal, "value mismatch") + }) + } +} + +func TestPickPoetryHrefByFilename(t *testing.T) { + body := []byte(` +Flask-2.0.0.tar.gz +Flask-2.0.0-py3-none-any.whl +`) + + t.Run("returns href without fragment when filename matches", func(t *testing.T) { + got := pickPoetryHrefByFilename(body, []string{"Flask-2.0.0-py3-none-any.whl"}) + assert.Equal(t, "packages/cc/dd/Flask-2.0.0-py3-none-any.whl", got) + }) + + t.Run("returns empty when no filename matches", func(t *testing.T) { + got := pickPoetryHrefByFilename(body, []string{"unrelated.whl"}) + assert.Equal(t, "", got) + }) + + t.Run("returns empty for empty body", func(t *testing.T) { + got := pickPoetryHrefByFilename(nil, []string{"Flask-2.0.0.tar.gz"}) + assert.Equal(t, "", got) + }) + + t.Run("matches first href when multiple wanted files are present", func(t *testing.T) { + got := pickPoetryHrefByFilename(body, []string{ + "Flask-2.0.0.tar.gz", + "Flask-2.0.0-py3-none-any.whl", + }) + // Both match; pickPoetryHrefByFilename returns the first hit in body order. + assert.Equal(t, "packages/aa/bb/Flask-2.0.0.tar.gz", got) + }) +} + +func TestParsePoetryLockPackages(t *testing.T) { + t.Run("v2 inline files format", func(t *testing.T) { + fixture := []byte(`# generated by poetry +[[package]] +name = "flask" +version = "2.0.0" +description = "Web framework" +files = [ + {file = "Flask-2.0.0.tar.gz", hash = "sha256:abc"}, + {file = "Flask-2.0.0-py3-none-any.whl", hash = "sha256:def"}, +] + +[[package]] +name = "click" +version = "8.0.1" +files = [ + {file = "click-8.0.1-py3-none-any.whl", hash = "sha256:ghi"}, +] + +[metadata] +lock-version = "2.0" +`) + got := parsePoetryLockPackages(fixture) + require.Len(t, got, 2) + + assert.Equal(t, "flask", got[0].Name) + assert.Equal(t, "2.0.0", got[0].Version) + assert.ElementsMatch(t, []string{ + "Flask-2.0.0.tar.gz", + "Flask-2.0.0-py3-none-any.whl", + }, got[0].Files) + + assert.Equal(t, "click", got[1].Name) + assert.Equal(t, "8.0.1", got[1].Version) + assert.ElementsMatch(t, []string{"click-8.0.1-py3-none-any.whl"}, got[1].Files) + }) + + t.Run("v1 metadata.files format", func(t *testing.T) { + fixture := []byte(`[[package]] +name = "flask" +version = "2.0.0" + +[[package]] +name = "click" +version = "8.0.1" + +[metadata] +lock-version = "1.1" + +[metadata.files] +flask = [ + {file = "Flask-2.0.0.tar.gz", hash = "sha256:abc"}, +] +click = [ + {file = "click-8.0.1-py3-none-any.whl", hash = "sha256:ghi"}, +] +`) + got := parsePoetryLockPackages(fixture) + require.Len(t, got, 2) + assert.Equal(t, "flask", got[0].Name) + assert.ElementsMatch(t, []string{"Flask-2.0.0.tar.gz"}, got[0].Files) + assert.Equal(t, "click", got[1].Name) + assert.ElementsMatch(t, []string{"click-8.0.1-py3-none-any.whl"}, got[1].Files) + }) + + t.Run("v1 quoted dotted key in metadata.files", func(t *testing.T) { + fixture := []byte(`[[package]] +name = "zope.interface" +version = "5.0.0" + +[metadata] +lock-version = "1.1" + +[metadata.files] +"zope.interface" = [ + {file = "zope.interface-5.0.0.tar.gz", hash = "sha256:aaa"}, + {file = "zope.interface-5.0.0-cp39-cp39-linux_x86_64.whl", hash = "sha256:bbb"}, +] +`) + got := parsePoetryLockPackages(fixture) + require.Len(t, got, 1) + assert.Equal(t, "zope.interface", got[0].Name) + assert.ElementsMatch(t, []string{ + "zope.interface-5.0.0.tar.gz", + "zope.interface-5.0.0-cp39-cp39-linux_x86_64.whl", + }, got[0].Files, + "files for a dotted package with a quoted key in [metadata.files] must be collected") + }) + + t.Run("empty content returns empty slice", func(t *testing.T) { + got := parsePoetryLockPackages(nil) + assert.Empty(t, got) + }) + + t.Run("comments only returns empty slice", func(t *testing.T) { + got := parsePoetryLockPackages([]byte("# only a comment\n# another\n")) + assert.Empty(t, got) + }) +} + +func TestBuildPoetryDownloadUrlsMapInputValidation(t *testing.T) { + t.Run("nil server details returns error", func(t *testing.T) { + _, err := buildPoetryDownloadUrlsMap(nil, "poetry-repo") + require.Error(t, err) + assert.Contains(t, err.Error(), "server details") + }) + + t.Run("empty artifactory url returns error", func(t *testing.T) { + _, err := buildPoetryDownloadUrlsMap(&config.ServerDetails{}, "poetry-repo") + require.Error(t, err) + assert.Contains(t, err.Error(), "server details") + }) + + t.Run("empty repository returns error", func(t *testing.T) { + sd := &config.ServerDetails{ArtifactoryUrl: "http://example.com/artifactory/"} + _, err := buildPoetryDownloadUrlsMap(sd, "") + require.Error(t, err) + assert.Contains(t, err.Error(), "repository must be configured") + }) +} + +func TestReadPoetryLockIfExists(t *testing.T) { + t.Run("returns error when poetry.lock is missing", func(t *testing.T) { + t.Chdir(t.TempDir()) + _, err := readPoetryLockIfExists() + require.Error(t, err) + assert.Contains(t, err.Error(), poetryLockFile) + }) + + t.Run("parses lock content when present", func(t *testing.T) { + dir := t.TempDir() + lockContent := []byte(`[[package]] +name = "flask" +version = "2.0.0" +files = [ + {file = "Flask-2.0.0.tar.gz", hash = "sha256:abc"}, +] + +[metadata] +lock-version = "2.0" +`) + require.NoError(t, os.WriteFile(filepath.Join(dir, poetryLockFile), lockContent, 0600)) + t.Chdir(dir) + + got, err := readPoetryLockIfExists() + require.NoError(t, err) + require.Len(t, got, 1) + assert.Equal(t, "flask", got[0].Name) + assert.Equal(t, "2.0.0", got[0].Version) + assert.ElementsMatch(t, []string{"Flask-2.0.0.tar.gz"}, got[0].Files) + }) +} + +// TestSetCurationSourceInPyproject covers the three source-handling cases: +// +// 1. pyproject.toml has no [[tool.poetry.source]] → a single entry named +// after the Artifactory repo (`repoName`) is added. +// 2. pyproject.toml has exactly one [[tool.poetry.source]] with a name +// that differs from the Artifactory repo → the user's name is +// preserved and only the URL is rewritten. This is the regression +// guard for the bug where renaming the source forced Poetry to abort +// the relock with "Repository '' does not exist" and push +// every `jf ca` run with a pre-existing lock into the no-lockfile +// probe path. +// 3. pyproject.toml has multiple [[tool.poetry.source]] entries → every +// name is preserved and every URL is rewritten to the curation +// pass-through. +func TestSetCurationSourceInPyproject(t *testing.T) { + const ( + repoName = "my-curation-repo" + repoURL = "https://example.com/artifactory/api/curation/audit/my-curation-repo" + ) + + t.Run("no existing source — falls back to repoName", func(t *testing.T) { + dir := t.TempDir() + initial := []byte(`[tool.poetry] +name = "test-project" +version = "0.1.0" +description = "fixture" +`) + pyprojectPath := filepath.Join(dir, pyprojectToml) + require.NoError(t, os.WriteFile(pyprojectPath, initial, 0600)) + t.Chdir(dir) + + require.NoError(t, setCurationSourceInPyproject(repoName, repoURL, 0)) + + written, err := os.ReadFile(pyprojectPath) + require.NoError(t, err) + out := string(written) + assert.Contains(t, out, repoName, "fallback name must be written when pyproject has no existing source") + assert.Contains(t, out, repoURL) + assert.True(t, strings.Contains(out, "tool.poetry.source") || strings.Contains(out, "[tool.poetry]"), + "expected pyproject.toml to retain a tool.poetry section, got:\n%s", out) + }) + + t.Run("existing single source with different name — name preserved, url rewritten", func(t *testing.T) { + dir := t.TempDir() + initial := []byte(`[tool.poetry] +name = "test-project" +version = "0.1.0" + +[[tool.poetry.source]] +name = "poetry-test" +url = "https://example.com/artifactory/api/pypi/my-curation-repo/simple" +`) + pyprojectPath := filepath.Join(dir, pyprojectToml) + require.NoError(t, os.WriteFile(pyprojectPath, initial, 0600)) + t.Chdir(dir) + + require.NoError(t, setCurationSourceInPyproject(repoName, repoURL, 0)) + + written, err := os.ReadFile(pyprojectPath) + require.NoError(t, err) + out := string(written) + + assert.Contains(t, out, `name = "poetry-test"`, + "user's source name must be preserved so poetry.lock stays in sync; got:\n%s", out) + assert.Contains(t, out, repoURL, "URL must be rewritten to the curation pass-through") + assert.NotContains(t, out, `name = "`+repoName+`"`, + "the Artifactory repo name must NOT replace the user's source name when one already exists; got:\n%s", out) + }) + + t.Run("existing multi-source — all names preserved, all urls rewritten", func(t *testing.T) { + dir := t.TempDir() + initial := []byte(`[tool.poetry] +name = "test-project" +version = "0.1.0" + +[[tool.poetry.source]] +name = "primary-mirror" +url = "https://example.com/artifactory/api/pypi/my-curation-repo/simple" + +[[tool.poetry.source]] +name = "secondary-mirror" +url = "https://example.com/artifactory/api/pypi/other-repo/simple" +`) + pyprojectPath := filepath.Join(dir, pyprojectToml) + require.NoError(t, os.WriteFile(pyprojectPath, initial, 0600)) + t.Chdir(dir) + + require.NoError(t, setCurationSourceInPyproject(repoName, repoURL, 0)) + + written, err := os.ReadFile(pyprojectPath) + require.NoError(t, err) + out := string(written) + + assert.Contains(t, out, `name = "primary-mirror"`, "first source name must be preserved; got:\n%s", out) + assert.Contains(t, out, `name = "secondary-mirror"`, "second source name must be preserved; got:\n%s", out) + assert.Contains(t, out, repoURL, "URLs must be rewritten to the curation pass-through") + assert.NotContains(t, out, "/api/pypi/my-curation-repo/simple", + "original non-curation URL on first source must be replaced") + assert.NotContains(t, out, "/api/pypi/other-repo/simple", + "original non-curation URL on second source must be replaced") + }) + + t.Run("dotted dependency name is not corrupted", func(t *testing.T) { + dir := t.TempDir() + initial := []byte(`[tool.poetry] +name = "test-project" +version = "0.1.0" + +[tool.poetry.dependencies] +python = "^3.11" +"zope.interface" = "5.0.0" +`) + pyprojectPath := filepath.Join(dir, pyprojectToml) + require.NoError(t, os.WriteFile(pyprojectPath, initial, 0600)) + t.Chdir(dir) + + require.NoError(t, setCurationSourceInPyproject(repoName, repoURL, 1)) + + written, err := os.ReadFile(pyprojectPath) + require.NoError(t, err) + out := string(written) + + assert.Contains(t, out, `"zope.interface" = "5.0.0"`, + "quoted dotted dependency key must survive the pyproject.toml rewrite; got:\n%s", out) + assert.Contains(t, out, repoURL) + }) +} + +func TestExtractPoetrySourceNames(t *testing.T) { + t.Run("nil returns nil", func(t *testing.T) { + assert.Nil(t, extractPoetrySourceNames(nil)) + }) + t.Run("wrong type returns nil", func(t *testing.T) { + assert.Nil(t, extractPoetrySourceNames("not-an-array")) + assert.Nil(t, extractPoetrySourceNames(map[string]interface{}{"name": "x"})) + }) + t.Run("entries without name are skipped", func(t *testing.T) { + got := extractPoetrySourceNames([]interface{}{ + map[string]interface{}{"url": "https://x"}, + map[string]interface{}{"name": "named", "url": "https://y"}, + map[string]interface{}{"name": " ", "url": "https://z"}, + }) + assert.Equal(t, []string{"named"}, got) + }) + t.Run("duplicate names are deduped, order preserved", func(t *testing.T) { + got := extractPoetrySourceNames([]interface{}{ + map[string]interface{}{"name": "a", "url": "https://1"}, + map[string]interface{}{"name": "b", "url": "https://2"}, + map[string]interface{}{"name": "a", "url": "https://3"}, + }) + assert.Equal(t, []string{"a", "b"}, got) + }) +} + +// TestBuildPoetryDownloadUrl_HTTP exercises the simple-index lookup that +// resolves a poetry.lock package to an absolute Artifactory download URL. +// The function: +// - GETs /api/pypi/{repo}/simple/{normalized-name}/ +// - scans the body for an whose basename matches one of pkg.Files +// - returns the href resolved against the simple-index URL +// +// The three cases below cover the happy path, the upstream-error path, and +// the listing-without-match path. +func TestBuildPoetryDownloadUrl_HTTP(t *testing.T) { + const repo = "pypi-curation" + pkg := poetryLockPackage{ + Name: "telnyx", + Version: "4.87.1", + Files: []string{"telnyx-4.87.1.tar.gz", "telnyx-4.87.1-py3-none-any.whl"}, + } + + t.Run("200 with matching filename returns absolute URL", func(t *testing.T) { + server, _, rtManager := coreCommonTests.CreateRtRestsMockServer(t, func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "/simple/telnyx/") { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(` +telnyx-4.87.1.tar.gz +`)) + return + } + t.Fatalf("unexpected request to %s", r.URL.Path) + }) + defer server.Close() + httpDetails := rtManager.GetConfig().GetServiceDetails().CreateHttpClientDetails() + + got, err := buildPoetryDownloadUrl(rtManager, &httpDetails, server.URL, repo, pkg) + require.NoError(t, err) + assert.Contains(t, got, "/packages/aa/bb/telnyx-4.87.1.tar.gz", "resolved URL must include the matched file path") + assert.True(t, strings.HasPrefix(got, server.URL), "resolved URL must be absolute against the simple-index base, got %q", got) + assert.NotContains(t, got, "#", "fragment must be stripped from the returned URL") + }) + + t.Run("non-200 from simple-index surfaces status code", func(t *testing.T) { + server, _, rtManager := coreCommonTests.CreateRtRestsMockServer(t, func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + }) + defer server.Close() + httpDetails := rtManager.GetConfig().GetServiceDetails().CreateHttpClientDetails() + + _, err := buildPoetryDownloadUrl(rtManager, &httpDetails, server.URL, repo, pkg) + require.Error(t, err) + assert.Contains(t, err.Error(), "404") + assert.Contains(t, err.Error(), "simple-index") + }) + + t.Run("200 with no matching filename returns error", func(t *testing.T) { + server, _, rtManager := coreCommonTests.CreateRtRestsMockServer(t, func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(` +telnyx-1.0.0.tar.gz +`)) + }) + defer server.Close() + httpDetails := rtManager.GetConfig().GetServiceDetails().CreateHttpClientDetails() + + _, err := buildPoetryDownloadUrl(rtManager, &httpDetails, server.URL, repo, pkg) + require.Error(t, err) + assert.Contains(t, err.Error(), "no matching href") + }) + + t.Run("uses normalized name in simple-index URL", func(t *testing.T) { + // PEP 503: the URL segment must be the normalized name. A package + // declared as "Flask_Babel" in poetry.lock must hit /simple/flask-babel/. + var seenPath string + server, _, rtManager := coreCommonTests.CreateRtRestsMockServer(t, func(w http.ResponseWriter, r *http.Request) { + seenPath = r.URL.Path + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`Flask_Babel-1.0.tar.gz`)) + }) + defer server.Close() + httpDetails := rtManager.GetConfig().GetServiceDetails().CreateHttpClientDetails() + + quirky := poetryLockPackage{Name: "Flask_Babel", Version: "1.0", Files: []string{"Flask_Babel-1.0.tar.gz"}} + _, err := buildPoetryDownloadUrl(rtManager, &httpDetails, server.URL, repo, quirky) + require.NoError(t, err) + assert.Contains(t, seenPath, "/simple/flask-babel/", "must use PEP 503 normalized name in the simple-index URL, got %q", seenPath) + }) +} + +func TestParsePoetryVersion(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"Poetry (version 1.8.3)", "1.8.3"}, + {"Poetry version 1.5.0", "1.5.0"}, + {"Poetry (version 2.0.0)", "2.0.0"}, + {"Poetry version 1.2.0", "1.2.0"}, + {"", ""}, + {"some unrelated output", ""}, + } + for _, tt := range tests { + t.Run(tt.in, func(t *testing.T) { + assert.Equal(t, tt.want, parsePoetryVersion(tt.in)) + }) + } +} + +func TestInstallPoetryDepsLockCheckErrorSurfacedOnRelockFailure(t *testing.T) { + fakeDir := t.TempDir() + fakePoetry := filepath.Join(fakeDir, "poetry") + script := `#!/bin/sh +case "$*" in + *"--version"*) echo "Poetry (version 1.8.0)"; exit 0 ;; + *"check"*"--lock"*) echo "Error: SyntaxError in pyproject.toml at line 12" >&2; exit 1 ;; + *"lock"*) echo "Error: cannot resolve dependencies" >&2; exit 1 ;; + *) echo "unexpected call: $*" >&2; exit 2 ;; +esac +` + require.NoError(t, os.WriteFile(fakePoetry, []byte(script), 0755)) + require.NoError(t, os.Chmod(fakePoetry, 0755)) + t.Setenv("PATH", fakeDir+string(os.PathListSeparator)+os.Getenv("PATH")) + + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, poetryLockFile), []byte("# lock\n"), 0600)) + require.NoError(t, os.WriteFile(filepath.Join(dir, pyprojectToml), []byte("[tool.poetry]\nname=\"x\"\n"), 0600)) + t.Chdir(dir) + + _, _, err := installPoetryDeps(technologies.BuildInfoBomGeneratorParams{ + IsCurationCmd: true, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "SyntaxError", + "original check error must appear in the returned error chain") +} + +func TestInstallPoetryDepsNonCurationErrorPropagated(t *testing.T) { + fakeDir := t.TempDir() + fakePoetry := filepath.Join(fakeDir, "poetry") + require.NoError(t, os.WriteFile(fakePoetry, + []byte("#!/bin/sh\necho 'install failed' >&2\nexit 1\n"), 0755)) + t.Setenv("PATH", fakeDir+string(os.PathListSeparator)+os.Getenv("PATH")) + + _, _, err := installPoetryDeps(technologies.BuildInfoBomGeneratorParams{ + IsCurationCmd: false, + DependenciesRepository: "", + }) + + require.Error(t, err, "non-curation poetry install failure must propagate to the caller") +} +