Skip to content

Commit 0de9f65

Browse files
authored
Merge pull request #67 from nexB/metadata
Add metadata for packages
2 parents 5b1d45c + 93444f6 commit 0de9f65

14 files changed

+17883
-1320
lines changed
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) nexB Inc. and others. All rights reserved.
5+
# ScanCode is a trademark of nexB Inc.
6+
# SPDX-License-Identifier: Apache-2.0
7+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
8+
# See https://github.com/nexB/python-inspector for support or download.
9+
# See https://aboutcode.org for more information about nexB OSS projects.
10+
#
11+
12+
from typing import List
13+
14+
from packageurl import PackageURL
15+
16+
from _packagedcode import models
17+
from _packagedcode.models import PackageData
18+
from python_inspector import utils_pypi
19+
from python_inspector.resolution import get_python_version_from_env_tag
20+
from python_inspector.utils_pypi import Environment
21+
from python_inspector.utils_pypi import PypiSimpleRepository
22+
23+
24+
def get_pypi_bugtracker_url(project_urls):
25+
bug_tracking_url = project_urls.get("Tracker")
26+
if not (bug_tracking_url):
27+
bug_tracking_url = project_urls.get("Issue Tracker")
28+
if not (bug_tracking_url):
29+
bug_tracking_url = project_urls.get("Bug Tracker")
30+
return bug_tracking_url
31+
32+
33+
def get_pypi_codeview_url(project_urls):
34+
code_view_url = project_urls.get("Source")
35+
if not (code_view_url):
36+
code_view_url = project_urls.get("Code")
37+
if not (code_view_url):
38+
code_view_url = project_urls.get("Source Code")
39+
return code_view_url
40+
41+
42+
def get_wheel_download_urls(
43+
purl: PackageURL,
44+
repos: List[PypiSimpleRepository],
45+
environment: Environment,
46+
python_version: str,
47+
) -> List[str]:
48+
"""
49+
Return a list of download urls for the given purl.
50+
"""
51+
for repo in repos:
52+
for wheel in utils_pypi.get_supported_and_valid_wheels(
53+
repo=repo,
54+
name=purl.name,
55+
version=purl.version,
56+
environment=environment,
57+
python_version=python_version,
58+
):
59+
yield wheel.download_url
60+
61+
62+
def get_sdist_download_url(
63+
purl: PackageURL, repos: List[PypiSimpleRepository], python_version: str
64+
) -> str:
65+
"""
66+
Return a list of download urls for the given purl.
67+
"""
68+
for repo in repos:
69+
sdist = utils_pypi.get_valid_sdist(
70+
repo=repo,
71+
name=purl.name,
72+
version=purl.version,
73+
python_version=python_version,
74+
)
75+
if sdist:
76+
return sdist.download_url
77+
78+
79+
def get_pypi_data_from_purl(
80+
purl: str, environment: Environment, repos: List[PypiSimpleRepository]
81+
) -> PackageData:
82+
"""
83+
Generate `Package` object from the `purl` string of npm type
84+
"""
85+
purl = PackageURL.from_string(purl)
86+
name = purl.name
87+
version = purl.version
88+
if not version:
89+
raise Exception("Version is not specified in the purl")
90+
base_path = "https://pypi.org/pypi"
91+
api_url = f"{base_path}/{name}/{version}/json"
92+
from python_inspector.resolution import get_response
93+
94+
response = get_response(api_url)
95+
info = response.get("info") or {}
96+
homepage_url = info.get("home_page")
97+
license = info.get("license")
98+
project_urls = info.get("project_urls") or {}
99+
code_view_url = get_pypi_codeview_url(project_urls)
100+
bug_tracking_url = get_pypi_bugtracker_url(project_urls)
101+
python_version = get_python_version_from_env_tag(python_version=environment.python_version)
102+
valid_distribution_urls = []
103+
valid_distribution_urls.extend(
104+
list(
105+
get_wheel_download_urls(
106+
purl=purl,
107+
repos=repos,
108+
environment=environment,
109+
python_version=python_version,
110+
)
111+
)
112+
)
113+
valid_distribution_urls.append(
114+
get_sdist_download_url(
115+
purl=purl,
116+
repos=repos,
117+
python_version=python_version,
118+
)
119+
)
120+
urls = response.get("urls") or []
121+
for url in urls:
122+
dist_url = url.get("url")
123+
if dist_url not in valid_distribution_urls:
124+
continue
125+
digests = url.get("digests") or {}
126+
yield PackageData(
127+
primary_language="Python",
128+
description=info.get("description"),
129+
homepage_url=homepage_url,
130+
api_data_url=api_url,
131+
bug_tracking_url=bug_tracking_url,
132+
code_view_url=code_view_url,
133+
declared_license=license,
134+
download_url=dist_url,
135+
size=url.get("size"),
136+
md5=digests.get("md5") or url.get("md5_digest"),
137+
sha256=digests.get("sha256"),
138+
release_date=url.get("upload_time"),
139+
keywords=info.get("keywords") or [],
140+
parties=[
141+
models.Party(
142+
type=models.party_person,
143+
name=info.get("author"),
144+
role="author",
145+
email=info.get("author_email"),
146+
),
147+
models.Party(
148+
type=models.party_person,
149+
name=info.get("maintainer"),
150+
role="maintainer",
151+
email=info.get("maintainer_email"),
152+
),
153+
],
154+
**purl.to_dict(),
155+
).to_dict()

src/python_inspector/resolution.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,10 @@ def fetch_and_extract_sdist(
185185
if not sdist:
186186
return
187187

188+
return get_sdist_file_path_from_filename(sdist)
189+
190+
191+
def get_sdist_file_path_from_filename(sdist):
188192
if sdist.endswith(".tar.gz"):
189193
sdist_file = sdist.rstrip(".tar.gz")
190194
with tarfile.open(os.path.join(utils_pypi.CACHE_THIRDPARTY_DIR, sdist)) as file:
@@ -625,8 +629,6 @@ def format_resolution(
625629
parent_children = dict(
626630
package=str(parent_purl),
627631
dependencies=dependencies,
628-
wheel_urls=list(dict.fromkeys(wheel_urls)),
629-
sdist_url=sdist_url,
630632
)
631633
as_parent_children.append(parent_children)
632634
as_parent_children.sort(key=lambda d: d["package"])
@@ -678,6 +680,31 @@ def format_pdt_tree(results):
678680
return dependencies
679681

680682

683+
def get_package_list(results):
684+
"""
685+
Return a list of packages in the resolution.
686+
"""
687+
mapping = results.mapping
688+
graph = results.graph
689+
parents = mapping.keys()
690+
packages = set()
691+
for parent in parents:
692+
parent_purl = PackageURL(
693+
type="pypi",
694+
name=parent,
695+
version=str(mapping[parent].version),
696+
)
697+
packages.add(str(parent_purl))
698+
for dependency in graph.iter_children(parent):
699+
dep_purl = PackageURL(
700+
type="pypi",
701+
name=dependency,
702+
version=str(mapping[dependency].version),
703+
)
704+
packages.add(str(dep_purl))
705+
return list(sorted(packages))
706+
707+
681708
def get_resolved_dependencies(
682709
requirements: List[Requirement],
683710
environment: Environment = None,
@@ -701,10 +728,14 @@ def get_resolved_dependencies(
701728
reporter=BaseReporter(),
702729
)
703730
resolver_results = resolver.resolve(requirements=requirements, max_rounds=max_rounds)
731+
package_list = get_package_list(results=resolver_results)
704732
if pdt_output:
705-
return format_pdt_tree(resolver_results)
706-
return format_resolution(
707-
resolver_results, as_tree=as_tree, environment=environment, repos=repos
733+
return (format_pdt_tree(resolver_results), package_list)
734+
return (
735+
format_resolution(
736+
resolver_results, as_tree=as_tree, environment=environment, repos=repos
737+
),
738+
package_list,
708739
)
709740
except Exception as e:
710741
if verbose:

src/python_inspector/resolve_cli.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from python_inspector import utils
2626
from python_inspector import utils_pypi
2727
from python_inspector.cli_utils import FileOptionType
28+
from python_inspector.package_data import get_pypi_data_from_purl
2829
from python_inspector.resolution import get_environment_marker_from_environment
2930
from python_inspector.resolution import get_python_version_from_env_tag
3031
from python_inspector.resolution import get_resolved_dependencies
@@ -321,7 +322,7 @@ def resolve_dependencies(
321322
click.secho(f" {repo}")
322323

323324
# resolve dependencies proper
324-
requirements, resolved_dependencies = resolve(
325+
requirements, resolved_dependencies, purls = resolve(
325326
direct_dependencies=direct_dependencies,
326327
environment=environment,
327328
repos=repos,
@@ -354,12 +355,20 @@ def resolve_dependencies(
354355
errors=[],
355356
)
356357

358+
packages = []
359+
360+
for package in purls:
361+
packages.extend(
362+
list(get_pypi_data_from_purl(package, repos=repos, environment=environment)),
363+
)
364+
357365
if json_output:
358366
write_output(
359367
headers=headers,
360368
requirements=requirements,
361369
resolved_dependencies=resolved_dependencies,
362370
json_output=json_output,
371+
packages=packages,
363372
)
364373

365374
else:
@@ -368,6 +377,7 @@ def resolve_dependencies(
368377
requirements=requirements,
369378
resolved_dependencies=resolved_dependencies,
370379
json_output=pdt_output,
380+
packages=packages,
371381
pdt_output=True,
372382
)
373383

@@ -400,7 +410,7 @@ def resolve(
400410
)
401411
)
402412

403-
resolved_dependencies = get_resolved_dependencies(
413+
resolved_dependencies, packages = get_resolved_dependencies(
404414
requirements=requirements,
405415
environment=environment,
406416
repos=repos,
@@ -412,7 +422,7 @@ def resolve(
412422

413423
initial_requirements = [d.to_dict() for d in direct_dependencies]
414424

415-
return initial_requirements, resolved_dependencies
425+
return initial_requirements, resolved_dependencies, packages
416426

417427

418428
def get_requirements_from_direct_dependencies(
@@ -432,19 +442,26 @@ def get_requirements_from_direct_dependencies(
432442
yield req
433443

434444

435-
def write_output(headers, requirements, resolved_dependencies, json_output, pdt_output=False):
445+
def write_output(
446+
headers, requirements, resolved_dependencies, json_output, packages, pdt_output=False
447+
):
436448
"""
437449
Write headers, requirements and resolved_dependencies as JSON to ``json_output``.
438450
Return the output data.
439451
"""
452+
440453
if not pdt_output:
441454
output = dict(
442455
headers=headers,
443456
requirements=requirements,
444457
resolved_dependencies=resolved_dependencies,
458+
packages=packages,
445459
)
446460
else:
447-
output = resolved_dependencies
461+
output = dict(
462+
resolved_dependencies=resolved_dependencies,
463+
packages=packages,
464+
)
448465

449466
json.dump(output, json_output, indent=2)
450467
return output

tests/data/default-url-expected.json

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,8 @@
2929
"resolved_dependencies": [
3030
{
3131
"package": "pkg:pypi/[email protected]",
32-
"dependencies": [],
33-
"wheel_urls": [],
34-
"sdist_url": null
32+
"dependencies": []
3533
}
36-
]
34+
],
35+
"packages": []
3736
}

0 commit comments

Comments
 (0)