Skip to content

Commit

Permalink
feat: Add author licensing config
Browse files Browse the repository at this point in the history
Restores the feature allowing for reporting licensing statistics based on author.

closes #88
  • Loading branch information
thehale committed Jan 11, 2025
1 parent 38347de commit c8c3164
Show file tree
Hide file tree
Showing 10 changed files with 253 additions and 84 deletions.
23 changes: 14 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,31 @@ functionality), it does help you clearly identify who your contributors are and
the exact lines of code they wrote.


<!--
To support libraries undergoing re-licensing, `git-authorship` includes config
files for labelling the licenses under which contributors have shared their code.

## Other Features

### Author Licenses
If you want to include information about the OSS license offered by each
contributor, simply add a line for each author to `config/author-licenses.txt`
in the following format:

You can include OSS licensing information for each author via a `.csv` file.
The `author-name` will be matched to the values shown in the generated
authorship report.

_licensing.csv_
```
author-name|license-SPDX-id
author-name,license-SPDX-id
```

The `author-name` will be matched to the values shown in the generated
authorship report.
<sub>A list of SPDX license identifiers can be found at [spdx.org/licenses](https://spdx.org/licenses)</sub>

_A list of SPDX license identifiers can be found here:
https://spdx.org/licenses/_
Then tell the CLI about the authorship file

```bash
git-authorship REPO_URL --author-licenses licensing.csv
```

<!--
### Pseudonyms
Expand Down
13 changes: 12 additions & 1 deletion git_authorship/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,23 @@
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
from pathlib import Path
from typing import Dict
from typing import TypedDict

from typing_extensions import NotRequired


FilePath = Path
Author = str
LineCount = int
Authorship = Dict[Author, LineCount]
License = str


class AuthorshipInfo(TypedDict):
lines: LineCount
license: NotRequired[License]


Authorship = Dict[Author, AuthorshipInfo]
RepoAuthorship = Dict[FilePath, Authorship]

__all__ = ["FilePath", "Author", "LineCount", "Authorship", "RepoAuthorship"]
62 changes: 46 additions & 16 deletions git_authorship/authorship.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
import csv
import json
import logging
from collections import defaultdict
from pathlib import Path
from typing import Optional

from git import Repo

from ._pathutils import iterfiles
from ._types import Authorship
from ._types import AuthorshipInfo
from ._types import RepoAuthorship
from git_authorship import export

Expand All @@ -20,7 +23,13 @@
log = logging.getLogger(__name__)


def for_repo(repo: Repo, cache_dir: Path = Path("build/cache")) -> RepoAuthorship:
def for_repo(
repo: Repo,
*,
license_file: Optional[Path] = None,
cache_dir: Path = Path("build/cache"),
use_cache: bool = True,
) -> RepoAuthorship:
"""
Calculates how many lines each author has contributed to the repo, with breakdowns
by folder and file.
Expand All @@ -41,24 +50,25 @@ def for_repo(repo: Repo, cache_dir: Path = Path("build/cache")) -> RepoAuthorshi
```
{
".": { "author1": 100, "author2": 200 },
"./folder1": { "author1": 50, "author2": 150 },
"./folder1/file1.txt": { "author1": 25, "author2": 150 },
"./folder1/file2.txt": { "author1": 25 },
"./folder2": { "author1": 50, "author2": 50 },
"./folder2/file1.txt": { "author1": 25, "author2": 25 },
"./folder2/file2.txt": { "author1": 25, "author2": 25 },
".": { "author1": {"lines": 100}, "author2": {"lines": 200} },
"./folder1": { "author1": {"lines": 50}, "author2": {"lines": 150} },
"./folder1/file1.txt": { "author1": {"lines": 25}, "author2": {"lines": 150} },
"./folder1/file2.txt": { "author1": {"lines": 25} },
"./folder2": { "author1": {"lines": 50}, "author2": {"lines": 50} },
"./folder2/file1.txt": { "author1": {"lines": 25}, "author2": {"lines": 25} },
"./folder2/file2.txt": { "author1": {"lines": 25}, "author2": {"lines": 25} },
}
```
"""
cache_key = cache_dir / f"{repo.head.commit.hexsha}.json"

if cache_key.exists():
if use_cache and cache_key.exists():
with open(cache_key, "r") as f:
return {Path(k): v for k, v in (json.load(f) or {}).items()}
else:
data = _compute_repo_authorship(repo)
data = _augment_author_licenses(data, license_file)
cache_key.parent.mkdir(exist_ok=True, parents=True)
export.as_json(data, cache_key)
return data
Expand All @@ -81,8 +91,8 @@ def for_file(repo: Repo, path: Path) -> Authorship:
The returned authorship would be:
```
{
"author1": 3,
"author2": 2,
"author1": {"lines": 3},
"author2": {"lines": 2},
}
```
"""
Expand All @@ -94,9 +104,9 @@ def for_file(repo: Repo, path: Path) -> Authorship:
for commit, lines in (raw_blame or [])
]

authorship: Authorship = defaultdict(int)
authorship: Authorship = defaultdict(_AuthorshipInfo)
for author, lines in blame:
authorship[author] += lines
authorship[author]["lines"] += lines
except FileNotFoundError as e:
log.warning(f"Failed to blame {path}: {e}")
authorship = {}
Expand All @@ -112,15 +122,35 @@ def _compute_repo_authorship(repo: Repo) -> RepoAuthorship:
]
file_authorships = {path: for_file(repo, path) for path in filepaths}

repo_authorship: RepoAuthorship = defaultdict(lambda: defaultdict(int))
repo_authorship: RepoAuthorship = defaultdict(lambda: defaultdict(_AuthorshipInfo))
for file, authorship in file_authorships.items():
parts = f"./{file}".split("/")
for i in range(len(parts)):
cur = "/".join(parts[: i + 1])
for author, lines in authorship.items():
repo_authorship[Path(cur)][author] += lines
for author, info in authorship.items():
repo_authorship[Path(cur)][author]["lines"] += info["lines"]

return repo_authorship


def _augment_author_licenses(
repo_authorship: RepoAuthorship, licenses_path: Optional[Path] = None
) -> RepoAuthorship:
if licenses_path:
with open(licenses_path, "r") as f:
reader = csv.reader(f)
licenses = {row[0]: row[1] for row in reader}

for path, authorship in repo_authorship.items():
for author in authorship.keys():
if author in licenses:
repo_authorship[path][author]["license"] = licenses[author]

return repo_authorship


def _AuthorshipInfo() -> AuthorshipInfo:
return {"lines": 0}


__all__ = ["file", "repo"]
37 changes: 35 additions & 2 deletions git_authorship/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
from dataclasses import dataclass
from pathlib import Path
from typing import Optional

from git import Repo

Expand All @@ -21,6 +22,8 @@ class Args:
location: str
clone_to: str
branch: str
author_licenses_path: Optional[Path]
use_authorship_cache: bool = True


def parse_args(argv=None) -> Args:
Expand All @@ -34,9 +37,35 @@ def parse_args(argv=None) -> Args:
parser.add_argument(
"--branch", nargs="?", default=None, help="The branch/revision to checkout"
)
parser.add_argument(
"--author-licenses",
nargs="?",
default=None,
help="The path to a CSV file containing author licenses",
)
parser.add_argument(
"--no-authorship-cache", action="store_true", help="Ignore authorship cache"
)

args = parser.parse_args(argv)
return Args(args.location, args.clone_to, args.branch)

return Args(
args.location,
args.clone_to,
args.branch,
_parse_author_licenses_path(args.author_licenses),
not args.no_authorship_cache,
)


def _parse_author_licenses_path(arg: Optional[str] = None) -> Optional[Path]:
if not arg:
return None
else:
if not Path(arg).exists():
raise FileNotFoundError(arg)
else:
return Path(arg)


def clone_and_checkout(args: Args):
Expand All @@ -57,7 +86,11 @@ def clone_and_checkout(args: Args):

def run(args: Args):
repo = clone_and_checkout(args)
repo_authorship = authorship.for_repo(repo)
repo_authorship = authorship.for_repo(
repo,
license_file=args.author_licenses_path,
use_cache=args.use_authorship_cache,
)
export.as_treemap(repo_authorship)
export.as_json(repo_authorship)
export.as_csv(repo_authorship)
Expand Down
33 changes: 26 additions & 7 deletions git_authorship/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
import csv
import json
from collections import defaultdict
from pathlib import Path
from typing import Dict

import plotly.graph_objects as go

from ._pathutils import io_handle
from ._pathutils import Writeable
from ._types import Authorship
from ._types import RepoAuthorship


Expand All @@ -29,11 +32,27 @@ def as_treemap(
parents = [
str(file.parent) if str(file) != "." else "" for file in authorship.keys()
]
values = [sum(authors.values()) for authors in authorship.values()]
values = [
sum(info["lines"] for info in authors.values())
for authors in authorship.values()
]
labels = [file.name for file in authorship.keys()]

def author_list(authorship: Authorship):
return "<br>Authors:<br> - " + "<br> - ".join(
f"{author}: {info['lines']}" for author, info in authorship.items()
)

def license_list(authorship: Authorship):
licensing: Dict[str, int] = defaultdict(int)
for _, info in authorship.items():
licensing[info.get("license", "Unknown")] += info["lines"]
return "<br>Licenses:<br> - " + "<br> - ".join(
f"{license}: {lines}" for license, lines in licensing.items()
)

descriptions = [
"<br>Authors:<br> - "
+ "<br> - ".join(f"{author}: {lines}" for author, lines in authorship.items())
f"{author_list(authorship)}<br>{license_list(authorship)}"
for authorship in authorship.values()
]

Expand Down Expand Up @@ -83,12 +102,12 @@ def as_csv(
"""
with io_handle(output) as f:
writer = csv.writer(f)
writer.writerow(["path", "author", "lines"])
writer.writerow(["path", "author", "lines", "license"])
for path, authors in sorted(authorship.items(), key=lambda x: x[0]):
for author, lines in sorted(
authors.items(), key=lambda x: x[1], reverse=True
for author, info in sorted(
authors.items(), key=lambda x: x[1]["lines"], reverse=True
):
writer.writerow([path, author, lines])
writer.writerow([path, author, info["lines"], info.get("license")])


__all__ = ["as_treemap", "as_json", "as_csv"]
1 change: 1 addition & 0 deletions test/fixtures/licensing.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Joseph Hale <[email protected]>,MPL-2.0
Loading

0 comments on commit c8c3164

Please sign in to comment.