Skip to content

Commit b4005c1

Browse files
committed
Add type annotations and mypy
1 parent f763df1 commit b4005c1

35 files changed

+79
-47
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
*.egg-info
22
pip-wheel-metadata
33
dist
4+
build
45

56
*coverage*
67
!.coveragerc
78

89
__pycache__
910
.pytest_cache
1011

12+
.mypy_cache
13+
1114
.tox

.pre-commit-config.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ repos:
2323
'-e', 'git+https://github.com/pycqa/pyflakes@master#egg=pyflakes',
2424
'-e', 'git+https://github.com/pycqa/pycodestyle@master#egg=pycodestyle',
2525
]
26+
- repo: https://github.com/pre-commit/mirrors-mypy
27+
rev: v0.770
28+
hooks:
29+
- id: mypy
30+
exclude: 'tests/data/'
2631
- repo: https://github.com/pre-commit/pre-commit-hooks
2732
rev: v2.5.0
2833
hooks:

mypy.ini

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
[mypy]
2+
mypy_path=src
3+
4+
warn_return_any = True
5+
warn_redundant_casts = True
6+
warn_unused_configs = True
7+
8+
[mypy-pytest.*]
9+
ignore_missing_imports = True

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "audiomatch"
3-
version = "0.2.0"
3+
version = "0.2.1"
44
description = "A small command-line tool to find similar audio files"
55
keywords = ["duplicate", "detection", "audio", "fingerprinting", "command-line"]
66
readme = "README.rst"

src/audiomatch/files.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,11 @@ def pair(
4141
)
4242

4343
if len(files) == 1 and len(files[0]) > 1:
44-
return itertools.combinations(*files, 2)
44+
return itertools.combinations(*files, 2) # type: ignore
4545
elif len(files) > 1:
4646
return itertools.chain.from_iterable(
47-
itertools.product(*group) for group in itertools.combinations(files, 2)
47+
itertools.product(*group) # type: ignore
48+
for group in itertools.combinations(files, 2)
4849
)
4950
else:
5051
raise NotEnoughFiles("Not enough input files.")

src/audiomatch/fingerprints.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
from __future__ import annotations
2+
13
import statistics
24
import subprocess
5+
from pathlib import Path
6+
from typing import Iterator, List, Tuple, Union
37

48
from audiomatch import popcount
59

@@ -8,7 +12,7 @@
812
SCORE_MEDIAN_DELTA = 0.04
913

1014

11-
def calc(path, length=120):
15+
def calc(path: Path, length: int = 120) -> List[int]:
1216
# TODO: Probably it would be better to parse json output
1317
fp = subprocess.run(
1418
["fpcalc", "-rate", "11025", "-raw", "-length", str(length), str(path)],
@@ -19,7 +23,7 @@ def calc(path, length=120):
1923
return []
2024

2125

22-
def compare(fp1, fp2):
26+
def compare(fp1: List[int], fp2: List[int]) -> float:
2327
# Take first 30 seconds of the the shortest fingerprint and try to find it in a
2428
# longer one
2529
if len(fp1) > len(fp2):
@@ -28,7 +32,7 @@ def compare(fp1, fp2):
2832
return find_best_score(fp2, fp1[: seconds(30)])
2933

3034

31-
def find_best_score(fp1, fp2):
35+
def find_best_score(fp1: List[int], fp2: List[int]) -> float:
3236
# Fingerprints lesser than 10 seconds don't have enough data for analysis
3337
if len(fp1) > seconds(10) and len(fp2) > seconds(10):
3438
results = [correlation(_fp1, _fp2) for _fp1, _fp2 in cross(fp1, fp2)]
@@ -58,12 +62,12 @@ def find_best_score(fp1, fp2):
5862
return 0.0
5963

6064

61-
def correlation(fp1, fp2):
65+
def correlation(fp1: List[int], fp2: List[int]) -> float:
6266
error = sum(popcount.popcount(x ^ y) for x, y in zip(fp1, fp2))
6367
return 1.0 - error / 32.0 / min(len(fp1), len(fp2))
6468

6569

66-
def cross(fp1, fp2):
70+
def cross(fp1: List[int], fp2: List[int]) -> Iterator[Tuple[List[int], List[int]]]:
6771
length = min(len(fp1), len(fp2))
6872
span = min(length // 4, seconds(5))
6973
limit = max(len(fp1), len(fp2)) - length - span
@@ -75,5 +79,5 @@ def cross(fp1, fp2):
7579
yield fp1[offset:], fp2
7680

7781

78-
def seconds(x) -> int:
82+
def seconds(x: Union[int, float]) -> int:
7983
return round(x * 7)

src/audiomatch/match.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import functools
55
import itertools
66
from pathlib import Path
7-
from typing import Dict, Iterable, Optional, Tuple
7+
from typing import Dict, Iterable, List, Optional, Tuple
88

99
from audiomatch import files, fingerprints
1010
from audiomatch.constants import DEFAULT_LENGTH
@@ -36,12 +36,12 @@ def match(
3636
# Using multiprocessing.Pool.starmap method we can avoid writing wrapper to unpack
3737
# arguments. However, multiprocessing.Pool doesn't play nicely with coverage, and
3838
# require to explicitly call 'pool.join'
39-
with concurrent.futures.ProcessPoolExecutor() as executor:
40-
scores = executor.map(_compare, ((fps[a], fps[b]) for a, b in pairs))
39+
with concurrent.futures.ProcessPoolExecutor() as pool:
40+
scores = pool.map(_compare, ((fps[a], fps[b]) for a, b in pairs))
4141

4242
return dict(zip(pairs, scores))
4343

4444

45-
def _compare(pair):
45+
def _compare(pair: Tuple[List[int], List[int]]) -> float:
4646
"""Just a wrapper for fingerprints.compare, that unpack its first argument"""
4747
return fingerprints.compare(*pair)

src/audiomatch/popcount/__init__.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
from __future__ import annotations
2+
3+
from typing import List
4+
15
try:
26
from audiomatch.popcount._popcount import popcount
37
except ImportError:
@@ -6,13 +10,13 @@
610
#
711
# This popcount version works slightly faster than 'bin(x).count("1")'
812

9-
def _popcount_table(size):
13+
def _popcount_table(size: int) -> List[int]:
1014
table = [0] * 2 ** size
1115
for i in range(len(table)):
1216
table[i] = (i & 1) + table[i >> 1]
1317
return table
1418

1519
_POPCOUNT_TABLE16 = _popcount_table(16)
1620

17-
def popcount(x):
21+
def popcount(x: int) -> int:
1822
return _POPCOUNT_TABLE16[x & 0xFFFF] + _POPCOUNT_TABLE16[(x >> 16) & 0xFFFF]

src/audiomatch/popcount/_popcount.pyi

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
def popcount(x: int) -> int:
2+
...
File renamed without changes.

src/audiomatch/reports.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
1+
from __future__ import annotations
2+
13
import sys
4+
from pathlib import Path
5+
from typing import Dict, Tuple
26

37

4-
def console(matches) -> None:
8+
def console(matches: Dict[Tuple[Path, Path], float]) -> None:
59
"""Print similar audio to standard output"""
610
similars = _join(_adjancency_list(matches))
711
lines = ["\n".join(str(node) for node in similar) for similar in sorted(similars)]
@@ -29,7 +33,7 @@ def _join(graph):
2933

3034

3135
def _adjancency_list(matches, score=0.61):
32-
"""Returns an adjacency list for matches"""
36+
"""Returns an adjacency list for matches with a given score or higher"""
3337
graph = {}
3438
for pair, score in matches.items():
3539
if score > 0.61:
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

tests/data/sample_2/__init__.py

Whitespace-only changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

tests/test_files.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,23 @@
66
from audiomatch.exceptions import NotEnoughFiles
77

88
SAMPLES_DIR = Path(__file__).parent.joinpath("data")
9-
SAMPLES_1 = [SAMPLES_DIR.joinpath(f"sample-1/take-{i}.log") for i in range(1, 4)]
10-
SAMPLES_2 = [SAMPLES_DIR.joinpath(f"sample-2/take-{i}.log") for i in range(1, 5)]
9+
SAMPLES_1 = [SAMPLES_DIR.joinpath(f"sample_1/take-{i}.log") for i in range(1, 4)]
10+
SAMPLES_2 = [SAMPLES_DIR.joinpath(f"sample_2/take-{i}.log") for i in range(1, 5)]
1111

1212

1313
def sort_sublist(pairs):
1414
return sorted(pair.__class__(sorted(pair)) for pair in pairs)
1515

1616

1717
def test_pair_two_files():
18-
file1 = SAMPLES_DIR.joinpath("sample-1/take-1.log")
19-
file2 = SAMPLES_DIR.joinpath("sample-1/take-2.log")
18+
file1 = SAMPLES_DIR.joinpath("sample_1/take-1.log")
19+
file2 = SAMPLES_DIR.joinpath("sample_1/take-2.log")
2020
pairs = files.pair(file1, file2)
2121
assert sort_sublist(pairs) == [(SAMPLES_1[0], SAMPLES_1[1])]
2222

2323

2424
def test_pair_files_in_a_directory():
25-
directory = SAMPLES_DIR.joinpath("sample-1")
25+
directory = SAMPLES_DIR.joinpath("sample_1")
2626
pairs = files.pair(directory, extensions=[".log"])
2727
assert sort_sublist(pairs) == [
2828
(SAMPLES_1[0], SAMPLES_1[1]),
@@ -32,8 +32,8 @@ def test_pair_files_in_a_directory():
3232

3333

3434
def test_pair_a_file_and_all_files_in_a_directory():
35-
file = SAMPLES_DIR.joinpath("sample-1/take-1.log")
36-
directory = SAMPLES_DIR.joinpath("sample-1")
35+
file = SAMPLES_DIR.joinpath("sample_1/take-1.log")
36+
directory = SAMPLES_DIR.joinpath("sample_1")
3737
pairs = files.pair(file, directory, extensions=[".log"])
3838
assert sort_sublist(pairs) == [
3939
(SAMPLES_1[0], SAMPLES_1[0]),
@@ -43,16 +43,16 @@ def test_pair_a_file_and_all_files_in_a_directory():
4343

4444

4545
def test_pair_directories():
46-
directory1 = SAMPLES_DIR.joinpath("sample-1")
47-
directory2 = SAMPLES_DIR.joinpath("sample-2")
46+
directory1 = SAMPLES_DIR.joinpath("sample_1")
47+
directory2 = SAMPLES_DIR.joinpath("sample_2")
4848
pairs = files.pair(directory1, directory2, extensions=[".log"])
4949
assert sort_sublist(pairs) == [
5050
(sample_1, sample_2) for sample_1 in SAMPLES_1 for sample_2 in SAMPLES_2
5151
]
5252

5353

5454
def test_pair_glob():
55-
wildcard = SAMPLES_DIR.joinpath("sample-1/*.log")
55+
wildcard = SAMPLES_DIR.joinpath("sample_1/*.log")
5656
pairs = files.pair(wildcard)
5757
assert sort_sublist(pairs) == [
5858
(SAMPLES_1[0], SAMPLES_1[1]),
@@ -62,7 +62,7 @@ def test_pair_glob():
6262

6363

6464
def test_pair_one_file():
65-
file = SAMPLES_DIR.joinpath("sample-1/take-1.log")
65+
file = SAMPLES_DIR.joinpath("sample_1/take-1.log")
6666
with pytest.raises(NotEnoughFiles) as excinfo:
6767
files.pair(file)
6868
assert str(excinfo.value) == "Not enough input files."

tests/test_fingerprints.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ def _load_fp(filepath: str) -> List[int]:
1515

1616

1717
def test_calc():
18-
fp = resources.read_binary("tests.data.sample-1", "take-1.log")
18+
fp = resources.read_binary("tests.data.sample_1", "take-1.log")
1919
fpcalc = mock.MagicMock(spec=CompletedProcess, stdout=fp)
2020
with mock.patch("subprocess.run", return_value=fpcalc) as fpcalc_mock:
2121
fp = fingerprints.calc("/path/to/audio", length=120)
22-
assert fp == _load_fp("sample-1/take-1.log")
22+
assert fp == _load_fp("sample_1/take-1.log")
2323
assert fpcalc_mock.called
2424

2525

@@ -35,12 +35,12 @@ def test_calc_empty_fingerprint():
3535
@pytest.mark.parametrize(
3636
["a", "b", "score"],
3737
[
38-
("sample-1/take-1.log", "sample-1/take-1.log", 1.00),
39-
("sample-1/take-1.log", "sample-1/take-2.log", 0.77),
40-
("sample-1/take-1.log", "sample-2/take-2.log", 0.00),
41-
("sample-1/take-3.log", "sample-2/take-4.log", 0.00),
42-
("sample-2/take-1.log", "sample-2/take-2.log", 0.66),
43-
("sample-2/take-4.log", "sample-2/take-1.log", 0.63),
38+
("sample_1/take-1.log", "sample_1/take-1.log", 1.00),
39+
("sample_1/take-1.log", "sample_1/take-2.log", 0.77),
40+
("sample_1/take-1.log", "sample_2/take-2.log", 0.00),
41+
("sample_1/take-3.log", "sample_2/take-4.log", 0.00),
42+
("sample_2/take-1.log", "sample_2/take-2.log", 0.66),
43+
("sample_2/take-4.log", "sample_2/take-1.log", 0.63),
4444
],
4545
)
4646
def test_compare(a, b, score):
@@ -50,26 +50,26 @@ def test_compare(a, b, score):
5050

5151

5252
def test_compare_add_correction_for_short_fingerprints():
53-
fp1 = _load_fp("edgecase-1/sample-1.log")
54-
fp2 = _load_fp("edgecase-1/sample-2.log")
53+
fp1 = _load_fp("edgecase_1/sample-1.log")
54+
fp2 = _load_fp("edgecase_1/sample-2.log")
5555
assert fingerprints.compare(fp1, fp2) == 0.0
5656

5757

5858
def test_compare_returns_immediately_for_score_greater_than_confidence_score():
59-
fp1 = _load_fp("edgecase-2/sample-1.log")
60-
fp2 = _load_fp("edgecase-2/sample-2.log")
59+
fp1 = _load_fp("edgecase_2/sample-1.log")
60+
fp2 = _load_fp("edgecase_2/sample-2.log")
6161
assert fingerprints.compare(fp1, fp2) > fingerprints.CONFIDENCE_SCORE
6262

6363

6464
def test_compare_requires_fingerprints_to_be_at_least_10_seconds_long():
65-
fp1 = _load_fp("edgecase-3/sample-1.log")
66-
fp2 = _load_fp("edgecase-3/sample-2.log")
65+
fp1 = _load_fp("edgecase_3/sample-1.log")
66+
fp2 = _load_fp("edgecase_3/sample-2.log")
6767
assert fingerprints.compare(fp1, fp2) == 0.0
6868

6969

7070
def test_compare_false_positive():
7171
# This two completely different fingerprints initially have a relatively good score.
7272
# 'compare' handles these cases by checking difference between median and max value.
73-
fp1 = _load_fp("edgecase-4/sample-1.log")
74-
fp2 = _load_fp("edgecase-4/sample-2.log")
73+
fp1 = _load_fp("edgecase_4/sample-1.log")
74+
fp2 = _load_fp("edgecase_4/sample-2.log")
7575
assert fingerprints.compare(fp1, fp2) == 0.0

tests/test_match.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ def fpcalc(filepath: Path, length):
2525

2626
@pytest.mark.slow
2727
def test_match():
28-
sample_1 = SAMPLES_DIR.joinpath("sample-1/take-1.log")
29-
sample_2 = SAMPLES_DIR.joinpath("sample-2/take-1.log")
28+
sample_1 = SAMPLES_DIR.joinpath("sample_1/take-1.log")
29+
sample_2 = SAMPLES_DIR.joinpath("sample_2/take-1.log")
3030
with mock.patch("audiomatch.fingerprints.calc", side_effect=fpcalc) as fpcalc_mock:
3131
matches = match.match(sample_1, sample_2, extensions=[".log"])
3232
assert sort_keys(matches) == {(sample_1, sample_2): 0.0}
@@ -35,7 +35,7 @@ def test_match():
3535

3636
@pytest.mark.slow
3737
def test_match_with_empty_fingerprint():
38-
sample_1 = SAMPLES_DIR.joinpath("sample-1/take-1.log")
38+
sample_1 = SAMPLES_DIR.joinpath("sample_1/take-1.log")
3939
empty = SAMPLES_DIR.joinpath("empty.log")
4040
with mock.patch("audiomatch.fingerprints.calc", side_effect=fpcalc) as fpcalc_mock:
4141
matches = match.match(sample_1, empty, extensions=[".log"])

0 commit comments

Comments
 (0)