Skip to content

unused_code: handle git grep no-matches and untracked #203

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
fdd052a
unused_code: handle git grep no-matches and untracked; detect pytest …
myakove Aug 9, 2025
c83ead3
tests(unused_code): use bytes for FakeCompleted stdout/stderr to matc…
myakove Aug 9, 2025
6fe4908
fix(unused_code): normalize _git_grep behavior; formatting after pre-…
myakove Aug 9, 2025
0e62ab5
fix(unused_code): parse git grep -n output with split(':', 2) to isol…
myakove Aug 9, 2025
b4cdf1f
tests(unused_code): add commented-usage test; fix mock to use text st…
myakove Aug 9, 2025
261388e
feat(jira_utils): prefer config values when present, fallback to CLI/…
myakove Aug 9, 2025
818fbbf
feat(unused_code): detect supported git grep regex flag (-P or -G) on…
myakove Aug 9, 2025
8189f2f
fix(unused_code): tighten call-site regex with word boundary; add por…
myakove Aug 9, 2025
9507035
feat(unused_code): aggregate processing errors instead of exiting on …
myakove Aug 9, 2025
8cd8c3a
feat(unused_code): enhance git grep reliability and cross-platform co…
myakove Aug 9, 2025
136b355
Add AI files to gitignore
myakove Aug 9, 2025
c421608
fix(unused_code): improve git grep output parsing and regex portability
myakove Aug 9, 2025
46adfac
refactor(jira_utils): simplify config precedence logic for url and token
myakove Aug 9, 2025
ec79dc8
fix(unused_code): filter documentation patterns to prevent false posi…
myakove Aug 11, 2025
b6d2ab7
Merge branch 'main' of github.com:RedHatQE/python-utility-scripts int…
myakove Aug 13, 2025
f96b127
refactor(unused_code): add unused_code_ prefix to test functions
myakove Aug 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,10 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

# AI
.cursor/
CLAUDE.md
.agent-os/
.cursorrules
.claude/
Empty file removed 18
Empty file.
1 change: 1 addition & 0 deletions apps/jira_utils/jira_information.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def process_jira_command_line_config_file(
config_dict = get_util_config(util_name="pyutils-jira", config_file_path=config_file_path)
url = url or config_dict.get("url", "")
token = token or config_dict.get("token", "")

if not (url and token):
LOGGER.error("Jira url and token are required.")
sys.exit(1)
Expand Down
278 changes: 261 additions & 17 deletions apps/unused_code/unused_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import ast
import logging
import os
import re
import subprocess
import sys
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
from functools import lru_cache
from typing import Any, Iterable

import click
Expand All @@ -17,6 +19,42 @@
LOGGER = get_logger(name=__name__)


@lru_cache(maxsize=1)
def _detect_supported_grep_flag() -> str:
"""Detect and cache a supported regex engine flag for git grep.

Prefer PCRE ("-P") for proper \b handling; fall back to basic regex ("-G").
Run a harmless grep to verify support and cache the first working flag.
Uses lru_cache for thread-safe caching that runs only once per process.
"""
candidate_flags = ["-P", "-G"]
for flag in candidate_flags:
try:
# Use a trivial pattern to minimize output. We only care that the flag is accepted.
# Discard stdout/stderr to prevent capturing large data in big repositories.
probe_cmd = [
"git",
"grep",
"-n",
"--no-color",
"--untracked",
"-I",
flag,
"^$", # match empty lines; success (rc=0) or no matches (rc=1) are both fine
]
result = subprocess.run(probe_cmd, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
if result.returncode in (0, 1):
return flag
except Exception:
# Try next candidate
pass

raise RuntimeError(
"git grep does not support '-P' (PCRE) or '-G' (basic regex) on this platform. "
"Please ensure a compatible git/grep is installed."
)


def is_fixture_autouse(func: ast.FunctionDef) -> bool:
deco_list: list[Any] = func.decorator_list
for deco in deco_list or []:
Expand All @@ -31,6 +69,151 @@ def is_fixture_autouse(func: ast.FunctionDef) -> bool:
return False


def is_pytest_fixture(func: ast.FunctionDef) -> bool:
"""Return True if the function is decorated with @pytest.fixture.

Detects any pytest fixture regardless of parameters (scope, autouse, etc.).
"""
decorators: list[Any] = func.decorator_list
for decorator in decorators or []:
# Case 1: @pytest.fixture(...)
if hasattr(decorator, "func"):
# e.g. @pytest.fixture(...)
if getattr(decorator.func, "attr", None) and getattr(decorator.func, "value", None):
if decorator.func.attr == "fixture" and getattr(decorator.func.value, "id", None) == "pytest":
return True
# e.g. from pytest import fixture; @fixture(...)
if isinstance(decorator.func, ast.Name) and decorator.func.id == "fixture":
return True
# Case 2: @pytest.fixture (no parentheses)
else:
# e.g. @pytest.fixture
if getattr(decorator, "attr", None) == "fixture" and getattr(decorator, "value", None):
if getattr(decorator.value, "id", None) == "pytest":
return True
# e.g. from pytest import fixture; @fixture
if isinstance(decorator, ast.Name) and decorator.id == "fixture":
return True
return False


def _build_call_pattern(function_name: str) -> str:
r"""Build a portable regex to match function call sites.

Uses word boundary semantics based on the detected grep engine.
The pattern is designed to match actual function calls while minimizing
false positives from documentation patterns.
- PCRE (-P): \bname\s*[(]
- Basic (-G): \<name\s*[(]
"""
flag = _detect_supported_grep_flag()
if flag == "-P":
return rf"\b{function_name}\s*[(]"
# -G basic regex: use word-start token \< and literal '('
return rf"\<{function_name}[[:space:]]*[(]"


def _build_fixture_param_pattern(function_name: str) -> str:
r"""Build a portable regex to find a parameter named `function_name` in a def signature."""
flag = _detect_supported_grep_flag()
if flag == "-P":
return rf"def\s+\w+\s*[(][^)]*\b{function_name}\b"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would find a fixture if it appears in the first line of the test or fixture definition. How about those who appears in a different line? e.g.

 @pytest.mark.usefixtures("sa_namespace", "service_account", "mr_access_role", "mr_access_role_binding")
    def test_service_account_access_granted(
        self: Self,
        sa_token: str,
        model_registry_instance_rest_endpoint: list[str],
    ):

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@myakove this is still valid. I think you need a test with a fixture that is like this

@pytest.fixture(scope="function")
def fixtureA():
    <does something>

def test_temp(
fixtureA
):
    <does something>

I did not see it getting caught.

Also how about fixtures being used via usefixtures()? I don't see (I might be missing something), that being handled either.
I did install your branch locally and tested it and both cases reported failures.

# For -G (basic regex), avoid PCRE tokens; use POSIX classes and literals
# def[space+][ident][ident*][space*]([^)]*\<name\>)
return rf"def[[:space:]]+[[:alnum:]_][[:alnum:]_]*[[:space:]]*[(][^)]*\<{function_name}\>"


def _is_documentation_pattern(line: str, function_name: str) -> bool:
"""Check if a line contains a documentation pattern rather than a function call.

Filters out common documentation patterns that include function names with parentheses:
- Parameter descriptions: 'param_name (type): description'
- Type annotations in docstrings
- Inline documentation patterns
- Lines within triple-quoted strings that aren't actual code

Args:
line: The line of code to check
function_name: The function name we're searching for

Returns:
True if this appears to be documentation, False if it might be a function call
"""
stripped_line = line.strip()

# First, exclude obvious code patterns that should never be considered documentation
# Skip control flow statements and common code patterns
code_prefixes = ["if ", "elif ", "while ", "for ", "with ", "assert ", "return ", "yield ", "raise "]
if any(stripped_line.startswith(prefix) for prefix in code_prefixes):
return False

# Skip function definitions (these are already filtered elsewhere but be extra safe)
if stripped_line.startswith("def "):
return False

# Pattern 1: Parameter description format "name (type): description"
# But be more specific - require either indentation or specific doc context
# This catches patterns like " namespace (str): The namespace of the pod."
if re.search(rf"^\s+{re.escape(function_name)}\s*\([^)]*\)\s*:\s+\w", stripped_line):
return True

# Pattern 2: Lines that look like type annotations in docstrings
# Must have descriptive text after the colon, not just code
type_annotation_pattern = rf"\b{re.escape(function_name)}\s*\([^)]*\)\s*:\s+[A-Z][a-z]"
if re.search(type_annotation_pattern, stripped_line):
return True

# Pattern 3: Lines that contain common documentation keywords near the function name
doc_keywords = ["Args:", "Arguments:", "Parameters:", "Returns:", "Return:", "Raises:", "Note:", "Example:"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are typically not in the same line as the function names. Doc patterns typically includes one of these string and next line may have an arg name. So I am not sure if we will get to these lines.

for keyword in doc_keywords:
if keyword in stripped_line:
# If we find doc keywords and function name with parens, likely documentation
if f"{function_name}(" in stripped_line:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if the key word is in the line, the function name won't be found in the same line if they are following valid docstring patterns. We typically ask people to put a new line or add the details in next line.

return True
# Also catch cases where the keyword line itself contains the function name
if keyword.rstrip(":").lower() == function_name.lower():
return True

# Pattern 4: Check for common docstring patterns
# Lines that start with common documentation patterns
doc_starters = ['"""', "'''", "# ", "## ", "### ", "*", "-", "•"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I am calling a member function, that would include . e.g. .wait_for_status(). I may have a resource class in my repo.

if any(stripped_line.startswith(starter) for starter in doc_starters):
if f"{function_name}(" in stripped_line:
return True

return False


def _git_grep(pattern: str) -> list[str]:
"""Run git grep with a pattern and return matching lines.

- Uses dynamically detected regex engine (prefers PCRE ``-P``, falls back to basic ``-G``).
- Includes untracked files so local changes are considered.
- Return an empty list when no matches are found (rc=1).
- Raise on other non-zero exit codes.
"""
cmd = [
"git",
"grep",
"-n", # include line numbers
"--no-color",
"--untracked",
"-I", # ignore binary files
_detect_supported_grep_flag(),
"-e", # safely handle patterns starting with dash
pattern,
]
result = subprocess.run(cmd, check=False, capture_output=True, text=True)
if result.returncode == 0:
return [line for line in result.stdout.splitlines() if line]
# rc=1 means no matches were found
if result.returncode == 1:
return []

error_message = result.stderr.strip() or "Unknown git grep error"
raise RuntimeError(f"git grep failed (rc={result.returncode}) for pattern {pattern!r}: {error_message}")


def _iter_functions(tree: ast.Module) -> Iterable[ast.FunctionDef]:
"""
Get all function from python file
Expand Down Expand Up @@ -61,6 +244,8 @@ def process_file(py_file: str, func_ignore_prefix: list[str], file_ignore_list:
with open(py_file) as fd:
tree = parse(source=fd.read())

unused_messages: list[str] = []

for func in _iter_functions(tree=tree):
if func_ignore_prefix and is_ignore_function_list(ignore_prefix_list=func_ignore_prefix, function=func):
LOGGER.debug(f"Skipping function: {func.name}")
Expand All @@ -75,25 +260,59 @@ def process_file(py_file: str, func_ignore_prefix: list[str], file_ignore_list:
continue

used = False
_func_grep_found = subprocess.check_output(["git", "grep", "-wE", f"{func.name}(.*)"], shell=False)

for entry in _func_grep_found.decode().splitlines():
_, _line = entry.split(":", 1)
# First, look for call sites: function_name(...)
for entry in _git_grep(pattern=_build_call_pattern(function_name=func.name)):
# git grep -n output format: path:line-number:line-content
# Use split to properly handle git grep format: first colon separates path from line number,
# second colon separates line number from content
parts = entry.split(":", 2)
if len(parts) != 3:
continue
_, _, _line = parts

# ignore its own definition
if f"def {func.name}" in _line:
continue

# ignore commented lines
if _line.strip().startswith("#"):
continue

# Filter out documentation patterns that aren't actual function calls
# This prevents false positives from docstrings, parameter descriptions, etc.
if _is_documentation_pattern(_line, func.name):
continue

if func.name in _line:
used = True
break

# If not found and it's a pytest fixture, also search for parameter usage in function definitions
if not used and is_pytest_fixture(func=func):
param_pattern = _build_fixture_param_pattern(function_name=func.name)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also look for fixtures in usefixtures()

for entry in _git_grep(pattern=param_pattern):
# git grep -n output format: path:line-number:line-content
# Use split to properly handle git grep format: first colon separates path from line number,
# second colon separates line number from content
parts = entry.split(":", 2)
if len(parts) != 3:
continue
_path, _lineno, _line = parts

# ignore commented lines
if _line.strip().startswith("#"):
continue

used = True
break

if not used:
return f"{os.path.relpath(py_file)}:{func.name}:{func.lineno}:{func.col_offset} Is not used anywhere in the code."
unused_messages.append(
f"{os.path.relpath(py_file)}:{func.name}:{func.lineno}:{func.col_offset} Is not used anywhere in the code."
)

return ""
return "\n".join(unused_messages)


@click.command()
Expand Down Expand Up @@ -124,28 +343,53 @@ def get_unused_functions(
func_ignore_prefix = exclude_function_prefixes or unused_code_config.get("exclude_function_prefix", [])
file_ignore_list = exclude_files or unused_code_config.get("exclude_files", [])

jobs: list[Future] = []
jobs: dict[Future, str] = {}
if not os.path.exists(".git"):
LOGGER.error("Must be run from a git repository")
sys.exit(1)

# Pre-flight grep flag detection to fail fast with clear error if unsupported
try:
detected_flag = _detect_supported_grep_flag()
LOGGER.debug(f"Using git grep flag: {detected_flag}")
except RuntimeError as e:
LOGGER.error(str(e))
sys.exit(1)

# res = process_file(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might want to remove these.

# py_file="tests/unused_code/unused_code_file_for_test.py",
# func_ignore_prefix=func_ignore_prefix,
# file_ignore_list=file_ignore_list,
# )
# __import__("ipdb").set_trace()
# return
with ThreadPoolExecutor() as executor:
for py_file in all_python_files():
jobs.append(
executor.submit(
process_file,
py_file=py_file,
func_ignore_prefix=func_ignore_prefix,
file_ignore_list=file_ignore_list,
)
future = executor.submit(
process_file,
py_file=py_file,
func_ignore_prefix=func_ignore_prefix,
file_ignore_list=file_ignore_list,
)
jobs[future] = py_file

processing_errors: list[str] = []
for future in as_completed(jobs):
try:
if unused_func := future.result():
unused_functions.append(unused_func)
except Exception as exc:
processing_errors.append(f"{jobs[future]}: {exc}")

for result in as_completed(jobs):
if unused_func := result.result():
unused_functions.append(unused_func)
if processing_errors:
joined = "\n".join(processing_errors)
LOGGER.error(f"One or more files failed to process:\n{joined}")
sys.exit(2)

if unused_functions:
click.echo("\n".join(unused_functions))
# Sort output for deterministic CI logs
sorted_output = sorted(unused_functions)
click.echo("\n".join(sorted_output))
sys.exit(1)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ skip_empty = true

[tool.coverage.html]
directory = ".tests_coverage"
show_contexts = true
show_contexts = false

[tool.uv]
default-groups = [ "dev", "test" ]
Expand Down
Loading