Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 119 additions & 1 deletion gittensor/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import bittensor as bt

from gittensor.constants import MIN_TOKEN_SCORE_FOR_BASE_SCORE
from gittensor.constants import MERGED_PR_BASE_SCORE, MIN_TOKEN_SCORE_FOR_BASE_SCORE
from gittensor.utils.utils import parse_repo_name

GITHUB_DOMAIN = 'https://github.com/'
Expand Down Expand Up @@ -488,6 +488,14 @@ def __add__(self, other: 'ScoreBreakdown') -> 'ScoreBreakdown':
)


class ScoringCategory(Enum):
"""Category of a scored file"""

SOURCE = 'source' # Non-test code files scored via tree-diff
TEST = 'test' # Test files (any scoring method)
NON_CODE = 'non_code' # Everything else (line-count, skipped, binary, etc.)


@dataclass
class FileScoreResult:
"""Result of scoring a single file."""
Expand All @@ -500,6 +508,14 @@ class FileScoreResult:
scoring_method: str # 'tree-diff', 'line-count', 'skipped-*'
breakdown: Optional[ScoreBreakdown] = None # Only populated for tree-diff scoring

@property
def category(self) -> ScoringCategory:
if self.is_test_file:
return ScoringCategory.TEST
if self.scoring_method == 'tree-diff':
return ScoringCategory.SOURCE
return ScoringCategory.NON_CODE


@dataclass
class PrScoringResult:
Expand All @@ -510,9 +526,111 @@ class PrScoringResult:

total_score: float
total_nodes_scored: int # Total AST nodes scored across all files
total_lines: int # Total lines changed across all files
file_results: List[FileScoreResult]
score_breakdown: Optional[ScoreBreakdown] = None # Aggregated breakdown across all files

@property
def density(self) -> float:
"""Code density (total_score / total_lines), capped at MAX_CODE_DENSITY_MULTIPLIER"""
from gittensor.constants import MAX_CODE_DENSITY_MULTIPLIER

if self.total_lines <= 0:
return 0.0
return min(self.total_score / self.total_lines, MAX_CODE_DENSITY_MULTIPLIER)


_EMPTY_SCORING_RESULT = PrScoringResult(
total_score=0.0,
total_nodes_scored=0,
total_lines=0,
file_results=[],
)


@dataclass
class PrScoringResultCategorized:
"""Scoring results split by category, with aggregate totals"""

total_score: float
total_nodes_scored: int
score_breakdown: Optional[ScoreBreakdown]
by_category: Dict[ScoringCategory, PrScoringResult]

@property
def file_results(self) -> List[FileScoreResult]:
results = []
for pr_result in self.by_category.values():
results.extend(pr_result.file_results)
return results

def get(self, category: ScoringCategory) -> PrScoringResult:
"""Get results for a specific category, returns empty result if none"""
return self.by_category.get(category, _EMPTY_SCORING_RESULT)

def calculate_initial_base_score(self) -> float:
"""Sum of per-category density-scaled base scores, 0 if below threshold"""
token_score = self.score_breakdown.total_score if self.score_breakdown else 0.0
if token_score < MIN_TOKEN_SCORE_FOR_BASE_SCORE:
return 0.0
return sum(MERGED_PR_BASE_SCORE * self.get(cat).density for cat in ScoringCategory)

def calculate_contribution_bonus(self) -> float:
"""Contribution bonus from SOURCE category score only"""
from gittensor.constants import CONTRIBUTION_SCORE_FOR_FULL_BONUS, MAX_CONTRIBUTION_BONUS

source_score = self.get(ScoringCategory.SOURCE).total_score
bonus_percent = min(1.0, source_score / CONTRIBUTION_SCORE_FOR_FULL_BONUS)
return round(bonus_percent * MAX_CONTRIBUTION_BONUS, 2)

@classmethod
def from_file_results(
cls,
file_results: List[FileScoreResult],
) -> 'PrScoringResultCategorized':
"""Build a categorized result from file results in a single pass"""
# Per-category accumulators
cat_files: Dict[ScoringCategory, List[FileScoreResult]] = {}
cat_score: Dict[ScoringCategory, float] = {}
cat_nodes: Dict[ScoringCategory, int] = {}
cat_lines: Dict[ScoringCategory, int] = {}
cat_breakdowns: Dict[ScoringCategory, List[ScoreBreakdown]] = {}

# Totals
total_score = 0.0
total_nodes = 0
all_breakdowns: List[ScoreBreakdown] = []

for f in file_results:
cat = f.category
cat_files.setdefault(cat, []).append(f)
cat_score[cat] = cat_score.get(cat, 0.0) + f.score
cat_nodes[cat] = cat_nodes.get(cat, 0) + f.nodes_scored
cat_lines[cat] = cat_lines.get(cat, 0) + f.total_lines
total_score += f.score
total_nodes += f.nodes_scored
if f.breakdown is not None:
cat_breakdowns.setdefault(cat, []).append(f.breakdown)
all_breakdowns.append(f.breakdown)

by_category: Dict[ScoringCategory, PrScoringResult] = {}
for cat, results in cat_files.items():
bd = cat_breakdowns.get(cat)
by_category[cat] = PrScoringResult(
total_score=cat_score[cat],
total_nodes_scored=cat_nodes[cat],
total_lines=cat_lines[cat],
file_results=results,
score_breakdown=sum(bd, start=ScoreBreakdown()) if bd else None,
)

return cls(
total_score=total_score,
total_nodes_scored=total_nodes,
score_breakdown=sum(all_breakdowns, start=ScoreBreakdown()) if all_breakdowns else None,
by_category=by_category,
)


@dataclass
class CachedEvaluation:
Expand Down
40 changes: 14 additions & 26 deletions gittensor/validator/oss_contributions/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,19 @@

import bittensor as bt

from gittensor.classes import Issue, MinerEvaluation, PrScoringResult, PRState, PullRequest
from gittensor.classes import (
Issue,
MinerEvaluation,
PrScoringResultCategorized,
PRState,
PullRequest,
)
from gittensor.constants import (
CONTRIBUTION_SCORE_FOR_FULL_BONUS,
EXCESSIVE_PR_PENALTY_BASE_THRESHOLD,
MAINTAINER_ASSOCIATIONS,
MAINTAINER_ISSUE_MULTIPLIER,
MAX_CODE_DENSITY_MULTIPLIER,
MAX_CONTRIBUTION_BONUS,
MAX_ISSUE_CLOSE_WINDOW_DAYS,
MAX_OPEN_PR_THRESHOLD,
MERGED_PR_BASE_SCORE,
MIN_TOKEN_SCORE_FOR_BASE_SCORE,
OPEN_PR_COLLATERAL_PERCENT,
OPEN_PR_THRESHOLD_TOKEN_SCORE,
Expand Down Expand Up @@ -152,8 +154,8 @@ def calculate_base_score(
token_config: TokenConfig,
file_contents: Dict[str, FileContentPair],
) -> float:
"""Calculate base score using code density scaling + contribution bonus."""
scoring_result: PrScoringResult = calculate_token_score_from_file_changes(
"""Calculate base score using per-category code density scaling + contribution bonus."""
scoring_result: PrScoringResultCategorized = calculate_token_score_from_file_changes(
pr.file_changes or [],
file_contents,
token_config,
Expand All @@ -168,25 +170,12 @@ def calculate_base_score(
pr.leaf_count = scoring_result.score_breakdown.leaf_count
pr.leaf_score = scoring_result.score_breakdown.leaf_score

# Calculate total lines changed across all files
total_lines = sum(f.total_lines for f in scoring_result.file_results)

# Check minimum token score threshold for base score. PRs below threshold get 0 base score
if pr.token_score < MIN_TOKEN_SCORE_FOR_BASE_SCORE:
code_density = 0.0
initial_base_score = 0.0
elif total_lines > 0:
code_density = min(pr.token_score / total_lines, MAX_CODE_DENSITY_MULTIPLIER)
initial_base_score = MERGED_PR_BASE_SCORE * code_density
else:
code_density = 0.0
initial_base_score = 0.0
initial_base_score = scoring_result.calculate_initial_base_score()

# Calculate contribution bonus, capped
bonus_percent = min(1.0, scoring_result.total_score / CONTRIBUTION_SCORE_FOR_FULL_BONUS)
contribution_bonus = round(bonus_percent * MAX_CONTRIBUTION_BONUS, 2)
# Calculate contribution bonus from SOURCE category only
contribution_bonus = scoring_result.calculate_contribution_bonus()

# Final base score = density-scaled base + contribution bonus
# Final base score = sum of per-category density bases + contribution bonus
base_score = round(initial_base_score + contribution_bonus, 2)

# Log with note if below token threshold
Expand All @@ -196,8 +185,7 @@ def calculate_base_score(
else ''
)
bt.logging.info(
f'Base score: {initial_base_score:.2f} (density {code_density:.2f}){threshold_note} + {contribution_bonus} bonus '
f'({bonus_percent * 100:.0f}% of max {MAX_CONTRIBUTION_BONUS}) = {base_score:.2f}'
f'Base score: {initial_base_score:.2f}{threshold_note} + {contribution_bonus} bonus = {base_score:.2f}'
)

return base_score
Expand Down
38 changes: 11 additions & 27 deletions gittensor/validator/utils/tree_sitter_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from gittensor.classes import (
FileScoreResult,
PrScoringResult,
PrScoringResultCategorized,
ScoreBreakdown,
)
from gittensor.constants import (
Expand Down Expand Up @@ -234,7 +234,7 @@ def calculate_token_score_from_file_changes(
file_contents: Dict[str, FileContentPair],
weights: TokenConfig,
programming_languages: Dict[str, LanguageConfig],
) -> PrScoringResult:
) -> PrScoringResultCategorized:
"""
Calculate contribution score using tree-sitter AST comparison.

Expand All @@ -245,18 +245,17 @@ def calculate_token_score_from_file_changes(
programming_languages: Language weight mapping (for fallback/documentation files)

Returns:
PrScoringResult with total score and per-file details
PrScoringResultCategorized with total score, per-file details, and per-category breakdowns
"""
if not file_changes:
return PrScoringResult(
return PrScoringResultCategorized(
total_score=0.0,
total_nodes_scored=0,
file_results=[],
score_breakdown=None,
by_category={},
)

file_results: List[FileScoreResult] = []
total_score = 0.0
total_nodes_scored = 0

for file in file_changes:
ext = file.file_extension or ''
Expand Down Expand Up @@ -284,8 +283,6 @@ def calculate_token_score_from_file_changes(
lang_weight = lang_config.weight if lang_config else DEFAULT_PROGRAMMING_LANGUAGE_WEIGHT
file_score = lang_weight * lines_to_score * file_weight

total_score += file_score

file_results.append(
FileScoreResult(
filename=file.short_name,
Expand Down Expand Up @@ -373,9 +370,6 @@ def calculate_token_score_from_file_changes(
# Track nodes scored for this file
nodes_scored = file_breakdown.added_count + file_breakdown.deleted_count

total_score += file_score
total_nodes_scored += nodes_scored

file_results.append(
FileScoreResult(
filename=file.short_name,
Expand All @@ -388,23 +382,13 @@ def calculate_token_score_from_file_changes(
)
)

# Compute total raw lines for logging
total_raw_lines = sum(f.total_lines for f in file_results)

# Compute aggregate breakdown from file_results
breakdowns = [r.breakdown for r in file_results if r.breakdown is not None]
total_breakdown = sum(breakdowns, start=ScoreBreakdown()) if breakdowns else None
result = PrScoringResultCategorized.from_file_results(file_results)

log_scoring_results(
file_results,
total_score,
total_raw_lines,
total_breakdown,
result.total_score,
sum(r.total_lines for r in result.by_category.values()),
result.score_breakdown,
)

return PrScoringResult(
total_score=total_score,
total_nodes_scored=total_nodes_scored,
file_results=file_results,
score_breakdown=total_breakdown,
)
return result
Loading