braintrustdata · ibolmo · Mar 25, 2025 · Mar 25, 2025 · ibolmo · Mar 25, 2025
diff --git a/py/autoevals/__init__.py b/py/autoevals/__init__.py
@@ -51,7 +51,7 @@
 ```python
 import os
 from openai import AsyncOpenAI
-from autoevals.llm import Correctness
+from autoevals.llm import Factuality
 
 # Configure client to use Braintrust AI Proxy
 client = AsyncOpenAI(
@@ -60,7 +60,7 @@
 )
 
 # Use with any evaluator
-evaluator = Correctness(client=client)
+evaluator = Factuality(client=client)
 ```
 
 **Braintrust integration**:

diff --git a/py/autoevals/llm.py b/py/autoevals/llm.py
@@ -723,106 +723,3 @@ class Translation(SpecFileClassifier):
     """
 
     pass
-
-
-class Correctness(SpecFileClassifier):
-    """Evaluate if a solution correctly solves a given problem.
-
-    This evaluator uses LLM-based analysis to determine if a solution correctly
-    addresses the given problem requirements, considering aspects like:
-    - Functional correctness
-    - Edge case handling
-    - Input validation
-    - Output format compliance
-    - Implementation completeness
-
-    Example:
-        ```python
-        from openai import OpenAI
-        from autoevals import Correctness
-
-        correctness = Correctness(client=OpenAI())
-        result = correctness.eval(
-            instructions='''
-                Write a function that takes a list of integers and returns their sum.
-                The function should handle empty lists by returning 0.
-            ''',
-            output='''
-                def sum_list(numbers):
-                    if not numbers:
-                        return 0
-                    return sum(numbers)
-            '''
-        )
-
-        print(result.score)  # 1 if correct, 0 if incorrect
-        print(result.metadata["rationale"])  # Detailed explanation
-        print(result.metadata["choice"])  # Selected choice (correct/incorrect)
-        ```
-
-    Args:
-        instructions: Problem description or task requirements to evaluate against
-        output: Solution to evaluate (code, text, or other content)
-
-    Returns:
-        Score object with:
-        - score: 1 if solution is correct, 0 if incorrect
-        - metadata.rationale: Detailed explanation of the evaluation
-        - metadata.choice: Selected choice (correct/incorrect)
-    """
-
-    pass
-
-
-class Complexity(SpecFileClassifier):
-    """Evaluate the complexity and efficiency of a solution.
-
-    This evaluator uses LLM-based analysis to assess various aspects of solution complexity:
-    - Time complexity (Big O notation)
-    - Space complexity
-    - Code readability and maintainability
-    - Implementation efficiency
-    - Resource utilization
-    - Algorithmic optimizations
-    - Design patterns and best practices
-
-    Example:
-        ```python
-        from autoevals import Complexity
-
-        complexity = Complexity(client=OpenAI())
-        result = complexity.eval(
-            instructions="Implement a function to find duplicates in a list",
-            output='''
-                def find_duplicates(arr):
-                    seen = set()
-                    duplicates = set()
-                    for x in arr:
-                        if x in seen:
-                            duplicates.add(x)
-                        seen.add(x)
-                    return list(duplicates)
-            '''
-        )
-
-        print(result.score)  # 1 if efficient, 0 if inefficient
-        print(result.metadata["rationale"])  # Detailed complexity analysis
-        print(result.metadata["choice"])  # Selected choice (efficient/inefficient)
-        print(result.metadata["time_complexity"])  # Estimated Big O notation
-        print(result.metadata["space_complexity"])  # Space usage analysis
-        ```
-
-    Args:
-        instructions: Problem description or requirements to evaluate against
-        output: Solution to analyze for complexity (code, algorithm, system design)
-
-    Returns:
-        Score object with:
-        - score: 1 if efficient, 0 if inefficient
-        - metadata.rationale: Detailed complexity analysis
-        - metadata.choice: Selected choice (efficient/inefficient)
-        - metadata.time_complexity: Time complexity analysis
-        - metadata.space_complexity: Space complexity analysis
-    """
-
-    pass
diff --git a/py/autoevals/version.py b/py/autoevals/version.py
@@ -1 +1 @@
-VERSION = "0.0.125"
+VERSION = "0.0.126"