diff --git a/py/autoevals/llm.py b/py/autoevals/llm.py index 7759f09..03252b0 100644 --- a/py/autoevals/llm.py +++ b/py/autoevals/llm.py @@ -49,7 +49,6 @@ import re from collections import defaultdict from dataclasses import dataclass -from typing import Dict, List, Optional import chevron import yaml @@ -126,9 +125,9 @@ def build_classification_tools(useCoT, choice_strings): class OpenAIScorer(ScorerWithPartial): def __init__( self, - api_key: Optional[str] = None, - base_url: Optional[str] = None, - client: Optional[Client] = None, + api_key: str | None = None, + base_url: str | None = None, + client: Client | None = None, ) -> None: self.extra_args = {} if api_key: @@ -142,10 +141,10 @@ def __init__( class OpenAILLMScorer(OpenAIScorer): def __init__( self, - temperature: Optional[float] = None, - api_key: Optional[str] = None, - base_url: Optional[str] = None, - client: Optional[Client] = None, + temperature: float | None = None, + api_key: str | None = None, + base_url: str | None = None, + client: Client | None = None, ) -> None: super().__init__( api_key=api_key, @@ -159,7 +158,7 @@ class OpenAILLMClassifier(OpenAILLMScorer): def __init__( self, name: str, - messages: List, + messages: list, model, choice_scores, classification_tools, @@ -169,7 +168,7 @@ def __init__( engine=None, api_key=None, base_url=None, - client: Optional[Client] = None, + client: Client | None = None, ): super().__init__( client=client, @@ -264,11 +263,11 @@ def _run_eval_sync(self, output, expected, **kwargs): @dataclass class ModelGradedSpec: prompt: str - choice_scores: Dict[str, float] - model: Optional[str] = None - engine: Optional[str] = None - use_cot: Optional[bool] = None - temperature: Optional[float] = None + choice_scores: dict[str, float] + model: str | None = None + engine: str | None = None + use_cot: bool | None = None + temperature: float | None = None class LLMClassifier(OpenAILLMClassifier): @@ -316,7 +315,7 @@ class LLMClassifier(OpenAILLMClassifier): **extra_render_args: Additional template variables """ - _SPEC_FILE_CONTENTS: Dict[str, str] = defaultdict(str) + _SPEC_FILE_CONTENTS: dict[str, str] = defaultdict(str) def __init__( self, @@ -330,7 +329,7 @@ def __init__( engine=None, api_key=None, base_url=None, - client: Optional[Client] = None, + client: Client | None = None, **extra_render_args, ): choice_strings = list(choice_scores.keys()) @@ -359,11 +358,11 @@ def __init__( ) @classmethod - def from_spec(cls, name: str, spec: ModelGradedSpec, client: Optional[Client] = None, **kwargs): + def from_spec(cls, name: str, spec: ModelGradedSpec, client: Client | None = None, **kwargs): return cls(name, spec.prompt, spec.choice_scores, client=client, **kwargs) @classmethod - def from_spec_file(cls, name: str, path: str, client: Optional[Client] = None, **kwargs): + def from_spec_file(cls, name: str, path: str, client: Client | None = None, **kwargs): if cls._SPEC_FILE_CONTENTS[name] == "": with open(path) as f: cls._SPEC_FILE_CONTENTS[name] = f.read() @@ -381,7 +380,7 @@ def __new__( temperature=None, api_key=None, base_url=None, - client: Optional[Client] = None, + client: Client | None = None, ): kwargs = {} if model is not None: diff --git a/py/autoevals/moderation.py b/py/autoevals/moderation.py index 76090ba..08ea9fe 100644 --- a/py/autoevals/moderation.py +++ b/py/autoevals/moderation.py @@ -1,5 +1,3 @@ -from typing import Optional - from autoevals.llm import OpenAIScorer from .oai import Client, arun_cached_request, run_cached_request @@ -50,7 +48,7 @@ def __init__( threshold=None, api_key=None, base_url=None, - client: Optional[Client] = None, + client: Client | None = None, ): """Initialize a Moderation scorer. diff --git a/py/autoevals/oai.py b/py/autoevals/oai.py index ff8b4f2..c439a09 100644 --- a/py/autoevals/oai.py +++ b/py/autoevals/oai.py @@ -4,9 +4,10 @@ import textwrap import time import warnings +from collections.abc import Callable from contextvars import ContextVar from dataclasses import dataclass -from typing import Any, Callable, Dict, Optional, Protocol, Tuple, Type, TypeVar, Union, cast, runtime_checkable +from typing import Any, Optional, Protocol, TypeVar, Union, cast, runtime_checkable PROXY_URL = "https://api.braintrust.dev/v1/proxy" @@ -50,10 +51,10 @@ def moderations(self) -> Moderations: ... def api_key(self) -> str: ... @property - def organization(self) -> Optional[str]: ... + def organization(self) -> str | None: ... @property - def base_url(self) -> Union[str, Any, None]: ... + def base_url(self) -> str | Any | None: ... class AsyncOpenAI(OpenAI): ... @@ -75,18 +76,18 @@ class Moderation(Protocol): acreate: Callable[..., Any] create: Callable[..., Any] - api_key: Optional[str] - api_base: Optional[str] - base_url: Optional[str] + api_key: str | None + api_base: str | None + base_url: str | None class error(Protocol): class RateLimitError(Exception): ... -_openai_module: Optional[Union[OpenAIV1Module, OpenAIV0Module]] = None +_openai_module: OpenAIV1Module | OpenAIV0Module | None = None -def get_openai_module() -> Union[OpenAIV1Module, OpenAIV0Module]: +def get_openai_module() -> OpenAIV1Module | OpenAIV0Module: global _openai_module if _openai_module is not None: @@ -150,11 +151,11 @@ def complete(self, **kwargs): ``` """ - openai: Union[OpenAIV0Module, OpenAIV1Module.OpenAI] + openai: OpenAIV0Module | OpenAIV1Module.OpenAI complete: Callable[..., Any] = None # type: ignore # Set in __post_init__ embed: Callable[..., Any] = None # type: ignore # Set in __post_init__ moderation: Callable[..., Any] = None # type: ignore # Set in __post_init__ - RateLimitError: Type[Exception] = None # type: ignore # Set in __post_init__ + RateLimitError: type[Exception] = None # type: ignore # Set in __post_init__ is_async: bool = False _is_wrapped: bool = False @@ -199,11 +200,11 @@ def is_wrapped(self) -> bool: T = TypeVar("T") -_named_wrapper: Optional[Type[Any]] = None -_wrap_openai: Optional[Callable[[Any], Any]] = None +_named_wrapper: type[Any] | None = None +_wrap_openai: Callable[[Any], Any] | None = None -def get_openai_wrappers() -> Tuple[Type[Any], Callable[[Any], Any]]: +def get_openai_wrappers() -> tuple[type[Any], Callable[[Any], Any]]: global _named_wrapper, _wrap_openai if _named_wrapper is not None and _wrap_openai is not None: @@ -213,7 +214,7 @@ def get_openai_wrappers() -> Tuple[Type[Any], Callable[[Any], Any]]: from braintrust.oai import NamedWrapper as BraintrustNamedWrapper # type: ignore from braintrust.oai import wrap_openai # type: ignore - _named_wrapper = cast(Type[Any], BraintrustNamedWrapper) + _named_wrapper = cast(type[Any], BraintrustNamedWrapper) except ImportError: class NamedWrapper: @@ -237,7 +238,7 @@ def resolve_client(client: Client, is_async: bool = False) -> LLMClient: return LLMClient(openai=client, is_async=is_async) -def init(client: Optional[Client] = None, is_async: bool = False): +def init(client: Client | None = None, is_async: bool = False): """Initialize Autoevals with an optional custom LLM client. This function sets up the global client context for Autoevals to use. If no client is provided, @@ -259,10 +260,10 @@ def init(client: Optional[Client] = None, is_async: bool = False): def prepare_openai( - client: Optional[Client] = None, + client: Client | None = None, is_async: bool = False, - api_key: Optional[str] = None, - base_url: Optional[str] = None, + api_key: str | None = None, + base_url: str | None = None, ): """Prepares and configures an OpenAI client for use with AutoEval. @@ -348,7 +349,7 @@ def prepare_openai( return LLMClient(openai=openai_obj, is_async=is_async) -def post_process_response(resp: Any) -> Dict[str, Any]: +def post_process_response(resp: Any) -> dict[str, Any]: # This normalizes against craziness in OpenAI v0 vs. v1 if hasattr(resp, "to_dict"): # v0 @@ -358,18 +359,18 @@ def post_process_response(resp: Any) -> Dict[str, Any]: return resp.dict() -def set_span_purpose(kwargs: Dict[str, Any]) -> None: +def set_span_purpose(kwargs: dict[str, Any]) -> None: kwargs.setdefault("span_info", {}).setdefault("span_attributes", {})["purpose"] = "scorer" def run_cached_request( *, - client: Optional[LLMClient] = None, + client: LLMClient | None = None, request_type: str = "complete", - api_key: Optional[str] = None, - base_url: Optional[str] = None, + api_key: str | None = None, + base_url: str | None = None, **kwargs: Any, -) -> Dict[str, Any]: +) -> dict[str, Any]: wrapper = prepare_openai(client=client, is_async=False, api_key=api_key, base_url=base_url) if wrapper.is_wrapped: set_span_purpose(kwargs) @@ -393,12 +394,12 @@ def run_cached_request( async def arun_cached_request( *, - client: Optional[LLMClient] = None, + client: LLMClient | None = None, request_type: str = "complete", - api_key: Optional[str] = None, - base_url: Optional[str] = None, + api_key: str | None = None, + base_url: str | None = None, **kwargs: Any, -) -> Dict[str, Any]: +) -> dict[str, Any]: wrapper = prepare_openai(client=client, is_async=True, api_key=api_key, base_url=base_url) if wrapper.is_wrapped: set_span_purpose(kwargs) diff --git a/py/autoevals/ragas.py b/py/autoevals/ragas.py index bc9f81b..2e432fe 100644 --- a/py/autoevals/ragas.py +++ b/py/autoevals/ragas.py @@ -58,7 +58,6 @@ import asyncio import json -from typing import Optional import chevron @@ -133,12 +132,12 @@ def extract_entities_request(text, **extra_args): ) -async def aextract_entities(*, text, client: Optional[Client] = None, **extra_args): +async def aextract_entities(*, text, client: Client | None = None, **extra_args): response = await arun_cached_request(client=client, **extract_entities_request(text=text, **extra_args)) return json.loads(response["choices"][0]["message"]["tool_calls"][0]["function"]["arguments"]) -def extract_entities(*, text, client: Optional[Client] = None, **extra_args): +def extract_entities(*, text, client: Client | None = None, **extra_args): response = run_cached_request(client=client, **extract_entities_request(text=text, **extra_args)) return json.loads(response["choices"][0]["message"]["tool_calls"][0]["function"]["arguments"]) @@ -169,7 +168,7 @@ class ContextEntityRecall(OpenAILLMScorer): context: The context document(s) to search for entities in """ - def __init__(self, pairwise_scorer=None, model=DEFAULT_RAGAS_MODEL, client: Optional[Client] = None, **kwargs): + def __init__(self, pairwise_scorer=None, model=DEFAULT_RAGAS_MODEL, client: Client | None = None, **kwargs): super().__init__(client=client, **kwargs) self.extraction_model = model @@ -313,7 +312,7 @@ class ContextRelevancy(OpenAILLMScorer): context: The context document(s) to evaluate """ - def __init__(self, pairwise_scorer=None, model=DEFAULT_RAGAS_MODEL, client: Optional[Client] = None, **kwargs): + def __init__(self, pairwise_scorer=None, model=DEFAULT_RAGAS_MODEL, client: Client | None = None, **kwargs): super().__init__(client=client, **kwargs) self.model = model @@ -473,7 +472,7 @@ class ContextRecall(OpenAILLMScorer): context: The context document(s) to evaluate """ - def __init__(self, pairwise_scorer=None, model=DEFAULT_RAGAS_MODEL, client: Optional[Client] = None, **kwargs): + def __init__(self, pairwise_scorer=None, model=DEFAULT_RAGAS_MODEL, client: Client | None = None, **kwargs): super().__init__(client=client, **kwargs) self.model = model @@ -633,7 +632,7 @@ class ContextPrecision(OpenAILLMScorer): context: The context document(s) to evaluate """ - def __init__(self, pairwise_scorer=None, model=DEFAULT_RAGAS_MODEL, client: Optional[Client] = None, **kwargs): + def __init__(self, pairwise_scorer=None, model=DEFAULT_RAGAS_MODEL, client: Client | None = None, **kwargs): super().__init__(client=client, **kwargs) self.model = model @@ -839,28 +838,28 @@ def extract_faithfulness_request(context, statements, **extra_args): ) -async def aextract_statements(question, answer, client: Optional[Client] = None, **extra_args): +async def aextract_statements(question, answer, client: Client | None = None, **extra_args): response = await arun_cached_request( client=client, **extract_statements_request(question=question, answer=answer, **extra_args) ) return load_function_call(response) -def extract_statements(question, answer, client: Optional[Client] = None, **extra_args): +def extract_statements(question, answer, client: Client | None = None, **extra_args): response = run_cached_request( client=client, **extract_statements_request(question=question, answer=answer, **extra_args) ) return load_function_call(response) -async def aextract_faithfulness(context, statements, client: Optional[Client] = None, **extra_args): +async def aextract_faithfulness(context, statements, client: Client | None = None, **extra_args): response = await arun_cached_request( client=client, **extract_faithfulness_request(context=context, statements=statements, **extra_args) ) return load_function_call(response) -def extract_faithfulness(context, statements, client: Optional[Client] = None, **extra_args): +def extract_faithfulness(context, statements, client: Client | None = None, **extra_args): response = run_cached_request( client=client, **extract_faithfulness_request(context=context, statements=statements, **extra_args) ) @@ -895,7 +894,7 @@ class Faithfulness(OpenAILLMScorer): context: The context document(s) to evaluate against """ - def __init__(self, model=DEFAULT_RAGAS_MODEL, client: Optional[Client] = None, **kwargs): + def __init__(self, model=DEFAULT_RAGAS_MODEL, client: Client | None = None, **kwargs): super().__init__(client=client, **kwargs) self.model = model @@ -1061,7 +1060,7 @@ def __init__( strictness=3, temperature=0.5, embedding_model=DEFAULT_RAGAS_EMBEDDING_MODEL, - client: Optional[Client] = None, + client: Client | None = None, **kwargs, ): super().__init__(temperature=temperature, client=client, **kwargs) @@ -1162,7 +1161,7 @@ def __init__( self, pairwise_scorer=None, model=DEFAULT_RAGAS_EMBEDDING_MODEL, - client: Optional[Client] = None, + client: Client | None = None, **kwargs, ): super().__init__(client=client, **kwargs) @@ -1306,7 +1305,7 @@ def __init__( factuality_weight=0.75, answer_similarity_weight=0.25, answer_similarity=None, - client: Optional[Client] = None, + client: Client | None = None, **kwargs, ): super().__init__(client=client, **kwargs) @@ -1387,9 +1386,9 @@ def load_function_call(response): return json.loads(response["choices"][0]["message"]["tool_calls"][0]["function"]["arguments"]) -async def aload_function_call_request(client: Optional[Client] = None, **kwargs): +async def aload_function_call_request(client: Client | None = None, **kwargs): return load_function_call(await arun_cached_request(client=client, **kwargs)) -def load_function_call_request(client: Optional[Client] = None, **kwargs): +def load_function_call_request(client: Client | None = None, **kwargs): return load_function_call(run_cached_request(client=client, **kwargs)) diff --git a/py/autoevals/score.py b/py/autoevals/score.py index 067a47a..6025e9f 100644 --- a/py/autoevals/score.py +++ b/py/autoevals/score.py @@ -1,7 +1,7 @@ import dataclasses import sys from abc import ABC, abstractmethod -from typing import Any, Dict, Optional +from typing import Any from .serializable_data_class import SerializableDataClass @@ -13,14 +13,14 @@ class Score(SerializableDataClass): name: str """The name of the score. This should be a unique name for the scorer.""" - score: Optional[float] = None + score: float | None = None """The score for the evaluation. This should be a float between 0 and 1. If the score is None, the evaluation is considered to be skipped.""" - metadata: Dict[str, Any] = dataclasses.field(default_factory=dict) + metadata: dict[str, Any] = dataclasses.field(default_factory=dict) """Metadata for the score. This can be used to store additional information about the score.""" # DEPRECATION_NOTICE: this field is deprecated, as errors are propagated up to the caller. - error: Optional[Exception] = None + error: Exception | None = None """Deprecated: The error field is deprecated, as errors are now propagated to the caller. The field will be removed in a future version of the library.""" def as_dict(self): diff --git a/py/autoevals/serializable_data_class.py b/py/autoevals/serializable_data_class.py index 2fe62f6..8f9eeef 100644 --- a/py/autoevals/serializable_data_class.py +++ b/py/autoevals/serializable_data_class.py @@ -1,6 +1,6 @@ import dataclasses import json -from typing import Dict, Union, get_origin +from typing import Union, get_origin class SerializableDataClass: @@ -16,15 +16,15 @@ def __getitem__(self, item: str): return getattr(self, item) @classmethod - def from_dict(cls, d: Dict): + def from_dict(cls, d: dict): """Deserialize the object from a dictionary. This method is shallow and will not call from_dict() on nested objects.""" - fields = set(f.name for f in dataclasses.fields(cls)) + fields = {f.name for f in dataclasses.fields(cls)} filtered = {k: v for k, v in d.items() if k in fields} return cls(**filtered) @classmethod - def from_dict_deep(cls, d: Dict): + def from_dict_deep(cls, d: dict): """Deserialize the object from a dictionary. This method is deep and will call from_dict_deep() on nested objects.""" fields = {f.name: f for f in dataclasses.fields(cls)} diff --git a/py/autoevals/string.py b/py/autoevals/string.py index 0dcba9a..a6dc39c 100644 --- a/py/autoevals/string.py +++ b/py/autoevals/string.py @@ -18,7 +18,6 @@ """ import threading -from typing import Optional from polyleven import levenshtein as distance @@ -120,7 +119,7 @@ def __init__( expected_min=0.7, api_key=None, base_url=None, - client: Optional[LLMClient] = None, + client: LLMClient | None = None, ): self.prefix = prefix self.expected_min = expected_min diff --git a/py/autoevals/test_embeddings.py b/py/autoevals/test_embeddings.py index 04ce76b..6df632a 100644 --- a/py/autoevals/test_embeddings.py +++ b/py/autoevals/test_embeddings.py @@ -1,7 +1,6 @@ import asyncio from autoevals import EmbeddingSimilarity -from autoevals.value import normalize_value SYNONYMS = [ ("water", ["water", "H2O", "agua"]), diff --git a/py/autoevals/test_oai.py b/py/autoevals/test_oai.py index d1140f6..8f31479 100644 --- a/py/autoevals/test_oai.py +++ b/py/autoevals/test_oai.py @@ -1,5 +1,5 @@ import sys -from typing import Any, Union, cast +from typing import Any, cast import openai import pytest @@ -25,7 +25,7 @@ ) -def unwrap_named_wrapper(obj: Union[NamedWrapper, OpenAIV1Module.OpenAI, OpenAIV0Module]) -> Any: +def unwrap_named_wrapper(obj: NamedWrapper | OpenAIV1Module.OpenAI | OpenAIV0Module) -> Any: return getattr(obj, "_NamedWrapper__wrapped") diff --git a/py/autoevals/test_ragas.py b/py/autoevals/test_ragas.py index 0f0326d..a224a4d 100644 --- a/py/autoevals/test_ragas.py +++ b/py/autoevals/test_ragas.py @@ -1,8 +1,6 @@ import asyncio -from typing import cast import pytest -from pytest import approx from autoevals.ragas import * diff --git a/py/autoevals/test_serializable_data_class.py b/py/autoevals/test_serializable_data_class.py index 0cade6a..e31e207 100644 --- a/py/autoevals/test_serializable_data_class.py +++ b/py/autoevals/test_serializable_data_class.py @@ -1,14 +1,13 @@ import unittest from dataclasses import dataclass -from typing import List, Optional from .serializable_data_class import SerializableDataClass @dataclass class PromptData(SerializableDataClass): - prompt: Optional[str] = None - options: Optional[dict] = None + prompt: str | None = None + options: dict | None = None @dataclass @@ -18,9 +17,9 @@ class PromptSchema(SerializableDataClass): _xact_id: str name: str slug: str - description: Optional[str] + description: str | None prompt_data: PromptData - tags: Optional[List[str]] + tags: list[str] | None class TestSerializableDataClass(unittest.TestCase): diff --git a/py/autoevals/test_values.py b/py/autoevals/test_values.py index f808b2f..25bd6f2 100644 --- a/py/autoevals/test_values.py +++ b/py/autoevals/test_values.py @@ -1,4 +1,3 @@ -import pytest from pytest import approx from autoevals.list import ListContains diff --git a/py/autoevals/version.py b/py/autoevals/version.py index 5e70889..1cf6267 100644 --- a/py/autoevals/version.py +++ b/py/autoevals/version.py @@ -1 +1 @@ -VERSION = "0.0.130" +VERSION = "0.1.0" diff --git a/setup.py b/setup.py index cc18d12..d8b7080 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ }, classifiers=[ "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", "Operating System :: OS Independent", ], package_dir={"": "py"},