From b0658595fdca0fec3bf7b33b2584745f2d88af9a Mon Sep 17 00:00:00 2001 From: Rahul Bhatnagar Date: Sun, 24 Aug 2025 15:59:11 +0530 Subject: [PATCH 1/3] Move Utils / Exceptions module to main package and update imports --- src/ragas/exceptions.py | 73 +++++++ src/ragas/experimental/exceptions.py | 88 -------- src/ragas/experimental/utils.py | 310 --------------------------- src/ragas/utils.py | 59 +++++ 4 files changed, 132 insertions(+), 398 deletions(-) delete mode 100644 src/ragas/experimental/exceptions.py delete mode 100644 src/ragas/experimental/utils.py diff --git a/src/ragas/exceptions.py b/src/ragas/exceptions.py index 75688760a..7b65f3e17 100644 --- a/src/ragas/exceptions.py +++ b/src/ragas/exceptions.py @@ -49,3 +49,76 @@ class UploadException(RagasException): def __init__(self, status_code: int, message: str): self.status_code = status_code super().__init__(message) + + +# Exceptions migrated from experimental module +class RagasError(Exception): + """Base class for all Ragas-related exceptions.""" + + pass + + +class ValidationError(RagasError): + """Raised when field validation fails.""" + + pass + + +class DuplicateError(RagasError): + """Exception raised when a duplicate resource is created.""" + + pass + + +class NotFoundError(RagasError): + """Exception raised when a resource is not found.""" + + pass + + +class ResourceNotFoundError(NotFoundError): + """Exception raised when a resource doesn't exist.""" + + pass + + +class ProjectNotFoundError(ResourceNotFoundError): + """Exception raised when a project doesn't exist.""" + + pass + + +class DatasetNotFoundError(ResourceNotFoundError): + """Exception raised when a dataset doesn't exist.""" + + pass + + +class ExperimentNotFoundError(ResourceNotFoundError): + """Exception raised when an experiment doesn't exist.""" + + pass + + +class DuplicateResourceError(RagasError): + """Exception raised when multiple resources exist with the same identifier.""" + + pass + + +class DuplicateProjectError(DuplicateResourceError): + """Exception raised when multiple projects exist with the same name.""" + + pass + + +class DuplicateDatasetError(DuplicateResourceError): + """Exception raised when multiple datasets exist with the same name.""" + + pass + + +class DuplicateExperimentError(DuplicateResourceError): + """Exception raised when multiple experiments exist with the same name.""" + + pass diff --git a/src/ragas/experimental/exceptions.py b/src/ragas/experimental/exceptions.py deleted file mode 100644 index f7ec46a6d..000000000 --- a/src/ragas/experimental/exceptions.py +++ /dev/null @@ -1,88 +0,0 @@ -"""All the exceptions specific to the `ragas_experimental` project.""" - -__all__ = [ - "RagasError", - "ValidationError", - "DuplicateError", - "NotFoundError", - "ResourceNotFoundError", - "ProjectNotFoundError", - "DatasetNotFoundError", - "ExperimentNotFoundError", - "DuplicateResourceError", - "DuplicateProjectError", - "DuplicateDatasetError", - "DuplicateExperimentError", -] - - -class RagasError(Exception): - """Base class for all Ragas-related exceptions.""" - - pass - - -class ValidationError(RagasError): - """Raised when field validation fails.""" - - pass - - -class DuplicateError(RagasError): - """Raised when multiple items are found but only one was expected.""" - - pass - - -class NotFoundError(RagasError): - """Raised when an item is not found.""" - - pass - - -class ResourceNotFoundError(RagasError): - """Exception raised when a requested resource doesn't exist.""" - - pass - - -class ProjectNotFoundError(ResourceNotFoundError): - """Exception raised when a project doesn't exist.""" - - pass - - -class DatasetNotFoundError(ResourceNotFoundError): - """Exception raised when a dataset doesn't exist.""" - - pass - - -class ExperimentNotFoundError(ResourceNotFoundError): - """Exception raised when an experiment doesn't exist.""" - - pass - - -class DuplicateResourceError(RagasError): - """Exception raised when multiple resources exist with the same identifier.""" - - pass - - -class DuplicateProjectError(DuplicateResourceError): - """Exception raised when multiple projects exist with the same name.""" - - pass - - -class DuplicateDatasetError(DuplicateResourceError): - """Exception raised when multiple datasets exist with the same name.""" - - pass - - -class DuplicateExperimentError(DuplicateResourceError): - """Exception raised when multiple experiments exist with the same name.""" - - pass diff --git a/src/ragas/experimental/utils.py b/src/ragas/experimental/utils.py deleted file mode 100644 index a9299f592..000000000 --- a/src/ragas/experimental/utils.py +++ /dev/null @@ -1,310 +0,0 @@ -__all__ = [ - "create_nano_id", - "async_to_sync", - "get_test_directory", -] - -import asyncio -import functools -import os -import random -import string -import tempfile -import typing as t -import uuid -from pathlib import Path - -from rich.console import Console - -console = Console() - - -def create_nano_id(size=12): - # Define characters to use (alphanumeric) - alphabet = string.ascii_letters + string.digits - - # Generate UUID and convert to int - uuid_int = uuid.uuid4().int - - # Convert to base62 - result = "" - while uuid_int: - uuid_int, remainder = divmod(uuid_int, len(alphabet)) - result = alphabet[remainder] + result - - # Pad if necessary and return desired length - return result[:size] - - -def async_to_sync(async_func): - """Convert an async function to a sync function""" - - @functools.wraps(async_func) - def sync_wrapper(*args, **kwargs): - try: - loop = asyncio.get_event_loop() - if loop.is_running(): - import concurrent.futures - - with concurrent.futures.ThreadPoolExecutor() as executor: - future = executor.submit(asyncio.run, async_func(*args, **kwargs)) - return future.result() - else: - return loop.run_until_complete(async_func(*args, **kwargs)) - except RuntimeError: - return asyncio.run(async_func(*args, **kwargs)) - - return sync_wrapper - - -# Helper function for tests -def get_test_directory(): - """Create a test directory that will be cleaned up on process exit. - - Returns: - str: Path to test directory - """ - # Create a directory in the system temp directory - test_dir = os.path.join(tempfile.gettempdir(), f"ragas_test_{create_nano_id()}") - os.makedirs(test_dir, exist_ok=True) - - return test_dir - - -class MemorableNames: - """Generator for memorable, unique names for experiments and datasets.""" - - def __init__(self): - # List of adjectives (similar to what Docker uses) - self.adjectives = [ - "admiring", - "adoring", - "affectionate", - "agitated", - "amazing", - "angry", - "awesome", - "blissful", - "bold", - "boring", - "brave", - "busy", - "charming", - "clever", - "cool", - "compassionate", - "competent", - "condescending", - "confident", - "cranky", - "crazy", - "dazzling", - "determined", - "distracted", - "dreamy", - "eager", - "ecstatic", - "elastic", - "elated", - "elegant", - "eloquent", - "epic", - "fervent", - "festive", - "flamboyant", - "focused", - "friendly", - "frosty", - "gallant", - "gifted", - "goofy", - "gracious", - "happy", - "hardcore", - "heuristic", - "hopeful", - "hungry", - "infallible", - "inspiring", - "jolly", - "jovial", - "keen", - "kind", - "laughing", - "loving", - "lucid", - "magical", - "mystifying", - "modest", - "musing", - "naughty", - "nervous", - "nifty", - "nostalgic", - "objective", - "optimistic", - "peaceful", - "pedantic", - "pensive", - "practical", - "priceless", - "quirky", - "quizzical", - "relaxed", - "reverent", - "romantic", - "sad", - "serene", - "sharp", - "silly", - "sleepy", - "stoic", - "stupefied", - "suspicious", - "sweet", - "tender", - "thirsty", - "trusting", - "upbeat", - "vibrant", - "vigilant", - "vigorous", - "wizardly", - "wonderful", - "xenodochial", - "youthful", - "zealous", - "zen", - ] - - # List of influential computer scientists and tech entrepreneurs - self.scientists = [ - "turing", - "hopper", - "knuth", - "torvalds", - "ritchie", - "thompson", - "dijkstra", - "kay", - "wozniak", - "gates", - "jobs", - "musk", - "bezos", - "lovelace", - "berners_lee", - "cerf", - "gosling", - "kernighan", - "lamport", - "mccarthy", - "minsky", - "rossum", - "backus", - "engelbart", - "hamilton", - "chomsky", - "shannon", - "zuckerberg", - "page", - "brin", - "matsumoto", - "stallman", - "stroustrup", - "cook", - "neumann", - "babbage", - "tanenbaum", - "rivest", - "shamir", - "adleman", - "carmack", - "andreessen", - "ullman", - "postel", - "huffman", - "boole", - "curry", - "liskov", - "wing", - "goldwasser", - "hoare", - "milner", - "perlis", - "sutherland", - "tarjan", - "valiant", - "yao", - "hopcroft", - "naur", - "wilkes", - "codd", - "diffie", - "hellman", - "pearl", - "thiel", - "narayen", - "nadella", - "pichai", - "dorsey", - ] - - self.used_names = set() - - def generate_name(self): - """Generate a single memorable name.""" - adjective = random.choice(self.adjectives) - scientist = random.choice(self.scientists) - return f"{adjective}_{scientist}" - - def generate_unique_name(self): - """Generate a unique memorable name.""" - attempts = 0 - max_attempts = 100 # Prevent infinite loops - - while attempts < max_attempts: - name = self.generate_name() - if name not in self.used_names: - self.used_names.add(name) - return name - attempts += 1 - - # If we exhaust our combinations, add a random suffix - base_name = self.generate_name() - unique_name = f"{base_name}_{random.randint(1000, 9999)}" - self.used_names.add(unique_name) - return unique_name - - def generate_unique_names(self, count): - """Generate multiple unique memorable names.""" - return [self.generate_unique_name() for _ in range(count)] - - -# Global instance for easy access -memorable_names = MemorableNames() - - -def find_git_root(start_path: t.Union[str, Path, None] = None) -> Path: - """Find the root directory of a git repository by traversing up from the start path.""" - # Start from the current directory if no path is provided - if start_path is None: - start_path = Path.cwd() - else: - start_path = Path(start_path).resolve() - - # Check if the current directory is a git repository - current_path = start_path - while current_path != current_path.parent: # Stop at filesystem root - if (current_path / ".git").exists() and (current_path / ".git").is_dir(): - return current_path - - # Move up to the parent directory - current_path = current_path.parent - - # Final check for the root directory - if (current_path / ".git").exists() and (current_path / ".git").is_dir(): - return current_path - - # No git repository found - raise ValueError(f"No git repository found in or above {start_path}") diff --git a/src/ragas/utils.py b/src/ragas/utils.py index bdd923a65..36620db24 100644 --- a/src/ragas/utils.py +++ b/src/ragas/utils.py @@ -550,3 +550,62 @@ def find_git_root(start_path: t.Union[str, Path, None] = None) -> Path: # No git repository found raise ValueError(f"No git repository found in or above {start_path}") + + +def create_nano_id(size=12): + """Generate a short unique identifier.""" + import string + import uuid + + # Define characters to use (alphanumeric) + alphabet = string.ascii_letters + string.digits + + # Generate UUID and convert to int + uuid_int = uuid.uuid4().int + + # Convert to base62 + result = "" + while uuid_int: + uuid_int, remainder = divmod(uuid_int, len(alphabet)) + result = alphabet[remainder] + result + + # Pad if necessary and return desired length + return result[:size] + + +def async_to_sync(async_func): + """Convert an async function to a sync function""" + import asyncio + import functools + + @functools.wraps(async_func) + def sync_wrapper(*args, **kwargs): + try: + loop = asyncio.get_event_loop() + if loop.is_running(): + import concurrent.futures + + with concurrent.futures.ThreadPoolExecutor() as executor: + future = executor.submit(asyncio.run, async_func(*args, **kwargs)) + return future.result() + else: + return loop.run_until_complete(async_func(*args, **kwargs)) + except RuntimeError: + return asyncio.run(async_func(*args, **kwargs)) + + return sync_wrapper + + +def get_test_directory(): + """Create a test directory that will be cleaned up on process exit. + + Returns: + str: Path to test directory + """ + import tempfile + + # Create a directory in the system temp directory + test_dir = os.path.join(tempfile.gettempdir(), f"ragas_test_{create_nano_id()}") + os.makedirs(test_dir, exist_ok=True) + + return test_dir From 361f271c35bbb1c70b7694c111cf7b357df9073b Mon Sep 17 00:00:00 2001 From: Rahul Bhatnagar Date: Sun, 24 Aug 2025 16:46:08 +0530 Subject: [PATCH 2/3] Ruff --- tests/unit/test_utils.py | 140 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 139 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 087c6fc41..a6eb6c47b 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1,6 +1,17 @@ +import asyncio +import os +import tempfile + import pytest -from ragas.utils import batched, check_if_sum_is_close, get_from_dict +from ragas.utils import ( + async_to_sync, + batched, + check_if_sum_is_close, + create_nano_id, + get_from_dict, + get_test_directory, +) @pytest.mark.parametrize( @@ -87,3 +98,130 @@ def test_batched_output_type(self, iterable, n, expected_type: type): result = list(batched(iterable, n)) for batch in result: assert all(isinstance(item, expected_type) for item in batch) + + +class TestCreateNanoId: + """Test cases for the create_nano_id function.""" + + def test_create_nano_id_default_size(self): + """Test that create_nano_id generates IDs of default size (12).""" + nano_id = create_nano_id() + assert len(nano_id) == 12 + assert nano_id.isalnum() + + def test_create_nano_id_custom_size(self): + """Test that create_nano_id respects custom size parameter.""" + for size in [5, 8, 16, 20]: + nano_id = create_nano_id(size=size) + assert len(nano_id) == size + assert nano_id.isalnum() + + def test_create_nano_id_uniqueness(self): + """Test that create_nano_id generates unique IDs.""" + ids = set() + for _ in range(100): + nano_id = create_nano_id() + assert nano_id not in ids, "Generated duplicate ID" + ids.add(nano_id) + + def test_create_nano_id_alphanumeric(self): + """Test that create_nano_id only uses alphanumeric characters.""" + nano_id = create_nano_id(size=50) # Larger size for better coverage + for char in nano_id: + assert char.isalnum(), f"Non-alphanumeric character found: {char}" + + +class TestAsyncToSync: + """Test cases for the async_to_sync function.""" + + def test_async_to_sync_basic(self): + """Test basic async to sync conversion.""" + + async def async_add(a, b): + await asyncio.sleep(0.001) # Small delay to make it truly async + return a + b + + sync_add = async_to_sync(async_add) + result = sync_add(3, 4) + assert result == 7 + + def test_async_to_sync_with_kwargs(self): + """Test async to sync conversion with keyword arguments.""" + + async def async_multiply(x, multiplier=2): + await asyncio.sleep(0.001) + return x * multiplier + + sync_multiply = async_to_sync(async_multiply) + result = sync_multiply(5, multiplier=3) + assert result == 15 + + def test_async_to_sync_exception_handling(self): + """Test that exceptions in async functions are properly propagated.""" + + async def async_error(): + await asyncio.sleep(0.001) + raise ValueError("Test error") + + sync_error = async_to_sync(async_error) + with pytest.raises(ValueError, match="Test error"): + sync_error() + + def test_async_to_sync_return_types(self): + """Test that return types are preserved.""" + + async def async_return_dict(): + await asyncio.sleep(0.001) + return {"key": "value", "number": 42} + + sync_return_dict = async_to_sync(async_return_dict) + result = sync_return_dict() + assert isinstance(result, dict) + assert result == {"key": "value", "number": 42} + + +class TestGetTestDirectory: + """Test cases for the get_test_directory function.""" + + def test_get_test_directory_exists(self): + """Test that get_test_directory creates a directory that exists.""" + test_dir = get_test_directory() + assert os.path.exists(test_dir) + assert os.path.isdir(test_dir) + + def test_get_test_directory_in_temp(self): + """Test that test directory is created in system temp directory.""" + test_dir = get_test_directory() + temp_root = tempfile.gettempdir() + assert test_dir.startswith(temp_root) + + def test_get_test_directory_unique(self): + """Test that get_test_directory creates unique directories.""" + dirs = set() + for _ in range(5): + test_dir = get_test_directory() + assert test_dir not in dirs, "Generated duplicate directory path" + dirs.add(test_dir) + + def test_get_test_directory_naming_pattern(self): + """Test that test directory follows expected naming pattern.""" + test_dir = get_test_directory() + dir_name = os.path.basename(test_dir) + assert dir_name.startswith("ragas_test_") + # The suffix should be the nano_id, which is alphanumeric + suffix = dir_name[len("ragas_test_") :] + assert suffix.isalnum() + + def test_get_test_directory_writable(self): + """Test that the created test directory is writable.""" + test_dir = get_test_directory() + # Try to create a file in the directory + test_file = os.path.join(test_dir, "test_file.txt") + with open(test_file, "w") as f: + f.write("test content") + + # Verify file was created and has correct content + assert os.path.exists(test_file) + with open(test_file, "r") as f: + content = f.read() + assert content == "test content" From 9ef29ddabc0e905d3541fc5bcd3747f53d3b6c6d Mon Sep 17 00:00:00 2001 From: Rahul Bhatnagar Date: Mon, 25 Aug 2025 10:49:26 +0530 Subject: [PATCH 3/3] PR --- examples/benchmark_llm/evals.py | 8 ++++---- examples/gdrive_append_example.py | 1 + src/ragas/utils.py | 5 ++--- tests/unit/test_utils.py | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/benchmark_llm/evals.py b/examples/benchmark_llm/evals.py index 4f6f617b8..dbcaf4af7 100644 --- a/examples/benchmark_llm/evals.py +++ b/examples/benchmark_llm/evals.py @@ -7,12 +7,12 @@ from typing import List, Optional import pandas as pd -from ragas.experimental import Dataset +from prompt import DEFAULT_MODEL, run_prompt + from ragas import experiment -from ragas.experimental.metrics.result import MetricResult +from ragas.experimental import Dataset from ragas.experimental.metrics.discrete import discrete_metric - -from prompt import run_prompt, DEFAULT_MODEL +from ragas.experimental.metrics.result import MetricResult @discrete_metric(name="discount_accuracy", allowed_values=["correct", "incorrect"]) diff --git a/examples/gdrive_append_example.py b/examples/gdrive_append_example.py index 3e538b1e1..a808e3d46 100644 --- a/examples/gdrive_append_example.py +++ b/examples/gdrive_append_example.py @@ -5,6 +5,7 @@ """ from pydantic import BaseModel + from ragas.dataset import Dataset diff --git a/src/ragas/utils.py b/src/ragas/utils.py index 36620db24..ca8962995 100644 --- a/src/ragas/utils.py +++ b/src/ragas/utils.py @@ -5,7 +5,9 @@ import os import random import re +import string import typing as t +import uuid import warnings from datetime import datetime from functools import lru_cache @@ -554,9 +556,6 @@ def find_git_root(start_path: t.Union[str, Path, None] = None) -> Path: def create_nano_id(size=12): """Generate a short unique identifier.""" - import string - import uuid - # Define characters to use (alphanumeric) alphabet = string.ascii_letters + string.digits diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index a6eb6c47b..ff92fbbed 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -176,8 +176,8 @@ async def async_return_dict(): sync_return_dict = async_to_sync(async_return_dict) result = sync_return_dict() - assert isinstance(result, dict) - assert result == {"key": "value", "number": 42} + expected = {"key": "value", "number": 42} + assert isinstance(result, dict) and result == expected class TestGetTestDirectory: