diff --git a/Makefile b/Makefile index 234b24ddf..dde071a1a 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help check-uv dev-setup test-setup test test-all test-unit test-integration clean ui-test ui-build ui-lint install-playwright +.PHONY: help check-uv dev-setup test-setup test test-all test-eval test-eval-local test-eval-workflow test-eval-remote test-unit test-integration clean ui-test ui-build ui-lint install-playwright # Check if uv is installed check-uv: @@ -14,10 +14,14 @@ help: @echo " make install-playwright Install Playwright browsers" @echo "" @echo "Backend Tests:" - @echo " make test Run all tests (excluding stress/long_soak)" - @echo " make test-all Run all tests including stress tests" - @echo " make test-unit Run unit tests only" - @echo " make test-integration Run integration tests only" + @echo " make test Run all tests (excluding stress/long_soak)" + @echo " make test-all Run all tests including stress and evaluation tests" + @echo " make test-eval Run local evaluation tests (default)" + @echo " make test-eval-local Run local evaluation tests" + @echo " make test-eval-workflow Run workflow evaluation tests" + @echo " make test-eval-remote Run remote evaluation tests" + @echo " make test-unit Run unit tests only" + @echo " make test-integration Run integration tests only" @echo "" @echo "Frontend Tests:" @echo " make ui-test Run frontend linting and build" @@ -50,6 +54,13 @@ test-setup: check-uv uv run playwright install @echo "Test environment setup complete!" +# Setup eval environment +eval-setup: check-uv + @echo "Setting up evaluation test environment..." + UV_VENV_CLEAR=1 uv venv --python 3.12 + @echo "Installing Solace Agent Mesh..." + source .venv/bin/activate && pip install . + # Install Playwright browsers only install-playwright: check-uv @echo "Installing Playwright browsers..." @@ -65,6 +76,35 @@ test-all: @echo "Running all tests..." uv run pytest +# Run evaluation tests (default: local) +test-eval: test-eval-local + +# Run local evaluation tests +test-eval-local: eval-setup + @echo "Running local evaluation tests..." + @echo "Note: Ensure environment variables are set in .env file:" + @echo " - SOLACE_BROKER_URL, SOLACE_BROKER_USERNAME, SOLACE_BROKER_PASSWORD, SOLACE_BROKER_VPN" + @echo " - LLM_SERVICE_ENDPOINT, LLM_SERVICE_API_KEY" + @echo "" + source .venv/bin/activate && sam eval tests/evaluation/local_example.json + +# Run workflow evaluation tests +test-eval-workflow: eval-setup + @echo "Running workflow evaluation tests..." + @echo "Note: Ensure environment variables are set in .env file:" + @echo " - SOLACE_BROKER_URL, SOLACE_BROKER_USERNAME, SOLACE_BROKER_PASSWORD, SOLACE_BROKER_VPN" + @echo " - LLM_SERVICE_ENDPOINT, LLM_SERVICE_API_KEY" + @echo "" + source .venv/bin/activate && sam eval tests/evaluation/workflow_eval.json + +# Run remote evaluation tests +test-eval-remote: eval-setup + @echo "Running remote evaluation tests..." + @echo "Note: Ensure environment variables are set in .env file:" + @echo " - SOLACE_BROKER_URL, SOLACE_BROKER_USERNAME, SOLACE_BROKER_PASSWORD, SOLACE_BROKER_VPN" + @echo " - LLM_SERVICE_ENDPOINT, LLM_SERVICE_API_KEY" + @echo "" + source .venv/bin/activate && sam eval tests/evaluation/remote_example.json # Run unit tests only test-unit: diff --git a/evaluation/README.md b/evaluation/README.md new file mode 100644 index 000000000..da9b8cc86 --- /dev/null +++ b/evaluation/README.md @@ -0,0 +1,66 @@ +# Solace Agent Mesh - Evaluation Framework + +This directory contains the evaluation framework for the Solace Agent Mesh. It is designed to test the functionality and performance of Solace Agent Mesh. + +## How to Run Evaluations + +The evaluations are run using the `sam` command-line interface after installing the project. + +### Quick Start with Make + +If you prefer an automated setup, you can use the provided Make command which handles environment setup and runs the evaluation. Run the command in the root of the project: + +```bash +make test-eval-local +``` + +This command will: +- Create a Python 3.12 virtual environment +- Install the project and its dependencies +- Run the local evaluation tests + +**Note:** Ensure your environment variables are exported before running this command (see [Environment Variables](#environment-variables) section below). + +### Manual Setup (alternative) + +Install the project and its dependencies by running the following command from the root of the project: + +```bash +pip install . +``` + +Install the rest gateway: + +```bash +pip install git+https://github.com/SolaceLabs/solace-agent-mesh-core-plugins#subdirectory=sam-rest-gateway +``` + +To run an evaluation test suite, use the `sam eval` command followed by the path to the test suite's JSON configuration file. For example, to run the full remote evaluation suite, execute the following command: + +```bash +sam eval tests/evaluation/local_example.json +``` + +## Environment Variables + +To run the evaluations successfully, you must configure the following environment variables. These are defined in `sam eval tests/evaluation/local_example.json` and must be exported to your environment. + +### Solace Broker Connection + +These variables are required to connect to the Solace message broker during the tests. + +```bash +export SOLACE_BROKER_URL= +export SOLACE_BROKER_USERNAME= +export SOLACE_BROKER_PASSWORD= +export SOLACE_BROKER_VPN= +``` + +### LLM Evaluator Settings + +For evaluations that use an LLM to judge the response, the following variables are needed: + +```bash +export LLM_SERVICE_ENDPOINT= +export LLM_SERVICE_API_KEY= +``` diff --git a/evaluation/run.py b/evaluation/run.py index 43f9ca1a7..32cf91387 100644 --- a/evaluation/run.py +++ b/evaluation/run.py @@ -59,10 +59,33 @@ def _ensure_eval_backend_config_exists(): f"'{eval_backend_config_path.relative_to(project_root)}' not found. Creating it..." ) - if not (configs_dir / "shared_config.yaml").exists(): - _error_exit( - "Error: 'configs/shared_config.yaml' not found. Please run 'sam init' first." - ) + # Create configs directory if it doesn't exist + if not configs_dir.exists(): + click.echo(f"Creating '{configs_dir.relative_to(project_root)}' directory...") + configs_dir.mkdir(parents=True, exist_ok=True) + + # Copy shared_config.yaml from examples if it doesn't exist + shared_config_path = configs_dir / "shared_config.yaml" + if not shared_config_path.exists(): + example_shared_config = project_root / "examples" / "shared_config.yaml" + if example_shared_config.exists(): + click.echo( + f"Copying 'examples/shared_config.yaml' to '{shared_config_path.relative_to(project_root)}'..." + ) + try: + shutil.copy2(example_shared_config, shared_config_path) + click.echo( + click.style( + f"Successfully created '{shared_config_path.relative_to(project_root)}'.", + fg="green", + ) + ) + except Exception as e: + _error_exit(f"Failed to copy shared_config.yaml: {e}") + else: + _error_exit( + "Error: 'examples/shared_config.yaml' not found. Please run 'sam init' first or ensure the examples directory exists." + ) try: # This is a simplified way to get the template content.