From 553f1115075002050ee797299f581b8b6feadf93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 3 Jul 2024 15:53:44 +0200 Subject: [PATCH 01/34] rename benchmark to benchmarks --- {benchmark => benchmarks}/dbally_benchmark/__init__.py | 0 {benchmark => benchmarks}/dbally_benchmark/config.py | 0 {benchmark => benchmarks}/dbally_benchmark/constants.py | 0 .../dbally_benchmark/dataset/bird_dataset.py | 0 {benchmark => benchmarks}/dbally_benchmark/e2e_benchmark.py | 0 {benchmark => benchmarks}/dbally_benchmark/evaluate.py | 0 .../dbally_benchmark/experiment_config/config.yaml | 0 .../dbally_benchmark/experiment_config/e2e/superhero.yaml | 0 .../dbally_benchmark/experiment_config/iql/superhero.yaml | 0 .../dbally_benchmark/experiment_config/text2sql/superhero.yaml | 0 {benchmark => benchmarks}/dbally_benchmark/iql/iql_result.py | 0 .../dbally_benchmark/iql/method_call_visitor.py | 0 {benchmark => benchmarks}/dbally_benchmark/iql/metrics.py | 0 {benchmark => benchmarks}/dbally_benchmark/iql_benchmark.py | 0 {benchmark => benchmarks}/dbally_benchmark/paths.py | 0 {benchmark => benchmarks}/dbally_benchmark/text2sql/metrics.py | 0 .../dbally_benchmark/text2sql/prompt_template.py | 0 .../dbally_benchmark/text2sql/text2sql_result.py | 0 {benchmark => benchmarks}/dbally_benchmark/text2sql_benchmark.py | 0 {benchmark => benchmarks}/dbally_benchmark/utils.py | 0 {benchmark => benchmarks}/dbally_benchmark/views/superhero.py | 0 {benchmark => benchmarks}/tests/unit/test_iql_metrics.py | 0 {benchmark => benchmarks}/tests/unit/test_main_evaluate.py | 0 {benchmark => benchmarks}/tests/unit/test_method_call_visitor.py | 0 24 files changed, 0 insertions(+), 0 deletions(-) rename {benchmark => benchmarks}/dbally_benchmark/__init__.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/config.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/constants.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/dataset/bird_dataset.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/e2e_benchmark.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/evaluate.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/experiment_config/config.yaml (100%) rename {benchmark => benchmarks}/dbally_benchmark/experiment_config/e2e/superhero.yaml (100%) rename {benchmark => benchmarks}/dbally_benchmark/experiment_config/iql/superhero.yaml (100%) rename {benchmark => benchmarks}/dbally_benchmark/experiment_config/text2sql/superhero.yaml (100%) rename {benchmark => benchmarks}/dbally_benchmark/iql/iql_result.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/iql/method_call_visitor.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/iql/metrics.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/iql_benchmark.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/paths.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/text2sql/metrics.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/text2sql/prompt_template.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/text2sql/text2sql_result.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/text2sql_benchmark.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/utils.py (100%) rename {benchmark => benchmarks}/dbally_benchmark/views/superhero.py (100%) rename {benchmark => benchmarks}/tests/unit/test_iql_metrics.py (100%) rename {benchmark => benchmarks}/tests/unit/test_main_evaluate.py (100%) rename {benchmark => benchmarks}/tests/unit/test_method_call_visitor.py (100%) diff --git a/benchmark/dbally_benchmark/__init__.py b/benchmarks/dbally_benchmark/__init__.py similarity index 100% rename from benchmark/dbally_benchmark/__init__.py rename to benchmarks/dbally_benchmark/__init__.py diff --git a/benchmark/dbally_benchmark/config.py b/benchmarks/dbally_benchmark/config.py similarity index 100% rename from benchmark/dbally_benchmark/config.py rename to benchmarks/dbally_benchmark/config.py diff --git a/benchmark/dbally_benchmark/constants.py b/benchmarks/dbally_benchmark/constants.py similarity index 100% rename from benchmark/dbally_benchmark/constants.py rename to benchmarks/dbally_benchmark/constants.py diff --git a/benchmark/dbally_benchmark/dataset/bird_dataset.py b/benchmarks/dbally_benchmark/dataset/bird_dataset.py similarity index 100% rename from benchmark/dbally_benchmark/dataset/bird_dataset.py rename to benchmarks/dbally_benchmark/dataset/bird_dataset.py diff --git a/benchmark/dbally_benchmark/e2e_benchmark.py b/benchmarks/dbally_benchmark/e2e_benchmark.py similarity index 100% rename from benchmark/dbally_benchmark/e2e_benchmark.py rename to benchmarks/dbally_benchmark/e2e_benchmark.py diff --git a/benchmark/dbally_benchmark/evaluate.py b/benchmarks/dbally_benchmark/evaluate.py similarity index 100% rename from benchmark/dbally_benchmark/evaluate.py rename to benchmarks/dbally_benchmark/evaluate.py diff --git a/benchmark/dbally_benchmark/experiment_config/config.yaml b/benchmarks/dbally_benchmark/experiment_config/config.yaml similarity index 100% rename from benchmark/dbally_benchmark/experiment_config/config.yaml rename to benchmarks/dbally_benchmark/experiment_config/config.yaml diff --git a/benchmark/dbally_benchmark/experiment_config/e2e/superhero.yaml b/benchmarks/dbally_benchmark/experiment_config/e2e/superhero.yaml similarity index 100% rename from benchmark/dbally_benchmark/experiment_config/e2e/superhero.yaml rename to benchmarks/dbally_benchmark/experiment_config/e2e/superhero.yaml diff --git a/benchmark/dbally_benchmark/experiment_config/iql/superhero.yaml b/benchmarks/dbally_benchmark/experiment_config/iql/superhero.yaml similarity index 100% rename from benchmark/dbally_benchmark/experiment_config/iql/superhero.yaml rename to benchmarks/dbally_benchmark/experiment_config/iql/superhero.yaml diff --git a/benchmark/dbally_benchmark/experiment_config/text2sql/superhero.yaml b/benchmarks/dbally_benchmark/experiment_config/text2sql/superhero.yaml similarity index 100% rename from benchmark/dbally_benchmark/experiment_config/text2sql/superhero.yaml rename to benchmarks/dbally_benchmark/experiment_config/text2sql/superhero.yaml diff --git a/benchmark/dbally_benchmark/iql/iql_result.py b/benchmarks/dbally_benchmark/iql/iql_result.py similarity index 100% rename from benchmark/dbally_benchmark/iql/iql_result.py rename to benchmarks/dbally_benchmark/iql/iql_result.py diff --git a/benchmark/dbally_benchmark/iql/method_call_visitor.py b/benchmarks/dbally_benchmark/iql/method_call_visitor.py similarity index 100% rename from benchmark/dbally_benchmark/iql/method_call_visitor.py rename to benchmarks/dbally_benchmark/iql/method_call_visitor.py diff --git a/benchmark/dbally_benchmark/iql/metrics.py b/benchmarks/dbally_benchmark/iql/metrics.py similarity index 100% rename from benchmark/dbally_benchmark/iql/metrics.py rename to benchmarks/dbally_benchmark/iql/metrics.py diff --git a/benchmark/dbally_benchmark/iql_benchmark.py b/benchmarks/dbally_benchmark/iql_benchmark.py similarity index 100% rename from benchmark/dbally_benchmark/iql_benchmark.py rename to benchmarks/dbally_benchmark/iql_benchmark.py diff --git a/benchmark/dbally_benchmark/paths.py b/benchmarks/dbally_benchmark/paths.py similarity index 100% rename from benchmark/dbally_benchmark/paths.py rename to benchmarks/dbally_benchmark/paths.py diff --git a/benchmark/dbally_benchmark/text2sql/metrics.py b/benchmarks/dbally_benchmark/text2sql/metrics.py similarity index 100% rename from benchmark/dbally_benchmark/text2sql/metrics.py rename to benchmarks/dbally_benchmark/text2sql/metrics.py diff --git a/benchmark/dbally_benchmark/text2sql/prompt_template.py b/benchmarks/dbally_benchmark/text2sql/prompt_template.py similarity index 100% rename from benchmark/dbally_benchmark/text2sql/prompt_template.py rename to benchmarks/dbally_benchmark/text2sql/prompt_template.py diff --git a/benchmark/dbally_benchmark/text2sql/text2sql_result.py b/benchmarks/dbally_benchmark/text2sql/text2sql_result.py similarity index 100% rename from benchmark/dbally_benchmark/text2sql/text2sql_result.py rename to benchmarks/dbally_benchmark/text2sql/text2sql_result.py diff --git a/benchmark/dbally_benchmark/text2sql_benchmark.py b/benchmarks/dbally_benchmark/text2sql_benchmark.py similarity index 100% rename from benchmark/dbally_benchmark/text2sql_benchmark.py rename to benchmarks/dbally_benchmark/text2sql_benchmark.py diff --git a/benchmark/dbally_benchmark/utils.py b/benchmarks/dbally_benchmark/utils.py similarity index 100% rename from benchmark/dbally_benchmark/utils.py rename to benchmarks/dbally_benchmark/utils.py diff --git a/benchmark/dbally_benchmark/views/superhero.py b/benchmarks/dbally_benchmark/views/superhero.py similarity index 100% rename from benchmark/dbally_benchmark/views/superhero.py rename to benchmarks/dbally_benchmark/views/superhero.py diff --git a/benchmark/tests/unit/test_iql_metrics.py b/benchmarks/tests/unit/test_iql_metrics.py similarity index 100% rename from benchmark/tests/unit/test_iql_metrics.py rename to benchmarks/tests/unit/test_iql_metrics.py diff --git a/benchmark/tests/unit/test_main_evaluate.py b/benchmarks/tests/unit/test_main_evaluate.py similarity index 100% rename from benchmark/tests/unit/test_main_evaluate.py rename to benchmarks/tests/unit/test_main_evaluate.py diff --git a/benchmark/tests/unit/test_method_call_visitor.py b/benchmarks/tests/unit/test_method_call_visitor.py similarity index 100% rename from benchmark/tests/unit/test_method_call_visitor.py rename to benchmarks/tests/unit/test_method_call_visitor.py From b1b84a515feb4fc164ac6d615faefeff35777c69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 3 Jul 2024 16:02:37 +0200 Subject: [PATCH 02/34] add README --- benchmarks/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 benchmarks/README.md diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 00000000..390d53de --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,13 @@ +# db-ally benchmarks + +This folder contains scripts that produce reproducible timings and evaluation metrics of various db-ally features. + +## Setup environment + +tbd + +## Benchmark list + +Please refer to each subfolder to discover each benchmark suite. Links are provided where descriptions exist: + +tbd From d499c42286fd5710cf15bc479e70bfb03928ca79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Fri, 5 Jul 2024 14:24:00 +0200 Subject: [PATCH 03/34] mv everything to sql benchmark --- benchmarks/{dbally_benchmark => sql}/__init__.py | 0 benchmarks/{dbally_benchmark => sql}/config.py | 2 +- .../{dbally_benchmark => sql}/constants.py | 2 +- .../dataset/bird_dataset.py | 2 +- .../{dbally_benchmark => sql}/e2e_benchmark.py | 14 +++++++------- benchmarks/{dbally_benchmark => sql}/evaluate.py | 8 ++++---- .../experiment_config/config.yaml | 0 .../experiment_config/e2e/superhero.yaml | 0 .../experiment_config/iql/superhero.yaml | 0 .../experiment_config/text2sql/superhero.yaml | 0 .../{dbally_benchmark => sql}/iql/iql_result.py | 0 .../iql/method_call_visitor.py | 0 .../{dbally_benchmark => sql}/iql/metrics.py | 4 ++-- .../{dbally_benchmark => sql}/iql_benchmark.py | 14 +++++++------- benchmarks/{dbally_benchmark => sql}/paths.py | 4 ++-- .../text2sql/metrics.py | 4 ++-- .../text2sql/prompt_template.py | 0 .../text2sql/text2sql_result.py | 0 .../text2sql_benchmark.py | 16 ++++++++-------- benchmarks/{dbally_benchmark => sql}/utils.py | 0 .../{dbally_benchmark => sql}/views/superhero.py | 2 +- benchmarks/tests/unit/test_iql_metrics.py | 4 ++-- benchmarks/tests/unit/test_main_evaluate.py | 2 +- .../tests/unit/test_method_call_visitor.py | 2 +- 24 files changed, 40 insertions(+), 40 deletions(-) rename benchmarks/{dbally_benchmark => sql}/__init__.py (100%) rename benchmarks/{dbally_benchmark => sql}/config.py (90%) rename benchmarks/{dbally_benchmark => sql}/constants.py (87%) rename benchmarks/{dbally_benchmark => sql}/dataset/bird_dataset.py (97%) rename benchmarks/{dbally_benchmark => sql}/e2e_benchmark.py (91%) rename benchmarks/{dbally_benchmark => sql}/evaluate.py (81%) rename benchmarks/{dbally_benchmark => sql}/experiment_config/config.yaml (100%) rename benchmarks/{dbally_benchmark => sql}/experiment_config/e2e/superhero.yaml (100%) rename benchmarks/{dbally_benchmark => sql}/experiment_config/iql/superhero.yaml (100%) rename benchmarks/{dbally_benchmark => sql}/experiment_config/text2sql/superhero.yaml (100%) rename benchmarks/{dbally_benchmark => sql}/iql/iql_result.py (100%) rename benchmarks/{dbally_benchmark => sql}/iql/method_call_visitor.py (100%) rename benchmarks/{dbally_benchmark => sql}/iql/metrics.py (97%) rename benchmarks/{dbally_benchmark => sql}/iql_benchmark.py (92%) rename benchmarks/{dbally_benchmark => sql}/paths.py (69%) rename benchmarks/{dbally_benchmark => sql}/text2sql/metrics.py (98%) rename benchmarks/{dbally_benchmark => sql}/text2sql/prompt_template.py (100%) rename benchmarks/{dbally_benchmark => sql}/text2sql/text2sql_result.py (100%) rename benchmarks/{dbally_benchmark => sql}/text2sql_benchmark.py (90%) rename benchmarks/{dbally_benchmark => sql}/utils.py (100%) rename benchmarks/{dbally_benchmark => sql}/views/superhero.py (99%) diff --git a/benchmarks/dbally_benchmark/__init__.py b/benchmarks/sql/__init__.py similarity index 100% rename from benchmarks/dbally_benchmark/__init__.py rename to benchmarks/sql/__init__.py diff --git a/benchmarks/dbally_benchmark/config.py b/benchmarks/sql/config.py similarity index 90% rename from benchmarks/dbally_benchmark/config.py rename to benchmarks/sql/config.py index fd27027f..50a23172 100644 --- a/benchmarks/dbally_benchmark/config.py +++ b/benchmarks/sql/config.py @@ -1,5 +1,5 @@ -from dbally_benchmark.paths import PATH_PACKAGE from pydantic.v1 import BaseSettings +from sql.paths import PATH_PACKAGE class BenchmarkConfig(BaseSettings): diff --git a/benchmarks/dbally_benchmark/constants.py b/benchmarks/sql/constants.py similarity index 87% rename from benchmarks/dbally_benchmark/constants.py rename to benchmarks/sql/constants.py index ac492b13..b53b5863 100644 --- a/benchmarks/dbally_benchmark/constants.py +++ b/benchmarks/sql/constants.py @@ -1,7 +1,7 @@ from enum import Enum from typing import Dict, Type -from dbally_benchmark.views.superhero import SuperheroCountByPowerView, SuperheroView +from sql.views.superhero import SuperheroCountByPowerView, SuperheroView from dbally.views.sqlalchemy_base import SqlAlchemyBaseView diff --git a/benchmarks/dbally_benchmark/dataset/bird_dataset.py b/benchmarks/sql/dataset/bird_dataset.py similarity index 97% rename from benchmarks/dbally_benchmark/dataset/bird_dataset.py rename to benchmarks/sql/dataset/bird_dataset.py index 007572d5..58ca509a 100644 --- a/benchmarks/dbally_benchmark/dataset/bird_dataset.py +++ b/benchmarks/sql/dataset/bird_dataset.py @@ -4,8 +4,8 @@ from pathlib import Path from typing import Iterator -from dbally_benchmark.utils import load_data from pydantic import BaseModel, RootModel +from sql.utils import load_data class DifficultyLevel(str, enum.Enum): diff --git a/benchmarks/dbally_benchmark/e2e_benchmark.py b/benchmarks/sql/e2e_benchmark.py similarity index 91% rename from benchmarks/dbally_benchmark/e2e_benchmark.py rename to benchmarks/sql/e2e_benchmark.py index f122a1ea..17a3a92f 100644 --- a/benchmarks/dbally_benchmark/e2e_benchmark.py +++ b/benchmarks/sql/e2e_benchmark.py @@ -7,17 +7,17 @@ import hydra import neptune -from dbally_benchmark.config import BenchmarkConfig -from dbally_benchmark.constants import VIEW_REGISTRY, EvaluationType, ViewName -from dbally_benchmark.dataset.bird_dataset import BIRDDataset, BIRDExample -from dbally_benchmark.paths import PATH_EXPERIMENTS -from dbally_benchmark.text2sql.metrics import calculate_dataset_metrics -from dbally_benchmark.text2sql.text2sql_result import Text2SQLResult -from dbally_benchmark.utils import batch, get_datetime_str, set_up_gitlab_metadata from hydra.utils import instantiate from loguru import logger from neptune.utils import stringify_unsupported from omegaconf import DictConfig +from sql.config import BenchmarkConfig +from sql.constants import VIEW_REGISTRY, EvaluationType, ViewName +from sql.dataset.bird_dataset import BIRDDataset, BIRDExample +from sql.paths import PATH_EXPERIMENTS +from sql.text2sql.metrics import calculate_dataset_metrics +from sql.text2sql.text2sql_result import Text2SQLResult +from sql.utils import batch, get_datetime_str, set_up_gitlab_metadata from sqlalchemy import create_engine import dbally diff --git a/benchmarks/dbally_benchmark/evaluate.py b/benchmarks/sql/evaluate.py similarity index 81% rename from benchmarks/dbally_benchmark/evaluate.py rename to benchmarks/sql/evaluate.py index 5290a558..ebed3679 100644 --- a/benchmarks/dbally_benchmark/evaluate.py +++ b/benchmarks/sql/evaluate.py @@ -2,11 +2,11 @@ from typing import Callable, Dict import hydra -from dbally_benchmark.constants import EvaluationType -from dbally_benchmark.e2e_benchmark import evaluate as e2e_evaluate -from dbally_benchmark.iql_benchmark import evaluate as iql_evaluate -from dbally_benchmark.text2sql_benchmark import evaluate as text2sql_evaluate from omegaconf import DictConfig +from sql.constants import EvaluationType +from sql.e2e_benchmark import evaluate as e2e_evaluate +from sql.iql_benchmark import evaluate as iql_evaluate +from sql.text2sql_benchmark import evaluate as text2sql_evaluate async def evaluate(cfg: DictConfig) -> None: diff --git a/benchmarks/dbally_benchmark/experiment_config/config.yaml b/benchmarks/sql/experiment_config/config.yaml similarity index 100% rename from benchmarks/dbally_benchmark/experiment_config/config.yaml rename to benchmarks/sql/experiment_config/config.yaml diff --git a/benchmarks/dbally_benchmark/experiment_config/e2e/superhero.yaml b/benchmarks/sql/experiment_config/e2e/superhero.yaml similarity index 100% rename from benchmarks/dbally_benchmark/experiment_config/e2e/superhero.yaml rename to benchmarks/sql/experiment_config/e2e/superhero.yaml diff --git a/benchmarks/dbally_benchmark/experiment_config/iql/superhero.yaml b/benchmarks/sql/experiment_config/iql/superhero.yaml similarity index 100% rename from benchmarks/dbally_benchmark/experiment_config/iql/superhero.yaml rename to benchmarks/sql/experiment_config/iql/superhero.yaml diff --git a/benchmarks/dbally_benchmark/experiment_config/text2sql/superhero.yaml b/benchmarks/sql/experiment_config/text2sql/superhero.yaml similarity index 100% rename from benchmarks/dbally_benchmark/experiment_config/text2sql/superhero.yaml rename to benchmarks/sql/experiment_config/text2sql/superhero.yaml diff --git a/benchmarks/dbally_benchmark/iql/iql_result.py b/benchmarks/sql/iql/iql_result.py similarity index 100% rename from benchmarks/dbally_benchmark/iql/iql_result.py rename to benchmarks/sql/iql/iql_result.py diff --git a/benchmarks/dbally_benchmark/iql/method_call_visitor.py b/benchmarks/sql/iql/method_call_visitor.py similarity index 100% rename from benchmarks/dbally_benchmark/iql/method_call_visitor.py rename to benchmarks/sql/iql/method_call_visitor.py diff --git a/benchmarks/dbally_benchmark/iql/metrics.py b/benchmarks/sql/iql/metrics.py similarity index 97% rename from benchmarks/dbally_benchmark/iql/metrics.py rename to benchmarks/sql/iql/metrics.py index 665475ae..e22cd95a 100644 --- a/benchmarks/dbally_benchmark/iql/metrics.py +++ b/benchmarks/sql/iql/metrics.py @@ -1,9 +1,9 @@ import ast from typing import Dict, List, Tuple -from dbally_benchmark.iql.iql_result import IQLResult -from dbally_benchmark.iql.method_call_visitor import MethodCallVisitor from loguru import logger +from sql.iql.iql_result import IQLResult +from sql.iql.method_call_visitor import MethodCallVisitor from dbally.iql._exceptions import IQLError, IQLUnsupportedSyntaxError from dbally.iql._query import IQLQuery diff --git a/benchmarks/dbally_benchmark/iql_benchmark.py b/benchmarks/sql/iql_benchmark.py similarity index 92% rename from benchmarks/dbally_benchmark/iql_benchmark.py rename to benchmarks/sql/iql_benchmark.py index d5b6b2ef..90e4ca41 100644 --- a/benchmarks/dbally_benchmark/iql_benchmark.py +++ b/benchmarks/sql/iql_benchmark.py @@ -6,17 +6,17 @@ import hydra import neptune -from dbally_benchmark.config import BenchmarkConfig -from dbally_benchmark.constants import VIEW_REGISTRY, EvaluationType, ViewName -from dbally_benchmark.dataset.bird_dataset import BIRDDataset, BIRDExample -from dbally_benchmark.iql.iql_result import IQLResult -from dbally_benchmark.iql.metrics import calculate_dataset_metrics -from dbally_benchmark.paths import PATH_EXPERIMENTS -from dbally_benchmark.utils import batch, get_datetime_str, set_up_gitlab_metadata from hydra.utils import instantiate from loguru import logger from neptune.utils import stringify_unsupported from omegaconf import DictConfig +from sql.config import BenchmarkConfig +from sql.constants import VIEW_REGISTRY, EvaluationType, ViewName +from sql.dataset.bird_dataset import BIRDDataset, BIRDExample +from sql.iql.iql_result import IQLResult +from sql.iql.metrics import calculate_dataset_metrics +from sql.paths import PATH_EXPERIMENTS +from sql.utils import batch, get_datetime_str, set_up_gitlab_metadata from sqlalchemy import create_engine from dbally.audit.event_tracker import EventTracker diff --git a/benchmarks/dbally_benchmark/paths.py b/benchmarks/sql/paths.py similarity index 69% rename from benchmarks/dbally_benchmark/paths.py rename to benchmarks/sql/paths.py index b87bbfcc..fb9fd49b 100644 --- a/benchmarks/dbally_benchmark/paths.py +++ b/benchmarks/sql/paths.py @@ -1,8 +1,8 @@ from pathlib import Path -import dbally_benchmark +import sql -PATH_PACKAGE = Path(dbally_benchmark.__file__).parent +PATH_PACKAGE = Path(sql.__file__).parent PATH_ROOT = PATH_PACKAGE.parent.parent PATH_EXPERIMENTS = PATH_ROOT / "experiments" diff --git a/benchmarks/dbally_benchmark/text2sql/metrics.py b/benchmarks/sql/text2sql/metrics.py similarity index 98% rename from benchmarks/dbally_benchmark/text2sql/metrics.py rename to benchmarks/sql/text2sql/metrics.py index dece18df..8e879621 100644 --- a/benchmarks/dbally_benchmark/text2sql/metrics.py +++ b/benchmarks/sql/text2sql/metrics.py @@ -3,8 +3,8 @@ from typing import Any, Dict, List import pandas as pd -from dbally_benchmark.text2sql.text2sql_result import Text2SQLResult -from dbally_benchmark.utils import batch +from sql.text2sql.text2sql_result import Text2SQLResult +from sql.utils import batch from sqlalchemy import Engine, text diff --git a/benchmarks/dbally_benchmark/text2sql/prompt_template.py b/benchmarks/sql/text2sql/prompt_template.py similarity index 100% rename from benchmarks/dbally_benchmark/text2sql/prompt_template.py rename to benchmarks/sql/text2sql/prompt_template.py diff --git a/benchmarks/dbally_benchmark/text2sql/text2sql_result.py b/benchmarks/sql/text2sql/text2sql_result.py similarity index 100% rename from benchmarks/dbally_benchmark/text2sql/text2sql_result.py rename to benchmarks/sql/text2sql/text2sql_result.py diff --git a/benchmarks/dbally_benchmark/text2sql_benchmark.py b/benchmarks/sql/text2sql_benchmark.py similarity index 90% rename from benchmarks/dbally_benchmark/text2sql_benchmark.py rename to benchmarks/sql/text2sql_benchmark.py index 5e4c5860..11cd5659 100644 --- a/benchmarks/dbally_benchmark/text2sql_benchmark.py +++ b/benchmarks/sql/text2sql_benchmark.py @@ -6,18 +6,18 @@ import hydra import neptune -from dbally_benchmark.config import BenchmarkConfig -from dbally_benchmark.constants import EvaluationType -from dbally_benchmark.dataset.bird_dataset import BIRDDataset, BIRDExample -from dbally_benchmark.paths import PATH_EXPERIMENTS, PATH_SCHEMAS -from dbally_benchmark.text2sql.metrics import calculate_dataset_metrics -from dbally_benchmark.text2sql.prompt_template import TEXT2SQL_PROMPT_TEMPLATE -from dbally_benchmark.text2sql.text2sql_result import Text2SQLResult -from dbally_benchmark.utils import batch, get_datetime_str, set_up_gitlab_metadata from hydra.utils import instantiate from loguru import logger from neptune.utils import stringify_unsupported from omegaconf import DictConfig +from sql.config import BenchmarkConfig +from sql.constants import EvaluationType +from sql.dataset.bird_dataset import BIRDDataset, BIRDExample +from sql.paths import PATH_EXPERIMENTS, PATH_SCHEMAS +from sql.text2sql.metrics import calculate_dataset_metrics +from sql.text2sql.prompt_template import TEXT2SQL_PROMPT_TEMPLATE +from sql.text2sql.text2sql_result import Text2SQLResult +from sql.utils import batch, get_datetime_str, set_up_gitlab_metadata from sqlalchemy import create_engine from dbally.audit.event_tracker import EventTracker diff --git a/benchmarks/dbally_benchmark/utils.py b/benchmarks/sql/utils.py similarity index 100% rename from benchmarks/dbally_benchmark/utils.py rename to benchmarks/sql/utils.py diff --git a/benchmarks/dbally_benchmark/views/superhero.py b/benchmarks/sql/views/superhero.py similarity index 99% rename from benchmarks/dbally_benchmark/views/superhero.py rename to benchmarks/sql/views/superhero.py index 827981f6..829b8948 100644 --- a/benchmarks/dbally_benchmark/views/superhero.py +++ b/benchmarks/sql/views/superhero.py @@ -1,7 +1,7 @@ # pylint: disable=missing-docstring, missing-return-doc, missing-param-doc import sqlalchemy -from dbally_benchmark.config import config +from sql.config import config from sqlalchemy import create_engine from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.ext.automap import automap_base diff --git a/benchmarks/tests/unit/test_iql_metrics.py b/benchmarks/tests/unit/test_iql_metrics.py index 083bc07b..2732884b 100644 --- a/benchmarks/tests/unit/test_iql_metrics.py +++ b/benchmarks/tests/unit/test_iql_metrics.py @@ -1,5 +1,5 @@ -from dbally_benchmark.iql.iql_result import IQLResult -from dbally_benchmark.iql.metrics import ( +from sql.iql.iql_result import IQLResult +from sql.iql.metrics import ( _count_hallucinated_methods_for_single_example, calculate_hallucinated_filters_for_dataset, calculate_syntax_errors, diff --git a/benchmarks/tests/unit/test_main_evaluate.py b/benchmarks/tests/unit/test_main_evaluate.py index aeffe9f2..c677a12c 100644 --- a/benchmarks/tests/unit/test_main_evaluate.py +++ b/benchmarks/tests/unit/test_main_evaluate.py @@ -1,8 +1,8 @@ from unittest.mock import call, patch import pytest -from dbally_benchmark.evaluate import evaluate from omegaconf import DictConfig +from sql.evaluate import evaluate @patch("dbally_benchmark.evaluate.e2e_evaluate") diff --git a/benchmarks/tests/unit/test_method_call_visitor.py b/benchmarks/tests/unit/test_method_call_visitor.py index 78da73cd..33e38934 100644 --- a/benchmarks/tests/unit/test_method_call_visitor.py +++ b/benchmarks/tests/unit/test_method_call_visitor.py @@ -1,7 +1,7 @@ import ast import pytest -from dbally_benchmark.iql.method_call_visitor import MethodCallVisitor +from sql.iql.method_call_visitor import MethodCallVisitor @pytest.fixture From d5ce69195b4c521eb686da61a86d73dc32257b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Fri, 5 Jul 2024 14:54:35 +0200 Subject: [PATCH 04/34] add docs for sql benchmarks --- benchmarks/sql/README.md | 15 +++++++++++++++ .../{ => sql}/tests/unit/test_iql_metrics.py | 0 .../{ => sql}/tests/unit/test_main_evaluate.py | 0 .../tests/unit/test_method_call_visitor.py | 0 4 files changed, 15 insertions(+) create mode 100644 benchmarks/sql/README.md rename benchmarks/{ => sql}/tests/unit/test_iql_metrics.py (100%) rename benchmarks/{ => sql}/tests/unit/test_main_evaluate.py (100%) rename benchmarks/{ => sql}/tests/unit/test_method_call_visitor.py (100%) diff --git a/benchmarks/sql/README.md b/benchmarks/sql/README.md new file mode 100644 index 00000000..39a2f037 --- /dev/null +++ b/benchmarks/sql/README.md @@ -0,0 +1,15 @@ +# SQL benchmarks + +This folder contains benchmarks for querying SQL databases with db-ally. This suite evaluates the following tasks: + +- `E2E` - measures correctness of rows returned from the database by db-ally. +- `Text2IQL` - measures correctness of IQL queries generated by structured views. +- `Text2SQL` - measures correctness of SQL queries generated by freeform views. + +## Run benchmarks + +tbd + +## Run tests + +tbd diff --git a/benchmarks/tests/unit/test_iql_metrics.py b/benchmarks/sql/tests/unit/test_iql_metrics.py similarity index 100% rename from benchmarks/tests/unit/test_iql_metrics.py rename to benchmarks/sql/tests/unit/test_iql_metrics.py diff --git a/benchmarks/tests/unit/test_main_evaluate.py b/benchmarks/sql/tests/unit/test_main_evaluate.py similarity index 100% rename from benchmarks/tests/unit/test_main_evaluate.py rename to benchmarks/sql/tests/unit/test_main_evaluate.py diff --git a/benchmarks/tests/unit/test_method_call_visitor.py b/benchmarks/sql/tests/unit/test_method_call_visitor.py similarity index 100% rename from benchmarks/tests/unit/test_method_call_visitor.py rename to benchmarks/sql/tests/unit/test_method_call_visitor.py From 940ce4e712392317ff35833542210e94211197f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Fri, 5 Jul 2024 14:59:20 +0200 Subject: [PATCH 05/34] update benchmark list --- benchmarks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 390d53de..e39d9efc 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -10,4 +10,4 @@ tbd Please refer to each subfolder to discover each benchmark suite. Links are provided where descriptions exist: -tbd +* [SQL](sql/README.md) From 58710321ba87bb578debd6bb99f97bb54062e74a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Fri, 5 Jul 2024 15:06:52 +0200 Subject: [PATCH 06/34] fix imports --- benchmarks/sql/config.py | 2 +- benchmarks/sql/constants.py | 2 +- benchmarks/sql/dataset/bird_dataset.py | 2 +- benchmarks/sql/e2e_benchmark.py | 14 +++++++------- benchmarks/sql/evaluate.py | 8 ++++---- benchmarks/sql/iql/metrics.py | 4 ++-- benchmarks/sql/iql_benchmark.py | 14 +++++++------- benchmarks/sql/paths.py | 4 ++-- benchmarks/sql/tests/unit/test_iql_metrics.py | 4 ++-- benchmarks/sql/tests/unit/test_main_evaluate.py | 2 +- .../sql/tests/unit/test_method_call_visitor.py | 2 +- benchmarks/sql/text2sql/metrics.py | 4 ++-- benchmarks/sql/text2sql_benchmark.py | 16 ++++++++-------- benchmarks/sql/views/superhero.py | 2 +- 14 files changed, 40 insertions(+), 40 deletions(-) diff --git a/benchmarks/sql/config.py b/benchmarks/sql/config.py index 50a23172..caf3aaaa 100644 --- a/benchmarks/sql/config.py +++ b/benchmarks/sql/config.py @@ -1,5 +1,5 @@ +from paths import PATH_PACKAGE from pydantic.v1 import BaseSettings -from sql.paths import PATH_PACKAGE class BenchmarkConfig(BaseSettings): diff --git a/benchmarks/sql/constants.py b/benchmarks/sql/constants.py index b53b5863..321378dc 100644 --- a/benchmarks/sql/constants.py +++ b/benchmarks/sql/constants.py @@ -1,7 +1,7 @@ from enum import Enum from typing import Dict, Type -from sql.views.superhero import SuperheroCountByPowerView, SuperheroView +from views.superhero import SuperheroCountByPowerView, SuperheroView from dbally.views.sqlalchemy_base import SqlAlchemyBaseView diff --git a/benchmarks/sql/dataset/bird_dataset.py b/benchmarks/sql/dataset/bird_dataset.py index 58ca509a..67a009cf 100644 --- a/benchmarks/sql/dataset/bird_dataset.py +++ b/benchmarks/sql/dataset/bird_dataset.py @@ -5,7 +5,7 @@ from typing import Iterator from pydantic import BaseModel, RootModel -from sql.utils import load_data +from utils import load_data class DifficultyLevel(str, enum.Enum): diff --git a/benchmarks/sql/e2e_benchmark.py b/benchmarks/sql/e2e_benchmark.py index 17a3a92f..2eb4f1fd 100644 --- a/benchmarks/sql/e2e_benchmark.py +++ b/benchmarks/sql/e2e_benchmark.py @@ -7,18 +7,18 @@ import hydra import neptune +from config import BenchmarkConfig +from constants import VIEW_REGISTRY, EvaluationType, ViewName +from dataset.bird_dataset import BIRDDataset, BIRDExample from hydra.utils import instantiate from loguru import logger from neptune.utils import stringify_unsupported from omegaconf import DictConfig -from sql.config import BenchmarkConfig -from sql.constants import VIEW_REGISTRY, EvaluationType, ViewName -from sql.dataset.bird_dataset import BIRDDataset, BIRDExample -from sql.paths import PATH_EXPERIMENTS -from sql.text2sql.metrics import calculate_dataset_metrics -from sql.text2sql.text2sql_result import Text2SQLResult -from sql.utils import batch, get_datetime_str, set_up_gitlab_metadata +from paths import PATH_EXPERIMENTS from sqlalchemy import create_engine +from text2sql.metrics import calculate_dataset_metrics +from text2sql.text2sql_result import Text2SQLResult +from utils import batch, get_datetime_str, set_up_gitlab_metadata import dbally from dbally.collection import Collection diff --git a/benchmarks/sql/evaluate.py b/benchmarks/sql/evaluate.py index ebed3679..b32a5078 100644 --- a/benchmarks/sql/evaluate.py +++ b/benchmarks/sql/evaluate.py @@ -2,11 +2,11 @@ from typing import Callable, Dict import hydra +from constants import EvaluationType +from e2e_benchmark import evaluate as e2e_evaluate +from iql_benchmark import evaluate as iql_evaluate from omegaconf import DictConfig -from sql.constants import EvaluationType -from sql.e2e_benchmark import evaluate as e2e_evaluate -from sql.iql_benchmark import evaluate as iql_evaluate -from sql.text2sql_benchmark import evaluate as text2sql_evaluate +from text2sql_benchmark import evaluate as text2sql_evaluate async def evaluate(cfg: DictConfig) -> None: diff --git a/benchmarks/sql/iql/metrics.py b/benchmarks/sql/iql/metrics.py index e22cd95a..b88c4c19 100644 --- a/benchmarks/sql/iql/metrics.py +++ b/benchmarks/sql/iql/metrics.py @@ -1,9 +1,9 @@ import ast from typing import Dict, List, Tuple +from iql.iql_result import IQLResult +from iql.method_call_visitor import MethodCallVisitor from loguru import logger -from sql.iql.iql_result import IQLResult -from sql.iql.method_call_visitor import MethodCallVisitor from dbally.iql._exceptions import IQLError, IQLUnsupportedSyntaxError from dbally.iql._query import IQLQuery diff --git a/benchmarks/sql/iql_benchmark.py b/benchmarks/sql/iql_benchmark.py index 90e4ca41..768d9fbb 100644 --- a/benchmarks/sql/iql_benchmark.py +++ b/benchmarks/sql/iql_benchmark.py @@ -6,18 +6,18 @@ import hydra import neptune +from config import BenchmarkConfig +from constants import VIEW_REGISTRY, EvaluationType, ViewName +from dataset.bird_dataset import BIRDDataset, BIRDExample from hydra.utils import instantiate +from iql.iql_result import IQLResult +from iql.metrics import calculate_dataset_metrics from loguru import logger from neptune.utils import stringify_unsupported from omegaconf import DictConfig -from sql.config import BenchmarkConfig -from sql.constants import VIEW_REGISTRY, EvaluationType, ViewName -from sql.dataset.bird_dataset import BIRDDataset, BIRDExample -from sql.iql.iql_result import IQLResult -from sql.iql.metrics import calculate_dataset_metrics -from sql.paths import PATH_EXPERIMENTS -from sql.utils import batch, get_datetime_str, set_up_gitlab_metadata +from paths import PATH_EXPERIMENTS from sqlalchemy import create_engine +from utils import batch, get_datetime_str, set_up_gitlab_metadata from dbally.audit.event_tracker import EventTracker from dbally.iql_generator.iql_generator import IQLGenerator diff --git a/benchmarks/sql/paths.py b/benchmarks/sql/paths.py index fb9fd49b..6df53cb0 100644 --- a/benchmarks/sql/paths.py +++ b/benchmarks/sql/paths.py @@ -1,8 +1,8 @@ from pathlib import Path -import sql +# import sql -PATH_PACKAGE = Path(sql.__file__).parent +PATH_PACKAGE = Path(__file__).parent PATH_ROOT = PATH_PACKAGE.parent.parent PATH_EXPERIMENTS = PATH_ROOT / "experiments" diff --git a/benchmarks/sql/tests/unit/test_iql_metrics.py b/benchmarks/sql/tests/unit/test_iql_metrics.py index 2732884b..5cd639cd 100644 --- a/benchmarks/sql/tests/unit/test_iql_metrics.py +++ b/benchmarks/sql/tests/unit/test_iql_metrics.py @@ -1,5 +1,5 @@ -from sql.iql.iql_result import IQLResult -from sql.iql.metrics import ( +from iql.iql_result import IQLResult +from iql.metrics import ( _count_hallucinated_methods_for_single_example, calculate_hallucinated_filters_for_dataset, calculate_syntax_errors, diff --git a/benchmarks/sql/tests/unit/test_main_evaluate.py b/benchmarks/sql/tests/unit/test_main_evaluate.py index c677a12c..d20318be 100644 --- a/benchmarks/sql/tests/unit/test_main_evaluate.py +++ b/benchmarks/sql/tests/unit/test_main_evaluate.py @@ -1,8 +1,8 @@ from unittest.mock import call, patch import pytest +from evaluate import evaluate from omegaconf import DictConfig -from sql.evaluate import evaluate @patch("dbally_benchmark.evaluate.e2e_evaluate") diff --git a/benchmarks/sql/tests/unit/test_method_call_visitor.py b/benchmarks/sql/tests/unit/test_method_call_visitor.py index 33e38934..12b845cc 100644 --- a/benchmarks/sql/tests/unit/test_method_call_visitor.py +++ b/benchmarks/sql/tests/unit/test_method_call_visitor.py @@ -1,7 +1,7 @@ import ast import pytest -from sql.iql.method_call_visitor import MethodCallVisitor +from iql.method_call_visitor import MethodCallVisitor @pytest.fixture diff --git a/benchmarks/sql/text2sql/metrics.py b/benchmarks/sql/text2sql/metrics.py index 8e879621..770a44ea 100644 --- a/benchmarks/sql/text2sql/metrics.py +++ b/benchmarks/sql/text2sql/metrics.py @@ -3,9 +3,9 @@ from typing import Any, Dict, List import pandas as pd -from sql.text2sql.text2sql_result import Text2SQLResult -from sql.utils import batch from sqlalchemy import Engine, text +from text2sql.text2sql_result import Text2SQLResult +from utils import batch @dataclass diff --git a/benchmarks/sql/text2sql_benchmark.py b/benchmarks/sql/text2sql_benchmark.py index 11cd5659..3799ba4f 100644 --- a/benchmarks/sql/text2sql_benchmark.py +++ b/benchmarks/sql/text2sql_benchmark.py @@ -6,19 +6,19 @@ import hydra import neptune +from config import BenchmarkConfig +from constants import EvaluationType +from dataset.bird_dataset import BIRDDataset, BIRDExample from hydra.utils import instantiate from loguru import logger from neptune.utils import stringify_unsupported from omegaconf import DictConfig -from sql.config import BenchmarkConfig -from sql.constants import EvaluationType -from sql.dataset.bird_dataset import BIRDDataset, BIRDExample -from sql.paths import PATH_EXPERIMENTS, PATH_SCHEMAS -from sql.text2sql.metrics import calculate_dataset_metrics -from sql.text2sql.prompt_template import TEXT2SQL_PROMPT_TEMPLATE -from sql.text2sql.text2sql_result import Text2SQLResult -from sql.utils import batch, get_datetime_str, set_up_gitlab_metadata +from paths import PATH_EXPERIMENTS, PATH_SCHEMAS from sqlalchemy import create_engine +from text2sql.metrics import calculate_dataset_metrics +from text2sql.prompt_template import TEXT2SQL_PROMPT_TEMPLATE +from text2sql.text2sql_result import Text2SQLResult +from utils import batch, get_datetime_str, set_up_gitlab_metadata from dbally.audit.event_tracker import EventTracker from dbally.llms.litellm import LiteLLM diff --git a/benchmarks/sql/views/superhero.py b/benchmarks/sql/views/superhero.py index 829b8948..22f7e837 100644 --- a/benchmarks/sql/views/superhero.py +++ b/benchmarks/sql/views/superhero.py @@ -1,7 +1,7 @@ # pylint: disable=missing-docstring, missing-return-doc, missing-param-doc import sqlalchemy -from sql.config import config +from config import config from sqlalchemy import create_engine from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.ext.automap import automap_base From 4807cfce46a13422748a06277395ddbaaf517055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Fri, 5 Jul 2024 15:18:34 +0200 Subject: [PATCH 07/34] rename bench script --- benchmarks/sql/{evaluate.py => bench.py} | 0 benchmarks/sql/tests/unit/test_main_evaluate.py | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) rename benchmarks/sql/{evaluate.py => bench.py} (100%) diff --git a/benchmarks/sql/evaluate.py b/benchmarks/sql/bench.py similarity index 100% rename from benchmarks/sql/evaluate.py rename to benchmarks/sql/bench.py diff --git a/benchmarks/sql/tests/unit/test_main_evaluate.py b/benchmarks/sql/tests/unit/test_main_evaluate.py index d20318be..08c0341e 100644 --- a/benchmarks/sql/tests/unit/test_main_evaluate.py +++ b/benchmarks/sql/tests/unit/test_main_evaluate.py @@ -1,9 +1,10 @@ from unittest.mock import call, patch import pytest -from evaluate import evaluate from omegaconf import DictConfig +from benchmarks.sql.bench import evaluate + @patch("dbally_benchmark.evaluate.e2e_evaluate") @patch("dbally_benchmark.evaluate.text2sql_evaluate") From 51330ed37b32c36886e674c92711e94f8a96f7b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 10 Jul 2024 09:42:29 +0200 Subject: [PATCH 08/34] add hf integration --- benchmarks/sql/config.py | 2 +- benchmarks/sql/e2e_benchmark.py | 8 +- benchmarks/sql/experiment_config/config.yaml | 12 ++- .../sql/experiment_config/iql/superhero.yaml | 8 +- benchmarks/sql/iql/iql_result.py | 3 +- benchmarks/sql/iql/metrics.py | 35 ++++++-- benchmarks/sql/iql_benchmark.py | 90 ++++++++++++------- benchmarks/sql/text2sql_benchmark.py | 8 +- benchmarks/sql/views/superhero.py | 22 ++++- 9 files changed, 133 insertions(+), 55 deletions(-) diff --git a/benchmarks/sql/config.py b/benchmarks/sql/config.py index caf3aaaa..4ed166ff 100644 --- a/benchmarks/sql/config.py +++ b/benchmarks/sql/config.py @@ -5,7 +5,7 @@ class BenchmarkConfig(BaseSettings): """db-ally Benchmark configuration.""" - pg_connection_string: str = "" + pg_connection_string: str = "sqlite://" openai_api_key: str = "" neptune_project: str = "deepsense-ai/db-ally" diff --git a/benchmarks/sql/e2e_benchmark.py b/benchmarks/sql/e2e_benchmark.py index 2eb4f1fd..4d0b8eec 100644 --- a/benchmarks/sql/e2e_benchmark.py +++ b/benchmarks/sql/e2e_benchmark.py @@ -83,11 +83,11 @@ async def evaluate(cfg: DictConfig) -> Any: engine = create_engine(benchmark_cfg.pg_connection_string + f"/{cfg.db_name}") - if cfg.model_name.startswith("local/"): - llm = LocalLLM(api_key=benchmark_cfg.hf_api_key, model_name=cfg.model_name.split("/", 1)[1]) + if cfg.llm.model_name.startswith("local/"): + llm = LocalLLM(api_key=benchmark_cfg.hf_api_key, model_name=cfg.llm.model_name.split("/", 1)[1]) else: llm = LiteLLM( - model_name=cfg.model_name, + model_name=cfg.llm.model_name, api_key=benchmark_cfg.openai_api_key, ) @@ -104,7 +104,7 @@ async def evaluate(cfg: DictConfig) -> Any: api_token=benchmark_cfg.neptune_api_token, ) run["config"] = stringify_unsupported(cfg) - tags = list(cfg.neptune.get("tags", [])) + [EvaluationType.END2END.value, cfg.model_name, cfg.db_name] + tags = list(cfg.neptune.get("tags", [])) + [EvaluationType.END2END.value, cfg.llm.model_name, cfg.db_name] run["sys/tags"].add(tags) if "CI_MERGE_REQUEST_IID" in os.environ: diff --git a/benchmarks/sql/experiment_config/config.yaml b/benchmarks/sql/experiment_config/config.yaml index 334ea3ce..80501175 100644 --- a/benchmarks/sql/experiment_config/config.yaml +++ b/benchmarks/sql/experiment_config/config.yaml @@ -1,10 +1,14 @@ # Provide list of test configurations you want to execute defaults: - - e2e: [superhero] - - iql: [] + - e2e: [] + - iql: [superhero] - text2sql: [] -# Attributes common to all tests -model_name: "gpt-3.5-turbo" +llm: + model_name: "gpt-3.5-turbo" + api_key: "" + neptune: + project: "deepsense-ai/db-ally" + api_token: "" log: False diff --git a/benchmarks/sql/experiment_config/iql/superhero.yaml b/benchmarks/sql/experiment_config/iql/superhero.yaml index 0318ab97..c7195c45 100644 --- a/benchmarks/sql/experiment_config/iql/superhero.yaml +++ b/benchmarks/sql/experiment_config/iql/superhero.yaml @@ -1,6 +1,8 @@ superhero: + dataset_path: "micpst/bird-dev-iql" + split: "dev" + db_ids: ["superhero"] + difficulties: ["simple"] output_path: "iql_evaluation" - dataset_path: "data/superhero.json" - db_name: "superhero" view_name: "SuperheroView" - difficulty_levels: ["simple"] + db_url: "sqlite://" diff --git a/benchmarks/sql/iql/iql_result.py b/benchmarks/sql/iql/iql_result.py index 01a757f2..0cfa8fec 100644 --- a/benchmarks/sql/iql/iql_result.py +++ b/benchmarks/sql/iql/iql_result.py @@ -9,5 +9,6 @@ class IQLResult(BaseModel): """ question: str - iql_filters: str + ground_truth_iql: str + predicted_iql: str exception_raised: Optional[bool] = None diff --git a/benchmarks/sql/iql/metrics.py b/benchmarks/sql/iql/metrics.py index b88c4c19..0b14edd5 100644 --- a/benchmarks/sql/iql/metrics.py +++ b/benchmarks/sql/iql/metrics.py @@ -45,7 +45,7 @@ def calculate_hallucinated_filters_for_dataset(dataset: List[IQLResult], filter_ for example in dataset: hallucinated_filters, total_filters = _count_hallucinated_methods_for_single_example( - example.iql_filters, allowed_filters + example.predicted_iql, allowed_filters ) hallucinated_filters_count += hallucinated_filters total_filters_count += total_filters @@ -73,14 +73,36 @@ async def calculate_valid_iql(dataset: List[IQLResult], filter_list: List[Expose for example in dataset: try: - await IQLQuery.parse(example.iql_filters, filter_list) + await IQLQuery.parse(example.predicted_iql, filter_list) valid_iql += 1 except Exception as exc: # pylint: disable=broad-exception-caught - logger.warning(f"Error while parsing IQL: {example.iql_filters}\n{exc}") + logger.warning(f"Error while parsing IQL: {example.predicted_iql}\n{exc}") return valid_iql / len(dataset) +def calculate_exact_match(dataset: List[IQLResult]) -> float: + """ + For a dataset, it calculates the ratio of predicated queries that are identical + to the ground truth ones. + + Args: + dataset: List containing Text2SQLResult objects that + represents (ground truth query, predicted query). + + Returns: + The ratio of predicated queries that are identical to the ground truth ones. + """ + + exact_query_matches = 0 + + for example in dataset: + if example.ground_truth_iql == example.predicted_iql: + exact_query_matches += 1 + + return exact_query_matches / len(dataset) + + async def calculate_syntax_errors(dataset: List[IQLResult], filter_list: List[ExposedFunction]) -> float: """ Calculates the ratio of syntax errors for a given dataset. @@ -96,16 +118,16 @@ async def calculate_syntax_errors(dataset: List[IQLResult], filter_list: List[Ex syntax_errors = 0 - filtered_dataset = [example for example in dataset if example.iql_filters != "UNSUPPORTED_QUERY"] + filtered_dataset = [example for example in dataset if example.predicted_iql != "UNSUPPORTED_QUERY"] for example in filtered_dataset: try: - await IQLQuery.parse(example.iql_filters, filter_list) + await IQLQuery.parse(example.predicted_iql, filter_list) except (IQLError, IQLUnsupportedSyntaxError, SyntaxError): syntax_errors += 1 except Exception as exc: # pylint: disable=broad-exception-caught # I haven't figured out yet how to handle it better :( - logger.warning(f"Error while parsing IQL: {example.iql_filters}\n{exc}") + logger.warning(f"Error while parsing IQL: {example.predicted_iql}\n{exc}") return syntax_errors / len(filtered_dataset) @@ -126,6 +148,7 @@ async def calculate_dataset_metrics(dataset: List[IQLResult], filter_list: List[ metrics = { "valid_iql": await calculate_valid_iql(dataset, filter_list), + "exact_match": calculate_exact_match(dataset), "hallucinated_filters": calculate_hallucinated_filters_for_dataset(dataset, filter_list), "syntax_errors": await calculate_syntax_errors(dataset, filter_list), } diff --git a/benchmarks/sql/iql_benchmark.py b/benchmarks/sql/iql_benchmark.py index 768d9fbb..c44ec9b7 100644 --- a/benchmarks/sql/iql_benchmark.py +++ b/benchmarks/sql/iql_benchmark.py @@ -1,14 +1,13 @@ import asyncio import json import os -from pathlib import Path +from ast import Dict from typing import Any, List import hydra import neptune -from config import BenchmarkConfig from constants import VIEW_REGISTRY, EvaluationType, ViewName -from dataset.bird_dataset import BIRDDataset, BIRDExample +from datasets import Dataset, load_dataset from hydra.utils import instantiate from iql.iql_result import IQLResult from iql.metrics import calculate_dataset_metrics @@ -17,7 +16,7 @@ from omegaconf import DictConfig from paths import PATH_EXPERIMENTS from sqlalchemy import create_engine -from utils import batch, get_datetime_str, set_up_gitlab_metadata +from utils import get_datetime_str, set_up_gitlab_metadata from dbally.audit.event_tracker import EventTracker from dbally.iql_generator.iql_generator import IQLGenerator @@ -28,25 +27,46 @@ async def _run_iql_for_single_example( - example: BIRDExample, view: BaseStructuredView, iql_generator: IQLGenerator + example: Dict, + view: BaseStructuredView, + iql_generator: IQLGenerator, ) -> IQLResult: - filter_list = view.list_filters() + filters = view.list_filters() event_tracker = EventTracker() try: - iql_filters = await iql_generator.generate_iql( - question=example.question, - filters=filter_list, + predicted_iql = await iql_generator.generate_iql( + question=example["question"], + filters=filters, event_tracker=event_tracker, ) except UnsupportedQueryError: - return IQLResult(question=example.question, iql_filters="UNSUPPORTED_QUERY", exception_raised=True) + return IQLResult( + question=example["question"], + ground_truth_iql=example["iql"], + predicted_iql="UNSUPPORTED_QUERY", + exception_raised=True, + ) + except (SyntaxError, ValueError): + return IQLResult( + question=example["question"], + ground_truth_iql=example["iql"], + predicted_iql="", + exception_raised=True, + ) - return IQLResult(question=example.question, iql_filters=str(iql_filters), exception_raised=False) + return IQLResult( + question=example["question"], + ground_truth_iql=example["iql"], + predicted_iql=str(predicted_iql), + exception_raised=False, + ) async def run_iql_for_dataset( - dataset: BIRDDataset, view: BaseStructuredView, iql_generator: IQLGenerator + dataset: Dataset, + view: BaseStructuredView, + iql_generator: IQLGenerator, ) -> List[IQLResult]: """ Runs IQL predictions for a dataset. @@ -59,14 +79,11 @@ async def run_iql_for_dataset( Returns: A list of IQLResult objects representing the predictions. """ - results: List[IQLResult] = [] - for group in batch(dataset, 5): - current_results = await asyncio.gather( - *[_run_iql_for_single_example(example, view, iql_generator) for example in group] - ) - results = [*current_results, *results] + for example in dataset: + result = await _run_iql_for_single_example(example, view, iql_generator) + results.append(result) return results @@ -83,35 +100,45 @@ async def evaluate(cfg: DictConfig) -> Any: ValueError: If model_name is not supported (at the moment only OpenAI's model are supported). """ + cfg = instantiate(cfg) output_dir = PATH_EXPERIMENTS / cfg.output_path / get_datetime_str() output_dir.mkdir(exist_ok=True, parents=True) - cfg = instantiate(cfg) - benchmark_cfg = BenchmarkConfig() view_name = cfg.view_name allowed_views = [view.value for view in ViewName] if view_name not in allowed_views: raise ValueError(f"View {view_name} not supported. Available views: {allowed_views}") - engine = create_engine(benchmark_cfg.pg_connection_string + f"/{cfg.db_name}") + engine = create_engine(cfg.db_url) view = VIEW_REGISTRY[ViewName(view_name)](engine) - if cfg.model_name.startswith("local/"): - llm = LocalLLM(model_name=cfg.model_name.split("/", 1)[1], api_key=benchmark_cfg.hf_api_key) + if cfg.llm.model_name.startswith("local/"): + llm = LocalLLM( + model_name=cfg.llm.model_name.split("/", 1)[1], + api_key=cfg.llm.api_key, + ) else: - llm = LiteLLM(api_key=benchmark_cfg.openai_api_key, model_name=cfg.model_name) + llm = LiteLLM( + model_name=cfg.llm.model_name, + api_key=cfg.llm.api_key, + ) iql_generator = IQLGenerator(llm=llm) run = None if cfg.neptune.log: run = neptune.init_run( - project=benchmark_cfg.neptune_project, - api_token=benchmark_cfg.neptune_api_token, + project=cfg.neptune.project, + api_token=cfg.neptune.api_token, ) run["config"] = stringify_unsupported(cfg) - tags = list(cfg.neptune.get("tags", [])) + [EvaluationType.IQL.value, view_name, cfg.model_name, cfg.db_name] + tags = list(cfg.neptune.get("tags", [])) + [ + EvaluationType.IQL.value, + view_name, + cfg.llm.model_name, + cfg.db_name, + ] run["sys/tags"].add(tags) if "CI_MERGE_REQUEST_IID" in os.environ: @@ -120,10 +147,11 @@ async def evaluate(cfg: DictConfig) -> Any: metrics_file_name, results_file_name = "metrics.json", "eval_results.json" logger.info(f"Running IQL predictions for dataset: {cfg.dataset_path} and view: {view_name}") - evaluation_dataset = BIRDDataset.from_json_file( - Path(cfg.dataset_path), difficulty_levels=cfg.get("difficulty_levels") - ) - dbally_results = await run_iql_for_dataset(dataset=evaluation_dataset, view=view, iql_generator=iql_generator) + + dataset = load_dataset(cfg.dataset_path, split=cfg.split) + dataset = dataset.filter(lambda x: x["db_id"] in cfg.db_ids and x["difficulty"] in cfg.difficulties) + + dbally_results = await run_iql_for_dataset(dataset=dataset, view=view, iql_generator=iql_generator) valid_dbally_results = [result for result in dbally_results if not result.exception_raised] unsupported_query_error = (len(dbally_results) - len(valid_dbally_results)) / len(dbally_results) diff --git a/benchmarks/sql/text2sql_benchmark.py b/benchmarks/sql/text2sql_benchmark.py index 3799ba4f..178a8332 100644 --- a/benchmarks/sql/text2sql_benchmark.py +++ b/benchmarks/sql/text2sql_benchmark.py @@ -85,12 +85,12 @@ async def evaluate(cfg: DictConfig) -> Any: engine = create_engine(benchmark_cfg.pg_connection_string + f"/{cfg.db_name}") - if cfg.model_name.startswith("local/"): - llm = LocalLLM(model_name=cfg.model_name.split("/", 1)[1], api_key=benchmark_cfg.hf_api_key) + if cfg.llm.model_name.startswith("local/"): + llm = LocalLLM(model_name=cfg.llm.model_name.split("/", 1)[1], api_key=benchmark_cfg.hf_api_key) else: llm = LiteLLM( api_key=benchmark_cfg.openai_api_key, - model_name=cfg.model_name, + model_name=cfg.llm.model_name, ) run = None @@ -100,7 +100,7 @@ async def evaluate(cfg: DictConfig) -> Any: api_token=benchmark_cfg.neptune_api_token, ) run["config"] = stringify_unsupported(cfg) - tags = list(cfg.neptune.get("tags", [])) + [EvaluationType.TEXT2SQL.value, cfg.db_name, cfg.model_name] + tags = list(cfg.neptune.get("tags", [])) + [EvaluationType.TEXT2SQL.value, cfg.db_name, cfg.llm.model_name] run["sys/tags"].add(tags) if "CI_MERGE_REQUEST_IID" in os.environ: diff --git a/benchmarks/sql/views/superhero.py b/benchmarks/sql/views/superhero.py index 22f7e837..10dd47c1 100644 --- a/benchmarks/sql/views/superhero.py +++ b/benchmarks/sql/views/superhero.py @@ -9,7 +9,7 @@ from dbally import SqlAlchemyBaseView, decorators -engine = create_engine(config.pg_connection_string + "/superhero") +engine = create_engine(config.pg_connection_string + "/superhero.sqlite") SuperheroModel = automap_base() SuperheroModel.prepare(autoload_with=engine, reflect=True) @@ -43,6 +43,14 @@ class SuperheroDBSchema: class SuperheroFilterMixin: + @decorators.view_filter() + def filter_by_name(self, name: str) -> sqlalchemy.ColumnElement: + return SuperheroModel.classes.superhero.name == name + + @decorators.view_filter() + def filter_by_full_name(self, full_name: str) -> sqlalchemy.ColumnElement: + return SuperheroModel.classes.superhero.full_name == full_name + @decorators.view_filter() def filter_by_superhero_name(self, name: str) -> sqlalchemy.ColumnElement: return SuperheroModel.classes.superhero.superhero_name == name @@ -97,6 +105,14 @@ def filter_by_gender(self, gender: str) -> sqlalchemy.ColumnElement: sqlalchemy.select(SuperheroModel.classes.gender.id).where(SuperheroModel.classes.gender.gender == gender) ) + @decorators.view_filter() + def filter_by_missing_weight(self) -> sqlalchemy.ColumnElement: + return SuperheroModel.classes.superhero.weight_kg == 0 or SuperheroModel.classes.superhero.weight_kg is None + + @decorators.view_filter() + def filter_by_weight(self, weight: float) -> sqlalchemy.ColumnElement: + return SuperheroModel.classes.superhero.weight_kg == weight + @decorators.view_filter() def heavier_than(self, weight: float) -> sqlalchemy.ColumnElement: return SuperheroModel.classes.superhero.weight_kg > weight @@ -105,6 +121,10 @@ def heavier_than(self, weight: float) -> sqlalchemy.ColumnElement: def lighter_than(self, weight: float) -> sqlalchemy.ColumnElement: return SuperheroModel.classes.superhero.weight_kg < weight + @decorators.view_filter() + def filter_by_height(self, height: float) -> sqlalchemy.ColumnElement: + return SuperheroModel.classes.superhero.height_cm == height + @decorators.view_filter() def taller_than(self, height: float) -> sqlalchemy.ColumnElement: return SuperheroModel.classes.superhero.height_cm > height From db2207cf82e27599a37715da7ed8097d99485b99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 10 Jul 2024 10:29:17 +0200 Subject: [PATCH 09/34] add setup docs --- benchmarks/README.md | 8 ++++++-- benchmarks/sql/README.md | 10 ++++++++-- benchmarks/sql/tests/unit/test_main_evaluate.py | 3 +-- setup.cfg | 3 ++- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index e39d9efc..a34616be 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -4,10 +4,14 @@ This folder contains scripts that produce reproducible timings and evaluation me ## Setup environment -tbd +Install dependencies with: + +```bash +pip install dbally[benchmarks] +``` ## Benchmark list Please refer to each subfolder to discover each benchmark suite. Links are provided where descriptions exist: -* [SQL](sql/README.md) +- [`SQL`](sql/README.md) diff --git a/benchmarks/sql/README.md b/benchmarks/sql/README.md index 39a2f037..9f05e28f 100644 --- a/benchmarks/sql/README.md +++ b/benchmarks/sql/README.md @@ -8,8 +8,14 @@ This folder contains benchmarks for querying SQL databases with db-ally. This su ## Run benchmarks -tbd +Run evaluation suite: + +```bash +python bench.py +``` ## Run tests -tbd +```bash +python -m pytest +``` diff --git a/benchmarks/sql/tests/unit/test_main_evaluate.py b/benchmarks/sql/tests/unit/test_main_evaluate.py index 08c0341e..9c303a58 100644 --- a/benchmarks/sql/tests/unit/test_main_evaluate.py +++ b/benchmarks/sql/tests/unit/test_main_evaluate.py @@ -1,10 +1,9 @@ from unittest.mock import call, patch import pytest +from bench import evaluate from omegaconf import DictConfig -from benchmarks.sql.bench import evaluate - @patch("dbally_benchmark.evaluate.e2e_evaluate") @patch("dbally_benchmark.evaluate.text2sql_evaluate") diff --git a/setup.cfg b/setup.cfg index 7a162c5b..ee3d66fe 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,8 +54,9 @@ examples = pydantic~=2.6.0 pydantic_settings~=2.1.0 psycopg2-binary~=2.9.9 -benchmark = +benchmarks = asyncpg~=0.28.0 + datasets~=2.20.0 eval-type-backport~=0.1.3 hydra-core~=1.3.2 loguru~=0.7.0 From 507b75c4f8d5e6c7b2cabc0ffde774af71f63c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Thu, 11 Jul 2024 10:38:49 +0200 Subject: [PATCH 10/34] eval for iql --- .../sql/experiment_config/iql/superhero.yaml | 2 +- benchmarks/sql/iql/iql_result.py | 14 -- benchmarks/sql/iql/metrics.py | 77 +++---- benchmarks/sql/iql_benchmark.py | 198 +++--------------- benchmarks/sql/pipeline.py | 139 ++++++++++++ benchmarks/sql/results.py | 24 +++ benchmarks/sql/saving.py | 54 +++++ benchmarks/sql/tests/unit/test_iql_metrics.py | 33 ++- .../sql/tests/unit/test_main_evaluate.py | 6 +- benchmarks/sql/views/superhero.py | 3 +- src/dbally/iql_generator/iql_generator.py | 9 +- 11 files changed, 306 insertions(+), 253 deletions(-) delete mode 100644 benchmarks/sql/iql/iql_result.py create mode 100644 benchmarks/sql/pipeline.py create mode 100644 benchmarks/sql/results.py create mode 100644 benchmarks/sql/saving.py diff --git a/benchmarks/sql/experiment_config/iql/superhero.yaml b/benchmarks/sql/experiment_config/iql/superhero.yaml index c7195c45..209bc245 100644 --- a/benchmarks/sql/experiment_config/iql/superhero.yaml +++ b/benchmarks/sql/experiment_config/iql/superhero.yaml @@ -5,4 +5,4 @@ superhero: difficulties: ["simple"] output_path: "iql_evaluation" view_name: "SuperheroView" - db_url: "sqlite://" + db_url: "sqlite:///superhero.db" diff --git a/benchmarks/sql/iql/iql_result.py b/benchmarks/sql/iql/iql_result.py deleted file mode 100644 index 0cfa8fec..00000000 --- a/benchmarks/sql/iql/iql_result.py +++ /dev/null @@ -1,14 +0,0 @@ -from typing import Optional - -from pydantic import BaseModel - - -class IQLResult(BaseModel): - """ - Represents a single IQL result. - """ - - question: str - ground_truth_iql: str - predicted_iql: str - exception_raised: Optional[bool] = None diff --git a/benchmarks/sql/iql/metrics.py b/benchmarks/sql/iql/metrics.py index 0b14edd5..35146421 100644 --- a/benchmarks/sql/iql/metrics.py +++ b/benchmarks/sql/iql/metrics.py @@ -1,9 +1,9 @@ import ast -from typing import Dict, List, Tuple +from typing import List, Tuple -from iql.iql_result import IQLResult from iql.method_call_visitor import MethodCallVisitor from loguru import logger +from results import TextToIQLResult from dbally.iql._exceptions import IQLError, IQLUnsupportedSyntaxError from dbally.iql._query import IQLQuery @@ -25,13 +25,12 @@ def _count_hallucinated_methods_for_single_example(iql: str, allowed_methods: Li return 0, 0 -def calculate_hallucinated_filters_for_dataset(dataset: List[IQLResult], filter_list: List[ExposedFunction]) -> float: +def calculate_hallucinated_filters(results: List[TextToIQLResult], filter_list: List[ExposedFunction]) -> float: """ - Calculates the ratio of hallucinated filters for a given dataset. + Calculates the ratio of hallucinated filters for a given results. Args: - dataset: List containing IQLResult objects that - represents predicted filters. + results: List containing TextToIQLResult objects that represents predicted filters. filter_list: List of allowed filters. Returns: @@ -43,7 +42,7 @@ def calculate_hallucinated_filters_for_dataset(dataset: List[IQLResult], filter_ allowed_filters = [filter.name for filter in filter_list] - for example in dataset: + for example in results: hallucinated_filters, total_filters = _count_hallucinated_methods_for_single_example( example.predicted_iql, allowed_filters ) @@ -56,13 +55,12 @@ def calculate_hallucinated_filters_for_dataset(dataset: List[IQLResult], filter_ return hallucinated_filters_count / total_filters_count -async def calculate_valid_iql(dataset: List[IQLResult], filter_list: List[ExposedFunction]) -> float: +async def calculate_valid_iql(results: List[TextToIQLResult], filter_list: List[ExposedFunction]) -> float: """ - Calculates the ratio of valid IQL queries for a given dataset. + Calculates the ratio of valid IQL queries for a given results. Args: - dataset: List containing IQLResult objects that - represents predicted filters. + results: List containing TextToIQLResult objects that represents predicted filters. filter_list: List of allowed filters. Returns: @@ -71,86 +69,77 @@ async def calculate_valid_iql(dataset: List[IQLResult], filter_list: List[Expose valid_iql = 0 - for example in dataset: + for example in results: try: await IQLQuery.parse(example.predicted_iql, filter_list) valid_iql += 1 except Exception as exc: # pylint: disable=broad-exception-caught logger.warning(f"Error while parsing IQL: {example.predicted_iql}\n{exc}") - return valid_iql / len(dataset) + return valid_iql / len(results) -def calculate_exact_match(dataset: List[IQLResult]) -> float: +def calculate_exact_match(results: List[TextToIQLResult]) -> float: """ - For a dataset, it calculates the ratio of predicated queries that are identical + For a results, it calculates the ratio of predicated queries that are identical to the ground truth ones. Args: - dataset: List containing Text2SQLResult objects that - represents (ground truth query, predicted query). + results: List containing Text2SQLResult objects that represents ground truth query, predicted query. Returns: The ratio of predicated queries that are identical to the ground truth ones. """ - exact_query_matches = 0 - for example in dataset: + for example in results: if example.ground_truth_iql == example.predicted_iql: exact_query_matches += 1 - return exact_query_matches / len(dataset) + return exact_query_matches / len(results) -async def calculate_syntax_errors(dataset: List[IQLResult], filter_list: List[ExposedFunction]) -> float: +async def calculate_invalid_iql(results: List[TextToIQLResult], filter_list: List[ExposedFunction]) -> float: """ - Calculates the ratio of syntax errors for a given dataset. + Calculates the ratio of syntax errors for a given results. Args: - dataset: List containing IQLResult objects that - represents predicted filters. + results: List containing TextToIQLResult objects that represents predicted filters. filter_list: List of allowed filters. Returns: Syntax errors ratio. """ - syntax_errors = 0 - filtered_dataset = [example for example in dataset if example.predicted_iql != "UNSUPPORTED_QUERY"] + filtered_results = [result for result in results if result.predicted_iql != "UNSUPPORTED_QUERY"] - for example in filtered_dataset: + for result in filtered_results: try: - await IQLQuery.parse(example.predicted_iql, filter_list) + await IQLQuery.parse(result.predicted_iql, filter_list) except (IQLError, IQLUnsupportedSyntaxError, SyntaxError): syntax_errors += 1 except Exception as exc: # pylint: disable=broad-exception-caught # I haven't figured out yet how to handle it better :( - logger.warning(f"Error while parsing IQL: {example.predicted_iql}\n{exc}") + logger.warning(f"Error while parsing IQL: {result.predicted_iql}\n{exc}") - return syntax_errors / len(filtered_dataset) + return syntax_errors / len(filtered_results) -async def calculate_dataset_metrics(dataset: List[IQLResult], filter_list: List[ExposedFunction]) -> Dict[str, float]: +def calculate_unsupported_iql(results: List[TextToIQLResult]) -> float: """ - Calculates metrics for a given dataset. The following metrics are being calculated: valid IQL, - ratio of hallucinated filters and ratio of IQLs contained syntax error. + Calculates the ratio of unsupported queries for a given results. Args: - dataset: List containing IQLResult objects that - represents predicted filters. - filter_list: List of allowed filters. + results: List containingTextToTextToIQLResult objects that represents predicted filters. Returns: - Dictionary containing metrics. + Unsupported queries ratio. """ + unsupported_queries = 0 - metrics = { - "valid_iql": await calculate_valid_iql(dataset, filter_list), - "exact_match": calculate_exact_match(dataset), - "hallucinated_filters": calculate_hallucinated_filters_for_dataset(dataset, filter_list), - "syntax_errors": await calculate_syntax_errors(dataset, filter_list), - } + for result in results: + if result.predicted_iql == "UNSUPPORTED_QUERY": + unsupported_queries += 1 - return metrics + return unsupported_queries / len(results) diff --git a/benchmarks/sql/iql_benchmark.py b/benchmarks/sql/iql_benchmark.py index c44ec9b7..ae72684e 100644 --- a/benchmarks/sql/iql_benchmark.py +++ b/benchmarks/sql/iql_benchmark.py @@ -1,193 +1,51 @@ import asyncio -import json -import os -from ast import Dict -from typing import Any, List import hydra import neptune -from constants import VIEW_REGISTRY, EvaluationType, ViewName -from datasets import Dataset, load_dataset -from hydra.utils import instantiate -from iql.iql_result import IQLResult -from iql.metrics import calculate_dataset_metrics +from constants import EvaluationType +from datasets import load_dataset from loguru import logger from neptune.utils import stringify_unsupported from omegaconf import DictConfig -from paths import PATH_EXPERIMENTS -from sqlalchemy import create_engine -from utils import get_datetime_str, set_up_gitlab_metadata +from pipeline import TextToIQLEvaluationPipeline +from saving import save -from dbally.audit.event_tracker import EventTracker -from dbally.iql_generator.iql_generator import IQLGenerator -from dbally.iql_generator.prompt import IQL_GENERATION_TEMPLATE, UnsupportedQueryError -from dbally.llms.litellm import LiteLLM -from dbally.llms.local import LocalLLM -from dbally.views.structured import BaseStructuredView - -async def _run_iql_for_single_example( - example: Dict, - view: BaseStructuredView, - iql_generator: IQLGenerator, -) -> IQLResult: - filters = view.list_filters() - event_tracker = EventTracker() - - try: - predicted_iql = await iql_generator.generate_iql( - question=example["question"], - filters=filters, - event_tracker=event_tracker, - ) - except UnsupportedQueryError: - return IQLResult( - question=example["question"], - ground_truth_iql=example["iql"], - predicted_iql="UNSUPPORTED_QUERY", - exception_raised=True, - ) - except (SyntaxError, ValueError): - return IQLResult( - question=example["question"], - ground_truth_iql=example["iql"], - predicted_iql="", - exception_raised=True, - ) - - return IQLResult( - question=example["question"], - ground_truth_iql=example["iql"], - predicted_iql=str(predicted_iql), - exception_raised=False, - ) - - -async def run_iql_for_dataset( - dataset: Dataset, - view: BaseStructuredView, - iql_generator: IQLGenerator, -) -> List[IQLResult]: - """ - Runs IQL predictions for a dataset. - - Args: - dataset: The dataset containing questions to be transformed into IQL queries. - view: The view used to generate IQL. - iql_generator: IQL generator. - - Returns: - A list of IQLResult objects representing the predictions. - """ - results: List[IQLResult] = [] - - for example in dataset: - result = await _run_iql_for_single_example(example, view, iql_generator) - results.append(result) - - return results - - -async def evaluate(cfg: DictConfig) -> Any: +@hydra.main(version_base=None, config_path="experiment_config", config_name="evaluate_iql_config") +async def evaluate(config: DictConfig) -> None: """ Runs IQL evaluation for a single dataset defined in hydra config. Args: - cfg: hydra config, loads automatically from path passed on to the decorator - - Raises: - ValueError: If view_name defined in hydra config is not supported. - ValueError: If model_name is not supported (at the - moment only OpenAI's model are supported). + config: hydra config, loads automatically from path passed on to the decorator. """ - cfg = instantiate(cfg) - - output_dir = PATH_EXPERIMENTS / cfg.output_path / get_datetime_str() - output_dir.mkdir(exist_ok=True, parents=True) - - view_name = cfg.view_name - allowed_views = [view.value for view in ViewName] - if view_name not in allowed_views: - raise ValueError(f"View {view_name} not supported. Available views: {allowed_views}") - - engine = create_engine(cfg.db_url) - view = VIEW_REGISTRY[ViewName(view_name)](engine) + logger.info(f"Running IQL predictions for dataset: {config.dataset_path} and view: {config.view_name}.") + dataset = load_dataset(config.dataset_path, split=config.split) + dataset = dataset.filter(lambda x: x["db_id"] in config.db_ids and x["difficulty"] in config.difficulties) - if cfg.llm.model_name.startswith("local/"): - llm = LocalLLM( - model_name=cfg.llm.model_name.split("/", 1)[1], - api_key=cfg.llm.api_key, - ) - else: - llm = LiteLLM( - model_name=cfg.llm.model_name, - api_key=cfg.llm.api_key, - ) + pipe = TextToIQLEvaluationPipeline(config) + metrics, results = await pipe(dataset) - iql_generator = IQLGenerator(llm=llm) + output_file = save("./evals/", metrics=metrics, results=results) + logger.info(f"IQL evaluation metrics and predictions saved under directory: {output_file}.") - run = None - if cfg.neptune.log: + if config.neptune.log: run = neptune.init_run( - project=cfg.neptune.project, - api_token=cfg.neptune.api_token, + project=config.neptune.project, + api_token=config.neptune.api_token, ) - run["config"] = stringify_unsupported(cfg) - tags = list(cfg.neptune.get("tags", [])) + [ - EvaluationType.IQL.value, - view_name, - cfg.llm.model_name, - cfg.db_name, - ] - run["sys/tags"].add(tags) - - if "CI_MERGE_REQUEST_IID" in os.environ: - run = set_up_gitlab_metadata(run) - - metrics_file_name, results_file_name = "metrics.json", "eval_results.json" - - logger.info(f"Running IQL predictions for dataset: {cfg.dataset_path} and view: {view_name}") - - dataset = load_dataset(cfg.dataset_path, split=cfg.split) - dataset = dataset.filter(lambda x: x["db_id"] in cfg.db_ids and x["difficulty"] in cfg.difficulties) - - dbally_results = await run_iql_for_dataset(dataset=dataset, view=view, iql_generator=iql_generator) - valid_dbally_results = [result for result in dbally_results if not result.exception_raised] - unsupported_query_error = (len(dbally_results) - len(valid_dbally_results)) / len(dbally_results) - - with open(output_dir / results_file_name, "w", encoding="utf-8") as outfile: - json.dump([result.model_dump() for result in dbally_results], outfile, indent=4) - - logger.info("Calculating metrics") - metrics = await calculate_dataset_metrics(dbally_results, view.list_filters()) - metrics = {**metrics, "unsupported_query_error": unsupported_query_error} - - with open(output_dir / metrics_file_name, "w", encoding="utf-8") as outfile: - json.dump(metrics, outfile, indent=4) - - logger.info(f"IQL predictions saved under directory: {output_dir}") - - if run: - run["config/iql_prompt_template"] = stringify_unsupported(IQL_GENERATION_TEMPLATE.chat) - run[f"evaluation/{metrics_file_name}"].upload((output_dir / metrics_file_name).as_posix()) - run[f"evaluation/{results_file_name}"].upload((output_dir / results_file_name).as_posix()) + run["sys/tags"].add( + [ + EvaluationType.IQL.value, + config.view_name, + config.llm.model_name, + config.db_name, + ] + ) + run["config"] = stringify_unsupported(config) run["evaluation/metrics"] = stringify_unsupported(metrics) - logger.info(f"Evaluation results logged to neptune at {run.get_url()}") - - -@hydra.main(version_base=None, config_path="experiment_config", config_name="evaluate_iql_config") -def main(cfg: DictConfig): - """ - Runs IQL evaluation for a single dataset defined in hydra config. - The following metrics are calculated during evaluation: valid IQL, - ratio of hallucinated filters and ratio of IQLs contained syntax error. - - Args: - cfg: hydra config, loads automatically from path passed on to the decorator. - """ - - asyncio.run(evaluate(cfg)) + logger.info(f"Evaluation results logged to neptune at {run.get_url()}.") if __name__ == "__main__": - main() # pylint: disable=E1120 + asyncio.run(evaluate()) # pylint: disable=E1120 diff --git a/benchmarks/sql/pipeline.py b/benchmarks/sql/pipeline.py new file mode 100644 index 00000000..d2b96b2a --- /dev/null +++ b/benchmarks/sql/pipeline.py @@ -0,0 +1,139 @@ +from dataclasses import asdict +from typing import Dict, List, Tuple + +from constants import VIEW_REGISTRY, ViewName +from datasets import Dataset +from iql.metrics import ( + calculate_exact_match, + calculate_hallucinated_filters, + calculate_invalid_iql, + calculate_unsupported_iql, + calculate_valid_iql, +) +from results import TextToIQLResult +from sqlalchemy import create_engine + +from dbally.iql._exceptions import IQLError +from dbally.iql_generator.iql_generator import IQLGenerator +from dbally.iql_generator.prompt import UnsupportedQueryError +from dbally.llms.litellm import LiteLLM +from dbally.llms.local import LocalLLM +from dbally.views.sqlalchemy_base import SqlAlchemyBaseView + + +class TextToIQLEvaluationPipeline: + """ + Pipeline for evaluating IQL predictions. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the. + + Raises: + ValueError: If the view name is not supported. + """ + self.engine = create_engine(config.db_url) + self.view = self.get_view(config.view_name) + self.iql_generator = self.get_iql_generator(config.llm) + + def get_view(self, view_name: str) -> SqlAlchemyBaseView: + """ + Returns the view object based on the view name. + + Args: + view_name: The name of the view. + + Returns: + The view object. + + Raises: + ValueError: If the view name is not supported + """ + allowed_views = [view.value for view in ViewName] + if view_name not in allowed_views: + raise ValueError(f"View {view_name} not supported. Available views: {allowed_views}") + return VIEW_REGISTRY[ViewName(view_name)](self.engine) + + def get_iql_generator(self, llm_config: Dict) -> IQLGenerator: + """ + Returns the IQL generator based on the LLM configuration. + + Args: + llm_config: The LLM configuration. + + Returns: + The IQL generator. + """ + if llm_config.model_name.startswith("local/"): + llm = LocalLLM( + model_name=llm_config.model_name.split("/", 1)[1], + api_key=llm_config.api_key, + ) + else: + llm = LiteLLM( + model_name=llm_config.model_name, + api_key=llm_config.api_key, + ) + return IQLGenerator(llm) + + async def compute_metrics(self, results: List[TextToIQLResult]) -> Dict[str, float]: + """ + Computes the metrics for IQL predictions. + + Args: + results: The list of IQL predictions. + + Returns: + The metrics for the IQL predictions. + """ + filters = self.view.list_filters() + + return { + "exact_match": calculate_exact_match(results), + "valid_iql": await calculate_valid_iql(results, filters), + "invalid_iql": await calculate_invalid_iql(results, filters), + "unsupported_iql": calculate_unsupported_iql(results), + "hallucinated_iql": calculate_hallucinated_filters(results, filters), + } + + async def __call__(self, dataset: Dataset) -> Tuple[Dict[str, float], List[Dict[str, str]]]: + """ + Runs the pipeline for evaluating IQL predictions. + + Args: + dataset: The dataset containing the questions and ground truth IQL queries. + + Returns: + The list of IQL predictions. + """ + filters = self.view.list_filters() + examples = self.view.list_few_shots() + results = [] + + for data in dataset: + try: + predicted_iql = await self.iql_generator.generate_iql( + question=data["question"], + filters=filters, + examples=examples, + n_retries=0, + ) + except UnsupportedQueryError: + result = "UNSUPPORTED_QUERY" + except IQLError as exc: + result = exc.source + else: + result = str(predicted_iql) + + results.append( + TextToIQLResult(question=data["question"], ground_truth_iql=data["iql"], predicted_iql=result) + ) + + metrics = await self.compute_metrics(results) + results = [asdict(result) for result in results] + + return metrics, results diff --git a/benchmarks/sql/results.py b/benchmarks/sql/results.py new file mode 100644 index 00000000..ac638e89 --- /dev/null +++ b/benchmarks/sql/results.py @@ -0,0 +1,24 @@ +from dataclasses import dataclass + + +@dataclass +class TextToIQLResult: + """ + Represents a single TextToIQL result. + """ + + question: str + ground_truth_iql: str + predicted_iql: str + + +@dataclass +class TextToSQLResult: + """ + Represents a single TextToSQL result. + """ + + db_id: str + question: str + ground_truth_sql: str + predicted_sql: str diff --git a/benchmarks/sql/saving.py b/benchmarks/sql/saving.py new file mode 100644 index 00000000..a2ab81d9 --- /dev/null +++ b/benchmarks/sql/saving.py @@ -0,0 +1,54 @@ +import json +import os +import sys +from datetime import datetime +from pathlib import Path +from typing import Any + +from datasets.utils.filelock import FileLock + + +def save(path_or_file: str, **data: Any) -> Path: + """ + Saves results to a JSON file. Also saves system information such as current time and Python system information. + + Args: + path_or_file: Path or file to store the file. If only a folder is provided + the results file will be saved in the format `"result-%Y_%m_%d-%H_%M_%S.json"`. + **data: The data to save. + + Returns: + The path to the saved file. + """ + current_time = datetime.now() + print(type(current_time)) + + file_path = _setup_path(path_or_file, current_time) + + data["_timestamp"] = current_time.isoformat() + data["_python_version"] = sys.version + data["_interpreter_path"] = sys.executable + + with FileLock(str(file_path) + ".lock"): + with open(file_path, "w", encoding="utf8") as f: + json.dump(data, f) + + try: + os.remove(str(file_path) + ".lock") + except FileNotFoundError: + pass + + return file_path + + +def _setup_path(path_or_file: str, current_time: datetime) -> Path: + path_or_file = Path(path_or_file) + is_file = len(path_or_file.suffix) > 0 + if is_file: + folder = path_or_file.parent + file_name = path_or_file.name + else: + folder = path_or_file + file_name = "result-" + current_time.strftime("%Y_%m_%d-%H_%M_%S") + ".json" + folder.mkdir(parents=True, exist_ok=True) + return folder / file_name diff --git a/benchmarks/sql/tests/unit/test_iql_metrics.py b/benchmarks/sql/tests/unit/test_iql_metrics.py index 5cd639cd..871fbb1c 100644 --- a/benchmarks/sql/tests/unit/test_iql_metrics.py +++ b/benchmarks/sql/tests/unit/test_iql_metrics.py @@ -1,10 +1,10 @@ -from iql.iql_result import IQLResult from iql.metrics import ( _count_hallucinated_methods_for_single_example, - calculate_hallucinated_filters_for_dataset, - calculate_syntax_errors, + calculate_hallucinated_filters, + calculate_invalid_iql, calculate_valid_iql, ) +from results import TextToIQLResult from dbally.views.exposed_functions import ExposedFunction, MethodParamWithTyping @@ -21,7 +21,7 @@ IQL_WITH_SYNTAX_ERROR = "filter_by_name('Cody Brown'" -def test_count_hallucinated_methods_for_single_example(): +def test_count_hallucinated_methods_for_single_example() -> None: hallucinated_methods, total_methods = _count_hallucinated_methods_for_single_example( IQL_WITH_HALLUCINATED_FILTERS, [method.name for method in ALLOWED_METHODS] ) @@ -35,31 +35,30 @@ def test_count_hallucinated_methods_for_single_example(): assert total_methods == 2 -def test_calculate_hallucinated_filters_for_dataset(): +def test_calculate_hallucinated_filters() -> None: dataset = [ - IQLResult(question="", iql_filters=IQL_WITH_HALLUCINATED_FILTERS), - IQLResult(question="", iql_filters=VALID_IQL), + TextToIQLResult(question="", ground_truth_iql="", predicted_iql=IQL_WITH_HALLUCINATED_FILTERS), + TextToIQLResult(question="", ground_truth_iql="", predicted_iql=VALID_IQL), ] - - hallucinated_filters_ratio = calculate_hallucinated_filters_for_dataset(dataset, ALLOWED_METHODS) + hallucinated_filters_ratio = calculate_hallucinated_filters(dataset, ALLOWED_METHODS) assert hallucinated_filters_ratio == 0.25 -async def test_calculate_syntax_errors(): +async def test_calculate_invalid_iql() -> None: dataset = [ - IQLResult(question="", iql_filters=IQL_WITH_SYNTAX_ERROR), - IQLResult(question="", iql_filters=VALID_IQL), + TextToIQLResult(question="", ground_truth_iql="", predicted_iql=IQL_WITH_SYNTAX_ERROR), + TextToIQLResult(question="", ground_truth_iql="", predicted_iql=VALID_IQL), ] - syntax_errors_ratio = await calculate_syntax_errors(dataset, ALLOWED_METHODS) + syntax_errors_ratio = await calculate_invalid_iql(dataset, ALLOWED_METHODS) assert syntax_errors_ratio == 0.5 -async def test_calculate_valid_iql(): +async def test_calculate_valid_iql() -> None: dataset = [ - IQLResult(question="", iql_filters=IQL_WITH_SYNTAX_ERROR), - IQLResult(question="", iql_filters=VALID_IQL), - IQLResult(question="", iql_filters=IQL_WITH_HALLUCINATED_FILTERS), + TextToIQLResult(question="", ground_truth_iql="", predicted_iql=IQL_WITH_SYNTAX_ERROR), + TextToIQLResult(question="", ground_truth_iql="", predicted_iql=VALID_IQL), + TextToIQLResult(question="", ground_truth_iql="", predicted_iql=IQL_WITH_HALLUCINATED_FILTERS), ] valid_iql_ratio = await calculate_valid_iql(dataset, ALLOWED_METHODS) diff --git a/benchmarks/sql/tests/unit/test_main_evaluate.py b/benchmarks/sql/tests/unit/test_main_evaluate.py index 9c303a58..7a6a0de2 100644 --- a/benchmarks/sql/tests/unit/test_main_evaluate.py +++ b/benchmarks/sql/tests/unit/test_main_evaluate.py @@ -5,9 +5,9 @@ from omegaconf import DictConfig -@patch("dbally_benchmark.evaluate.e2e_evaluate") -@patch("dbally_benchmark.evaluate.text2sql_evaluate") -@patch("dbally_benchmark.evaluate.iql_evaluate") +@patch("bench.e2e_evaluate") +@patch("bench.text2sql_evaluate") +@patch("bench.iql_evaluate") @pytest.mark.asyncio async def test_evaluate(iql_mock, text2sql_mock, e2e_mock) -> None: cfg = DictConfig( diff --git a/benchmarks/sql/views/superhero.py b/benchmarks/sql/views/superhero.py index 10dd47c1..69c2e82e 100644 --- a/benchmarks/sql/views/superhero.py +++ b/benchmarks/sql/views/superhero.py @@ -1,7 +1,6 @@ # pylint: disable=missing-docstring, missing-return-doc, missing-param-doc import sqlalchemy -from config import config from sqlalchemy import create_engine from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.ext.automap import automap_base @@ -9,7 +8,7 @@ from dbally import SqlAlchemyBaseView, decorators -engine = create_engine(config.pg_connection_string + "/superhero.sqlite") +engine = create_engine("sqlite:///superhero.db") SuperheroModel = automap_base() SuperheroModel.prepare(autoload_with=engine, reflect=True) diff --git a/src/dbally/iql_generator/iql_generator.py b/src/dbally/iql_generator/iql_generator.py index 7eeb9154..26a8b0c0 100644 --- a/src/dbally/iql_generator/iql_generator.py +++ b/src/dbally/iql_generator/iql_generator.py @@ -38,7 +38,7 @@ async def generate_iql( self, question: str, filters: List[ExposedFunction], - event_tracker: EventTracker, + event_tracker: Optional[EventTracker] = None, examples: Optional[List[FewShotExample]] = None, llm_options: Optional[LLMOptions] = None, n_retries: int = 3, @@ -56,6 +56,9 @@ async def generate_iql( Returns: Generated IQL query. + + Raises: + IQLError: If the generated IQL is not valid. """ prompt_format = IQLGenerationPromptFormat( question=question, @@ -64,7 +67,7 @@ async def generate_iql( ) formatted_prompt = self._prompt_template.format_prompt(prompt_format) - for _ in range(n_retries + 1): + for retry in range(n_retries + 1): try: response = await self._llm.generate_text( prompt=formatted_prompt, @@ -80,5 +83,7 @@ async def generate_iql( event_tracker=event_tracker, ) except IQLError as exc: + if retry == n_retries: + raise exc formatted_prompt = formatted_prompt.add_assistant_message(response) formatted_prompt = formatted_prompt.add_user_message(ERROR_MESSAGE.format(error=exc)) From 6344cf59fd2449cb1234a45635513ea14ae3f3fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Fri, 12 Jul 2024 02:15:08 +0200 Subject: [PATCH 11/34] update README --- benchmarks/README.md | 4 ++-- benchmarks/sql/README.md | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index a34616be..9bda6e91 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -4,10 +4,10 @@ This folder contains scripts that produce reproducible timings and evaluation me ## Setup environment -Install dependencies with: +From the root directory of the project, install the dependencies: ```bash -pip install dbally[benchmarks] +pip install -e '.[benchmarks]' ``` ## Benchmark list diff --git a/benchmarks/sql/README.md b/benchmarks/sql/README.md index 9f05e28f..d6f5abf4 100644 --- a/benchmarks/sql/README.md +++ b/benchmarks/sql/README.md @@ -6,12 +6,22 @@ This folder contains benchmarks for querying SQL databases with db-ally. This su - `Text2IQL` - measures correctness of IQL queries generated by structured views. - `Text2SQL` - measures correctness of SQL queries generated by freeform views. +All benchmarks are run on a dev split of the [BIRD](https://bird-bench.github.io/) dataset. For now, only one configuration is available to run the suite against the `superhero` database. We plan to extend it to all databases in the set to cover all cases. + +Any new PRs adding support for new databases from BIRD are welcome. + ## Run benchmarks -Run evaluation suite: +Run the whole suite on the `superhero` database: ```bash -python bench.py +python bench.py e2e=superhero iql=superhero sql=superhero +``` + +You can also run each evaluation separately or in subgroups: + +```bash +python bench.py e2e=superhero ... ``` ## Run tests @@ -19,3 +29,21 @@ python bench.py ```bash python -m pytest ``` + +## Metrics + +Each task computes following metrics: + +tbd + +## Custom evaluation dataset + +In order to run this suite against you own dataset, upload your dataset to [Hugging Face](https://huggingface.co) and make sure the data is in the format expected by the evaluation pipeline. + +Evaluation dataset required fields: + +- `question` - natural langugage SQL prompt +- `sql` - SQL corresponding to the SQL prompt +- `iql` - IQL corresponding to the SQL prompt +- `difficulty` - SQL code difficulty label +- `db_id` - database identifier From 18022d5ffba1ef1481da553439a143790437b859 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Fri, 12 Jul 2024 04:34:14 +0200 Subject: [PATCH 12/34] brand new world --- benchmarks/sql/bench.py | 124 ++++++-- benchmarks/sql/config.py | 22 -- benchmarks/sql/config/config.yaml | 12 + .../e2e/superhero.yaml | 0 .../iql/superhero.yaml | 0 benchmarks/sql/config/sql/superhero.yaml | 8 + benchmarks/sql/constants.py | 27 -- benchmarks/sql/e2e_benchmark.py | 157 --------- benchmarks/sql/experiment_config/config.yaml | 14 - .../experiment_config/text2sql/superhero.yaml | 5 - benchmarks/sql/iql_benchmark.py | 51 --- benchmarks/sql/paths.py | 10 - benchmarks/sql/pipeline.py | 139 -------- benchmarks/sql/pipelines.py | 285 +++++++++++++++++ benchmarks/sql/results.py | 1 - benchmarks/sql/text2sql_benchmark.py | 151 --------- benchmarks/sql/views/__init__.py | 15 + benchmarks/sql/views/freeform/__init__.py | 0 benchmarks/sql/views/freeform/superhero.py | 301 ++++++++++++++++++ .../sql/views/{ => structured}/superhero.py | 0 20 files changed, 724 insertions(+), 598 deletions(-) delete mode 100644 benchmarks/sql/config.py create mode 100644 benchmarks/sql/config/config.yaml rename benchmarks/sql/{experiment_config => config}/e2e/superhero.yaml (100%) rename benchmarks/sql/{experiment_config => config}/iql/superhero.yaml (100%) create mode 100644 benchmarks/sql/config/sql/superhero.yaml delete mode 100644 benchmarks/sql/constants.py delete mode 100644 benchmarks/sql/e2e_benchmark.py delete mode 100644 benchmarks/sql/experiment_config/config.yaml delete mode 100644 benchmarks/sql/experiment_config/text2sql/superhero.yaml delete mode 100644 benchmarks/sql/iql_benchmark.py delete mode 100644 benchmarks/sql/paths.py delete mode 100644 benchmarks/sql/pipeline.py create mode 100644 benchmarks/sql/pipelines.py delete mode 100644 benchmarks/sql/text2sql_benchmark.py create mode 100644 benchmarks/sql/views/__init__.py create mode 100644 benchmarks/sql/views/freeform/__init__.py create mode 100644 benchmarks/sql/views/freeform/superhero.py rename benchmarks/sql/views/{ => structured}/superhero.py (100%) diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py index b32a5078..e56ed488 100644 --- a/benchmarks/sql/bench.py +++ b/benchmarks/sql/bench.py @@ -1,41 +1,123 @@ import asyncio +from enum import Enum from typing import Callable, Dict import hydra -from constants import EvaluationType -from e2e_benchmark import evaluate as e2e_evaluate -from iql_benchmark import evaluate as iql_evaluate +import neptune +from datasets import load_dataset +from loguru import logger +from neptune.utils import stringify_unsupported from omegaconf import DictConfig -from text2sql_benchmark import evaluate as text2sql_evaluate +from pipelines import TextToIQLEvaluationPipeline, TextToSQLEvaluationPipeline +from saving import save -async def evaluate(cfg: DictConfig) -> None: - """Function running evaluation for all datasets and evaluation tasks defined in hydra config. +class EvaluationType(Enum): + """ + Enum representing the type of evaluation. + """ + + E2E = "e2e" + SQL = "sql" + IQL = "iql" + + +async def bench_iql(config: DictConfig) -> None: + """ + Runs IQL evaluation for a single dataset defined in hydra config. Args: - cfg (DictConfig): Hydra config + config: hydra config, loads automatically from path passed on to the decorator. + """ + logger.info(f"Running IQL predictions for dataset: {config.dataset_path} and view: {config.view_name}.") + + dataset = load_dataset(config.dataset_path, split=config.split) + dataset = dataset.filter(lambda x: x["db_id"] in config.db_ids and x["difficulty"] in config.difficulties) + dataset = dataset.select(range(1)) + + pipe = TextToIQLEvaluationPipeline(config) + metrics, results = await pipe(dataset) + + output_file = save("./evals/", metrics=metrics, results=results) + logger.info(f"IQL evaluation metrics and predictions saved under directory: {output_file}.") + + if config.neptune.run: + run = neptune.init_run(project=config.neptune.project) + run["sys/tags"].add( + [ + EvaluationType.IQL.value, + config.view_name, + config.llm.model_name, + *config.db_ids, + ] + ) + run["config"] = stringify_unsupported(config) + run["evaluation/metrics"] = stringify_unsupported(metrics) + logger.info(f"Evaluation results logged to neptune at {run.get_url()}.") + + +async def bench_sql(config: DictConfig) -> None: + """ + Runs Text2SQL evaluation for a single dataset defined in hydra config. + + Args: + config: hydra config, loads automatically from path passed on to the decorator + """ + logger.info(f"Running SQL predictions for dataset: {config.dataset_path} and view: {config.view_name}.") + + dataset = load_dataset(config.dataset_path, split=config.split) + dataset = dataset.filter(lambda x: x["db_id"] in config.db_ids and x["difficulty"] in config.difficulties) + dataset = dataset.select(range(1)) + + pipe = TextToSQLEvaluationPipeline(config) + metrics, results = await pipe(dataset) + + output_file = save("./evals/", metrics=metrics, results=results) + logger.info(f"IQL evaluation metrics and predictions saved under directory: {output_file}.") + + if config.neptune.run: + run = neptune.init_run(project=config.neptune.project) + run["sys/tags"].add( + [ + EvaluationType.SQL.value, + config.view_name, + config.llm.model_name, + *config.db_ids, + ] + ) + run["config"] = stringify_unsupported(config) + run["evaluation/metrics"] = stringify_unsupported(metrics) + logger.info(f"Evaluation results logged to neptune at {run.get_url()}.") + + +async def bench(config: DictConfig) -> None: + """ + Function running evaluation for all datasets and evaluation tasks defined in hydra config. + + Args: + config: Hydra configuration. """ evaluators_factory: Dict[str, Callable] = { - EvaluationType.END2END.value: e2e_evaluate, - EvaluationType.TEXT2SQL.value: text2sql_evaluate, - EvaluationType.IQL.value: iql_evaluate, + EvaluationType.SQL.value: bench_sql, + EvaluationType.IQL.value: bench_iql, } - - common_cfg = {k: v for k, v in cfg.items() if k not in evaluators_factory} + common_config = {k: v for k, v in config.items() if k not in evaluators_factory} for evaluation_type, eval_func in evaluators_factory.items(): - if evaluation_type in cfg: - for dataset_cfg in cfg[evaluation_type].values(): - await eval_func(DictConfig({**common_cfg, **dataset_cfg})) + if evaluation_type in config: + for dataset_config in config[evaluation_type].values(): + await eval_func(DictConfig({**common_config, **dataset_config})) -@hydra.main(version_base=None, config_path="experiment_config", config_name="config") -def main(cfg: DictConfig): - """Function running evaluation for all datasets and evaluation tasks defined in hydra config. +@hydra.main(version_base=None, config_path="config", config_name="config") +def main(config: DictConfig) -> None: + """ + Function running evaluation for all datasets and evaluation tasks defined in hydra config. Args: - cfg (DictConfig): Hydra config""" - asyncio.run(evaluate(cfg)) + config: Hydra configuration. + """ + asyncio.run(bench(config)) if __name__ == "__main__": - main() # pylint: disable=E1120 + main() # pylint: disable=no-value-for-parameter diff --git a/benchmarks/sql/config.py b/benchmarks/sql/config.py deleted file mode 100644 index 4ed166ff..00000000 --- a/benchmarks/sql/config.py +++ /dev/null @@ -1,22 +0,0 @@ -from paths import PATH_PACKAGE -from pydantic.v1 import BaseSettings - - -class BenchmarkConfig(BaseSettings): - """db-ally Benchmark configuration.""" - - pg_connection_string: str = "sqlite://" - openai_api_key: str = "" - - neptune_project: str = "deepsense-ai/db-ally" - neptune_api_token: str = "" - - class Config: - """Config for env class.""" - - env_file = str(PATH_PACKAGE / ".env") - env_file_encoding = "utf-8" - extra = "allow" - - -config = BenchmarkConfig() diff --git a/benchmarks/sql/config/config.yaml b/benchmarks/sql/config/config.yaml new file mode 100644 index 00000000..99df9b2d --- /dev/null +++ b/benchmarks/sql/config/config.yaml @@ -0,0 +1,12 @@ +defaults: + - e2e: + - iql: + - sql: + - _self_ + +llm: + model_name: "gpt-3.5-turbo" + +neptune: + project: "deepsense-ai/db-ally" + run: False diff --git a/benchmarks/sql/experiment_config/e2e/superhero.yaml b/benchmarks/sql/config/e2e/superhero.yaml similarity index 100% rename from benchmarks/sql/experiment_config/e2e/superhero.yaml rename to benchmarks/sql/config/e2e/superhero.yaml diff --git a/benchmarks/sql/experiment_config/iql/superhero.yaml b/benchmarks/sql/config/iql/superhero.yaml similarity index 100% rename from benchmarks/sql/experiment_config/iql/superhero.yaml rename to benchmarks/sql/config/iql/superhero.yaml diff --git a/benchmarks/sql/config/sql/superhero.yaml b/benchmarks/sql/config/sql/superhero.yaml new file mode 100644 index 00000000..ad22bbf7 --- /dev/null +++ b/benchmarks/sql/config/sql/superhero.yaml @@ -0,0 +1,8 @@ +superhero: + dataset_path: "micpst/bird-dev-iql" + split: "dev" + db_ids: ["superhero"] + difficulties: ["simple"] + output_path: "iql_evaluation" + view_name: "SuperheroFreeformView" + db_url: "sqlite:///superhero.db" diff --git a/benchmarks/sql/constants.py b/benchmarks/sql/constants.py deleted file mode 100644 index 321378dc..00000000 --- a/benchmarks/sql/constants.py +++ /dev/null @@ -1,27 +0,0 @@ -from enum import Enum -from typing import Dict, Type - -from views.superhero import SuperheroCountByPowerView, SuperheroView - -from dbally.views.sqlalchemy_base import SqlAlchemyBaseView - - -class ViewName(Enum): - """Enum representing the name of the view.""" - - SUPERHERO_VIEW = "SuperheroView" - SUPERHERO_COUNT_BY_POWER_VIEW = "SuperheroCountByPowerView" - - -class EvaluationType(Enum): - """Enum representing the type of evaluation.""" - - END2END = "e2e" - TEXT2SQL = "text2sql" - IQL = "iql" - - -VIEW_REGISTRY: Dict[ViewName, Type[SqlAlchemyBaseView]] = { - ViewName.SUPERHERO_VIEW: SuperheroView, - ViewName.SUPERHERO_COUNT_BY_POWER_VIEW: SuperheroCountByPowerView, -} diff --git a/benchmarks/sql/e2e_benchmark.py b/benchmarks/sql/e2e_benchmark.py deleted file mode 100644 index 4d0b8eec..00000000 --- a/benchmarks/sql/e2e_benchmark.py +++ /dev/null @@ -1,157 +0,0 @@ -import asyncio -import json -import os -from functools import partial -from pathlib import Path -from typing import Any, List - -import hydra -import neptune -from config import BenchmarkConfig -from constants import VIEW_REGISTRY, EvaluationType, ViewName -from dataset.bird_dataset import BIRDDataset, BIRDExample -from hydra.utils import instantiate -from loguru import logger -from neptune.utils import stringify_unsupported -from omegaconf import DictConfig -from paths import PATH_EXPERIMENTS -from sqlalchemy import create_engine -from text2sql.metrics import calculate_dataset_metrics -from text2sql.text2sql_result import Text2SQLResult -from utils import batch, get_datetime_str, set_up_gitlab_metadata - -import dbally -from dbally.collection import Collection -from dbally.collection.exceptions import NoViewFoundError -from dbally.iql_generator.prompt import IQL_GENERATION_TEMPLATE, UnsupportedQueryError -from dbally.llms.litellm import LiteLLM -from dbally.llms.local import LocalLLM -from dbally.view_selection.prompt import VIEW_SELECTION_TEMPLATE - - -async def _run_dbally_for_single_example(example: BIRDExample, collection: Collection) -> Text2SQLResult: - try: - result = await collection.ask(example.question, dry_run=True) - sql = result.context["sql"] - except UnsupportedQueryError: - sql = "UnsupportedQueryError" - except NoViewFoundError: - sql = "NoViewFoundError" - except Exception: # pylint: disable=broad-exception-caught - sql = "Error" - - return Text2SQLResult( - db_id=example.db_id, question=example.question, ground_truth_sql=example.SQL, predicted_sql=sql - ) - - -async def run_dbally_for_dataset(dataset: BIRDDataset, collection: Collection) -> List[Text2SQLResult]: - """ - Transforms questions into SQL queries using a IQL approach. - - Args: - dataset: The dataset containing questions to be transformed into SQL queries. - collection: Container for a set of views used by db-ally. - - Returns: - A list of Text2SQLResult objects representing the predictions. - """ - - results: List[Text2SQLResult] = [] - - for group in batch(dataset, 5): - current_results = await asyncio.gather( - *[_run_dbally_for_single_example(example, collection) for example in group] - ) - results = [*current_results, *results] - - return results - - -async def evaluate(cfg: DictConfig) -> Any: - """ - Runs db-ally evaluation for a single dataset defined in hydra config. - - Args: - cfg: hydra config, loads automatically from path passed on to the decorator - """ - - output_dir = PATH_EXPERIMENTS / cfg.output_path / get_datetime_str() - output_dir.mkdir(exist_ok=True, parents=True) - cfg = instantiate(cfg) - benchmark_cfg = BenchmarkConfig() - - engine = create_engine(benchmark_cfg.pg_connection_string + f"/{cfg.db_name}") - - if cfg.llm.model_name.startswith("local/"): - llm = LocalLLM(api_key=benchmark_cfg.hf_api_key, model_name=cfg.llm.model_name.split("/", 1)[1]) - else: - llm = LiteLLM( - model_name=cfg.llm.model_name, - api_key=benchmark_cfg.openai_api_key, - ) - - db = dbally.create_collection(cfg.db_name, llm) - - for view_name in cfg.view_names: - view = VIEW_REGISTRY[ViewName(view_name)] - db.add(view, partial(view, engine)) - - run = None - if cfg.neptune.log: - run = neptune.init_run( - project=benchmark_cfg.neptune_project, - api_token=benchmark_cfg.neptune_api_token, - ) - run["config"] = stringify_unsupported(cfg) - tags = list(cfg.neptune.get("tags", [])) + [EvaluationType.END2END.value, cfg.llm.model_name, cfg.db_name] - run["sys/tags"].add(tags) - - if "CI_MERGE_REQUEST_IID" in os.environ: - run = set_up_gitlab_metadata(run) - - metrics_file_name, results_file_name = "metrics.json", "eval_results.json" - - logger.info(f"Running db-ally predictions for dataset {cfg.dataset_path}") - evaluation_dataset = BIRDDataset.from_json_file( - Path(cfg.dataset_path), difficulty_levels=cfg.get("difficulty_levels") - ) - dbally_results = await run_dbally_for_dataset(dataset=evaluation_dataset, collection=db) - - with open(output_dir / results_file_name, "w", encoding="utf-8") as outfile: - json.dump([result.model_dump() for result in dbally_results], outfile, indent=4) - - logger.info("Calculating metrics") - metrics = calculate_dataset_metrics(dbally_results, engine) - - with open(output_dir / metrics_file_name, "w", encoding="utf-8") as outfile: - json.dump(metrics, outfile, indent=4) - - logger.info(f"db-ally predictions saved under directory: {output_dir}") - - if run: - run["config/iql_prompt_template"] = stringify_unsupported(IQL_GENERATION_TEMPLATE.chat) - run["config/view_selection_prompt_template"] = stringify_unsupported(VIEW_SELECTION_TEMPLATE.chat) - run["config/iql_prompt_template"] = stringify_unsupported(IQL_GENERATION_TEMPLATE) - run[f"evaluation/{metrics_file_name}"].upload((output_dir / metrics_file_name).as_posix()) - run[f"evaluation/{results_file_name}"].upload((output_dir / results_file_name).as_posix()) - run["evaluation/metrics"] = stringify_unsupported(metrics) - logger.info(f"Evaluation results logged to neptune at {run.get_url()}") - - -@hydra.main(version_base=None, config_path="experiment_config", config_name="evaluate_e2e_config") -def main(cfg: DictConfig): - """ - Runs db-ally evaluation for a single dataset defined in hydra config. - The following metrics are calculated during evaluation: exact match, valid SQL, - execution accuracy and valid efficiency score. - - Args: - cfg: hydra config, loads automatically from path passed on to the decorator. - """ - - asyncio.run(evaluate(cfg)) - - -if __name__ == "__main__": - main() # pylint: disable=E1120 diff --git a/benchmarks/sql/experiment_config/config.yaml b/benchmarks/sql/experiment_config/config.yaml deleted file mode 100644 index 80501175..00000000 --- a/benchmarks/sql/experiment_config/config.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Provide list of test configurations you want to execute -defaults: - - e2e: [] - - iql: [superhero] - - text2sql: [] - -llm: - model_name: "gpt-3.5-turbo" - api_key: "" - -neptune: - project: "deepsense-ai/db-ally" - api_token: "" - log: False diff --git a/benchmarks/sql/experiment_config/text2sql/superhero.yaml b/benchmarks/sql/experiment_config/text2sql/superhero.yaml deleted file mode 100644 index 89b0117d..00000000 --- a/benchmarks/sql/experiment_config/text2sql/superhero.yaml +++ /dev/null @@ -1,5 +0,0 @@ -superhero: - output_path: "text2sql_evaluation" - dataset_path: "data/superhero.json" - difficulty_levels: ["simple"] - db_name: "superhero" diff --git a/benchmarks/sql/iql_benchmark.py b/benchmarks/sql/iql_benchmark.py deleted file mode 100644 index ae72684e..00000000 --- a/benchmarks/sql/iql_benchmark.py +++ /dev/null @@ -1,51 +0,0 @@ -import asyncio - -import hydra -import neptune -from constants import EvaluationType -from datasets import load_dataset -from loguru import logger -from neptune.utils import stringify_unsupported -from omegaconf import DictConfig -from pipeline import TextToIQLEvaluationPipeline -from saving import save - - -@hydra.main(version_base=None, config_path="experiment_config", config_name="evaluate_iql_config") -async def evaluate(config: DictConfig) -> None: - """ - Runs IQL evaluation for a single dataset defined in hydra config. - - Args: - config: hydra config, loads automatically from path passed on to the decorator. - """ - logger.info(f"Running IQL predictions for dataset: {config.dataset_path} and view: {config.view_name}.") - dataset = load_dataset(config.dataset_path, split=config.split) - dataset = dataset.filter(lambda x: x["db_id"] in config.db_ids and x["difficulty"] in config.difficulties) - - pipe = TextToIQLEvaluationPipeline(config) - metrics, results = await pipe(dataset) - - output_file = save("./evals/", metrics=metrics, results=results) - logger.info(f"IQL evaluation metrics and predictions saved under directory: {output_file}.") - - if config.neptune.log: - run = neptune.init_run( - project=config.neptune.project, - api_token=config.neptune.api_token, - ) - run["sys/tags"].add( - [ - EvaluationType.IQL.value, - config.view_name, - config.llm.model_name, - config.db_name, - ] - ) - run["config"] = stringify_unsupported(config) - run["evaluation/metrics"] = stringify_unsupported(metrics) - logger.info(f"Evaluation results logged to neptune at {run.get_url()}.") - - -if __name__ == "__main__": - asyncio.run(evaluate()) # pylint: disable=E1120 diff --git a/benchmarks/sql/paths.py b/benchmarks/sql/paths.py deleted file mode 100644 index 6df53cb0..00000000 --- a/benchmarks/sql/paths.py +++ /dev/null @@ -1,10 +0,0 @@ -from pathlib import Path - -# import sql - -PATH_PACKAGE = Path(__file__).parent - -PATH_ROOT = PATH_PACKAGE.parent.parent -PATH_EXPERIMENTS = PATH_ROOT / "experiments" -PATH_DATA = PATH_ROOT / "data" -PATH_SCHEMAS = PATH_DATA / "schemas" diff --git a/benchmarks/sql/pipeline.py b/benchmarks/sql/pipeline.py deleted file mode 100644 index d2b96b2a..00000000 --- a/benchmarks/sql/pipeline.py +++ /dev/null @@ -1,139 +0,0 @@ -from dataclasses import asdict -from typing import Dict, List, Tuple - -from constants import VIEW_REGISTRY, ViewName -from datasets import Dataset -from iql.metrics import ( - calculate_exact_match, - calculate_hallucinated_filters, - calculate_invalid_iql, - calculate_unsupported_iql, - calculate_valid_iql, -) -from results import TextToIQLResult -from sqlalchemy import create_engine - -from dbally.iql._exceptions import IQLError -from dbally.iql_generator.iql_generator import IQLGenerator -from dbally.iql_generator.prompt import UnsupportedQueryError -from dbally.llms.litellm import LiteLLM -from dbally.llms.local import LocalLLM -from dbally.views.sqlalchemy_base import SqlAlchemyBaseView - - -class TextToIQLEvaluationPipeline: - """ - Pipeline for evaluating IQL predictions. - """ - - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating IQL predictions. - - Args: - config: The configuration for the. - - Raises: - ValueError: If the view name is not supported. - """ - self.engine = create_engine(config.db_url) - self.view = self.get_view(config.view_name) - self.iql_generator = self.get_iql_generator(config.llm) - - def get_view(self, view_name: str) -> SqlAlchemyBaseView: - """ - Returns the view object based on the view name. - - Args: - view_name: The name of the view. - - Returns: - The view object. - - Raises: - ValueError: If the view name is not supported - """ - allowed_views = [view.value for view in ViewName] - if view_name not in allowed_views: - raise ValueError(f"View {view_name} not supported. Available views: {allowed_views}") - return VIEW_REGISTRY[ViewName(view_name)](self.engine) - - def get_iql_generator(self, llm_config: Dict) -> IQLGenerator: - """ - Returns the IQL generator based on the LLM configuration. - - Args: - llm_config: The LLM configuration. - - Returns: - The IQL generator. - """ - if llm_config.model_name.startswith("local/"): - llm = LocalLLM( - model_name=llm_config.model_name.split("/", 1)[1], - api_key=llm_config.api_key, - ) - else: - llm = LiteLLM( - model_name=llm_config.model_name, - api_key=llm_config.api_key, - ) - return IQLGenerator(llm) - - async def compute_metrics(self, results: List[TextToIQLResult]) -> Dict[str, float]: - """ - Computes the metrics for IQL predictions. - - Args: - results: The list of IQL predictions. - - Returns: - The metrics for the IQL predictions. - """ - filters = self.view.list_filters() - - return { - "exact_match": calculate_exact_match(results), - "valid_iql": await calculate_valid_iql(results, filters), - "invalid_iql": await calculate_invalid_iql(results, filters), - "unsupported_iql": calculate_unsupported_iql(results), - "hallucinated_iql": calculate_hallucinated_filters(results, filters), - } - - async def __call__(self, dataset: Dataset) -> Tuple[Dict[str, float], List[Dict[str, str]]]: - """ - Runs the pipeline for evaluating IQL predictions. - - Args: - dataset: The dataset containing the questions and ground truth IQL queries. - - Returns: - The list of IQL predictions. - """ - filters = self.view.list_filters() - examples = self.view.list_few_shots() - results = [] - - for data in dataset: - try: - predicted_iql = await self.iql_generator.generate_iql( - question=data["question"], - filters=filters, - examples=examples, - n_retries=0, - ) - except UnsupportedQueryError: - result = "UNSUPPORTED_QUERY" - except IQLError as exc: - result = exc.source - else: - result = str(predicted_iql) - - results.append( - TextToIQLResult(question=data["question"], ground_truth_iql=data["iql"], predicted_iql=result) - ) - - metrics = await self.compute_metrics(results) - results = [asdict(result) for result in results] - - return metrics, results diff --git a/benchmarks/sql/pipelines.py b/benchmarks/sql/pipelines.py new file mode 100644 index 00000000..8feeca5b --- /dev/null +++ b/benchmarks/sql/pipelines.py @@ -0,0 +1,285 @@ +import json +from abc import ABC, abstractmethod +from dataclasses import asdict +from typing import Dict, List, Tuple + +from datasets import Dataset +from iql.metrics import calculate_exact_match as calculate_iql_exact_match +from iql.metrics import ( + calculate_hallucinated_filters, + calculate_invalid_iql, + calculate_unsupported_iql, + calculate_valid_iql, +) +from results import TextToIQLResult, TextToSQLResult +from sqlalchemy import create_engine, text +from text2sql.metrics import calculate_exact_match as calculate_sql_exact_match +from text2sql.metrics import calculate_exec_acc, calculate_undefined_error_ratio, calculate_valid_sql, calculate_ves +from views import FREEFORM_VIEW_REGISTRY, STRUCTURED_VIEW_REGISTRY + +from dbally.iql._exceptions import IQLError +from dbally.iql_generator.iql_generator import IQLGenerator +from dbally.iql_generator.prompt import UnsupportedQueryError +from dbally.llms.base import LLM +from dbally.llms.litellm import LiteLLM +from dbally.llms.local import LocalLLM +from dbally.views.freeform.text2sql.prompt import SQL_GENERATION_TEMPLATE, SQLGenerationPromptFormat +from dbally.views.freeform.text2sql.view import BaseText2SQLView, SQLParameterOption +from dbally.views.sqlalchemy_base import SqlAlchemyBaseView + +Metrics = Dict[str, float] +Results = List[Dict[str, str]] + + +class EvaluationPipeline(ABC): + """ + Evaluation pipeline base class. + """ + + def __init__(self, config: Dict) -> None: + self.engine = create_engine(config.db_url) + + def get_llm(self, llm_config: Dict) -> LLM: + """ + Returns the LLM based on the configuration. + + Args: + llm_config: The LLM configuration. + + Returns: + The LLM object. + """ + if llm_config.model_name.startswith("local/"): + return LocalLLM(llm_config.model_name.split("/", 1)[1]) + return LiteLLM(llm_config.model_name) + + @abstractmethod + async def __call__(self, dataset: Dataset) -> Tuple[Metrics, Results]: + """ + Runs the evaluation pipeline. + + Args: + dataset: The dataset containing the questions and ground truth IQL queries. + + Returns: + The list of IQL predictions. + """ + + +class TextToIQLEvaluationPipeline(EvaluationPipeline): + """ + Pipeline for evaluating IQL predictions. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the pipeline. + + Raises: + ValueError: If the view name is not supported. + """ + super().__init__(config) + self.view = self.get_view(config.view_name) + self.iql_generator = self.get_iql_generator(config.llm) + + def get_view(self, view_name: str) -> SqlAlchemyBaseView: + """ + Returns the view object based on the view name. + + Args: + view_name: The name of the view. + + Returns: + The view object. + + Raises: + ValueError: If the view name is not supported + """ + if view_name not in STRUCTURED_VIEW_REGISTRY: + raise ValueError(f"View {view_name} not supported. Available views: {STRUCTURED_VIEW_REGISTRY}.") + return STRUCTURED_VIEW_REGISTRY[view_name](self.engine) + + def get_iql_generator(self, llm_config: Dict) -> IQLGenerator: + """ + Returns the IQL generator based on the LLM configuration. + + Args: + llm_config: The LLM configuration. + + Returns: + The IQL generator. + """ + llm = self.get_llm(llm_config) + return IQLGenerator(llm) + + async def compute_metrics(self, results: List[TextToIQLResult]) -> Dict[str, float]: + """ + Computes the metrics for IQL predictions. + + Args: + results: The list of IQL predictions. + + Returns: + The metrics for the IQL predictions. + """ + filters = self.view.list_filters() + + return { + "exact_match": calculate_iql_exact_match(results), + "valid_iql": await calculate_valid_iql(results, filters), + "invalid_iql": await calculate_invalid_iql(results, filters), + "unsupported_iql": calculate_unsupported_iql(results), + "hallucinated_iql": calculate_hallucinated_filters(results, filters), + } + + async def __call__(self, dataset: Dataset) -> Tuple[Metrics, Results]: + """ + Runs the pipeline for evaluating IQL predictions. + + Args: + dataset: The dataset containing the questions and ground truth IQL queries. + + Returns: + The list of IQL predictions. + """ + filters = self.view.list_filters() + examples = self.view.list_few_shots() + results = [] + + for data in dataset: + try: + predicted_iql = await self.iql_generator.generate_iql( + question=data["question"], + filters=filters, + examples=examples, + n_retries=0, + ) + except UnsupportedQueryError: + result = "UNSUPPORTED_QUERY" + except IQLError as exc: + result = exc.source + else: + result = str(predicted_iql) + + results.append( + TextToIQLResult(question=data["question"], ground_truth_iql=data["iql"], predicted_iql=result) + ) + + metrics = await self.compute_metrics(results) + results = [asdict(result) for result in results] + + return metrics, results + + +class TextToSQLEvaluationPipeline(EvaluationPipeline): + """ + Pipeline for evaluating SQL predictions. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating SQL predictions. + + Args: + config: The configuration for the pipeline. + + Raises: + ValueError: If the view name is not supported. + """ + super().__init__(config) + self.view = self.get_view(config.view_name) + self.sql_generator = self.get_sql_generator(config.llm) + + def get_view(self, view_name: str) -> BaseText2SQLView: + """ + Returns the view object based on the view name. + + Args: + view_name: The name of the view. + + Returns: + The view object. + + Raises: + ValueError: If the view name is not supported + """ + if view_name not in FREEFORM_VIEW_REGISTRY: + raise ValueError(f"View {view_name} not supported. Available views: {FREEFORM_VIEW_REGISTRY}.") + return FREEFORM_VIEW_REGISTRY[view_name](self.engine) + + def get_sql_generator(self, llm_config: Dict) -> LLM: + """ + Returns the IQL generator based on the LLM configuration. + + Args: + llm_config: The LLM configuration. + + Returns: + The IQL generator. + """ + # TODO: Implement SQL generator + return self.get_llm(llm_config) + + async def compute_metrics(self, results: List[TextToSQLResult]) -> Dict[str, float]: + """ + Computes the metrics for IQL predictions. + + Args: + results: The list of IQL predictions. + + Returns: + The metrics for the IQL predictions. + """ + return { + "valid_sql": calculate_valid_sql(results, self.engine), + "undefined_error": calculate_undefined_error_ratio(results), + "exact_match": calculate_sql_exact_match(results), + "execution_accuracy": calculate_exec_acc(results, self.engine), + "valid_efficiency_score": calculate_ves(results, self.engine), + } + + async def __call__(self, dataset: Dataset) -> Tuple[Metrics, Results]: + """ + Runs the pipeline for evaluating IQL predictions. + + Args: + dataset: The dataset containing the questions and ground truth IQL queries. + + Returns: + The list of IQL predictions. + """ + tables = self.view.get_tables() + examples = self.view.list_few_shots() + results = [] + + for data in dataset: + try: + # TODO: Refactor this once the SQL generator is implemented + prompt_format = SQLGenerationPromptFormat( + question=data["question"], + dialect=self.engine.dialect.name, + tables=tables, + examples=examples, + ) + formatted_prompt = SQL_GENERATION_TEMPLATE.format_prompt(prompt_format) + response = await self.sql_generator.generate_text(formatted_prompt) + response = json.loads(response) + params = [SQLParameterOption.from_dict(param) for param in response.get("parameters", [])] + params = {param.name: param.value for param in params} + stmt = text(response.get("sql", "")) + stmt = stmt.bindparams(**params) + result = str(stmt.compile(compile_kwargs={"literal_binds": True})) + except Exception: # pylint: disable=broad-except + result = "" + + results.append( + TextToSQLResult(question=data["question"], ground_truth_sql=data["sql"], predicted_sql=result) + ) + + metrics = await self.compute_metrics(results) + results = [asdict(result) for result in results] + + return metrics, results diff --git a/benchmarks/sql/results.py b/benchmarks/sql/results.py index ac638e89..7eea01e0 100644 --- a/benchmarks/sql/results.py +++ b/benchmarks/sql/results.py @@ -18,7 +18,6 @@ class TextToSQLResult: Represents a single TextToSQL result. """ - db_id: str question: str ground_truth_sql: str predicted_sql: str diff --git a/benchmarks/sql/text2sql_benchmark.py b/benchmarks/sql/text2sql_benchmark.py deleted file mode 100644 index 178a8332..00000000 --- a/benchmarks/sql/text2sql_benchmark.py +++ /dev/null @@ -1,151 +0,0 @@ -import asyncio -import json -import os -from pathlib import Path -from typing import Any, List, Optional - -import hydra -import neptune -from config import BenchmarkConfig -from constants import EvaluationType -from dataset.bird_dataset import BIRDDataset, BIRDExample -from hydra.utils import instantiate -from loguru import logger -from neptune.utils import stringify_unsupported -from omegaconf import DictConfig -from paths import PATH_EXPERIMENTS, PATH_SCHEMAS -from sqlalchemy import create_engine -from text2sql.metrics import calculate_dataset_metrics -from text2sql.prompt_template import TEXT2SQL_PROMPT_TEMPLATE -from text2sql.text2sql_result import Text2SQLResult -from utils import batch, get_datetime_str, set_up_gitlab_metadata - -from dbally.audit.event_tracker import EventTracker -from dbally.llms.litellm import LiteLLM -from dbally.llms.local import LocalLLM - - -def _load_db_schema(db_name: str, encoding: Optional[str] = None) -> str: - db_schema_filename = db_name + ".sql" - db_schema_path = PATH_SCHEMAS / db_schema_filename - - with open(db_schema_path, encoding=encoding) as file_handle: - db_schema = file_handle.read() - - return db_schema - - -async def _run_text2sql_for_single_example(example: BIRDExample, llm: LiteLLM) -> Text2SQLResult: - event_tracker = EventTracker() - - db_schema = _load_db_schema(example.db_id) - - response = await llm.generate_text( - TEXT2SQL_PROMPT_TEMPLATE, {"schema": db_schema, "question": example.question}, event_tracker=event_tracker - ) - - return Text2SQLResult( - db_id=example.db_id, question=example.question, ground_truth_sql=example.SQL, predicted_sql=response - ) - - -async def run_text2sql_for_dataset(dataset: BIRDDataset, llm: LiteLLM) -> List[Text2SQLResult]: - """ - Transforms questions into SQL queries using a Text2SQL model. - - Args: - dataset: The dataset containing questions to be transformed into SQL queries. - llm: LLM client. - - Returns: - A list of Text2SQLResult objects representing the predictions. - """ - - results: List[Text2SQLResult] = [] - - for group in batch(dataset, 5): - current_results = await asyncio.gather(*[_run_text2sql_for_single_example(example, llm) for example in group]) - results = [*current_results, *results] - - return results - - -async def evaluate(cfg: DictConfig) -> Any: - """ - Runs Text2SQL evaluation for a single dataset defined in hydra config. - - Args: - cfg: hydra config, loads automatically from path passed on to the decorator - """ - - output_dir = PATH_EXPERIMENTS / cfg.output_path / get_datetime_str() - output_dir.mkdir(exist_ok=True, parents=True) - cfg = instantiate(cfg) - benchmark_cfg = BenchmarkConfig() - - engine = create_engine(benchmark_cfg.pg_connection_string + f"/{cfg.db_name}") - - if cfg.llm.model_name.startswith("local/"): - llm = LocalLLM(model_name=cfg.llm.model_name.split("/", 1)[1], api_key=benchmark_cfg.hf_api_key) - else: - llm = LiteLLM( - api_key=benchmark_cfg.openai_api_key, - model_name=cfg.llm.model_name, - ) - - run = None - if cfg.neptune.log: - run = neptune.init_run( - project=benchmark_cfg.neptune_project, - api_token=benchmark_cfg.neptune_api_token, - ) - run["config"] = stringify_unsupported(cfg) - tags = list(cfg.neptune.get("tags", [])) + [EvaluationType.TEXT2SQL.value, cfg.db_name, cfg.llm.model_name] - run["sys/tags"].add(tags) - - if "CI_MERGE_REQUEST_IID" in os.environ: - run = set_up_gitlab_metadata(run) - - metrics_file_name, results_file_name = "metrics.json", "eval_results.json" - - logger.info(f"Running Text2SQ predictions for dataset {cfg.dataset_path}") - evaluation_dataset = BIRDDataset.from_json_file( - Path(cfg.dataset_path), difficulty_levels=cfg.get("difficulty_levels") - ) - text2sql_results = await run_text2sql_for_dataset(dataset=evaluation_dataset, llm=llm) - - with open(output_dir / results_file_name, "w", encoding="utf-8") as outfile: - json.dump([result.model_dump() for result in text2sql_results], outfile, indent=4) - - logger.info("Calculating metrics") - metrics = calculate_dataset_metrics(text2sql_results, engine) - - with open(output_dir / metrics_file_name, "w", encoding="utf-8") as outfile: - json.dump(metrics, outfile, indent=4) - - logger.info(f"Text2SQL predictions saved under directory: {output_dir}") - - if run: - run["config/prompt_template"] = stringify_unsupported(TEXT2SQL_PROMPT_TEMPLATE.chat) - run[f"evaluation/{metrics_file_name}"].upload((output_dir / metrics_file_name).as_posix()) - run[f"evaluation/{results_file_name}"].upload((output_dir / results_file_name).as_posix()) - run["evaluation/metrics"] = stringify_unsupported(metrics) - logger.info(f"Evaluation results logged to neptune at {run.get_url()}") - - -@hydra.main(version_base=None, config_path="experiment_config", config_name="evaluate_text2sql_config") -def main(cfg: DictConfig): - """ - Runs Text2SQL evaluation for a single dataset defined in hydra config. - The following metrics are calculated during evaluation: exact match, valid SQL, - execution accuracy and valid efficiency score. - - Args: - cfg: hydra config, loads automatically from path passed on to the decorator. - """ - - asyncio.run(evaluate(cfg)) - - -if __name__ == "__main__": - main() # pylint: disable=E1120 diff --git a/benchmarks/sql/views/__init__.py b/benchmarks/sql/views/__init__.py new file mode 100644 index 00000000..9a8488a0 --- /dev/null +++ b/benchmarks/sql/views/__init__.py @@ -0,0 +1,15 @@ +from typing import Dict, Type + +from dbally.views.freeform.text2sql.view import BaseText2SQLView +from dbally.views.sqlalchemy_base import SqlAlchemyBaseView + +from .freeform.superhero import SuperheroFreeformView +from .structured.superhero import SuperheroView + +STRUCTURED_VIEW_REGISTRY: Dict[str, Type[SqlAlchemyBaseView]] = { + SuperheroView.__name__: SuperheroView, +} + +FREEFORM_VIEW_REGISTRY: Dict[str, Type[BaseText2SQLView]] = { + SuperheroFreeformView.__name__: SuperheroFreeformView, +} diff --git a/benchmarks/sql/views/freeform/__init__.py b/benchmarks/sql/views/freeform/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/sql/views/freeform/superhero.py b/benchmarks/sql/views/freeform/superhero.py new file mode 100644 index 00000000..1634b30b --- /dev/null +++ b/benchmarks/sql/views/freeform/superhero.py @@ -0,0 +1,301 @@ +from typing import List + +from faiss.swigfaiss import IndexFlatL2 + +from dbally.embeddings.litellm import LiteLLMEmbeddingClient +from dbally.similarity.faiss_store import FaissStore +from dbally.similarity.index import SimilarityIndex +from dbally.views.freeform.text2sql.config import ColumnConfig, TableConfig +from dbally.views.freeform.text2sql.view import BaseText2SQLView + + +class SuperheroFreeformView(BaseText2SQLView): + """ + Freeform view for the Superhero SQLite database. + """ + + def get_tables(self) -> List[TableConfig]: + """ + Get the tables used by the view. + + Returns: + The list of tables used by the view. + """ + return [ + TableConfig( + name="alignment", + columns=[ + ColumnConfig( + name="alignment", + data_type="TEXT", + description=None, + similarity_index=SimilarityIndex( + fetcher=self._create_default_fetcher("alignment", "alignment"), + store=FaissStore( + index_dir=".", + index_name="alignment_alignment_index", + embedding_client=LiteLLMEmbeddingClient( + model="text-embedding-3-small", + options={}, + api_base=None, + api_key=None, + api_version=None, + ), + max_distance=None, + index_type=IndexFlatL2, + ), + ), + ), + ], + description="""The 'alignment' table is a simple reference table storing different types of + moral alignments (such as Good, Bad, Neutral or N/A). Each alignment type is + assigned a unique identifier.""", + ), + TableConfig( + name="attribute", + columns=[ + ColumnConfig( + name="attribute_name", + data_type="TEXT", + description=None, + similarity_index=SimilarityIndex( + fetcher=self._create_default_fetcher("attribute", "attribute_name"), + store=FaissStore( + index_dir=".", + index_name="attribute_attribute_name_index", + embedding_client=LiteLLMEmbeddingClient( + model="text-embedding-3-small", + options={}, + api_base=None, + api_key=None, + api_version=None, + ), + max_distance=None, + index_type=IndexFlatL2, + ), + ), + ), + ], + description="""The table named 'attribute' is used to store various attributes like + intelligence, strength, speed, durability, power, etc. Each attribute is + uniquely identified by an ID.""", + ), + TableConfig( + name="colour", + columns=[ + ColumnConfig( + name="colour", + data_type="TEXT", + description=None, + similarity_index=SimilarityIndex( + fetcher=self._create_default_fetcher("colour", "colour"), + store=FaissStore( + index_dir=".", + index_name="colour_colour_index", + embedding_client=LiteLLMEmbeddingClient( + model="text-embedding-3-small", + options={}, + api_base=None, + api_key=None, + api_version=None, + ), + max_distance=None, + index_type=IndexFlatL2, + ), + ), + ), + ], + description="""The "colour" table in the SQLite database is a simple lookup table consisting of + colour names. Each entry in the table has a unique identifier and associated + text representing different colours.""", + ), + TableConfig( + name="gender", + columns=[ + ColumnConfig( + name="gender", + data_type="TEXT", + description=None, + similarity_index=SimilarityIndex( + fetcher=self._create_default_fetcher("gender", "gender"), + store=FaissStore( + index_dir=".", + index_name="gender_gender_index", + embedding_client=LiteLLMEmbeddingClient( + model="text-embedding-3-small", + options={}, + api_base=None, + api_key=None, + api_version=None, + ), + max_distance=None, + index_type=IndexFlatL2, + ), + ), + ), + ], + description="""The table named 'gender' is designed to store gender information. Each record + consists of an integer identifier and its corresponding gender which may be + 'Male', 'Female' or 'N/A'.""", + ), + TableConfig( + name="publisher", + columns=[ + ColumnConfig( + name="publisher_name", + data_type="TEXT", + description=None, + similarity_index=SimilarityIndex( + fetcher=self._create_default_fetcher("publisher", "publisher_name"), + store=FaissStore( + index_dir=".", + index_name="publisher_publisher_name_index", + embedding_client=LiteLLMEmbeddingClient( + model="text-embedding-3-small", + options={}, + api_base=None, + api_key=None, + api_version=None, + ), + max_distance=None, + index_type=IndexFlatL2, + ), + ), + ), + ], + description="""The "publisher" table stores information pertaining to various publishers. Each + entry consists of a unique ID and the name of the publisher. Some entries + may not have a publisher name.""", + ), + TableConfig( + name="race", + columns=[ + ColumnConfig( + name="race", + data_type="TEXT", + description=None, + similarity_index=SimilarityIndex( + fetcher=self._create_default_fetcher("race", "race"), + store=FaissStore( + index_dir=".", + index_name="race_race_index", + embedding_client=LiteLLMEmbeddingClient( + model="text-embedding-3-small", + options={}, + api_base=None, + api_key=None, + api_version=None, + ), + max_distance=None, + index_type=IndexFlatL2, + ), + ), + ), + ], + description="""The 'race' table associates a unique ID to different races which can include + various categories including clarifications such as "-" or specific types + like "Alien", "Alpha", "Amazon", "Android" etc.""", + ), + TableConfig( + name="superpower", + columns=[ + ColumnConfig( + name="power_name", + data_type="TEXT", + description=None, + similarity_index=SimilarityIndex( + fetcher=self._create_default_fetcher("superpower", "power_name"), + store=FaissStore( + index_dir=".", + index_name="superpower_power_name_index", + embedding_client=LiteLLMEmbeddingClient( + model="text-embedding-3-small", + options={}, + api_base=None, + api_key=None, + api_version=None, + ), + max_distance=None, + index_type=IndexFlatL2, + ), + ), + ), + ], + description="""The "superpower" table stores a list of different superpowers. Each superpower + has a unique identifier and a name.""", + ), + TableConfig( + name="superhero", + columns=[ + ColumnConfig( + name="superhero_name", + data_type="TEXT", + description=None, + similarity_index=SimilarityIndex( + fetcher=self._create_default_fetcher("superhero", "superhero_name"), + store=FaissStore( + index_dir=".", + index_name="superhero_superhero_name_index", + embedding_client=LiteLLMEmbeddingClient( + model="text-embedding-3-small", + options={}, + api_base=None, + api_key=None, + api_version=None, + ), + max_distance=None, + index_type=IndexFlatL2, + ), + ), + ), + ColumnConfig( + name="full_name", + data_type="TEXT", + description=None, + similarity_index=SimilarityIndex( + fetcher=self._create_default_fetcher("superhero", "full_name"), + store=FaissStore( + index_dir=".", + index_name="superhero_full_name_index", + embedding_client=LiteLLMEmbeddingClient( + model="text-embedding-3-small", + options={}, + api_base=None, + api_key=None, + api_version=None, + ), + max_distance=None, + index_type=IndexFlatL2, + ), + ), + ), + ], + description="""The "superhero" table holds information about various superheroes. This includes + their superhero name, their full name, and other descriptive characteristics + like their gender, eye, hair, and skin color, race, height, and weight. Each + of these descriptive characteristics has an associated id that corresponds + to a particular description. The alignment and publisher of the superhero + are also included in this table. The table is linked with multiple other + tables such as 'alignment', 'colour', 'gender', 'publisher', 'race' via + foreign key constraints.""", + ), + TableConfig( + name="hero_attribute", + columns=[], + description="""The "hero_attribute" table is a join table that connects superheroes and their + attributes. It has foreign keys referring to the ID of the hero from the + "superhero" table and the ID of the attribute from the "attribute" table. It + also stores the values for these hero attributes.""", + ), + TableConfig( + name="hero_power", + columns=[], + description="""The 'hero_power' table is a relational junction table that links superheros + ('hero_id') with their corresponding superpowers ('power_id'). This table + exhibits a many-to-many relationship between superheros and superpowers, + indicating that a single superhero can have multiple superpowers, and + similarly, a single superpower can be attributed to multiple superheros. The + 'hero_id' and 'power_id' are foreign keys that reference the primary keys in + the 'superhero' and 'superpower' tables respectively.""", + ), + ] diff --git a/benchmarks/sql/views/superhero.py b/benchmarks/sql/views/structured/superhero.py similarity index 100% rename from benchmarks/sql/views/superhero.py rename to benchmarks/sql/views/structured/superhero.py From c4242b675ed1c3c90aac2463f2f11eca7f2ee3c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Mon, 15 Jul 2024 11:53:04 +0200 Subject: [PATCH 13/34] great refactor --- benchmarks/sql/README.md | 27 +- benchmarks/sql/bench.py | 158 +++++----- .../sql/{views/freeform => bench}/__init__.py | 0 benchmarks/sql/bench/evaluator.py | 129 ++++++++ benchmarks/sql/bench/metrics/__init__.py | 16 + benchmarks/sql/bench/metrics/iql.py | 61 ++++ benchmarks/sql/bench/metrics/sql.py | 108 +++++++ benchmarks/sql/bench/pipelines/__init__.py | 12 + benchmarks/sql/bench/pipelines/base.py | 68 +++++ benchmarks/sql/bench/pipelines/e2e.py | 118 ++++++++ benchmarks/sql/bench/pipelines/iql.py | 96 ++++++ benchmarks/sql/bench/pipelines/sql.py | 86 ++++++ benchmarks/sql/bench/utils.py | 37 +++ benchmarks/sql/{ => bench}/views/__init__.py | 0 .../sql/bench/views/freeform/__init__.py | 0 .../{ => bench}/views/freeform/superhero.py | 0 .../{ => bench}/views/structured/superhero.py | 59 +--- benchmarks/sql/config/config.yaml | 13 +- benchmarks/sql/config/data/superhero.yaml | 11 + benchmarks/sql/config/e2e/superhero.yaml | 6 - benchmarks/sql/config/iql/superhero.yaml | 8 - benchmarks/sql/config/llm/claude.yaml | 1 + benchmarks/sql/config/llm/gpt.yaml | 1 + benchmarks/sql/config/sql/superhero.yaml | 8 - benchmarks/sql/config/task/e2e.yaml | 1 + benchmarks/sql/config/task/iql.yaml | 1 + benchmarks/sql/config/task/sql.yaml | 1 + benchmarks/sql/data/.gitkeep | 0 benchmarks/sql/data/superhero.db | Bin 0 -> 237568 bytes benchmarks/sql/dataset/bird_dataset.py | 63 ---- benchmarks/sql/iql/method_call_visitor.py | 68 ----- benchmarks/sql/iql/metrics.py | 145 --------- benchmarks/sql/pipelines.py | 285 ------------------ benchmarks/sql/results.py | 23 -- benchmarks/sql/saving.py | 54 ---- benchmarks/sql/tests/unit/test_iql_metrics.py | 65 ---- .../sql/tests/unit/test_main_evaluate.py | 26 -- .../tests/unit/test_method_call_visitor.py | 17 -- benchmarks/sql/text2sql/metrics.py | 268 ---------------- benchmarks/sql/text2sql/prompt_template.py | 19 -- benchmarks/sql/text2sql/text2sql_result.py | 10 - benchmarks/sql/utils.py | 73 ----- 42 files changed, 856 insertions(+), 1286 deletions(-) rename benchmarks/sql/{views/freeform => bench}/__init__.py (100%) create mode 100644 benchmarks/sql/bench/evaluator.py create mode 100644 benchmarks/sql/bench/metrics/__init__.py create mode 100644 benchmarks/sql/bench/metrics/iql.py create mode 100644 benchmarks/sql/bench/metrics/sql.py create mode 100644 benchmarks/sql/bench/pipelines/__init__.py create mode 100644 benchmarks/sql/bench/pipelines/base.py create mode 100644 benchmarks/sql/bench/pipelines/e2e.py create mode 100644 benchmarks/sql/bench/pipelines/iql.py create mode 100644 benchmarks/sql/bench/pipelines/sql.py create mode 100644 benchmarks/sql/bench/utils.py rename benchmarks/sql/{ => bench}/views/__init__.py (100%) create mode 100644 benchmarks/sql/bench/views/freeform/__init__.py rename benchmarks/sql/{ => bench}/views/freeform/superhero.py (100%) rename benchmarks/sql/{ => bench}/views/structured/superhero.py (80%) create mode 100644 benchmarks/sql/config/data/superhero.yaml delete mode 100644 benchmarks/sql/config/e2e/superhero.yaml delete mode 100644 benchmarks/sql/config/iql/superhero.yaml create mode 100644 benchmarks/sql/config/llm/claude.yaml create mode 100644 benchmarks/sql/config/llm/gpt.yaml delete mode 100644 benchmarks/sql/config/sql/superhero.yaml create mode 100644 benchmarks/sql/config/task/e2e.yaml create mode 100644 benchmarks/sql/config/task/iql.yaml create mode 100644 benchmarks/sql/config/task/sql.yaml create mode 100644 benchmarks/sql/data/.gitkeep create mode 100644 benchmarks/sql/data/superhero.db delete mode 100644 benchmarks/sql/dataset/bird_dataset.py delete mode 100644 benchmarks/sql/iql/method_call_visitor.py delete mode 100644 benchmarks/sql/iql/metrics.py delete mode 100644 benchmarks/sql/pipelines.py delete mode 100644 benchmarks/sql/results.py delete mode 100644 benchmarks/sql/saving.py delete mode 100644 benchmarks/sql/tests/unit/test_iql_metrics.py delete mode 100644 benchmarks/sql/tests/unit/test_main_evaluate.py delete mode 100644 benchmarks/sql/tests/unit/test_method_call_visitor.py delete mode 100644 benchmarks/sql/text2sql/metrics.py delete mode 100644 benchmarks/sql/text2sql/prompt_template.py delete mode 100644 benchmarks/sql/text2sql/text2sql_result.py delete mode 100644 benchmarks/sql/utils.py diff --git a/benchmarks/sql/README.md b/benchmarks/sql/README.md index d6f5abf4..f3f39fb3 100644 --- a/benchmarks/sql/README.md +++ b/benchmarks/sql/README.md @@ -3,25 +3,32 @@ This folder contains benchmarks for querying SQL databases with db-ally. This suite evaluates the following tasks: - `E2E` - measures correctness of rows returned from the database by db-ally. -- `Text2IQL` - measures correctness of IQL queries generated by structured views. -- `Text2SQL` - measures correctness of SQL queries generated by freeform views. +- `IQL` - measures correctness of IQL queries generated by structured views. +- `SQL` - measures correctness of SQL queries generated by freeform views. All benchmarks are run on a dev split of the [BIRD](https://bird-bench.github.io/) dataset. For now, only one configuration is available to run the suite against the `superhero` database. We plan to extend it to all databases in the set to cover all cases. -Any new PRs adding support for new databases from BIRD are welcome. +Any new PRs adding support for new databases from BIRD or SPIDER are welcome. ## Run benchmarks Run the whole suite on the `superhero` database: ```bash -python bench.py e2e=superhero iql=superhero sql=superhero +python bench.py task=iql,sql,e2e data=superhero ``` You can also run each evaluation separately or in subgroups: ```bash -python bench.py e2e=superhero ... +python bench.py task=e2e +python bench.py task=iql,sql +``` + +Compare IQL generation performance on multiple LLMs: + +```bash +python bench.py --multirun task=iql llm=gpt,claude ``` ## Run tests @@ -32,11 +39,12 @@ python -m pytest ## Metrics -Each task computes following metrics: +This suite computes following metrics: -tbd +- `exact_match` - ratio of predicated queries that are identical to the ground truth ones. +- `exact_match` - estimates the pass@k metric for code synthesis. -## Custom evaluation dataset +## Add new dataset In order to run this suite against you own dataset, upload your dataset to [Hugging Face](https://huggingface.co) and make sure the data is in the format expected by the evaluation pipeline. @@ -47,3 +55,6 @@ Evaluation dataset required fields: - `iql` - IQL corresponding to the SQL prompt - `difficulty` - SQL code difficulty label - `db_id` - database identifier + + +Additionaly, you need to create approprite structure and freeform view for downstream tasks \ No newline at end of file diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py index e56ed488..b0db9c0c 100644 --- a/benchmarks/sql/bench.py +++ b/benchmarks/sql/bench.py @@ -1,15 +1,29 @@ import asyncio +import logging from enum import Enum from typing import Callable, Dict import hydra import neptune +from bench.evaluator import Evaluator +from bench.metrics import ( + exact_match_iql, + exact_match_sql, + execution_accuracy, + invalid_iql, + invalid_sql, + unsupported_iql, + valid_efficiency_score, + valid_iql, + valid_sql, +) +from bench.pipelines import EndToEndEvaluationPipeline, EvaluationPipeline, IQLEvaluationPipeline, SQLEvaluationPipeline from datasets import load_dataset -from loguru import logger from neptune.utils import stringify_unsupported from omegaconf import DictConfig -from pipelines import TextToIQLEvaluationPipeline, TextToSQLEvaluationPipeline -from saving import save + +logging.getLogger("LiteLLM").setLevel(logging.ERROR) +logging.getLogger("httpx").setLevel(logging.ERROR) class EvaluationType(Enum): @@ -17,66 +31,73 @@ class EvaluationType(Enum): Enum representing the type of evaluation. """ - E2E = "e2e" - SQL = "sql" - IQL = "iql" - - -async def bench_iql(config: DictConfig) -> None: - """ - Runs IQL evaluation for a single dataset defined in hydra config. - - Args: - config: hydra config, loads automatically from path passed on to the decorator. - """ - logger.info(f"Running IQL predictions for dataset: {config.dataset_path} and view: {config.view_name}.") - - dataset = load_dataset(config.dataset_path, split=config.split) - dataset = dataset.filter(lambda x: x["db_id"] in config.db_ids and x["difficulty"] in config.difficulties) - dataset = dataset.select(range(1)) - - pipe = TextToIQLEvaluationPipeline(config) - metrics, results = await pipe(dataset) + E2E = "E2E" + SQL = "SQL" + IQL = "IQL" + + +EVALUATION_PIPELINES: Dict[str, EvaluationPipeline] = { + EvaluationType.SQL.value: SQLEvaluationPipeline, + EvaluationType.IQL.value: IQLEvaluationPipeline, + EvaluationType.E2E.value: EndToEndEvaluationPipeline, +} + +EVALUATION_METRICS: Dict[str, Callable] = { + EvaluationType.IQL.value: { + "em_iql": exact_match_iql, + "valid_iql": valid_iql, + "invalid_iql": invalid_iql, + "unsupported_iql": unsupported_iql, + "em_sql": exact_match_sql, + "valid_sql": valid_sql, + "invalid_sql": invalid_sql, + "ex": execution_accuracy, + "ves": valid_efficiency_score, + }, + EvaluationType.SQL.value: { + "em_sql": exact_match_sql, + "valid_sql": valid_sql, + "invalid_sql": invalid_sql, + "ex": execution_accuracy, + "ves": valid_efficiency_score, + }, + EvaluationType.E2E.value: { + "em_iql": exact_match_iql, + "valid_iql": valid_iql, + "invalid_iql": invalid_iql, + "unsupported_iql": unsupported_iql, + "em_sql": exact_match_iql, + "valid_sql": valid_sql, + "invalid_sql": invalid_sql, + "ex": execution_accuracy, + "ves": valid_efficiency_score, + }, +} - output_file = save("./evals/", metrics=metrics, results=results) - logger.info(f"IQL evaluation metrics and predictions saved under directory: {output_file}.") - if config.neptune.run: - run = neptune.init_run(project=config.neptune.project) - run["sys/tags"].add( - [ - EvaluationType.IQL.value, - config.view_name, - config.llm.model_name, - *config.db_ids, - ] - ) - run["config"] = stringify_unsupported(config) - run["evaluation/metrics"] = stringify_unsupported(metrics) - logger.info(f"Evaluation results logged to neptune at {run.get_url()}.") - - -async def bench_sql(config: DictConfig) -> None: +async def bench(config: DictConfig) -> None: """ - Runs Text2SQL evaluation for a single dataset defined in hydra config. + Function running evaluation for all datasets and evaluation tasks defined in hydra config. Args: - config: hydra config, loads automatically from path passed on to the decorator + config: Hydra configuration. """ - logger.info(f"Running SQL predictions for dataset: {config.dataset_path} and view: {config.view_name}.") - - dataset = load_dataset(config.dataset_path, split=config.split) - dataset = dataset.filter(lambda x: x["db_id"] in config.db_ids and x["difficulty"] in config.difficulties) - dataset = dataset.select(range(1)) - - pipe = TextToSQLEvaluationPipeline(config) - metrics, results = await pipe(dataset) - - output_file = save("./evals/", metrics=metrics, results=results) - logger.info(f"IQL evaluation metrics and predictions saved under directory: {output_file}.") - - if config.neptune.run: - run = neptune.init_run(project=config.neptune.project) + dataset = load_dataset(config.data.path, split=config.data.split) + dataset = dataset.filter(lambda x: x["db_id"] in config.data.db_ids and x["difficulty"] in config.data.difficulties) + dataset = dataset.select(range(2)) + + pipeline = EVALUATION_PIPELINES[config.task.type](config) + metrics = EVALUATION_METRICS[config.task.type] + + evaluator = Evaluator(task=config.task.type) + results = await evaluator.compute( + pipe=pipeline, + data=dataset, + metrics=metrics, + ) + + if config.neptune: + run = neptune.init_run() run["sys/tags"].add( [ EvaluationType.SQL.value, @@ -86,29 +107,10 @@ async def bench_sql(config: DictConfig) -> None: ] ) run["config"] = stringify_unsupported(config) - run["evaluation/metrics"] = stringify_unsupported(metrics) - logger.info(f"Evaluation results logged to neptune at {run.get_url()}.") - + run["evaluation/metrics"] = stringify_unsupported(results["metrics"]) -async def bench(config: DictConfig) -> None: - """ - Function running evaluation for all datasets and evaluation tasks defined in hydra config. - Args: - config: Hydra configuration. - """ - evaluators_factory: Dict[str, Callable] = { - EvaluationType.SQL.value: bench_sql, - EvaluationType.IQL.value: bench_iql, - } - common_config = {k: v for k, v in config.items() if k not in evaluators_factory} - for evaluation_type, eval_func in evaluators_factory.items(): - if evaluation_type in config: - for dataset_config in config[evaluation_type].values(): - await eval_func(DictConfig({**common_config, **dataset_config})) - - -@hydra.main(version_base=None, config_path="config", config_name="config") +@hydra.main(config_path="config", config_name="config", version_base="1.3.2") def main(config: DictConfig) -> None: """ Function running evaluation for all datasets and evaluation tasks defined in hydra config. diff --git a/benchmarks/sql/views/freeform/__init__.py b/benchmarks/sql/bench/__init__.py similarity index 100% rename from benchmarks/sql/views/freeform/__init__.py rename to benchmarks/sql/bench/__init__.py diff --git a/benchmarks/sql/bench/evaluator.py b/benchmarks/sql/bench/evaluator.py new file mode 100644 index 00000000..318cd8da --- /dev/null +++ b/benchmarks/sql/bench/evaluator.py @@ -0,0 +1,129 @@ +import time +from typing import Any, Callable, Dict, List, Tuple + +from datasets import Dataset +from sqlalchemy import create_engine + +from .pipelines.base import EvaluationPipeline, EvaluationResult +from .utils import avarage_execution_time, execute_query + + +class Evaluator: + """ + Evaluator class. + """ + + def __init__(self, task: str) -> None: + """ + Constructs the evaluator. + + Args: + task: The task for the evaluator. + """ + self.task = task + + async def compute( + self, + pipe: Callable, + data: Dataset, + metrics: Dict[str, Callable], + ) -> Dict[str, Any]: + """ + Compute the evaluation results for the given pipeline and data. + + Args: + pipe: The pipeline to be evaluated. + data: The evaluation data. + metrics: The metrics to be computed. + + Returns: + The evaluation results. + """ + results, perf_results = await self.call_pipeline(pipe, data) + results = self.results_processor(results) + metrics = self.compute_metrics(metrics, results["results"]) + + result = {} + result.update(metrics) + result.update(perf_results) + result.update(results) + return result + + async def call_pipeline( + self, pipe: EvaluationPipeline, data: Dataset + ) -> Tuple[List[EvaluationResult], Dict[str, Any]]: + """ + Call the pipeline with the given data. + + Args: + pipe: The pipeline to be called. + data: The evaluation data. + + Returns: + The evaluation results and performance metrics. + """ + start_time = time.perf_counter() + pipe_output = await pipe(data) + end_time = time.perf_counter() + return pipe_output, self._compute_time_perf(start_time, end_time, len(pipe_output)) + + def results_processor(self, results: List[EvaluationResult]) -> Dict[str, Any]: + """ + Process the results. + + Args: + results: The evaluation results. + + Returns: + The processed results. + """ + for result in results: + if result.db_url is not None: + engine = create_engine(result.db_url) + if result.reference.sql is not None: + result.reference.results, _ = execute_query(result.reference.sql, engine) + result.reference.execution_time = avarage_execution_time(result.reference.sql, engine, 10) + + if result.prediction.sql is not None: + result.prediction.results, _ = execute_query(result.prediction.sql, engine) + result.prediction.execution_time = avarage_execution_time(result.prediction.sql, engine, 10) + + return { + "results": results, + } + + def compute_metrics(self, metrics: Dict[str, Callable], results: List[EvaluationResult]) -> Dict[str, Any]: + """ + Compute a metric using the given inputs. + + Args: + metrics: The metrics to be computed. + results: The evaluation results. + + Returns: + The computed metric. + """ + return {"metrics": {metric_name: metric(results) for metric_name, metric in metrics.items()}} + + @staticmethod + def _compute_time_perf(start_time: float, end_time: float, num_samples: int) -> Dict[str, Any]: + """ + Compute the performance metrics. + + Args: + start_time: The start time. + end_time: The end time. + num_samples: The number of samples. + + Returns: + The performance metrics. + """ + latency = end_time - start_time + throughput = num_samples / latency + latency_sample = 1.0 / throughput + + return { + "total_time_in_seconds": latency, + "samples_per_second": throughput, + "latency_in_seconds": latency_sample, + } diff --git a/benchmarks/sql/bench/metrics/__init__.py b/benchmarks/sql/bench/metrics/__init__.py new file mode 100644 index 00000000..2e107a73 --- /dev/null +++ b/benchmarks/sql/bench/metrics/__init__.py @@ -0,0 +1,16 @@ +from .iql import exact_match as exact_match_iql +from .iql import invalid_iql, unsupported_iql, valid_iql +from .sql import exact_match as exact_match_sql +from .sql import execution_accuracy, invalid_sql, valid_efficiency_score, valid_sql + +__all__ = [ + "exact_match_iql", + "exact_match_sql", + "valid_iql", + "valid_sql", + "invalid_iql", + "invalid_sql", + "unsupported_iql", + "execution_accuracy", + "valid_efficiency_score", +] diff --git a/benchmarks/sql/bench/metrics/iql.py b/benchmarks/sql/bench/metrics/iql.py new file mode 100644 index 00000000..ab45c1cf --- /dev/null +++ b/benchmarks/sql/bench/metrics/iql.py @@ -0,0 +1,61 @@ +from typing import List + +from dbally.iql._exceptions import IQLError, IQLUnsupportedSyntaxError + +from ..pipelines.base import EvaluationResult + + +def exact_match(results: List[EvaluationResult]) -> float: + """ + Computes the ratio of predicated queries that are identical to the ground truth ones. + + Args: + results: List of evaluation results. + + Returns: + Ratio of predicated queries that are identical to the ground truth ones. + """ + return sum(result.prediction.iql == result.reference.iql for result in results) / len(results) + + +def valid_iql(results: List[EvaluationResult]) -> float: + """ + Calculates the ratio of valid IQL queries for a given results. + + Args: + results: List of evaluation results. + + Returns: + Valid IQL queries ratio. + """ + return sum( + not isinstance(result.prediction.exception, (IQLError, IQLUnsupportedSyntaxError, SyntaxError)) + for result in results + ) / len(results) + + +def invalid_iql(results: List[EvaluationResult]) -> float: + """ + Calculates the ratio of invalid IQL queries for a given results. + + Args: + results: List of evaluation results. + + Returns: + Invalid IQL queries ratio. + """ + + return sum(isinstance(result.prediction.exception, (IQLError, SyntaxError)) for result in results) / len(results) + + +def unsupported_iql(results: List[EvaluationResult]) -> float: + """ + Calculates the ratio of unsupported queries for a given results. + + Args: + results: List of evaluation results. + + Returns: + Unsupported queries ratio. + """ + return sum(isinstance(result.prediction.exception, IQLUnsupportedSyntaxError) for result in results) / len(results) diff --git a/benchmarks/sql/bench/metrics/sql.py b/benchmarks/sql/bench/metrics/sql.py new file mode 100644 index 00000000..23bc27d0 --- /dev/null +++ b/benchmarks/sql/bench/metrics/sql.py @@ -0,0 +1,108 @@ +from typing import List + +import pandas as pd +from sqlalchemy.exc import SQLAlchemyError + +from dbally.iql._exceptions import IQLError, IQLUnsupportedSyntaxError + +from ..pipelines.base import EvaluationResult + + +def exact_match(results: List[EvaluationResult]) -> float: + """ + Computes the ratio of predicated queries that are identical to the ground truth ones. + + Args: + results: List of evaluation results. + + Returns: + Ratio of predicated queries that are identical to the ground truth ones. + """ + return sum(result.prediction.sql == result.reference.sql for result in results) / len(results) + + +def valid_sql(results: List[EvaluationResult]) -> float: + """ + Calculates the ratio of valid SQL queries for a given results. + + Args: + results: List of evaluation results. + + Returns: + Valid IQL ratio. + """ + return sum( + not isinstance(result.prediction.exception, (IQLError, IQLUnsupportedSyntaxError, SyntaxError, SQLAlchemyError)) + for result in results + ) / len(results) + + +def invalid_sql(results: List[EvaluationResult]) -> float: + """ + Calculates the ratio of valid SQL queries for a given results. + + Args: + results: List of evaluation results. + + Returns: + Invalid IQL ratio. + """ + + return sum( + isinstance(result.prediction.exception, (IQLError, IQLUnsupportedSyntaxError, SyntaxError, SQLAlchemyError)) + for result in results + ) / len(results) + + +def _execution_accuracy(result: EvaluationResult) -> bool: + reference = pd.DataFrame(result.reference.results) + prediction = pd.DataFrame(result.prediction.results) + + # If filtering works correctly, the number of rows will be the same + # TODO: Sometimes a different number of rows is okay, e.g. if df has aggregated values that are expanded in gt + if reference.shape[0] != prediction.shape[0]: + return False + # Returned view may have the same columns, or more columns than the ground truth + if not reference.columns.isin(prediction.columns).all(): + return False + # Check if dataframe equality, disregarding indexing and order + # commented out way is also ok but slower. Leaving it here just in case + # return df_gt.merge(df[df_gt.columns], how='outer', on=df_gt.columns.tolist(), + # indicator='indicator').indicator.drop_duplicates().values.tolist() == ['both'] + prediction = prediction[reference.columns].sort_values(by=reference.columns.tolist()).reset_index(drop=True) + reference = reference.sort_values(by=reference.columns.tolist()).reset_index(drop=True) + return prediction.equals(reference) + + +def execution_accuracy(results: List[EvaluationResult]) -> float: + """ + Calculates execution accuracy score i.e. the proportion of examples in the evaluation set for + which the executed results of both the predicted and ground-truth SQLs are identical. + + Args: + results: List of evaluation results. + + Returns: + Execution accuracy score. + """ + return sum(_execution_accuracy(result) for result in results) / len(results) + + +def _valid_efficiency_score(result: EvaluationResult) -> float: + if _execution_accuracy(result) is False: + return 0 + return (result.reference.execution_time_ns / result.prediction.execution_time_ns) ** 0.5 + + +def valid_efficiency_score(results: List[EvaluationResult]) -> float: + """ + Calculates valid efficiency score that measures the efficiency of valid SQLs generated + by models. More details about this metric can be found here: https://arxiv.org/pdf/2305.03111.pdf. + + Args: + results: List of evaluation results. + + Returns: + Valid efficiency score. + """ + return sum(_valid_efficiency_score(result) for result in results) / len(results) diff --git a/benchmarks/sql/bench/pipelines/__init__.py b/benchmarks/sql/bench/pipelines/__init__.py new file mode 100644 index 00000000..76feca79 --- /dev/null +++ b/benchmarks/sql/bench/pipelines/__init__.py @@ -0,0 +1,12 @@ +from .base import EvaluationPipeline, EvaluationResult +from .e2e import EndToEndEvaluationPipeline +from .iql import IQLEvaluationPipeline +from .sql import SQLEvaluationPipeline + +__all__ = [ + "EvaluationPipeline", + "EndToEndEvaluationPipeline", + "SQLEvaluationPipeline", + "IQLEvaluationPipeline", + "EvaluationResult", +] diff --git a/benchmarks/sql/bench/pipelines/base.py b/benchmarks/sql/bench/pipelines/base.py new file mode 100644 index 00000000..7fa85cb3 --- /dev/null +++ b/benchmarks/sql/bench/pipelines/base.py @@ -0,0 +1,68 @@ +from abc import ABC, abstractmethod +from dataclasses import asdict, dataclass, field +from typing import Any, Dict, List, Optional + +from datasets import Dataset + +from dbally.llms.base import LLM +from dbally.llms.litellm import LiteLLM +from dbally.llms.local import LocalLLM + + +@dataclass +class ExecutionResult: + """ + Represents the result of a single query execution. + """ + + sql: Optional[str] = None + iql: Optional[str] = None + results: List[Dict[str, Any]] = field(default_factory=list) + exception: Optional[Exception] = None + execution_time: Optional[float] = None + + +@dataclass +class EvaluationResult: + """ + Represents the result of a single evaluation. + """ + + question: str + reference: ExecutionResult + prediction: ExecutionResult + db_url: Optional[str] = None + + dict = asdict + + +class EvaluationPipeline(ABC): + """ + Evaluation pipeline base class. + """ + + def get_llm(self, config: Dict) -> LLM: + """ + Returns the LLM based on the configuration. + + Args: + config: The LLM configuration. + + Returns: + The LLM object. + """ + if config.model_name.startswith("local/"): + return LocalLLM(config.model_name.split("/", 1)[1]) + return LiteLLM(config.model_name) + + @abstractmethod + async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: + """ + Runs the evaluation pipeline. + + Args: + dataset: The evaluation dataset. + + Returns: + The list of evaluation results. + """ diff --git a/benchmarks/sql/bench/pipelines/e2e.py b/benchmarks/sql/bench/pipelines/e2e.py new file mode 100644 index 00000000..84cb035a --- /dev/null +++ b/benchmarks/sql/bench/pipelines/e2e.py @@ -0,0 +1,118 @@ +from typing import Dict, List + +from datasets import Dataset +from sqlalchemy import create_engine +from tqdm import tqdm + +import dbally +from dbally.collection.collection import Collection +from dbally.collection.exceptions import NoViewFoundError +from dbally.iql._exceptions import IQLError +from dbally.iql_generator.prompt import UnsupportedQueryError +from dbally.views.sqlalchemy_base import SqlAlchemyBaseView + +from ..views import FREEFORM_VIEW_REGISTRY, STRUCTURED_VIEW_REGISTRY +from .base import EvaluationPipeline, EvaluationResult, ExecutionResult + + +class EndToEndEvaluationPipeline(EvaluationPipeline): + """ + Pipeline for evaluating IQL predictions. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the pipeline. + + Raises: + ValueError: If the view name is not supported. + """ + self.llm = self.get_llm(config.llm) + self.collection = self.get_collection(config.data.views) + + def get_collection(self, config: Dict) -> Collection: + """ + Returns the view object based on the view name. + + Args: + config: The view configuration. + + Returns: + The view object. + + Raises: + ValueError: If the view name is not supported. + """ + if not config.structured and not config.freeform: + raise ValueError("No structured and freeform views found in the configuration.") + + collection = dbally.create_collection("bench", self.llm) + + for view_name, db_url in config.structured.items(): + if view_cls := STRUCTURED_VIEW_REGISTRY.get(view_name): + collection.add(view_cls, lambda: view_cls(create_engine(db_url))) # pylint: disable=cell-var-from-loop + else: + raise ValueError(f"View {view_name} not supported. Available views: {STRUCTURED_VIEW_REGISTRY}.") + + for view_name, db_url in config.freeform.items(): + if view_cls := FREEFORM_VIEW_REGISTRY.get(view_name): + collection.add(view_cls, lambda: view_cls(create_engine(db_url))) # pylint: disable=cell-var-from-loop + else: + raise ValueError(f"View {view_name} not supported. Available views: {FREEFORM_VIEW_REGISTRY}.") + + return collection + + async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: + """ + Runs the pipeline for evaluating IQL predictions. + + Args: + dataset: The dataset containing the questions and ground truth IQL queries. + + Returns: + The list of IQL predictions. + """ + results = [] + + for data in tqdm(dataset, desc="E2E evaluation"): + try: + result = await self.collection.ask( + question=data["question"], + dry_run=True, + return_natural_response=False, + ) + except NoViewFoundError as exc: + prediction = ExecutionResult(exception=exc) + db_url = None + except (IQLError, SyntaxError, UnsupportedQueryError) as exc: + query = "UNSUPPORTED_QUERY" if isinstance(exc, UnsupportedQueryError) else exc.source + prediction = ExecutionResult(iql=query, exception=exc) + db_url = None + else: + prediction = ExecutionResult( + iql=result.context.get("iql", None), + sql=result.context.get("sql", None), + ) + used_view = self.collection.get(result.view_name) + db_url = ( + used_view._sqlalchemy_engine.url + if isinstance(used_view, SqlAlchemyBaseView) + else used_view._engine.url + ) + reference = ExecutionResult( + iql=data["iql"], + sql=data["sql"], + ) + results.append( + EvaluationResult( + question=data["question"], + reference=reference, + prediction=prediction, + db_url=db_url, + ), + ) + + return results diff --git a/benchmarks/sql/bench/pipelines/iql.py b/benchmarks/sql/bench/pipelines/iql.py new file mode 100644 index 00000000..e75f5065 --- /dev/null +++ b/benchmarks/sql/bench/pipelines/iql.py @@ -0,0 +1,96 @@ +from typing import Dict, List + +from datasets import Dataset +from sqlalchemy import create_engine +from tqdm import tqdm + +from dbally.iql._exceptions import IQLError +from dbally.iql_generator.prompt import UnsupportedQueryError +from dbally.views.sqlalchemy_base import SqlAlchemyBaseView + +from ..views import STRUCTURED_VIEW_REGISTRY +from .base import EvaluationPipeline, EvaluationResult, ExecutionResult + + +class IQLEvaluationPipeline(EvaluationPipeline): + """ + Pipeline for evaluating IQL predictions. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the pipeline. + + Raises: + ValueError: If the view name is not supported. + """ + self.llm = self.get_llm(config.llm) + self.view = self.get_view(config.data.views) + + def get_view(self, config: Dict) -> SqlAlchemyBaseView: + """ + Returns the view object based on the view name. + + Args: + config: The view configuration. + + Returns: + The view object. + + Raises: + ValueError: If the view name is not supported. + """ + if not config.structured: + raise ValueError("No structured views found in the configuration.") + + view_name, db_url = list(config.structured.items())[0] + if view_cls := STRUCTURED_VIEW_REGISTRY.get(view_name): + return view_cls(create_engine(db_url)) + + raise ValueError(f"View {view_name} not supported. Available views: {STRUCTURED_VIEW_REGISTRY}.") + + async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: + """ + Runs the pipeline for evaluating IQL predictions. + + Args: + dataset: The dataset containing the questions and ground truth IQL queries. + + Returns: + The list of IQL predictions. + """ + results = [] + + for data in tqdm(dataset, desc="IQL evaluation"): + try: + result = await self.view.ask( + query=data["question"], + llm=self.llm, + n_retries=0, + dry_run=True, + ) + except (IQLError, UnsupportedQueryError) as exc: + query = "UNSUPPORTED_QUERY" if isinstance(exc, UnsupportedQueryError) else exc.source + prediction = ExecutionResult(iql=query, exception=exc) + else: + prediction = ExecutionResult( + iql=result.context.get("iql", None), + sql=result.context.get("sql", None), + ) + reference = ExecutionResult( + iql=data["iql"], + sql=data["sql"], + ) + results.append( + EvaluationResult( + db_url=self.view._sqlalchemy_engine.url, + question=data["question"], + reference=reference, + prediction=prediction, + ), + ) + + return results diff --git a/benchmarks/sql/bench/pipelines/sql.py b/benchmarks/sql/bench/pipelines/sql.py new file mode 100644 index 00000000..f1b82b6d --- /dev/null +++ b/benchmarks/sql/bench/pipelines/sql.py @@ -0,0 +1,86 @@ +from typing import Dict, List + +from datasets import Dataset +from sqlalchemy import create_engine +from tqdm import tqdm + +from dbally.views.freeform.text2sql.view import BaseText2SQLView + +from ..views import FREEFORM_VIEW_REGISTRY +from .base import EvaluationPipeline, EvaluationResult, ExecutionResult + + +class SQLEvaluationPipeline(EvaluationPipeline): + """ + Pipeline for evaluating SQL predictions. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating SQL predictions. + + Args: + config: The configuration for the pipeline. + + Raises: + ValueError: If the view name is not supported. + """ + self.view = self.get_view(config.data.views) + self.llm = self.get_llm(config.llm) + + def get_view(self, config: Dict) -> BaseText2SQLView: + """ + Returns the view object based on the view name. + + Args: + config: The view configuration. + + Returns: + The view object. + + Raises: + ValueError: If the view name is not supported + """ + if not config.freeform: + raise ValueError("No freeform views found in the configuration.") + + view_name, db_url = list(config.freeform.items())[0] + if view_cls := FREEFORM_VIEW_REGISTRY.get(view_name): + return view_cls(create_engine(db_url)) + + raise ValueError(f"View {view_name} not supported. Available views: {FREEFORM_VIEW_REGISTRY}.") + + async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: + """ + Runs the pipeline for evaluating IQL predictions. + + Args: + dataset: The dataset containing the questions and ground truth IQL queries. + + Returns: + The list of IQL predictions. + """ + results = [] + + for data in tqdm(dataset, desc="SQL evaluation"): + result = await self.view.ask( + query=data["question"], + llm=self.llm, + n_retries=0, + dry_run=True, + ) + prediction = ExecutionResult(sql=result.context["sql"]) + reference = ExecutionResult( + iql=result.context.get("iql", None), + sql=result.context.get("sql", None), + ) + results.append( + EvaluationResult( + db_url=self.view._engine.url, + question=data["question"], + reference=reference, + prediction=prediction, + ), + ) + + return results diff --git a/benchmarks/sql/bench/utils.py b/benchmarks/sql/bench/utils.py new file mode 100644 index 00000000..ef85e5a1 --- /dev/null +++ b/benchmarks/sql/bench/utils.py @@ -0,0 +1,37 @@ +import time +from typing import Any, Dict, List, Tuple + +from sqlalchemy import Engine, text + + +def execute_query(query: str, engine: Engine) -> Tuple[List[Dict[str, Any]], float]: + """ + Execute the given query on the database. + + Args: + query: The query to be executed. + engine: The database engine. + + Returns: + The query results. + """ + with engine.connect() as connection: + start_time = time.perf_counter() + rows = connection.execute(text(query)).fetchall() + execution_time = time.perf_counter() - start_time + return [dict(row._mapping) for row in rows], execution_time # pylint: disable=protected-access + + +def avarage_execution_time(query: str, engine: Engine, n: int) -> float: + """ + Execute the given query on the database n times and return the average execution time. + + Args: + query: The query to be executed. + engine: The database engine. + n: The number of times to execute the query. + + Returns: + The average execution time. + """ + return sum(execute_query(query, engine)[1] for _ in range(n)) / n diff --git a/benchmarks/sql/views/__init__.py b/benchmarks/sql/bench/views/__init__.py similarity index 100% rename from benchmarks/sql/views/__init__.py rename to benchmarks/sql/bench/views/__init__.py diff --git a/benchmarks/sql/bench/views/freeform/__init__.py b/benchmarks/sql/bench/views/freeform/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/sql/views/freeform/superhero.py b/benchmarks/sql/bench/views/freeform/superhero.py similarity index 100% rename from benchmarks/sql/views/freeform/superhero.py rename to benchmarks/sql/bench/views/freeform/superhero.py diff --git a/benchmarks/sql/views/structured/superhero.py b/benchmarks/sql/bench/views/structured/superhero.py similarity index 80% rename from benchmarks/sql/views/structured/superhero.py rename to benchmarks/sql/bench/views/structured/superhero.py index 69c2e82e..d7a20026 100644 --- a/benchmarks/sql/views/structured/superhero.py +++ b/benchmarks/sql/bench/views/structured/superhero.py @@ -8,7 +8,7 @@ from dbally import SqlAlchemyBaseView, decorators -engine = create_engine("sqlite:///superhero.db") +engine = create_engine("sqlite:///data/superhero.db") SuperheroModel = automap_base() SuperheroModel.prepare(autoload_with=engine, reflect=True) @@ -86,7 +86,7 @@ def filter_by_race(self, race: str) -> sqlalchemy.ColumnElement: def filter_by_publisher(self, publisher: str) -> sqlalchemy.ColumnElement: return SuperheroModel.classes.superhero.publisher_id.in_( sqlalchemy.select(SuperheroModel.classes.publisher.id).where( - SuperheroModel.classes.publisher.publisher == publisher + SuperheroModel.classes.publisher.publisher_name == publisher ) ) @@ -104,6 +104,10 @@ def filter_by_gender(self, gender: str) -> sqlalchemy.ColumnElement: sqlalchemy.select(SuperheroModel.classes.gender.id).where(SuperheroModel.classes.gender.gender == gender) ) + @decorators.view_filter() + def filter_by_power(self, power: str) -> sqlalchemy.ColumnElement: + return SuperheroModel.classes.superpower.power_name == power + @decorators.view_filter() def filter_by_missing_weight(self) -> sqlalchemy.ColumnElement: return SuperheroModel.classes.superhero.weight_kg == 0 or SuperheroModel.classes.superhero.weight_kg is None @@ -135,7 +139,6 @@ class SuperheroView(SqlAlchemyBaseView, SuperheroFilterMixin): """ def __init__(self, sqlalchemy_engine: sqlalchemy.engine.Engine) -> None: - self._inner = sqlalchemy.select() super().__init__(sqlalchemy_engine) def get_select(self) -> sqlalchemy.Select: @@ -194,53 +197,3 @@ def get_select(self) -> sqlalchemy.Select: SuperheroDBSchema.skin_color, ) ) - - @decorators.view_filter() - def has_power(self, power: str) -> sqlalchemy.ColumnElement: - return self._inner.c.powers.contains([power]) - - # - # - # @decorators.view_filter() - # def power_higher_than(self, power_level: int) -> sqlalchemy.ColumnElement: - # return self._inner.c.attributes["Power"] < power_level # TODO: this does not work for some reason - # - # - # @decorators.view_filter() - # def combat_higher_than(self, combat_level: int) -> sqlalchemy.ColumnElement: - # return self._inner.c.attributes["Combat"] < combat_level # TODO: this does not work for some reason - - -# todo: sometimes I use classes, sometimes metadata.tables, because some classes aren't automapped correctly. -# at some point we should either fix the automap or use metadata.tables everywhere -class SuperheroCountByPowerView(SqlAlchemyBaseView, SuperheroFilterMixin): - """ - View used to count the number of superheroes with a specific power. - """ - - def __init__(self, sqlalchemy_engine: sqlalchemy.engine.Engine) -> None: - self._superhero_count = sqlalchemy.func.count(SuperheroModel.classes.superhero.id).label("superhero_count") - self._hero_power = SuperheroModel.metadata.tables["hero_power"] - - super().__init__(sqlalchemy_engine) - - def get_select(self) -> sqlalchemy.Select: - """ - Creates the initial SqlAlchemy select object, which will be used to build the query. - """ - # TODO: this should use part of the main query instead of replicating joins - return ( - sqlalchemy.select( - SuperheroModel.classes.superpower.power_name, - self._superhero_count, - ) - .join(self._hero_power, self._hero_power.c.hero_id == SuperheroModel.classes.superhero.id) - .join( - SuperheroModel.classes.superpower, SuperheroModel.classes.superpower.id == self._hero_power.c.power_id - ) - .group_by(SuperheroModel.classes.superpower.power_name) - ) - - @decorators.view_filter() - def filter_by_power(self, power: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superpower.power_name == power diff --git a/benchmarks/sql/config/config.yaml b/benchmarks/sql/config/config.yaml index 99df9b2d..dc6784c3 100644 --- a/benchmarks/sql/config/config.yaml +++ b/benchmarks/sql/config/config.yaml @@ -1,12 +1,7 @@ defaults: - - e2e: - - iql: - - sql: + - task: iql + - data: superhero + - llm: gpt - _self_ -llm: - model_name: "gpt-3.5-turbo" - -neptune: - project: "deepsense-ai/db-ally" - run: False +neptune: False diff --git a/benchmarks/sql/config/data/superhero.yaml b/benchmarks/sql/config/data/superhero.yaml new file mode 100644 index 00000000..7273d98d --- /dev/null +++ b/benchmarks/sql/config/data/superhero.yaml @@ -0,0 +1,11 @@ +path: "micpst/bird-dev-iql" +split: "dev" +db_ids: ["superhero"] +difficulties: ["simple"] +views: + structured: { + "SuperheroView": "sqlite:///data/superhero.db", + } + freeform: { + "SuperheroFreeformView": "sqlite:///data/superhero.db" + } diff --git a/benchmarks/sql/config/e2e/superhero.yaml b/benchmarks/sql/config/e2e/superhero.yaml deleted file mode 100644 index da957a77..00000000 --- a/benchmarks/sql/config/e2e/superhero.yaml +++ /dev/null @@ -1,6 +0,0 @@ -superhero: - output_path: "dbally_evaluation" - dataset_path: "data/superhero.json" - db_name: "superhero" - view_names: ["SuperheroView", "SuperheroCountByPowerView"] - difficulty_levels: ["simple"] diff --git a/benchmarks/sql/config/iql/superhero.yaml b/benchmarks/sql/config/iql/superhero.yaml deleted file mode 100644 index 209bc245..00000000 --- a/benchmarks/sql/config/iql/superhero.yaml +++ /dev/null @@ -1,8 +0,0 @@ -superhero: - dataset_path: "micpst/bird-dev-iql" - split: "dev" - db_ids: ["superhero"] - difficulties: ["simple"] - output_path: "iql_evaluation" - view_name: "SuperheroView" - db_url: "sqlite:///superhero.db" diff --git a/benchmarks/sql/config/llm/claude.yaml b/benchmarks/sql/config/llm/claude.yaml new file mode 100644 index 00000000..2eee59c7 --- /dev/null +++ b/benchmarks/sql/config/llm/claude.yaml @@ -0,0 +1 @@ +model_name: "claude-3-5-sonnet-20240620" diff --git a/benchmarks/sql/config/llm/gpt.yaml b/benchmarks/sql/config/llm/gpt.yaml new file mode 100644 index 00000000..7c8fe609 --- /dev/null +++ b/benchmarks/sql/config/llm/gpt.yaml @@ -0,0 +1 @@ +model_name: "gpt-4-turbo" diff --git a/benchmarks/sql/config/sql/superhero.yaml b/benchmarks/sql/config/sql/superhero.yaml deleted file mode 100644 index ad22bbf7..00000000 --- a/benchmarks/sql/config/sql/superhero.yaml +++ /dev/null @@ -1,8 +0,0 @@ -superhero: - dataset_path: "micpst/bird-dev-iql" - split: "dev" - db_ids: ["superhero"] - difficulties: ["simple"] - output_path: "iql_evaluation" - view_name: "SuperheroFreeformView" - db_url: "sqlite:///superhero.db" diff --git a/benchmarks/sql/config/task/e2e.yaml b/benchmarks/sql/config/task/e2e.yaml new file mode 100644 index 00000000..f8d43329 --- /dev/null +++ b/benchmarks/sql/config/task/e2e.yaml @@ -0,0 +1 @@ +type: "E2E" diff --git a/benchmarks/sql/config/task/iql.yaml b/benchmarks/sql/config/task/iql.yaml new file mode 100644 index 00000000..3d385f11 --- /dev/null +++ b/benchmarks/sql/config/task/iql.yaml @@ -0,0 +1 @@ +type: "IQL" diff --git a/benchmarks/sql/config/task/sql.yaml b/benchmarks/sql/config/task/sql.yaml new file mode 100644 index 00000000..dfa246b1 --- /dev/null +++ b/benchmarks/sql/config/task/sql.yaml @@ -0,0 +1 @@ +type: "SQL" diff --git a/benchmarks/sql/data/.gitkeep b/benchmarks/sql/data/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/sql/data/superhero.db b/benchmarks/sql/data/superhero.db new file mode 100644 index 0000000000000000000000000000000000000000..4cfbee0ba8b6033a7415184caff90d363fe444c2 GIT binary patch literal 237568 zcmeFa33wdE)i&DIGu1skGd(TImL*x1HL@(rvMui`UL{MmWqFacco8=CNE%CHk7mTo z$hM4GdL)A(ge`;^0)#z4*h9b&k^mneKuBT;2_)g;140N8LI@!VAtbr)IjzP12;ZMP z_x|^P?sLa5^&Zt;r|X>VuIf6cY16t$DjeJ$PjrV;!8uBnB80Lc7*rHxA%3Ue=fcm2 zUl2bxenbDn5b;0YLS_2Sv8EiOcwMgv|HoFFKjr(KZoxC-zD>U0 z`BO;ySbduQ`ycAdBxN}8&p9B+i2S^g648HZD%2JYcZC!2ojvjW;lv1)yRxyis<}4U zTvffUHaHR=oa_xce?~fjbsL(4HMOg%TGllOH?*urC)FHWwXw0bZuN#>eeJg3WNs_cQ4wscT~*syyJ2N*Q!v@v<8&@K87b7vr)%o@ zZ}kjyLC}+kbcYgsbmg7SI{k}sdBw%z;`)(}5f`|9B7C3j7Da!$SO0#`hTGWr&wDq>Y>d#!z(f4h z7@S<4;aEpFF(^SJEE?j(xeqsYm#;iPi^V^b$yuaim*CwxYO zgHz+AG8Fe2*-xH(CzTrMpSR+`SUoQ{SJ+J~|5P9SyeCOL$2hk&RzpMA|6fx;RQjK( z9=ouBRZm)S)5F6bCiIg!F&zE>gz6ct>@!r4x58HQ^}K=tVejPLIa9$2wUheg#GTx! ze~&><4gZuyYTk*RaITvZ%9A?b#Qoptgj2&m>x5!w=kkTJPiIyPi{LP!p46A&=##qx zMey&jWw@Sy+9^f6z?a9lp`OT@lP}NUxM=ncK9+9SIP$zYn7S{NXzvOoCeNN(@u^4J z6P`7D_6{oZU_)cw`l`ll&NC}N2u;5HyxQW=8q#pd zowJ5Xi3jOl@W70JZ>sRX?7}f8Dum%$hl$VN%BSWT-*f)W$u`8|&r}E|#^$zup;&*m zerNp>MCMW!jJOJ z!EY{p3-F`w1Qy{(-wIIK9Q?-Mmxo_Ieg*iA#jg;*arhPCHy*!Y{6^sy#IFRu3HVW) z6Y(p3HvO~r2-ewFx5$B+6+ea`&La3I5h3R9VE6xZ>m}=H>pS=(^Do1J3?q_7WVzi>SC#I6sP6rxGcT@mj*T})(6&G z)_+<*ww|yaw(hlVu#Q?stOHiu+HGyO)>|vBh1N8y#LBYtz$bwZ18)X?7I-G`NZ{VU z)qzU`=LPx#vA}7lCi5@DfeZ&S9LR7W!+{J3G91WoAj5$S2QnPUaNz$R9574q7P<{@ zm+}2G8p9od2`;5{@Xu=4b}~xv=IZ?I54x1%EnP4uKp8#7cwObcjPb~_ITDYCQ=^Jp zN+sT0L;Ksp2^jyWj>cmh#yF%mMWXw{34bA~K;ao&&>V}V>9386P^>fTE5K_n{kb|3 z?(^pl=B$o}+V^UC_y`_J#u!x9(3|LqhV@*058n`p?X^bZ1=*>=sn!^UOiqM12OrX} z58=Hz6*96BT^H+!bjCed==++`!En^H&;_&?j?JLadICt`KXsEnw?{{GKYw>0?Fjh5 znd(G*f6Qqn;2r*(`{*%HmeWlSDy8b2jJl2*xOS`pk&l>j8*PXmhp z1y#Lmy@{9ys{nab-C(32O8~_i;=z?{C5YbtUDg|P{{LM-=3j;b84hGPkl{dv0~rov zIFR8$h65Q6WH^xFK!yVu4*Y+V0~(eA$Pj>mHGqOu;qDODJFp0lyB-9+{}%*eiuG6P zH`Y(A?^zF6H(8fiY3%%;X05XpSY=k8r3F3;{AO^+|LcJp0+$62p-ARmh65Q6WH^xF zK!yVu4rDlx;XsB184hGPkl{dv1E0YGKQ`ryw?v{H$rK#UjmED0jFwmgTlvXQG}x5t z?TEya0qo9~Hb+vSSg<-C-dm`<@=FPlU-_*CN&yT(OVtDvS1e+7# zy(Tv5OPb=nsjgr{D6uySoAu>Q@mOE5A<~}eO@xy^Y}pqzb@hhfS0?1graitLXrm9| z<1q^x_hlQZR|YGCYv2Ybob+SszIc749WI07yHi=%zAs-NO6&_qv2ETRX-|&AK7Qre z>A^<+zd0V=8wtmT0$G8yx>O$P>W+ zera8MJT{mm3mg39YeETl2g4RUYDs2crynt~Sg5i(l)x5#$ij|)+3IjSfr{7lwuh2_ z?D-ehtQ@RjG+y{;)PM=WHSt7psO_!EyEIlzXX0C zcsuYKRtcU9JRNu}@DR=h+!nY#a3#(MoFA}pLg4g3BoGR03v3G11*!sz0&+|8OU?65+w3z>HzQ`q+-7bv>&z;1 zkvYq(FiXq=Gut#w;s1;O_x`v2ulaxGf6o83|1tkV{(Jqm`LFjM^tx?;pND`QG)t?t97iL*Em=hkf_>Zt-31yUcgQca|^Z z+vD5i+w5ECtM)DQP4`XojrC>uH18+g54>-AU-7=+ea8E!_W|!6-W$9}y%%{0ya&BK z-re5q-bU{l?=tUP?-Xy)JI3qxs>Vmgd&V2a&yC~8lg1;)ea5ZEwZ`Sf1;#nXJ|k+h z8C#7FMvZZbG1Hi2j5Bf!kLT~64?XXAUiJLc^Q`A_&oR$ko|`;Zc`oss>pA2}c)C2N zd73?IJu5u(J<~iBJo%o0$E|;?zpuZkzpOv6Kc#2lSZUp||P{ z`YL^iK3kuxkJm@*UR}}tq`j-XuDzuFPpOKHs2jm^{26k;~*3+n0pZiw#weHK^7r4)H?{i1pZSJk^4elEEDejr>N$zp( z9Jj~ych`rmcU-T!e(HMG^|n_(#uB%*^xXyJQawS|{uG3u2uC=ZeuKBKMt_iMu zSHR^~KUUvY-&9|w39YBQ+^QkO13UF}CsW^O(bKz`y1!LVhnf2NDm~r7)P41Ox}B+e zk*|%ZyBqX$h^f0e^z<&K?nK;9rtUz^r!n<4r0!tqcBF1+>b3=X`c$TFMQSTkUq!py zn7Re6ZDs0aP+OR~3Djn$ZbXR|rfxuqW~Q#I($h^$eFdqFOkInfZer>hrY=P9S2FcE#8oqO0cxpY>U^}Zf~g~DcR5q%VI0etI*j_3GBtn_OPJ~hwV0_i zYCeT28?`KA>KvplWa@0xw}7d$P~Uu}&P2X>Or3$$xlHw89CMgDfcj=LwSS48p2buW zC1x^}05yZD(?LyVss}Y!G8IRC)0pZ;*{MuL5jTaYNU5H#V5$polbPC$TFRLUqX(0i z>Og&EOtm9*B2yvss+6f+h?~IFPSjGu)M*%Rkf|MrD`sjt>Ko70sVG~-)HbA!V`?j? zLZ-HW8q3sXlqg`T1ts#CYDQ`vQ%#`8Fx7}Fn9I~Av^$!q29y}Z)JEjXVQK?Xvzc0l zxGbjXL0L?#MTr1YYe1Pytp??1Y86K5W2zREm#LMg&tR$=l!vJ*l+c-4j(i$Z%Pn3{mHai)TxVoVjU((P`hijXhL)Ht-Zm#ISZc@I-#ks4vD05v=MFCVFn{u_gK z9sQS!I7k1DMx3MnMj_78e>tEW{g(~O(SKQ>9Q|jZSC0NO5$EVXKU#D2pAYpp`p=8L zIQq{3<>)^TQXT!LgL3qrhEzxYxe@2+KNs3?^q&gK(SHJ|tpAjMpk3B~%HL6z^`G)L z#IgQUK0zGoKjkl=SpO*>A(i!?@@Mpv^`G)5P^|xyKO!INKjlN@WBsT60Tk;$p$f!)WZ5t`86okf6AN4$NEqC6)4t!${R>!{ipmA^|Ag_UPoV8|0%D5L#+RlR}shh zPx&uUtpAi(P=fWJ@}Fpz^`G)GQd$2gKL^G7Px%>AS^p_7p*7Zj%8OOH&H7LIDNp$gtOLUv{pYmOlW&Ni-0gCmX@*hZL{ii&R)>!{3k3nWw|0$0mj`g4N z9h6}Gr#ymG)_=-3(FW^3Mo-2gUkN`8r0)`cJ{r zgU$L+xfc}cKjj|O$NEpX8~IrODR_FYS^p__AeHr>@-?Kg{!?y49P2*?PYO2cKjmhm zvi?(ULRr>-3Z4pV)_=-ZP=fWJaxJbA>p$fh#IgQU@GM}n{!_k;60HA}t9I(F{*)_Q zbk=?fo(Wj_DOa@UtoxKN?$lZJDVNvltofA7T69)?%B2lD>pkU?7M<0ea&e2!T2J|W ztIkSKxv)cLou_=RLuZwz;6Z>jo^oD`&I(UCT(7gfQwCafR(DFeMQ3fN*bO=>JLQ}f zE$SAAIJ-sLOL$h5wukV{1}#E(s731{JY$L0NqDec+fCS4uZ0N@G-w@!y)9ZhVX8rE zBTUw7A;Lt9wu|ueR&6I?e5ZCAVXRf#LD=1@Z6}O2Xr~hHZP8i@BlX%g!md_rD`97g zwuNwai?*3C+^V$@cI?!e3ES$mCc;pQ)=0RkRog_kqe^Qa+}@yVBs_Jewt;Y4i?*I{ zYpb@7a7(>bPq=xfwwADEr&dSU+@Y-@Y--U~6E?PLs|Yu((rO7C8nhb1jUC!b!gUQ= zHR0NwS`}gaPHhEYU5B=ua80YWjBr(pwv@28L0dvtvr}74xN?Q*%EC$;lvKDh_JL(8%H>yUMnOFwrXPui#xOe!tt$I zK4DRVmPa^lr#6PLuwKh099yrACd_ZuMiJ(fYB_{sO0{gl+!ig1a8!e45$3dL0mAGB znn{?|p!o^S2F*w4Z`Hhnz9pIg#7oYkFLxmO8>U)p!NFYMdo zTkMLtKRQ<@AQ7z`+4sn?_Tc~Z?(75JKme^RgB*nzc8LQ9x%RaoM#-sd-D#w zHP14N48P}N&)c4#dmi!J>$%c%*mJrkc->hGx@6%iK z<$8slrTtlZLp!cLuHB6H-Sf0w?KEwzHdmXVWy?Ry|B~O8$K);Ya+#Jfxkc8>>9Saw z?!UUvh+UT;B#eGygIi_}{<*(=Nv%=f>_> zB-xGy$5gV2r(~+*>?lr@|H@-&7 zwsEGbVmNkdWn-u>$Hw8VsasGCdC+Psm54{j*f`@gXVd;rq9cOkVYYh`Y@{_*8w<;& zjiC;VuPGc$Vi;IkF5Qe(WcVr%CnL!$8_UWgj9G(E#aPF1xf`P{O}$tF4ptNAx-1(j&}CcTb$5sr zUK=aWh3mu76k5x(vGP1`xHV_px;>8N@?0A$(6eChr8knoI(smL{$iOuoM&SpdL9@S z;p=k3SOPY!M>qAhRbs&#yh~U%7NlV_W*;V%V_RmFjRk2WcVamn%!(wjR^J}Zwy`K( zQ5)-`KRSZV@!s~X(KgnlXEaB;!@>0-Oj1lhhhJErF2mwHDrjoQ2)jKtoliqdD3N7j zUAnv>8j7Wc20PYPT*~70Ck+3DcKv7L3ZXNxa4M4Q&N&OyrXik4VMAeP)@7fGj**ZM zb9*sGlihiTF#HjjAY(ecN$OhO8AxajN5hbiuD%nR8GR5VsRchePm1kBY!gWn+N;9E z3}dz>ix0Sz+UEEHNJj{l3=$U(VHtjeS7Y{L9uHqHYA)$qu6?*#BUh!aySq0Afy(Ly zN4A7$(hW+2nS%ILN8*FC+e(6;jyOO-u|biYz>rtsQgmX-!Ky@eJmEVX+aD{VahmL% zX3wA=>48wkQdXQUaWvZ79f|Q38Xd#9)`TL7ldfBKH)?I}iX^aoGuW~}3hq-$(m2WN zy~r>;1)93CyAs%g>BKXjIT6AJN=^h9XhR4)GT<2Gy9dQ`x)4Vqc_NEC!JWdY_U+8oL%VGYIK{F+F-SXJ6+1shD5w=(9qIJ3^Gqcn>|ci>OwvNS;e zz1tzOtD_NU%}(=FBvo}l9-Wq2hd8i$xb=ch^=iId*ifTm6qVrxAj8lY+8ooEdF_OHWa;{MTJ zS@r0D6L#w`pB-JFzZQ2g+^$mb&P1rEi`CEWX!t-lnODbR+ujw6lij0o%C{K#2}kvr_z(2MQ6-H~v@Ta61}+s!J)s=}bFVyQ^w;Ka&VfdWn8 z=yBkBnJX}OmG!ft9-0!S~)G%GQ!QtdB0Asrc<=5gc0~iTp&X#) zSYLz7d9%=3LpX$u%ieC>;n9f>o*7v)X)bhydcw)B$Zopl1!kb=pls0{KX*D*XASN& zF}g@jPoZwCN(^Xa6q;;bypLF`Ps5kbYkRwUtf@%B%nv69RhefBZb5ukvnr4@sC%k$ zcS;5(lM;iP9?Y79$FLE;@)JigYZAK1cMcj2O`!mu#Y$`F{_Wfi3-BPeh_+>S)05Uo ziX$5jVigXXhXip99>nH23bL{dw~r1-29gDM9$Pdh#2XXz6gZq@(nDE8JekBjh}y?9 z8Sdot7~xog!{b;H-3M_k5BTvUR$LQ4fSU>!pM@u|@+#VYAG%Umc>XFMyuOa+_Tu5I z5Zmo^kI2FE*3=r}^@wK7!XsCCQ{*7#&ftxQxAJRxd!mD~mW2nd@*%#GXeBLr;2Ml! z6~%k{%tgBY2JpC5N)NGve9y*nR>k0mhAi6QLF*)ocD%E{E)Ej_q_#)n;cCVpx2jSY zcw28OoXo`o)~rFD)!WIsfi74!p0X+i?@72kkj8|C$E-3u3GGc{K4R|m|jl z58Hq51YQaJ7(4vm2;7L(fP;a(*wJ4Tm>ZZ77!y#<56xehFJd*|G4t!@t>zc8bAQN; zneFCQb3OL#=U`o+!1Vh6?*G95ivP#{NBwu9>CC?j2QnPUa3I5h3kd^vBFWYE#46-PX?1cVeE<|4J>dJ zYzy_K_Jne(*JbXL+&2QMftO-GfyQFIG1STVkCdEDM=f&?spR zbrT&9#e7)FC~6Kx@#>ih`>>QzG_(ie#~KDcJ5OLw0v+&T38N4zz=>2mVqp2ApeYo= zLW6+?i-N{D?Nb<7sVJcJ>~PADMT%m4ogV7IQcTQ;wTYq);XW*7g}c32mw=b{{lV4o zj%+MVRIKmCT3#wRBZxKISlGbgM1d3K$Ld7!`fw`LMQbEpEKR^DU4r`J!LkHaFe0IZ zABz#i>w0^-F!D%;iG_%g`b1w(3Tr;0m>=s9#r5$-FONG1OAk})Lwjl5sV;;xD6Bmc z*2TDiEUY(_*R|tqJRWSq=0-S{gJp)Pyg*aAxi^YE7nHED)=)Ma*B**xVX>jyi5TRL zi4}(uC#D`Y@j`z)rD2~#6J~WjUlA6vD#3v zDjH9Oum}>xGD>ffP5upL_h6+VuZAyN7A_nptl`Bwa}#hT$Q3-5ll`$Rt}1*lDK|6!LW8v;H+_Yv2IXU73)agsvB4@D5&ZV9gN4= z8$eaGrz_;x0gU1*y0HpiRnqtWy7eN-+8o#b_#^xPzHR*yo&a9Nn|)niy!i+7C47s2 zKEAvk>;I$wC;nsptNlIx&Hjb{fbYlnTK+=p+t2s;z3*bj{zC5#?`&@tzH@)f=r>x7 z`9=Z0VSmN*fag3 zC%FIMe%5`ZyUV@UjSsk7KXzT`I^sIjWvIVXkE=JSXQ~_3+3INVs<=yBCJu;G#UhcT z{ANgDd85p-x{ zim5z#D7Xu?l^FPkY#vW_B+&tB#Lgw=a;zg1&D9Ebno6nSOSU$8zBYY#;_ylUY#49jwc#eIlIgE+3T7h2hsJ8dhtGJ{xxA2ruW%bcqq zpU~Frd$Em>+>b8mhN&tyuP)nSD#aC4)!A20QvZQ#K=836I*j zaGgP%gXsuy5Jcr+j1-?FDvv1V6}5n`7O@9?po?6I1>9t+(k)HpSIV{d&74dPGIPUy zSYZw)Ds#<-CJZ`0*cTLpe#Mo9(NT>4q(6d_vF061Iy;%c`0ij=BpHM@ZHr)GH5`nE zy2BNw^0e~O)JCv=@nDU(Sm9W3)=Y$ZaHV44XQHhy*c^_<`jYXOd(0+eX0D=VlsTM= zhAT~VRs+U92XQofFq8&^b5VV(Lka9GV11C*CWGtZPuRPtsG6~RbQ zWKB2*1F8*OSo7-%u5Vu%@6&Z1^Z0({8+jW{C08MSI5PyR@9tt#xqJEe^_Yl?!*)Ye zsza&$G|_^a)j%E^x^L<_q?ZpBRFRG@V;d#04kkttIC_-Cs%_G6Lt2!>GwP9QOmtE+2V3^4` z6{i7AkiKAT6t|JKFm@v9A@4&4rc{GrgVIWq3pbZgcOyMnrQ*TOVXV-f-WxthSMcWf z*OFpfW#y8Rl%>u&h)a#5Cf-J$)%lk<+fnQ*|&A3P3lk(uonTwIU07=fYs_ISPcE1**Krg{5y7u8v zFCH}$&?zJ!%S%qdz>6t6uD?fArQ+Sey3LJv#=|}bp84GFHF=91WzJa?#mQ1vm*=in zh_V&KOdCvqT7iDUeoiO~HEpNOVH*x2sOLd?YNpKtKS^y24igU!$=XC@Z*XvqLwvBi5i{~I#0SHb zb4PL$&pso!Zs3b(<;k#@XYiBVXPL`JghZaFL_xm|RTi#RZi-SE)mEx2VELRF&it`5_S~oHmq% z;)6+f?p!=099M2Ano5%l3HS%WslsCz4ri*xqx`C=Jk~Z1TrC>ZFvNsJJMRKH>bEC^ z`!fWsQyo7BQ}WJ5OK`g=t^-$wD#P5yE>aXv4-9!QFyw#i7X#ouO-_zaOlb zIvJefwg$Tr!X|ilZ7-Mx!Ne(2+7*H-gKXZtJXns*xkFb@tvbDz#1FfOFbhH zpM24_Vgh%VU=^O`aX2^;Z{MqHn7Y3byGtjaXo;ggaQoz3TZmz@)vXE}E zC*^c*vTL#XjQ1#P#PxM^_8%WC8;>gt6@vb;PQq;tcjDe;)esQ?1MY8~R)qE|PNHiz=geY{Ag3D9DIN!J;U>5Q!nk;cH@Rz zP&x{Qi>V7vVR{`JUL|lZa;eIP%5AkdCO-8eMmW5P?#5GWuz?=f_LC@9IT6iWcw!Cr zCh@fyJxPzGI}asmCS_yNm5mG-pQ}KWzMPb{K8)`K5|j;3 z6WCV9*>Wc;zd$XraH&Qf+p0(;c%ITz%l)kd0q~BKX^5g~Pv`(HFJmB{pcuoWldo%s;Qn#u+|f|$y8x`IB; z@lhk};;hy2HvDfGm=5=Cs_;T_oS?Kh$E-Q{ssl?Yq^lf7Wh!?rodz8YrVaDH12+Jy zxP&l|@XgdP&64oY)^Q$wBSCIwPEiiXHO}!@>Y7!e^V`heOcA{Oj%Rg#G0|x*N0McXlS~%;bk9Uj`y$H?nsn96D0Tc1;(Me?~8|u zAWZxcg6fEEXbx{%iAp_h0hDa(G#7J~CYF;NhIrD#3boip!{fhn9V;*2shWvfO2Hzh zX}U*MR%0n_=qY%8B-$14#kYmfI^kpq_>AVK2-?-3dpH;2f`NGm7xr&KxG23J;VJ1t zggN#KgjsF${%={gAXLVD#`*ta)_v9;)-5;}U|R>Rq!qQounoAyYOvP84!|ka9C!vS zv&O?tV3y^9C4i3ue+axA_!YhrcrkDsUkW@P_-5e#z@33x0$&Lng*AW+0{yTb*b9$= zoq=8WdY}=f32FjM1M>qj0+V48U~FJiz#ow2-_1Xp@53g*>*g=a7w}ENlje8KW3UQv zyLpp&jd_K6v3Ue`0S=l;oH_`brbzB_z3!BW8GSOXdG9fGZZsISAf-Pa6j z0X4oQzPY|>uop1SH`?d-xnVKjL#&a!;eFZrW7rIM%zMmxkM~yZ_1-JJmwM0l+TK3z z>E4Jp}N5yK#eY zm2nwt2=p6g7)fKV(Qce-G#T~AN@KAx$CzrA8imFv!*94fpTL&DyPh{ZFMD2qHG%JX zzUBFb=T6Vfo@+c`^n4!YCC>EpVvQ#3+2LvNtoPJ_nNT$dl_av2OD>{g3)@ z^X~yQjD(xW~d0fzR!7{nhmc*Kb_Ebo~OWQO~-ba6RI>-*pG<5nS!M z+;yRA02T>SuBfZSwcXV$`zN`Cm^DWBml5X3{)vQ!>@Nk*sFnQ_2yI-IyxVXX4K(^)h{k zaI8$9L6{@c2MINq?gLirm+1q9+huw`A@c1bM7~}^L#9)N$e$!c^$Ap8p`}lE33ak^ zv6k*(;;>A|kzw)ynT`?G%5*p35}A$?7R&TrLNu_45Di2KQE?X`D(*zZleP42)K{)t zqNT%39MRGpOr&MH9XZQ;Wx9=UuS|ysyJUJ7;Z~X6N!TLOrx9+F=^cbiWqLaydU7gZ zxlFebqDR{Z(W9+|qh)#vA$qo%Fk7Zu2(x6mnb0TGO@z8kHxi=fn+U=A211NrBSuiJ zr8l6zlaxzjdOhMNp}*@0TV=YQ5dB(9I7O!G2rFcI4I%oonh^b2MTq{?5{{DT8bb7I zB_aAzO^EiZfMuwE1tHRx6C!;XaAHcPmlAf!^b*2tGQF5^vrL~txLl?e5rRJp3BjKQ zgtKLOKH*H6o<}%Yrson)lIb~w=;v%g^m7(rzD&;~93#^+2+{B9gy>HtA^10qFd)-Y z2{EOn5PD>~f)IS048Bd&(&gY=sdBMQll+%rCXxJ?u99hz|5Egi(FU|AbDNCi$O$@sRvaKsl2C3FtrMUra!G zQUDV)Ee!>rmM8-HAU>K=@5-^NZKnWN|Dxd@d zA{9{LmT5tVfsqObwn&>)K(IvGPyr%1R@$Tkg89-W9S|HXZPEe39BGpd2xd#0bU;vd z+t2|Luwfz-USO3(#GX{sMG|q`yFy zHc5YhCT&Q+C{W!t#NQ~8HVJ<|x@sknrcDJ0$%1=uQOJm5@;!+5}H5-0G0UXrc_G>tZCe%^TUpRjz#HfhGsWV`g#91@oW{G2Fz||6m%77Rb ziT@~!i^P8v#zEpg3jHGSAEis22m_)$68}-sZA1JGbf2U@+o$26mr%2)Jre({h$QLH z+Ac}>vrd&H`&pYMiGJ25Ns^znT9V*rt&$}7SxY6vUS!RdB=%V|B}skOG)Y3ARW3>9 zvx+5&eAZY=5}!3jlE7!>NRs!gED3QJS*9d$&+_AIX?VbAhNlJzWIlBj2?Zb-V3 zr9sYB3-gNP-0GAh=2o*LDYuqO5^`&vBpJ6VB#F2+NrniDB?MepVj z5^Wli1lqqtl05q>B*ahMl97!^(dnAdhu1S(uT}l#IEh$M}wN6Rms%?`bt=dva z!m7=dB&*tVNusJvktC_wBuRp*6-$y+EnkwDYB`dm6eqk%NHs%}jB0KTBC1N|B1s}D z*GZC4xm1#X;?OtAC*CATJn=d~(kZhg38z#g$)?+-K{WBX{Sir`>DnquGF?j~38rhd zB)N2rl_Zv~(UPRnl_NAiBweP2L<*N*l0>?E5&|h)nhfCWJnxQTg!#8k zLs)PNo#Y>T<7$M3*EJy=cg-$@MOSqr9RH<*2#dd{AslttD1^a_scgxGRCdDo3lUB{ zcNs#fe-c&-L>J7ZS%0y9kDdS5te;uWSx;M!Sr1wFTDMu(TUTc6|7Gm|W$gb6k+J_L zM8^J~5E=V_LS*d!VZE(BWB)H>|4(G>{~01<{|`LK*#8q5`+phxe;NCKWS{T4csZO(Vocsw9*xnw!wwpt%TLJ2jP1t;d!qK4!qz#foTFA5yG8S?|K` z|4Y^nasK~d>mKa)UkyKiN364~l(om&Wo?Exz-nuuHQkzMjm1vC23r6h1m40f{|kX< z0*?kB2;3340sH$G1qNUhpeL|9upK-5YXZw)8(>Nx2Z2BMaC;VOh)39H^*1y6(AG`Gv{Q3TX-|hR@_rC881?9<=wyAC!6 zFNBrB1HKsc=v#dazE#+vpABn+d)L7V;X?0p??mrdZ}?51DrKI}fjeY(5Ty~Ew)u5&MU&vQ?ObwqsThh6l) zxPIsQ7531dbA8YCE!Wpwx4FLJ`l9P|F59)=)$M9`ZF6mO)w&kDX1U5;MXpge@$nD! zkLqvK*VGr)AE^IW5*RREW)Lhkh95Nrh-33I<9A z<~S&rC>5efZO{+HNU0Ew6bCb-GI$A#rqoqQup&r0&sSv9u9}JfYQL9$zhhe%@h#HE6 z@lqjHs+Ia-m@gHgnremtQz5FTEKHaRv4U!b5mO&_eCQk8v0@Vj2r}W7H%EHVkeS6TNABLe*+P_B} zOr2tjo=U*jDQ&u=r!aR)yW|)V44%?%GfKeZDfWSh8!&o`onIOo%%0MoDr$k@Q`+^! zm4WF~>}L`Mou3+eq!?)VGkzHsaM^g9>xpn;06~h*y7AuMf0y-0P^BZN#gufeLZltH`&D zDOepJU>ou3D;OHvh*x2Cd|(HszKmM7GxZBlr!w_(^nh)|t1lrR+lW_RL@ir6^{40y z+lW_xvPvId8}aH3NNwTN=RmQIc=h-KeSmGmt3N_5Y$IO%0aDpUy!w5_HE@Y%Ky75| zX{2so3bw`v)-&}z^kp4WPa+@Nh*!Uhp04G%$5Dc9#H+9?KEO8O)$gFhYEJz&;@C#K z`Yn{L<+w+X$~NNFhmpFHVN5wG5d)TNvX zuOtIYn7RvnUd+^;NM#%G>K%w<8}TZKBl&gySwl8$qTnMI76RS7D`mfNjL97lUg>oO%)RjbrNbNW~FD^z;J6v5k232=cLw zc=cRRIJ<(>0kp<8;?;iifNjL9X_Uox<4Cou^Z~XJubzVvY$IMh6ZNr;c=ZrUu#I^2 z4DdONOB_U;#Z(_=Y=Ef)C~GpcAC#Y|UQlc!UQMB_m*WzMV;k`*{F4l@jd(SVIGs~t zpx8#d+Kv9XCcy$lrTpZVhd~73L?L-{Z4f5>iujgUfmAr0H>acR5li`wl?VfY%N~hR za_ZI&y`SyHt6NYX8;n;sqbysDS6dLrCgW9jD(Uax5>1G6^j{+=NB?a?2}l1mpbba= zZA6@-|KO>l-_d{T5$EW?dQgu3TZ>dj|J5PR(SK_Y=jgxHXxGtytI%3I_W(Xi`W^jO zgE&Y3Rbw2E{;LAz=)V<6b@bozD!t#)f6GvxqyONkq~Fnh3z5&!e+!V$(SPt%((mZM zdC2GJzd5V)en=d(HBSm`4{N@j{ftZK1cr<;D)3BJfIx?r=u1}|7pnQ=s$`2 z9R24)YmWZIO}yXHe**bf|A~KqV*MxJv!tK(pZFU_#QINs0*dvY_$w&Zf8t}b#`;gd zk4Zo4Kk*UzQcC<2e?}bZKk+A&VErdP1P@sMi9aBf^`H0v6zf0nKFYHG6Td^vtpCJ& zD9id!{1#%is@j6;#{U=^SEv)|po*DXC{|P)Z^t1jG|A}^4|B06o$NEqF0u<{% z@pDkD|HRLbkM*B;2^8x;@gh=L{|Wdk>1X{XegewH{QEH|)_>x8^o8}Gcn)!_|HN^4 z6=MA-egscLtpCIh;G>H5pLiBtg;@WI@58GQ>p$@f^0EFCPs3jk>py`fiZts#@jc{Y z{U@G89P2*;KPG9`e*%6?(yaf)e;}3hpLiS;>p$@*Qd$3r@4#ym>p$^r#IgPp-$ESg zKk*3SSpSJ{qBYik;$ifK^`CeM`B?vnW9TXCKk*>H|KH+jR))|1zYZIKFIYdYp0vJW z(U<>Uvu?7kv97Q#&V2u$`Tjri{eR~B|IGLQ^i5#q`~S@M|C#UqGvEJbzW>jB|DXB( zKlA;6=KKH5_y7O;38W$iqZztd3fi)0XzLIR=FhuuLqt8T#>Q=M|*W5 zWB)H>|1V?zFJu4Dv4NAZ|A&vqSS^w_oQ(ayjQu~=F&CJz|Ch1NHifsVjwfvq?>ur9DFup)2@P7h3nRlo^>aX3Mc z74Qb!unYK+`GNVa`KI|QP7=Ie{?L5Nd>p3<9x(4=+kn@aN6pL33(a$Js^FlRg8#uT zoGjRGZiaopwK!d{%v@;BW($EqbF4Yq44AMV>Ho<8zW*(pGkD2=-2W6j6CU&51N(s2 z`H$kP!4dyC{sZuO(BJ5! zJnB2j6Q>V?zI&R zzU6%tClHQ%pYlFBXdm!8?@{k1IE8SI_kg#@+l6xoTfGh5HQp6Ci7?YU*&Foc<1B*5 zs~8^{@8dMWtHw*lapNhRM>uBOW87+7hZ6~x7)OkAi~~56&}Hm0wi*pMm9WBCXv{Pw z<6J_%k!5%c1t$~U_q^qK)$svZS%@tilof9Q}aa zgVPGT^sRb>z6R$N7V0zg$$Ahc7P54Yu4o_O%)(pRtJ+K2ahzIsR6C~Kquq*g3rDp} zv?JO%IJwZHb!oe_tvI`|Mq8mR)Mn!JLQu=svNR9QFMK55mv6~eaf0Evd`dnlkI8%F zt@1i~R9+&F$aCZY*(1B;F1b}U$Tf0>TqtMC$ucPOWtQ|v1@-~ocfaL+)%}wDxce#h zqwZtwd)&9WuX7)DU*bOEKF58)-2;CVyWCsd4emAW74C)ZneNH%AS{k&kzHihN3Qo> zZ@FG|z2rLXddl^v>zL~v*RAkdanyAQd{>;~I^gOdA=6!|D!wl6W4uq?%XqK2hw&b9 zH{;#nF2=jWos4&iI~eZ}Ut|25xSjEKaU0`p;#S66#a9`>DsExCMcmAIv$%=zCUGO< zjp7Ez8^raD*Nf{IuM=Nk{EE1i@mg^W<2B-H#;e7b8NV#9V!TRR$#|tW%6L?KiSbL~ z3dSqM7a6}OE@!-4e1Y)`;xfj|#HEavic1(T5f?LFEG}ZaNPM30^Ws9r3&rOcKPN6= zyg;1Kc)mEocto7Xc%C?y@mz73@vs=G;w;9q z#F>m|ibITt#2Jidh=Ys=MIU3IIKX&7>}T9B_A%}gy^Osg#h4OF#-vCvCdBECr;8rO z9ua4Zix^`}bTf8~C}ULYW!x+FFzyi%#)#-*>=K=fonklRZV_e-<10iOXNPEKY!_{e zZ6d@N61y08iJgo)#c7PEi5-kP#CFE*;#9^{MJr>g*v7a`Y-QXkwlHoHn;AEY7RDCQ z%-AfN7@I^RW24x_xJfiHHi(Uk8^s334Prgxda;giov3H57i$^UiaN$Rv4(MtSk1Uv ztYTaxY8h)q4P%X1$+%KfGggZ##wxLbafMjUxLhn_Tqc$>E)`1{mx#rTi^VC7r-((2 zi^M|4g<=8Y0x_R)zL>{2Pt0YUE9NlH5wjU*i&>1b#7xGSVg}<3F`aR`sAQ}Z(-^0T zsf<&_6vio{g0VtOW}FP;j&z;N#U#c_qKvUjOk|uWN*PPV1jY%Xgs}uR^r&1=6f+i! z@r>g|5o3`U$2d+DG8T%ljALOwj>;9lZX02~$YabCV;IMXT*h26nsGFYf>FLvB8M?Y zWHV;N(iX*M35yX%gc$?EWHg1J(GOcyl+P!;j9wU@qQ7zOk&OU~ zM~pDl#0X>IjIbHbs3`v+RQ{p-o$>F=-x&Xj;cDjzd`to()XFUm)ZA1Qxk z{Il{W#y=^4Wc;J@A>)V29~l3je8BjD@_WYLEAKPDul$bjcglN=?|_>%G>vA1Xg!{DJZ;I<73LBjE^ecVf>ErZN_ga-(vih@(AN2 z$~PImsXWa1u<{V&L&`D6W6Fb!4=N8ZKA?Pq@f*thjQ1;FXZ*TyALD(>y^QxN_b}e0 z+|78mau?%W%AJgNDt9p6p?r<;Ys&46w=1_X-lp8jc&qYN#;+>3Fy5lv%y_eM6XQ+F zjf^)cH!$9yT+evDavkG!%2yb_qFl>(t#S?HHOkeDS1Vs;{IYTt<5kL)j8`g08ILMo zV*HYF1>+UU7a6~(T+Vp8@&(2(D3>u_rd-N+sd5S9CCbH&7b_PreqQ-J z7>iNZ-A!RMO5xtU6z#+)QCp6NQb96mHr?VM7Cj z8#hw8VFQKh*HgG|9fkGv6s}!MVO<@CYt~S>dNqZsR#8}6OJPk7g)3K5SY1tFRTYIR zR#3QnIfcuXQMhy|g-e!DxOg#zr<_9JqD2%gTu9-91r*MoPvN|I6waMX;hZ@X&Yn%- ztXUM!oJrw~85B;RPGMywh0~@{ICUz8Q>IW@Q9vp~pj^u2ZOK6iWUK!0jd)uOahj z-qZ6h?f>sk-dAAx@0fM2)o6_k{5J3-yq}*FSRV-DUHiwdxPOkh)0}1c{crl8hSmJE zztg`M-v3_nebaZDuM^(?JlpZDJHJ=Yt9-TE@GZv3ZlzwtR^n=!@nSIYY(M1 z!*>UKb_L;uBHEXXhSj`cxYu#PVbmfUPOFM)X*Gp5zSJSlkPVS|DiPce*&C0BBH?|J zRC^cv#U*=s;U#4p`AEUdb*v4yR`578w0G!~4Z&YTyw5$(hG#2sN5jiBih@MAiP#eA zjD-`Xp%>Y3WL3gGRFK8vSsCw)M&PswzBk~)#H&uX;mN9Gs?#qTNEGgH$V*@6kjJEE zM8LZbtwXBHeH$j(aAcKNG1wj#2<>g|gYOOU9?=zppB!=m^ZKfM8}6)fb1`08KSImV zaEN}cVsom!5Y9Bp!MUNN5WJ|uWfr+8S_wuB{{fFf)*Kr?t;*R|ki)u2Dsm7Wjo^bS z9NoZ9a>(7tnlK!y#5>rD*@MdMg*F^q4KmHZzVphiUJMLwM&N-)xv$WMXRE?NuF4mY>9m#6^={5y1VDku;JCJWYEddNyTcy@ac&PYTy|Uh139-xP8G? z8?LSjD~M6fG{asg8fw58+Q}JdkQ{iZ0eJFJ;ihVe4fj{J0 z>4=5lB@5m~?%%ZBhV!f9Z9J!VV!&ev{Kv&(HQkX^A`+~EUy?W+Hab(Y4;5f$Ho?(Y zM<4uS?SsdYWj5SmIW?d;%)EcE2DdufhTkl>>*CQdn?v33K!%->muyO&S2;Zduw95aW?VOqg!EBu|n{mb4|1n#{$Izk=l6tl>NZ>>VQgv2~t zLb#miP4rb3!y^scDuzmI)K;!gBCl?)gwJ2{b=4T|3n%%5rrVcIw&7B%sFd?xW;lk8X*g=*kCp5F)<>YU8#ImwG9tgGgi}h@jvy$ zX$U^;_Cdz#>Q)W=;2ied1CK(8g|kM-f#Th3OYIW$gn9cK84~i1*E#YseG2V%(!p%MsEwbU` zr)21^LDxAJi^tkR4ksVnRAj@!R{5a&Mdl=Ggh;M|_thk3TbT`~T17?FXLOy)!@XWI zg`0|lDtyL5<*i+1lY_1B5KGjRp|)t)85jJpkuxwjf5HSB+Er-kWtX$T8aU+afj_xU zy%=)(edXE-Hk@(6apaKLSNFovS4X_DuXk@exG{mdPCVb8XTu{GJOgqra_f_{IfDXk?ad1earCrK3bEnzlachRtMWK4& zSd{irc1Qb~;GU1%hT^{N#KBkhV>4{*uT%`qK+F$J-V`xxGj4V9_*%NmUGZQ|0(>)k zxTX)bPqN9|){v)3DuKBd3AG33ZR!mt;D;@KAOPQ9w=bP!!`D_3WD5htHB^T(Zib(0 z&K1C^S2$fguiW-RxmD5#kr^&W;|Jina($>HLQe74hIj9Vzb`0FuUTm0OVGj*t;#v; z>RcqAkHxZW50v2vmnVJz7Y2T{%0_(PRmaJrWCAWpqa8!;IdMavCn|^MZsmzku8sOm z(6%Uq>8awm0{*p%`MJWm%7}-bt9W;Fs4W8K;{FAI0>YfSc|oC#=aeBA$q;UGh!q5n z;DH$K?{aLo*ZRNMd($v0s&nnXt7_L%yY|>kLpROTKm*M@G=j*eD4-yd%qR*7GBh$H zW2+3JK&1CZ6k$)jr`|nbj#C6fR2+gxG|C}J8bOZ(#;9>Z6NkwEUU!=uJtxUO*Y#fS z`w2dD-_)?yv(~Em-Mx3MdKQ*G`}?)DWYJ1*tBo*^hCd8cm*JGzvsw?IyK*HKlxt?< zYUr{Ni)MA)Zy9QP)`ajah#iTm>NQr$5gu)^YUgoC4tZ^xl+a2Rh;1cD1@H7j%LZI4?bRjFaIL7~ zX04g4fQrRVuU%ET3d=rYfi^v>Ui+lWdUPGf6i4I2TC`;LfBObjpSx;R>D(2VG>pF| zv(9y1hd|k5>R|N2f^}G`d=9$nDgYKs6OX`OIz0I2qYKs|3Q&o`s@`olwi}A-vIcjd zx<>JjUAbU!DgKni{V^9Kw0zzwT&udTQ4=-v{hF@+(e=YR7td>hp{=^A`E7^>7qIF! zMUX|auy*YVb=epWZ@X2f<8OnjPMbAnE|x>RSy{@GQmj#oKWh)oDq4vS{ec=;JnH7s z)vFfWg2lgK0QrTe=g`!yD{v71?EazE=*ZRJ9Trr@>cQ$k>n(8F1;psV$%V_=?uOR= zP~?(TE4$?v7UrNApPt`inYxfv(-HO$2Vl+9IZIZfAEz~}3p{kLS`nUqaMTQHy|kg7 zf8*gI<%~Q*_Zi?w&45lz-~e>qA39VWF%C`w{7ftcu<`e&T1F8FY`qwT!AI34#359N zq6sT!b>pXoLpOA}RSj}Od_Owk*4eWr&RV<{H|AWd=!rk3(3qM_dM&~|ga^+YgTFQA zTIJu1RS$tTFfuS<9G2I;8LRbTExewk_%q?ZSNg-qAc9f+qek6#xI3_T^ZW&BwP-FQ z_{8Mq3)SC$>hAvAK;sNhX8}G?cv~!BgVa7{`whhnUxdXayJ6|pF7q*VKcWmP!(V*E z@pmDHSgkQT2aQo{r9RTP?>sc-NAB>$!D{_pbe|k>Oxd7UaSjxJF0$gmsta(^YTm5g=Jhj#}{?I#g zQ}G;}@q<_VeO%OE7!zSe-ITz>#JIqgwY(YO{O$7+?+bL-6|-k!MN_0?!zG=it<=BB z^8eZYC;xl?H~jzUe+k+D5BPtM41f{;ZvPJdCjVOhQvW=x`8SC<058LH{$f1kZ-Qt1 z2A=SLhUfcl;_3b?z88Fle2@BSd~xIfEcb2omHAfUErQut<8PdA1Tq2k@pbhTVST?G zpVNE6`<3@&x3Ccn*0U_0(V;zdfFvp1Y6}aD``~XBM8)kM#`q48j_I zU62*90MF+g*KDlYH_kP} zHQ3e1)zwvm41qcL7UZw?dHX~A9jw{+8lK5NjU0jd?UWtDihbMgM1Hls*q)0lffMYJ zcpiVL-Q8|)x4>$BEBB00-&tQETi}1;Dg1A(Ut7Pj9<^$%gtgZS;u-uM)@|0U)*S14 z>pJTiJb}N|>Tb2i`h2;T3lB{GBA*~<;2EsWcTAp@gM9iPL)O51u{Pg&SuP9k?0t%i zkzvRi*jqYF8$5aU3G@kyyKUSN3b4Wh2sIoy^bx&Ah_JI08iVe zIL0`JIR-jUWQ(I_f}43=GfwPDa2 z+2x}cB)hy617(+oqC|GNDGFtmi=vtAvMCzLE+>U8>U(aKBeF{;ur$AVE%LltGb61u+cAy0BP#Xpek>KqVgC$r-fn6IYI!SOnMOz83qbQW%T8d^8 zTtks3!POK^BzPM|BMGje@Jetcg-3!bC~OHXr!dUmG8M%D2`*I|`Xh=Z6o_Im1){i> z0#PiYK;@+rsC*#>Dqlc>I_FcMs(BQs>J|!-;9LsB3eHjAPxi~&D8ZZ6rhc;|IGbXu z1ZPnUlHg1V)NvC3d;-*Rws3r z;2^c38x9$$MAw-T96&Kmg8eBl$XEs$yD)D3C@@NwD$xb;^`$^GeU#|DK!TT0;Pl=U zlO$L|Fgn&JygXHgv*R z7gOM*4oVc4O0Ye}JPEd=m?Oa=3bdvz1uoe(6u5X=Qm@+= z6qQPV@F^NE0l)`OTn0&i^eGxB0n(?aPy(b+Q8NjUK1B$T^eIA!q)(A80n!IcCk8+t z8*3>B2%ol1B|!MJ#Yu!u+Z+iHK3Fy}0Qlf(&U^`wJ%wW>K=u^&lK|OMh_lF^La_p1 zkFK@K+A0CUrxi{kd|FMH0O8XL=Mz4yhDm_%X@%wxKCMt9;nS+81PGs2oh3l{v}!8> z!lxBlO8B%wO9`J=7!|^&72+U#TA|5=Pb-fE2%lDH2I12RZ3ld8tZx_~ds-q2vZv)` zRsihLwH9hH03W3=7^F{&)e<0mT1=Dx>C*y(LHe{n^q>!(ZgrIa@zbJM0>n=X#7g|M zK&-@13xr7gv_J;~KR64ql0PjlJmgOcTtVbd3j|I6w9q9${xnC)f5d**vp4*{L@~7!M36MWc(VOH?(~f2U{IRigVSxC_zg+^vPyQMS5I^}VBtZP+ z<80z5e~tu*pZr-8Ab#?vNr3ptN5RBT{tyWeKl$i);wQgE0>n>#cL@+b`JE&{{N&?K zBYyHbNPzgsM_&*>`REJcCm#_KKl!+hh@bq%5+Hu^(M`Y)9!;W~$e( z3&REgL15U(pM3Ne`IGOE0Qr-TK_Y+ha5Z2Z3XIpM=2+Nayv*l`IB2H<>XIpGbty3a`UB} z{K?Iea`Gp)v6PcPxs9Zp{K?Ica`GoPTgt&7)MZOK0hB9JP5|XP%yIz8#xjZJ1Ed`M!DHg?QcnEjAX4HdN0)NqCmW{|KiT7? zocPJUT*`@`Y!pcRWTQahC%cc76F=D~kod_imU7}J8zmDz+2{k}CmRhVezMU%;wKyJ zBYv_`JMrU3?Z6KnUZYa-$B#y>Lh%;x>XMN=$Z@YEGKETyxC*2k_`$*GDDuaJ z(IJ0a2%7wHA!zc)g{zPJvDcaSK;-s6rJkV|`oF;&0DtoT!T&bC2RP|}72gCr?|tc}cVuGvu2nRVHTz07T{Sx{5CO|X}_%{2$MF}Jy9++=NJZgUNbmTY8hb4)a~ zHZr$))&+dS(p*vs7JS${^P%x#|a z6%=!uXMKs|nA<$-JdR^-^QRY*45a^+~yb8|GT*UpSp@K zuK$NVxOck6gyva4n`1GbdDf0O7Sow$Z69Ybn|apVlPo4P&$?@r#a!lDTc%q~WuAov zHJHgfYvVYJiOjR^oFt3XdS!P^l2Y~i_A6we`dv0o7O3AFCdqvDd;LI}r+%*+CAX;G zYe&gk^?S`YnWKKMnj|-?-z&$-Z1sD^D4C^xFP|hc)$e7~vp`R)?@Y!+1S8yA%K#m}ZcTZy0f+b_etLe0_aA@TPwo%n!)LT7f3sJ@~WtQ||}fw=qZHg!g5v82GgJ0NzHZ@us{H?*ra) z?>*j)c*B1MRt}ttxdK!0e*ZPzq22-BOT1mNeqf=usW->#@*1A+JYRbL^W5k8nP-bAZVgA%1qN?uwsXE7d3YF3A!2*|}l@es+q?#LtfVZpF{yJ?rqZ!~KGv z?RVwkXS;HBZqd)wxox-g!_PKb2jge!%^lSH{|Wn%te?#Pf5!g{eBw{|AN21;zQ1*t z{eO%92IK~~+CSKT33B|k_UB>lzwY}Ba{ImOtH-DOUt{L~BfctM6#4wNBX_`UzD1b# zKh-w|pX~eldipvbe_tca`v2a0-up-7?K|oH4RZD!z?}b-cb_+aTzwn7%aNz=M(;%A z=)2t87oXtU;`6%?^Zma;X1;TtH}J{*MbA^7$1vN!!t($=vu{F9zQy>&J{=kPhU4>k z3G(r^!l!kYC(Hd6KC8ctTzs#%pLairOnjB@Fy{H+$Nvwmv#t}amymt$7p^*20yF$~y0*C1A@klXt{Ys}A@AN`*Cnpb$hw#3 z^0;*5-20RLE@t#^$(D_Z9gWuDiyZ&W2UQ8L2Eyzy6XCSYA@!x>iXMi zFDAR{`YE*yvt4!l7_!3c$8=YvqSk)Qch&Wm)HY0b)%6#ZQZrtmuop94b^QqP!0pGB zS6x4>O2M30rBE^^z3Tcwbp~d=DupvJ?N!$gs8}%XRoC|;_uPI=d{qi9!pv7)&ma%o zeoTE;YLB%ab6=IZ&)Sd4uezR6bzt_ZQYZz}Uv)jHwqgFOQrLzGu)3a5+b{!GDQv?O zSY3~+ZI}bA6t-a!tgc6prEY(rQXv&{s|JO7TPn3rozbE}VQ=#Wg}u!h6iO*jYOgxC zX@kPv{04=+c}hK~_BLry*xOjC`&D~$mD-~ErIAvbmC8|SlTz7Am8stFHz=IzQ)+|i zQ*VPp^d6U8(-64o#_Ib#9hYg($i1js~^Ys=HmOmTFsBg9=-98yeIet8TqgdB{Upx2{3m zZ`G|;%0hW{YZ?^JSgn*GpJCl?4XV$3- z)IJr<^$iNOO;_qZ+!QWJ4s){R!`TI6!9yH=?oD&|p2^}|(CH?l$D zjBAuCR*_z#0SN_fS{|kJ6pLqX@ZvsBVv;E(DU-cgGKIMJf zTjx#UyMQ2`>fhnL&3mhNj(56uy!UGF5PTcZ&0B0Me$-v#jw3&RxqGX-4Brcsx@WtmAv^yF_h5G)cUN~2z8T0tZo><%uUsFy-gVV8 zGyl{0ZeYJFvi?6aNzDEzTLviOx~Zq0Y;Yhrig_%Gt!}aT?aQ*5}p- z$in}+^@{a^bqMeO*I04n;4infT4mNstJIopO+yC$;nrZQkJZ&GvYJ^rmQyasSMssE zE2rhS97Xp1C*&bXOIYrc?XnTM_m{{mGD9ZHC>bi3A@hE*w2~&`5ySk({0w>b-!xB} zFPqPszce2ZozVk!`=+vDH!LSm`Kr%yvveuKf{?!H&L;ZjN@2=8i@VU+KVvp1{gUD683LvtTXi9;z@*ylm4LbaJysB0Qhu8G; z3#ASQ4>3jR;O`KHQU`n2^e>L7h!ObCZqz+~dkuA0GatN=~!pb2$QVS=Ca7itU9Kted;p4XcxzxhNHT|;^sfCF{ zgoF;WZPh8QNbux*IGQVZ9H=p(f-ZHSIi3(tn=Ahob;h~`oY z$A)MuwJ>Z5+p1N5t$&mikXrb)rhl|XYGKz9tECoh4N)q!Fl&efQVXw!m@2ifYKZYt z3#Wz{D77$Zh^|r#pN42AwXkUjbO&4-0^I?VhCr{vqaiG*g+)W4gW%8*x>*Z@wy~>L z*)#rd4_LLzo}qSFwaT8Mwn{DRS<^p2uyAJx1PgP9K(O#;h>21QYlc9KaAt_HQVU~- z7$UXsWe7wKTZR}QwQyyK?ota=hA5U=crrv=sf8s&U|itH5QS0;Lxw;T;l~i@PuMX8 z+6p&@$dOu@F$5Y7FNVN11uKR?>*2%@7;zXe1X>RthR~%JHmvD?xLj)C!VrU`7A6dV zP~pK4s2vszf!g7~5V%fYz!12W;J*+E1NIAn+Tp$ss0-!`fx6(mn*KgYhV?=eNiCcg z0wu$EA#gf;7Xqz-?Lwd&xGn_Bf$2hsS=(L>)caO#J2fch)D={ucAdkZw57nHw4uPD zw5GrnR7f#PYFkm@Qff(oVQN7!RBD@3pvlcBa7`6ZU{IP;U@Y<}&HMv4|Nhi@AZ^g0_Z)|NdUcv?jeBQ zLsbOOdw!`UfZpjPDml5Pzk?AVfZo9|1kgJOhX8s9!%hIbJx^*0ptmss1kl?UKmzFP zJgFgo-fko{1kl^I)DS>#JEevIdTXH606?1lRx_y~f!^{;4GHv?W!8W|w*HpX5I}Es zkQxH$&9+iQ0KHi#H3ZO`sEPo36IBsFZ|Y_Z0A%ZLS~cL0uAj}iTWScPvt?340G(Yg zH3ZPvMN&fmot-B&1kl;>QbPcp9VRsd(Al9C4bH~l^XKrY-6b*f6nGe4f%5xok9Mbbw~~Qa|Z1qf6la#8uI51PA7lPpn>Gi z88ndmIfHV@pED?j{5gXT1AlD&j8y~t==vMDh-<(fO@Cvx)Br#b^Q8v-fj|R+9|(*9 z>GK9Ulk|B5LqYnyfx1YaH$-YkpEpdi2K2G@H>8H}IgN%AKBo}{;Zu(;Bz)>oFyT{= zf(f5`6ioQkqcwz2J!&9)>Tx>ZQ!i!>;A89cRt?yrzRd_&HGq#&7(vqK)E!bo`kX?$ zNS{-;Ne$_9YKGL1KBq7!q|d2NQbYQjLidtBrw|b7a|&^gKBo`|>2t~>HKflex73h6 zrw|$Ga|+ii>2nIbLHfMDKx#;z*C$I2>GL|uA$?xQF{IDQIZ{LVoSZ2&q|ZqVF6na; zVURv2aTe&K=_gS;@B?9+HK32JpR{TKA6-9@RW3E8&xum0A$?AucGBm>RH-3-PGC$) zpA$o*hV(hnQ))<`6R4E*Ie|(^pA#4i(&q$fCw)%%rH1r5;gTBC=LFhL`kb(&hV(gs z+Y0ot^%GJ<_#B@iHGq$%A4eaMKF1LS>2v%VsUdxi<3=HUj^mObeU4*rNT1`Kq=xi4 zj<`slq|b4LO!^#mN;T&b2 z=eStappUMh=znZmKstKT1 zdrCC{^lD?NCV*aTB-I4at0L6|&~H~sH2|dPznv-7AP~fKsV0Dai^>R~-(Dis1ki6g zNi_lVTU)9Lpx>e#0_f#YQcVE8e5F(qKreTfY69rx4pL13y__f21klS4sV0DaGt8<6 zfYcX%z_25Ih<55<(?|Ug|B?WY9}( zq?!zR34xJ8FCj28=p_V32EByZ$)J}|DH-$<>LP<)vZ~3TqpB`2NYjs^3L@wzsvv@n z;#werj-oCi=qTzUf{vmJBIu}?)j*J~AGNAMAoYbS3M7JFMDRq=izt~0dJzQ@K`)}8 ziJ%wJy+qK9D47U)5v?GCUPKi{(2K|qt3LmK^538TkNb~e-M=TW>R%db{@sTa|2AU1 zza^LfFavA-jlxQQ{rufA-@m25vEPk30AKq)^}Ua|{=f76#`m1>N#Dbm1rYN+;Jeqi z#kbzK+_wO8{HI_Zz%buHUvFP0U!kuFX7?N3Z@iy*|KNQSGXY-qKJWdd_YrTkw*s^J z@AYo+uE$(}1>TvM(LcsJ%sbHA+uPaOhO+_0^H+?O~e#o81 ztbqI6+ua+nI^SaVE$$iai9gEz*UH_*?Qt8fZ(N^YX26@SldhLt&%1u3$QBR6xSHnFxNm=Z_Ey8gEjg5$o}`e{iXfUzm@&(3arN0!|q_Wv>V%Q zAcHX=3MD4bU(d>NqJeGmtV>wQjN9u_Q+1TOE$=gf0_O7L-TF(_gHoBi20QH3v<7jGDFC6x6QoM zTy5TJ&M~hyuQRW~+76eR-Ocu9OS7@*Hg(6>Sa0ur$62iQ@EgZ-jwi9^LzN@;lk9&t zIVL%-bzJG_@92e<_6i;O4j-1U_|Evk_@nV(SZD9I#;=X1jK_^SBWdh2f>>qm4&yfC zR%4EFy>Xp!jd8hgsnOkNZ?rIS4HrJE{{@%1RMT-^#7)~p-I*`yQccG_0;bb(kIa^8 zI_{C{rJ9a=1O%qz9vLsyblf9Dq?(RfZ=(vYFNfjOU zaIsXuadB3mRMBt`H^Z)>9&XZNfq7pPdYWgp+Ym^dC=1LXa_DR#Kg4?P`faOv} zyFIu-s%WetQ7X({B$TR{HIMHc~~uJ%A7ipaY1E z06O58Dgx*L0wRD8ARqw9)(=P(`SS!CNd7#5lgOVZ(7)u*FLI=c{CONzkw1^)iX?v? z$64ghyZ3;tW>JVpT{sLB7`1jmv;kKkDH=Me+}{%HCm*-{1k zK%j2Y=Mh|?q|d`>C+YKWo@7X$hi%D_J`X!3L;C!@P%@;?&znhx^!a%s$&fxj7s-%5 z53P|5>GRMa$&fw|4U`P&^H7OoNS}vL4(am{${~FoLOG<*Lnw#z*^hEapZzF@^x2Pc zKp#!te~Dy>pZ%RBL;UP7mJIQ;zk_6mpZ#qmL;UPVUBu6R)J6R4M_t6v{%pwrKPV6_ zC4KgzcG73RZe~CqTi-7k!l!P&WC)+S$&w*_>c&cj@To(9fRCovp$5{Y4mFTIb*O># zsYAh}Pn{_l(x=8N8Pcb^t(5_N)bso;k|BOFWs)I&GV3Kn{AAD@#82in$q+vo1VsE~ z=1GS5$xM?B@sk-M8R92{$cUc|x{mnCUYNJ{dHg^vR&{q)!Ij3;Ni4#>xOb zx?Y)euVhG{N(2x3XnJKo$pAkPxsm~WAW$ITlb#_N!Y4gaGK5bW4J3Th7!II{-jDJ zL;j=|NQV4LVKm5})GW!6KdJGOA%9Z1PRO6sHIgBJQWzHUCxxiNACx>mG5`=nZ^@89 zsh*M{e^Q+!L;j>NKIBgdEhB$YXc_sF!myA(DU1gBlfr0_KPij``IAEH$)6NjPyVDV z$&f!Oj1T#f!uWtc2m|8-0NHxV%78y=4oN^V1W*#4KmaAtWB^FhlV~Xkltcqbpd=bd z0wvKd5-5pKNuVSmB!Q9$j08#|FcK(c8@#85m(Dv6;uT1^ba(Q0BS?vYAh2z8hqicFSDa7fc5mrEr;1cBwyvb5#;m;e3w|Ht|N!~Y=v|4*O)o4)Vxw*Mb} z|K&S{RRe$R`xV~wuk|H-dwl`l-M-uLmj5E(&A#cr@xH5lL-2-wH(xtnbG!*)`?BzM z|DU|?dEdYagD-gx<6VHCdn@r?|8DONU=p7cD7_xNL;`#tyaE&k=61)iCnDV{N&VR$E?x2LnGji;#x znIPSN#T)z|;;n$+yI*x5aX;mL++F8Ry7#$*crV}%_igS)c>X`#J>Gq_dkEGv?1nc3 zn!6jhZFd%)|NqJLp6d$9apS#koh-?fk{ReE#3d+0j|(%yW93j{oue|CiPyR<%`O?Xh-R zcUc>(71qKZp8t>dKYIS(Lpn%HX)JEh&9BW*&G*f-=I_kkn9rF9&Hupj|LDJb{-1~J z|N1}4{=dhu({Yz$gJT8Of1Kr*>KKdc{{!&(zmuc&Km7dvE8{Vv)<_t8jev2tal5g~ zSY+I6OgF|GBaA`DB}Nyctx;fP8;n<_PY;Fr2XzggJ{3|&>-6HJ~W8-yAKVb{q949 zV86D$&q~96)%?w!RvPxJ)DA06|J{p+>}mS%-qn((|L!f7H2rt)3`x^}_fC{F{dezp zNz;G#4w5wecW+lo(|`9COPc<>w@}jb-@VNwP5<57NYeD*y*ZMm|L)C}H2hc7_u7(% z{X#e;4fll*Nz;7y>Q)-&t0tE2lr-J?cMqzk{q8~awBJ3tq+!3B{yn1 zprq-(yGtZZ_uY+h=)SwrQ*__mh?(xY8==yDccTir?{173-FLTF(r{l*-)%{n=DS-r z(=cCK-z{l+?|mqb-g_U~M-1({P13~BF0_Ui+J$S27}|xV6GOXDCo!}Ots#bXiIoP1 zbUlbCG-+^1(}VLQ4Gcldku)h3#1%ja1<@Q*C|D$EQYeU?CxwD&1t}EtNSYK1q7G6h zh%0~;3Zmypp&*(A3fX#4(u7a|brM2>*^(xN0y8B|2nBE<5JCY&K?nr~OPUZ0pyLUl z0D6NE3ZQohp#b6{gaT+XArwFqgit`tG$9m_G$5quNuZqwnFQL2kV&AO zsFVcSiAqVJowy)Kpq;3l1loz(NuZsmodmkKm!v@;O~1F9q<|oZ97%yd5ZRIffFQh* zB7g4nNQ(TqSIiXnW9#=?Dd0!fci;=P6#27bfuzWv9n&R6{_L16De`9rsvv)M=#nCT zwwFnY{MkNUQsmF}VUi+$w&P&(XM1Bwkw4oTNs9d0jtI%0?T8Tk(e&+>l>&ZreVdAd z{Mj~9QsmFJagri`wvCn)`LhjmkU!fBB}M*h%aauOvkjGzKig0l`LoS7Q{a!SZ?jUs zkFMXNssMj9{hoo6B7W{cImFLB2%7l02L%#8_n<)H=N=SD{M>^AfgfAH$4Y@dy1rEv zNc?PFBPrr%>s(0@KU>i+#LrgLMf_|n*^u96~uwiHW>{Mph*QsmDT z1VsL9K|tis7Q{jRZ1GEq{MmvF2mI0WEiOqBKwB(H5kOl+QUuTz-An;Mw!Xzmfj{ak z1Vl&xZALr<&}PI#0BuG*1kh%L0RUn-C8HvDqAQiGN^2hq{yJM*^(lI%4SN63@V#0DKe;RlBCF>GTeq_P#Nw% zGN|lYNs&Qi!zD!qmEnFQgUW_Tk_;*vEJ-q`Y>*_$pt63FB!kNON|FpJ>mx}pNYl$o zBnb#Xbe1F;RMtU~WKdaqNs>Wj?IcMCl@&>n3@U3aNiwLcP?BU&Su;tJL1hJ!B!kMD zN|FpJ%aRF)@6GN`PvB*~z%Mv^3h%5o$L1|f`WNfJS29!U~GWws=VpfaZHZ;AZ8K-vh@vC5&+Wm^;vgI zk_1|Rha^d$_0uIu0*Qu|}<0 z|L-sA^ZzIQbN(~_6aHiVXZ=t5ANE)IWBvz_0pKqG2LDQbseiVA8gl-PKn{RD$oSXR z-_-B-i|>2im&o?_4zd6oN3OqTd{6iu@}+%Y-+jmfu+g{1x5RggZ-#Foa{LYTU4~2m z#lBXY0pRd{>-`+r{oeAv?tR7kg7=X3QDpW@c=sY3z}?>4y{nMb@8+Mp0npCd+}p@& zd$T-$MmE3qkP+ZNJui68S>~DVxyduxGa6a^1|TayCr@in zKHdN@{{9BQ>wkX(p!(l=0|4Iu{P`!}06c+j04jg@24FMt11xjRcirTg>>BO5$~C}M z;_8G90r@VU%e23HqNF_zf-KgTIa0~t+%lP z;H%aV>nZDTtIkSV`+l?n;9TnlYl1b>y29#b^{_fvEv?3w1E9;-@~OQ4!wP`U$&>Oh z9<2QTuK;M7-d}&9pNdx|M|O>iWv8%~le=tJEe* z(s);nmn4mMCCaApu9zrE8t)2}MB`n7l4!gu+*T6CtLw}0hfxyFtJFP`r1vggB}sbk zauiALU5;^~_b%@)NqX;cL`m;mj*+7GE(d1my~|Mvy?41=lJH(lUk@zPq%OBn@%tiG|!UO%e+zrbvRWyLgZ!=5b3O zNzio{mq>!HySRfS=(>v$#mzhhEvM}+M$2b$3*wqdfd=11fd=15;grM-3XueDcd^4v zz;KsAQ zT@u47iX}n(EJXJZKMPU$mE48OiJyh2{0eT#mc->0UP%n0Kvjb&P}Lw+)j}&VP&Iu% zW}ha=pZRkn(O(@if0!gLqZld)@@IZWNnFY;EhN#G0z=V<0v%8O%$p&J-fGJ{>?)x^ z?Y$__2R)UzWxgbOP$1Oq6bP&vg(-=yO3cBf(S@R`Bsx=|ft@IDR!52)Nfc9{r5z|x zPJ0Sn677_jjs7X3Ks;?JaC#dG9Nd}$y-}!oW40t(sSUH%NTMYLMxX@+DhGS83&Ylo zLd-;gI%$?9nyL*m*GnRwVwoiJDA1KnC~(1(JTq~f=WllUr%#u6ucZfGR&72J{|@#Pe@#FtU%W_+n?=?xNJqBdMVPvVOy zaMG<5PKhs~5Hnt?j=kQBFI4TDjypTPK<%17L*nx(5b`{Vwi3UE0s+pYK!I~8ToS*T z0tL=i1x}avEVW@8x?m;+&c2BPXA?cs(5e~Sf?{q^V(KD^Ur#Yd;?pT^k~rBjb+E*z za!X%{Pod~6@yQhE!ATSyBu?~9MadJmC0pXxQJ{CmQ#j1{I2GVjD?V1Wc#3NA7`1B( z8akSyjl{2|K&YcAFcL)16oh&WxA-M~HHAog1O@tixGH&y6(6Qbo(vz3U)7*c$Cc`! z$qOVtlmeAqLD5y>ms4Q;hftt;!e=t7AH*%Fejo*^A3%ZX`%|Fj37^S`qaU{*j!RV> zlO^6)ZJ0Dc;(aKFNu2DNI7{Na)s~54C0;^-D0)$#hMp8CkL;PKOPuVPfHCc+c1@Tn z@vaoO(8-<&xW37r3H>DAiMvW9PWDXbDe+=%!FY9`XeaUZ6lh;N3be0?qN&8&QeXtz zP#~Vx6u8g}DKMsl&jhc;37-jwknkCgRy0?~j7MJ*KI2g`;DaqFhx8ebD>Pqq4`R<# zd&Xf%no!J_IN>u6-IL2LQzYJq0)3f7frb)3I@o$wjg$czI%wmwec zWY5_35(j%Uee5+7w<&r^obVZo8=CML>ytR)GiHs%37;{g5+{7dAV5Q%HD;>Bbqd5z z_Kd+*MD~nnBNb%Nm_n%_d&V@A3bJQRBdH*J#$-zc*)zr~6=csCk5rI7V=Sp4d&Y=V zkUe8esUUmC=w=1jW9wt2g6J8If{C8dIZ{FNj7E7x&uGi6AbLhi1<^BVoKz4!qnuU+ z(4*@kaZ^-)Jxbjr6@ZVXk1Ul6&=}XL$es~67VNS05mG_) z3~wtHM9**(NAwKGu|&^sR88~@$4Nkstq+$9l4n@6fAIeQQUBA(|F_?t@`wDp{M-C@ z`d9lGBj^7O{t5n($oSvS-^1VG-s&BcboT4?`k~%pXMma((am()E$+9lZbln(IZ^Pv8H)!?nt_$aS-8x@$bL0}gTZ zb#-&Kb2WE0a@npd`_IS^c+Ng!pRkYF&)HAf58G9C%znVW*WO~Ux0l-s?3wlyd$c{w z9%%QrJKJsW{=eTA=U<)YogX^icK+V^s`H5RDd*$PI%m?k&lz;yzr3Rhd3{Fc6YXSws7V;UCu1)&(@!;bJiK_gmuh%);ehYhn2CS*8SGctj(DJzvM^r z{|8tlRwt{qm2dehQ@)cgOiBG7p;v%%7W;c>jO5xx?IKt~HmM^UNE~N#?cYP=5CR z+x!2f<2%O}jt?DgJAUtY)p5k}l;d$nog?Yk=LkCP{$KC^_i=P}6gisx{r&%sjdzXH z#&P4Q@r?0=@sN==!p42Zc4MQl##mz9V$3im7$c1b&Tb_f_rryXKi6>wUW&gV|4pw|ut zCF!+;J4yw;c5pMZ0$ywDgQbF2J8+&<&}s*wWLoV&luWA~XiEjHb|9*t)egYHwAuk3 zq@s&z*8rSFs~wOh6|~v`IGt8IK&%Q_tx76%uT=rBRqAf3pxO4HB^5N={^O;hNR`z8 z8mXY!_P<;zXtw=_NCnNdKUz<-?cYKwXtw=PEzPz+YNOfqx6KNet*!T$3VQ8j1Ehjp z+YiUmYx|+c;kBCH52He}?T6u|+4jT50kgIBeo{fNy%hZfueJ3{tqNGJuJ^?w@d|ov z-!)P}ukCxARM2btUL_Uu+P=6-;I*3G7ZJNDP&v)EF9N06_66%{wtbyaL9^|P?xorG z)un=F+ozXQ&}{qQbX?k+-UqSMZ2KT~nr$D%PP6S}nK78Ht@n`_z4nr35~J5%;*}V^ z_7ac8=(U&NSa_|iUm`JDZSP(Zqt*6qYsFx-x?X}YiqUIJ7D)_VtLY`PB}TI?nIbWo zZOPRVquG`Wkr>UkWU$0wwm5i@#OSsqog_xLEh&^3+!nhK7|a&JmRKo;Q)2Ym5=&zA z+7gi%y|zS`7`?XFP>IoNd-atVy|!0JiP39&b&%N2s+?Z76@%I8dQW`E6r#7_~%0{Fq{2%h{Y!k~aZ*n(>d z0J8NWi4i|-7f6iwX^TUMpSB1P__1}giu7rNAt8O*pyi}b8w`N0ifeC^f$iIG075jyG9x`)I_pVqA<3i@bz>&6l#ep=^PQQ${M z;<%s{1%H$ZNR$97Tq98cNYe|KNt6UCTp&>rsBnfvK_KiJC{Y5au(w1Bpu(;aC4dT> zNt6I8Y$QQ)p0QW@hjzqC~GzB{m-+Q8K7`dx?@k%@I5pgk9)#LZ~?+1cYq8xkSMrO>dSb zQ6i{dy+nzig5eS+f(p=cL{LGAM2Vn+o)QIuG`*leqGV7(Q;C8>*n+BvpaM&xL{I^G zg$OD@oj{PS7g$jcNZ0dKYlxtHR6zvg<8&e@AEy&R`8b^j%0pm8P~IYm5VDjkOv(_1bJ|g0707WE|Vx3 z^K zgKP{J8Dw{tC>dmTlPDQv<18}BMpa~x-AE#2kewqDGRU?i0tRWiEfN8OG~J0^B#?E7 zL`WcOy+lYLYmr1qAnQhnkU-WHiI70nIEj!z))GbR$qyb zKvr*wkU&;PiI6~6bBT~ZR#SZe3&;S2q{(t0O&i`+MH2@5(_V*d`|Gnuu z>3iAtJXZR9#8>UBz&d~BzOBA8-%4L8)&ZF28|NG08|>@j>*_1Q`hGcB3E+bFEAPkN zcd@qLaqm&@Gu|h#7C`#n`uzXX`TxK4JmRUwiU4~&J3V)K$~-GQrJmWIX;>3rglDj) z57zK2@-*}0c$~-p@YTP}0I=krWB_ouzQx)AAGqFfz3zI&^@8h=>rq#YD~{Cx%3WJs zWv-R3QrB$PG}k!S2&@m#$JNzU{rt^2s-#DLhKIweeS>=p5 zA8_95+~QpCT;W{koaLPA9P1qJ9OS&j*~MA(_Y44E{lg3Z_gUMmjn*1#vGqTn{}>n793d z3;-?6T+?N0j=wlQah!9Uahz}*b3E%f==cvu#u0Jc=h*Jp=vd=e;<&{z!!gk@$}!Z@ z&(Xut!O_yu*x`0)#@EKD#{0%u<9EhyjOUCejfahDqr%u@>@@B&HW(|6g~lvnsxj6W zZv3-103<@wmC+KR>B{92q3NP+5t^>xR;B6U+l~lL7mbe4bfrWhG+pT~5t^=amIzH3 zuSG;)x|%NSB|_JgLW$6I1@|>wS8zMib@Ba4gsv;No$0#bln7l{042JvI3xnsMS;2* zf$7>h_Rw=pz=@t~B4m25IZz_>T(ejr^js5_({s&c5~1gss2rZF=@@_rOxM;;D+15e zbqD@tjnH%*D4C|~0PSeH4m69V>j0Bzx{e$x0@KAmbsAk4Gbkc-U46bp=(_lpF+$hX z5h7ey!)Hf{z;+>eN`$Vf7f6JztD}GDx+)^LF7mZ1roncV0@LWbTA4)XyBd&8-_-y& z`mP4J(RVeBDt%W2fa$v$>Zk8&=py>AhWhEd8itO(tAWJyT@8(;?`r5h`mToFgYRm( zh8StQTAoB`yc#-$#;fH@gvP6(hiSYThL^^xWlMy{t6_LyyeP+#2z^(>Xwr8vAQAYk zja?Dgu8!=*K`R2^RVpA6WxU#j)e=$0t6e}3E92EJpv}s7wF|Q)qKsF&FkK?bc(n`H zNkkd1c44eUl<{g8&rG+KTyKT zc(v~fC9I5B`(7lhj92?^jD(f(YTpf$urgll+eQ*r#;bkPOTx-{wQt%=SQ)SOO*08A z&k+3pe?HjLzmGNrdcqFWhSNp~?!!TYOA`IWvwXgAQei+89RJnwe^J-rsAmzN; z*S#gIoLBq0gM^jyYG31gIIpdJEn#K6+F!CItc+Lt3+jULYTBPiOBlWj(OAODcD1h% zv$9?7D}^P(&g*6v2(q>F5(a@Z?F)36LXh?a zYETH$zCaBMLE0B6SRqLJ0tG7sXE>^$RA`34Us>{4;mtW{vc)u{IRt^NQn4(f3}2(pZBLpi1>MbiiC(C zqz4TVKkw&Bi1>LQ#SlO5+Y%yv-WLfGKj+p<2>8*oa|0y={y?B|;0FR{kv`|V5+Z%h zc_c*oyf;!pq|dtxB?S6t+Pl|Ci1>N;atRSX?+%d=@$>E=2@yZ>LS^K4)2nsflRn6486tg<(=tT* zoJAO<&)JR=B7M%HTSy-yt_+brNL(2rea>b}i1ay&CX+sAZ8HS=*xFeM5k6-S4&ieK z#}Ynga4g`XX=f}6kv?Y-JLz*qH$$M0t(}n&;dA;%2@yW0F>Hj-=`j)_d`@2}A;RZ0 zdJyo@w9^Qk^f`^OA$?Bwlo0828jT@+kfkz2`kZbfA=2kGhM)8~je#V6PB)bh>2o?? zLZr`WbOGptD$oUh4+NS;_MApvkUgi-KChUALST#mXj=VX36VbaxN1nB`hF53 zed_y4i1exNBO%hKzC=Q#Pd)mD^r`PIA=0P5vxG>Wdh`_OQ;(h^ed-G&MEcaDJ3t>) zitYe@AaW%{`qZNrNS}K20_jta#*;qv4hfMy^%z{x$JXj4MELw3ClNls#}z^NAZ2HW z@HvIKInpV!eVgwN|}7UA*=l^mI>mGZ;n@5ls|v0bD?g3uuC%BDiZ5(Wn?>B6-~2-}!Z(JQ_`&eBRHyeBSqi z|J-9!b?>>i>U@89ZdIKt1<=Q~kY0WNP+lyv7;1-pBHfq@$(}7iTHV; zrxb{v7rIG-_(7`90`Y?kodx3O1(5>r^ZZmP5I@fkkOJ}ZeCOid|H%LM*XW1QKSW=S z?v6hFgU|oVqEoN~;E3qp=&8*AcWm^iXdr5155Pl_1CiGvFGQY+Y>#Y?+!MJSy8vb) zwULU*)saj8DgR&B$nlZ3|H%LUVfYW>m&3cmPlvaL9|-@a&;P&s_y3l^fB#>6{(nvA zhrj>-_VfRL_5J_F!Ls0#;CaCjc>aGXb_P5tcx>>fU?6A*4hIee4g_Aq-hj^pwg)!j z?f=^YHwQ9-+Q5JE_W#@Ne(VtVl>4ZAzk8Q^t6Okya^vm|?v?JvZkap9J*_n+ z*VY&HiT5k-XWl=0-}dhJKIeVP``!G1H+mO)FY(UxPW{L8|L^7hD^P8pNBB`~pGWvz zq~Z7cQBt7VK98`W+CGmsq}o1@_@UbFY3mfAwjO&orjrHA?e0=3P;Pf4q$sz$5r~xA z-H3k5?e4G?D7U*qQlQ-KcBDYL-R&_2$gOMdmIBrGxe-#J+U~kg3RK%&2!5*Vu9i}u z+U{y01*+{Xcu2MVUAYvfw!ecrRNLR3Ed{FW@48EYYWwUgDL`#)`&l$WxqWt+6ezdP z;uy;9vt6V>xqTMCQ*NIHutxGM3=8BIr;Ag7-g@k3@W)gEf~yvWgNpmi6;hz$J~K`V zR9qy2El_cvIYkOoTqJ@mP;rs=wLryv1{Z;fi?pu=D(*8uDNu2d+qFQ&MQ+yu755n< z1uE{&B~qZ`?p!DZD(=onQlR4Q94rMY?#{kapyKZADFrI-PV|+EyA%FUad&o=0u>k8 zUJFoM+eWt60tI&`ZW9XbPB=!v-HEZM;3Dm7fr7i!CxxH!EHIgZyJNl-D7ZU@OM!yB zV~`XmxH~Yw6xh05)NP&8b)UE~U?bDs5K)ppG*8=qxiChcR zTO@KVP;ZgYvOv9kYOWNhw@;lV1?ugS)1^SYeR8xEsJBnTFY4`+B~qZ?K8Yrvx3>Kx znxNo5i6$txPofD5?vt(*D7a6eEeh@vCrE*U``gw|0fOtXAIBqw0u}f1#ZsW+K8`yf zs8v5cPzqGs$8iHtaUVykRNTkWDijyTU|1-)kE2xz?&D~cg8R5%3KZPO(L4qBaSS*G zcRRX4!QI|V@)X?dog`1e-QGd+6x{9YB~QWK-cIrqT;!|FQ*gJ3Bu~NJE|RCaH7fPObyKTPYskhsPOP+eWtwi$F+im?MPrcpN zSMmhVw$73#fVQ=fJOQ+=wd4sPII9stq}r*3lcAdp%soIDY<6^|bCK#=~H zbn;-3S~1BJLR+tqJR!7oiR1~Pt&1g32yI;`c|vIGEXfl>TPH{!5VGy9<0MZCZ5=In zQfTX7$&*4`2TGn4+B!h;q|jD$g%sM_NAjf5*6xxgg|?z|q|jDe22yA%MxPYgiqR*9 zwzic#DYO;+CWW@Pk~}H2wWZ`qp{=+uq|jCj2`RMIkvu81RZJcfa_y~7o(y^v5AyP4 z(4*)(8T2SRNCrJRO7djTqeCT620e-kLIypGUXVeL!e=t*QTRg!JqkC-pht1d$)HE! z1{s7*oOv?nQQQS&&?9A%CxafDBzZFE5sV=j^a#d~40@!CTpcp#VT>vn^ziYLCxafw@RC8u(wQfN9>!pjK@a0r z27_GtVaXFg58;j=f*vZDJQ4KJT*(7Lw*Amqk|%>6!Z?vZ58<+tK@TAS$e@Q%F&Xqw zQ1WEZL%1en&_fs%GH45GCWE%jkvti+1-_F(TX6f3L0fRMkU?9}2{LF4Iza|)K@()q zmZ;>(pe?S+gF&vn#mNIfy0u!RhX~p{Nb*F`W(*4vv>CS#5wsaW zKm={>Ejc1+b5F?;L7TfujtJV^S#m_s=8lphf{?m2M+9wdEjc0x8B23S(B_ts1A@?< z7Lp@_ki#@b25t6BjttuDlN=ee*&{hJXw!9)BZD@LkQ^Db=}gIyL7U(Y8MLXpOk|TZ|Kt03{l9}d+p9j!7@$&#`CVuYkAUWa( z$xL&^&;2-y__-ek6F>LkVB+UK98CNmKWUEmL0Zxr@pB&zCVr5OG)Mew#Apyd8!-sP z&qj2R__=qu2vo$$&o&H!wb>}8BcSh z4>F$SNFQW8&5=HLdn8Bt{CbAuNS|M$qofZ~mgYzwWFgIwKFC6v1AT1!*R4$s_;KxD zOOEup>w3wNK6l}kC4KH%B019MF4RZ*+=XjH`rL&OB7N?{ZAAJY`Dl*xLGsZY>2nvx zg7iTa(j4h?7cMsGgLI@h(&sMtPWs%5MoFJL(J1M2=Pb#QK6eh59O-i>Muqgbv!~=h zAKShYmjw8Mf!0Z%JCBwe>2oLCAbswH8>G*jaD()@Q=A;=v1iJpY>>!_*s9vR0|J8;o!dHhc4KE1K2$zOOg@=Vt5BCUn4z~}t35UbJ&=;Xk zL+^*)480WkedvkM!=a6#^`YiaF0?#U{l7f_|2+6{@ZI36!RLcJgO3F_aR-1^!G>Tm zxGZ>8a8YnUa7M5+I4U?S*gx1a*d=&e@aSM9D1om6p9TIHcssEF2fhF(?f_U8m=ZWI zFd{HGaO%JO0${tl*}ccT-M!h(xV3JDd$oJ1yTF~{mf{P5VQzo7r`yFn&OO?VxW@mb z|Ic{-|EB*X|L^@z_#gIf^so0f`*Z#k{+R!I{}uj){yF~1_yS;rfADv{0I;3I&Y{15 z0k9K006c)5Ue{nbtKO+`ZgduZ>-qms=QO9g)5-at=l}n12Y`S60^qwV03ZKHUjY2i z3c!zk;OqZ~eE0h9@U8ZJKLg;IzJb2pzMuIz_>S?l@HyUZynprn?_U6n_m1=q^`7SK z?)~l#fYM0CU3Y^tQgPQ^Esa#%b&I5tio0&1G*WTbT_}xI+;#J%k&3%+wlq?4*Ugkh zD(<=&(n!T!S0as6+;xMbk&3%+pfpl(*Y%S|D( z(n!T!=SU+Jcb!ifskrMrrV)zk+Uul|f{XmdjTBtuH*Tch-i~T0xJYZzy?rr#mg1ZKzLcv{wUnsb1a5@F|)@jm6!MzpZM8Ulk<3z!|6_k1_c*sa~mnRx1vG{?ycf9LU29yEm(bMq~hLEDvea!TLwrY75A2I(n!U+~zWAq~bPDmqsdX z^Au^M;xCZD6VZcpCOHu+-7v1lG}`1f|A>eTY{2{6t|6(+~&^G zNXc#PD2l13_S6ULN^+k~N{;x@q@DsB@lHWjxCy@BG|c2g&5q~JEeQwnYqZZQgO6FN%4 zZGy`b+$Ok8!EM4VLcwi%0ZGPd{%amEa9`Vy<`cWm2D(T_^fOtS;7Z-I!5`aBqZhE6o%lh9$SmewoJ9KE)LOEn&rLX+^tlP1l0HcJm?eE~@=BKUxyj>XK_8F30;_yk;%5b1 zC4N>MC0XJJ$r`i7&kCGR{H!oe7Wnbl%XR3%AKPAzt`b1YaXtaG90N@NEk~aTpylW@ z0kj-95COCteI|gG!+in>NglHV5K=m3383Y;fdC-aUM^YkCxw&9pA_z5@~5_gWWgWX zu0=HjP%Xv>0CMeG$&x?GNs=Xhl4B)H{v^9gmi$SgKJo`C6|>||@)*gIKS_8){v=(= zl0Qid2KkeOTi}mtCnZb#B+xtJ2gw$*#7`nDS>h*wcqe`mj>!T)uAPu9=~L6v$$~x} zJC3*RvcwM(A!dLd+eRY94Ecj>hZ*uG-cmB;PdqFc@+a;}hWv>;CIkMsc3d*VPwYa; z5I?ap$q+xWA(A0}VkMFxeqtv`hWLrKkqq!-+c7i+`oL%*8NdgINQUgGE|m<~Q{7H7 zWKVTSGGtFRjv;$0hf9X+sYDZGPi1GxkUhw3m?3*A(H7ZL3E#<{iW?+D_EgN04B1mL zP%>ms1x^QhY`X%D58wDpJjt3L-;I1>x9oT)C2g~_A<0i`Yb~| zqz@7ZW=Nl9sE71f=5aEhkH=n$_wzEu&(g(`A%2!llnn8+6jz7%S=vJ~z>jS&?I0P_ zXDO;CeU{?tkUmR8CIkAo_EO0ZJ~z&n4B>Ml27&Oo(JvXo=LS?v_}lMF zTqS(2M^6c#>(7@A;d4FOBz&&NtwH!)kH!g~>v44mpX(jT5I)zVr-To31ZD^yG6+Ch>be6H;x8NvtI@G^jpZC?uyNuO(fBpK4@TDVI3Tnm3l zAEd?0kUrPKAJXSqTzAsvTKEI{pdPPe2%l@NkqqH;%|gi#KG&Qs8N%n9u96{quJK3) z@UiWyakwuy$LzQ`AmPb2R~-i*8y z`F-Sx$ix3}{(n1sID9C4ApBbRh43@s?cvSgd&0MeZw_a|-~ao+f4FD3OZd3((cwt= z+dBa63OyNmBy?Zs&d@EPd}u`|7P>xkMQCAYPH1vyTwV3>oNu}BpP&B^^8NSs|1Ev~ANcG4Ztq&}Dt!N+^e*#W$b`jV`yE*a>pZ7&)v z84B(qpbvtJUl3i?+eLk)fqJ{Bvoug|7qylK>g^)GG*E9Z86XYR+e_L>1NHWjmeK&d zwe3s7(m=tzBqR+K+)Es3px|C2rU8QM+Lt&D&|8mvF{Vom6x@q1k_HOy#g1u!;JWt3 z(m=gkh!&`~3(*4gb|G4z-Y!H7)Z2wTP)+X`tSgcb5k0 zZ8=7cdRyK}8mPDB9i@SKTi#w8sJG?ENCWk@yp=RiZ_8cN0KIkXa%rI2E?6K9RNDnO ziE6t5-J;qqXd?|&+XXnAYP$d(rP?lVrGaX@0NsMx+V%n?4V2pj9;X3v>#^tIQDOu2 zc3y93px(}d&(zy_XpDM04~!}K zqTbHMS=8IPIE#8aSDXgut;a6I!`=o6u39l^pyHO@APrR9vc=Ls#Vxy18mPEs)1-ll zTUH_sRNOLLa4K#Ynx*2Fb&v)sZW;PP#VtcWsJLYqG%9WxT%+Qa;a;HPmIbAOidzOB zskmi+X`tekp^H@9GK?M+*JIDYo(T<<+&N36fs#9Cp)^o(=ir)Aa_0<}21@Rnq0&If zodaJfxpR6*10{D3ZVyWC9QaDfodaJfxpUwvC3g;drR2_OAq|w=Ik=N4xpM;2K*^m0 zUn#kB;439}4lX$*caFz2KyqDsj?)0e_1Lp@peebtako=)XTw!Uu5Hi8uu^kp50VCI z?rdB;YVK@D8mPImu9XIA?yR}eK+T;6JWz9I!BuMREDRzEGz)hH2{aQmlRz_3GYK@a zqco5}GtmSIG!ti$Kr_9PCV^%QkTeN2qn)HlpcyTlGzjFerz5FAnh2VHk)(;B>Ek3# z1Wg|#X(DKPe@PQT)B8x82%6qW(m;@HPj4@2FbD>!CW5B7kTelA-IX*Dgn#l#nhctD zg`~-#X_F;Q22DF#(qz!Ifs!VJrlB4(Xj)rIlR?vtmNXeOt&OC~plL@*nhcuOO44M| zw2-99plKp$GH9BSG#NC_W71%dYfp31K#<2qraLDM2B}r&qyZtdl9DEcrcRVJDKr(s zKnhKTYoyTBE|MmNrlR+x&{XuE6q*XJNujCmh!mRIQqrK1ZBNB`5JOWj9>ma8F==4P zwWms&6qLlLmr3_5`deq{*NO3nWbjO+XuD(1bCPCW9vQlQbDL0d0^$ z6L1HSK@-pmFvzwiz3@EQ0Wp$6GEj| zN}3QVg(HMe>0n6{LZ!HNgitAloDeF-024w;fR`qON-@BMP$_y%2$kY~A%sfNeL|=d zw=yAAis2-La8Usv+a3>t3>rUH(qz#1&XOjB#-pob(D>GpCWFTNB~1pMe~qNcp!0FN zkU{68DKh9hcuEGH2M5WZ^WY#EbRHZegT~<)GH6@}Ns~dyMVBUn#^I(XgT^@~4F8alSL(^1k!YR#8Hv##e@31p_27?fkHmlzKqKJ~0W=c+5I`g0 z4*@h1ZV*5tJ*FN2a_x~)PyU?KLF&OD+a7+r)Du6$7E3+xGi-p=6F6bU z!*B(NpJAw&_!(xTp7Q^7gS=3kdkQ_}I0c>>0E~>dBr#xLjnF$fB5hJjgj?{<{$k2e_*87ck=)JC9LoN?+-s0ek%NE`2O%+;akIn z@J-=(_=fP6;fues|KG{s6aJC?|8VF~=s@VT&FfB`{uW@|KI7}^1bZ;Km7jxC;s>T*X;lQ{{8(wn@dbQmX)kRgEI#vv`P95`-ZU?n-q#6ro5HjYrL0v%e^zb6TG9n z!@UE%y}Vt$$9vm)qh9fR<@v0b{a=y;mNUNmIE(S+?hPd0duUzVGpfNz|uBXvW>aL^FN$Rep zalF)BL!-UaT}`8v)LlixW9pVDkbF+vVkM;)Q?9xzi&j$VuF!+Li>2;z8snvo^6nic zb(DASK&hj=d(kT8-HTQ!?_M+pdB^GPq>k$DMXOYIZ%e78x_etl9o60IN*&eR>z6vH zJI)fRqr7`PQb&3BER;IRyXQiwqr7|OOC9CiGgIm)@1DU@M|npUgF4DP^61sg&PUYSD2HjD4x4xMrbt>=HH`ApK@(yFT)TzE(-}IF_)pzTg z&QhoPZhg~H>Qvt?eCl7P`fh!LCRE=oeCl7P`fh#Waq6J&9_wo)6t7c(x4yrDghtHfJhp;eW5>nq$HD)H7=xEEC7t*^pTrxI^{6_PrY zcQv&buY6Lc5^sHl`xX-KT3<;W6yCPJESEYJc1KU!W68AnOZwssysW!0oF9 zvc7<)N+9bCc&Y@lzJRAnAnOY;bs&%nqfP;2{cXO~DS)iMogj4zAnR`!T?LSJ7J&iM;XzWT0J09FaRrcdxS!N1fULu~pA|sX;oefG0J09FTM8iSFuJ7xvJPXE z6+qTubV~tb9Y$LUAnP#trvS1J<4OWR9_w?xl1d=!b6iO!ko7qnQUY0@mr9)y$ohP= z)G2|i&v9!ifvnH_N}UqO`n;EFWPRRFQc57}^J65X1hSBCA*BSeK5r!{ zC6M*GNJPiXZFKfs#`ESf9dU#gFwVJXZWz$b67e{8*pDb>PQ^ zky83tNLP?j`dCO#kW%_sfAUL8>0^Czk))J9)+Y-jrS!2r87wKKkM&6>Nhy6Sq!&mj zeJrFGNGW|RBojy}eXLI~l1d-z6C)|5kA-9cDbUBYK5&wmyc3gwMwjCk6O;tdFn^lp=jTDwh=L z^U(lFkv<=xdD7=2R89JPgsMp&q+LjnJ|E%Afj+MFk)#Np53iOK;qxKtBYZx@O+xs5 zI7?E5&xcbaMfiMJDk;L}!?BVAd~EAOTsYDPX&h3d&xde_^g%v{6zPLB4k^;-L%2iw zAdN$c^!X6OOZp(6LyGkI5ce4A^C3J1eO&89NfADOL^XuZA8|I}^GDZ70X`n<13Vi^ zkv<>5L(=C1v<~{%)(2>m@c97uGvV_AqLlFYz>yT;^8wldd|c}TNs&G8&yW<^^Zo#- zC41fvnOd;NwcdAXfgX=_5U)+wf<0_4mPjq&V_OGLms-;2U|*>veGc}KTGHp>DN;-N9PBE!q|d?5QcL?pOQ&%riQ zOZpsaEw!Z2!4^_W`W$qn7W6?AKBpG=@mTNS{;4H@-kT=1V*LkHl>~a{1gRx~-oZeVK<~hL66l>D zODzfX4n~{=dIznOKuG0LO9H)v)=8juL~2Q(Kj2^z=npuC1o{L1i3EBZLq`I=6_#2O z=&g{{l0a{kNi7NVCN2R9^k!SBC4t^-Q+)sbuiyUrU-$p};phJseCz%HJQ&X1fE8S)?O|JNMOg_nn` z!`Fo`4__Fb9i9{(8$K&MDBLI94J!cc!Y#x8&^P~P|G#CSt3r!H^Fz}^7lh6Yoe?@c z)Fae6)IQV(`~UfVVE@1M-`oFhVsK3G%;3Oa@8Hk=qa6T>@BhyYObwhLI43Y9&@b@w zK*zvO0!Lv70NXw69{R@)0RR2||F6Ec1HfqR0PwHg|9{f|i2pwSo&H<=`G0!}#{#JhX?>hjz>%8hb@9cCQb2d45e`^PTtDQ@o1%Ky95ip3z2`@ zV1qB|TjsmUx5zi&H{ExE?_A#*zSDi(eVu$i_5H{f`horb?)9$w7a0HtdHZ;~c~9`R z^S1Q*J>Pi#`u7!pTIw$H-_%lf-zb+_>h2rBKXli&-as@{cHcnAQg+`!@KJW(0B2Ws_yHk2C8dYuXmDK%I@nx0A=@e)JNHU9gS0VUk9xyyU2`F zOWA!LQBB!>-D7GYyRP-RQw!DgSg+wRS1n{$t)$dacVAm9wbb3$&? zQFmX1^VD7B*r}!Nz6P*UcV81zOWl1#<(J z6u*|b`^q&^OWl11ou}@;GD~WyyU49mOWl2?r_@q+ky57?x@%jnU^pne$fQ$C;YCiI zB!%~tup}Y8I3^@XC@&00k`P`PK1ot{U-6hEbl1f{B_X>W>t!s*CaJr~jgzGAzC1ya z)LrDXNm6%_(isIG18yH1i2T^JWhlA613fh4K9`zA_~n!9hDB&oUk zMo5yHyKjIbsk!_5OOl$quahLHx%=8nlA61(l_aUT$fJ{_<|2H! zaC4J9doUOz&mK5N^6bHtAbIv+Fi4&~Z6yivphEae^z6a#5IuV^JVehPTqL4r4~7Tm z!NKSk$+O2NNs?y|E(yrvT6-i(@a)Ff1kY}qP4Mg%CkgO)tmm+Rl_YtdTP#VE=ecQ; zAbFlEkp#)}Tn9;zJkPa}1j+N9D+!Y4IdKvokH^}DcPtWMk8SO`ND@TPt~rt*dUlPI z1ktl=fFy{XU7aLB^z3RU38H6LYe^72$m^0IdUk~+LG7$^zC zXGc#-5I#HFOM>v(fk7aAcC?ZN;j;tdOZe>YOM>v(A(9|`p2p=Ne4Z|q1mI&^Poq)N z=V>%b`aF%{C4HX8l>>cn4B8@okSirY{5*}eh@Yp?7V(3mDGB1|Dbz#!JcW9QpQms- z@$=MABtiT#6*N`e5|j?)RCZDS=t0B!3f2?A(acS#UH+i)WiK-(Ni5I|cO zNCE(|t*y8QBoH!#BuF4+21$@W$nlXNfwsbV5(pVR5+u-8ct`?mHIg8K9>!TD(8E0? zK>}@=APEv^3!Eo`wse#P3AE)zNsvHWe3Br6HqDX*3AD*02@>c*I7R|Jh~XuH9_%Iw z66irVMgk#YL4pK&P)q^@a;*m?K>$6_LlOkg{nto>0JDEu<$%kU;n2a*;syw=xM3$hGcw5(Lmjd@qn7fHr~~1klEbQbPc3 zyg+IQppE0Ch5*_)KxzmejNfkQ&nG9`uFuxu>(#fIhZ$Piv_G zer)S*{1fP7Tfc!p`22dh)DS*MGf+eL{JM+O5I(=gS%lABKbIQ92Ve2m5I%RFA~l51 zogJly@VR5C)DS**^pG0D=Z>F94dHVKJSBYYz~B%*cZ8&d@VNuN13s>Ght!Zgw`2Ip zp4&0}WY6t5o9wwAV*>WL*6mIW(BrYzV(q1d>{(kbHDu3P3=`S2wwKh9J!{cXvS%%7 zCVSR)kQ%aQEry-!S&Jsfp0&81WY5}w)Q~-EMQX^NwH{Lg_PEwssR4Ry>$U|_L-O1< zUusC6+a^g3$#WZgCwXq;-~YIstmxtBC((n^H>3Na&mjZgHmm?_h~5!xj^?8`VGqEi z(W|11kON?5bYgUD^lW4SJT2M-SpbfY9usXDb&&_~Z`=pq&B#8i13Vqs7TJVMfa|yu zz)g{w$kNDF%mp|LdjX7#jNn>8k4P6}13U)10k{!6{I~F*!taOQ!utTbu^+%=_#R+G z_>ORMI3Hf|gKq?!@~s^Kznc;8+dBgM`-}iz2R{q`G5B`y-(>`x${hiR1p6T)V8`H3 zf~|wWpeOLRz$bzC0>`5unVO?2L8Jc1oPS&dJV+&X1i| zj_X+RxqK||%B%9c?3Bl3liV%0V;7DL{>uBIHv+og(e%+05o5kIe>NYOx6D4X%RFfw zG547}%`GO6Hv+2uFL)#1Jl_c4VBe{}Q+y}+j`bbo3t&fp!`?&Q1K!uXFLW~rBeg>^;pe1deC3Bs-=bk z+>Dwaz_!)AL~5wO&7c((xEZvf0^?Kr8eAkCgVw0P%@{>0aPugsp#nD#mKrK>^B}3A z0yo1WDsVGMO9gK3FEvnL)Pv!K1jB%{RN&^WQbPsCr}l9wFut&lQ-Se?eVhv1+(zP5 zV0>5~rvf*(lsJ%ydRj=F3fvr$I2E`#AaN=%KE97rft&pjrvf*N#Hql|M&eXpe2yQ7 z0=rhT6NdnMtkoE)I3!rDYKc>WS6?S_YVhhM5~l{QULbL5@ap*zzexR9T_$mg@apjr zrwFeeCvl4K>QNG>2(KP2afmPu9wc$9@almQrwXqgAaSbj>i!a^3a{=dajNj@lO;|S zUfosVRN>VaDXQ>lj1*OPHAadmyc#1#6<&>zn#{H^QdHs97%8goYK#VUrwZTPN#aytq#%e>g>ObRRN+79zoER!hk~lDggC|NH6oOGIaX<*h2#J$Hg<%pWg9^CJWKaP;BZKhGf1C^| z^pQ9jRKV3Gg9_*?8H6kVaWbfYOGpMG3qYIL{J_T6G3_05=2lQqe%qiF`7V-YvmSt;UcOCxEi}<2DWed8{m^W^ocI z3#Un-EJlL_%A)TiP!=5ofm|!=!~r0WmBFfKoCHD^f;b73xm4mLP-daTNuUfaB?*+7 zE^!hlgDXe^WkyS!1j-;RNuUg_APJNiE-?})Qz9`EDAQMBBv7Wa#7Lk_M~RU@nf4MR zfigdl7zvbVCovKz(?((>P^PuSNFd}uh><{f1_;_^F4-#1AqU#E74II7t8ES4DY zvjRg&{Hz!tG2&-Me~A%4D==`x&kDFf{H#Eu#LtR=#E732Xp8tk27?&!vjRgv{H$mjTj=LvD{w$v>G4cmF31Z|AauUSIpXH8;fj_PuOZ=p|N{sla#U&)PB}V#G;Zl-5 zRTwGKrwT2QK2?4bBYmnQM*39ZUr3)ykHkoyW#|LxvkbQ(>9Y)-A${;=e~k252LDK( zWpIo1S%y0c^s%jF@Q?UehARpDxY$4=M*1vWATiQsDI6tzmZB-rXXyxukv>Z?N~F(H z_yzjd)>4cK-~$5{lRZm~#K@kdxWm967n8~u(Q{)<6AOB6`-V0q7SOVliMd+3Cgw+Z z{Xi3QwCrkPqGe|jGg@{uF`t$lOw6k#&i81E^KC5=yq1=JQ+)%<>z0`6>$SYnR9~lM zxv9QZ%UP!S8ZEn<>Z`TvWU8;y(lymfP+oh3sa~w*KvR9CmIF-n6Pxk3XQ~%z*~V1=Ld&B}^(9)iGSwGrDZc83UfaFaR9}Suy=JDVzEDg2 zbGepcsu!TV8mG-ic@?UkrzQS>F3KfnwhZNBbYqT|rKWnemLk=&ytd~GYlW$vi65?5 zY^rByd6B7}uH`sWJxxpWbE=l`cZ!zqce0jnV3L+$Q$0~jkFRI zpe4FBUP~N)zLxO!Je0p|ZK}s<=}Prj_;H!~F$O;!jb3ydMg}x{*TB0N1p#5dH!$xfA;x*&O7+~&;K_e1K_&o z>S!*yA{vk07+vyD&;R>HyCVzWarg?LMbsa)un*uTk%Pzs`2FYqEAS=2jgcjhMUe%O znURT+v5~VQgCqSS-6NeN$6+_X77>5MLN>rp!Uw}|hW8;Kz|-Mv;Z4W~xGuaJ-vj9L z{~N7`Tlgm6Q0TqT8`u?KSLmtG*3g5< z3vherSD{Ai3sB4pcyVZMXd1o?7!?{G8W`#mIyrPw=%@HHAQF<`*TK(`8}Pm08^L|S z=YmfMw_$gHdxGnNtFbz;A{Y@st#+@+Zh@D$^W5p!Enu{JraQ>(i~R!i?SMAe zFF^cX`~T|y81DzXfgJ;O`JeJ{^*`vp8+!)+%HQZ;jy(fz@GtiN!apCo22Svg@t@@{ z!L9+_{hhIIU|YN;;Kz3Zhn-KHgU*}SIq*5WC$J4)4s3AlaGIUGa})LsSn6DbZwJbq zSce9UY{CPDpvA(nM2El2*9=#~%llOh^Ti91{ zw|9s4G4Cet-TbN`=Uw5AdvEkEL1w`P$Sg3?JJx#^0_N}k`KkwD8E%nO55%(GFLYUN z0M^xhfn~k^T4H(ebS<&Gc$$`2r|YL>e^Y&`mRPUrt0k5l`)G-EyWU!2ovxRbSf}f$ zrQ@safp&f&)!os~C3uOo`sXy_ruq~da0wbbS<7jrx|^0*NBo(VXrQZ>-Ar{CEwQfH zSxYq3NlPqPcGMEh^Hx5>x$C{O`pJ zP4!Q-9B-568+Tm^K$f4*U!t* zKUYgMr0eJ97*MRAyX8`i<#W#h>n5bD!0!uYm}*@=Uw{jx>*otlkFK9DKs~yCzMz$_ z8tdoo0;$6Cxo4hLVXAcfeBJ<4rR(SOjx|-fem<|AsnYfHc`Z$quAk2fn<`yDpBFM! zx_&;-@l|2{+?^*?SU&g6)p~UOeC{MurR(Q&QJ=1#&uwq2bp3oT>eKb}xou39uAk3s zZK`zrd~PdKrR(Q&TbL?cKcDNGDy*N|bN!}D7trU5uL=w3?p#w5=&&y6WRl0s&*40$$`gvJrQ>E+YWoTa4&&$v}*3a!SG_T9&WoTBH&&$v~ zT|O^E^SXRqhURtoyv$>&boqSFbW^3v=W|AzDqTLGgL-xOd=Bct^0_^ywNzpK+%p@Q zzpAi+4kKx*bOn9(VpF9n=(8u9DqTUJJ>FF53i@pH04wPBZ1g}E&}X9ux_~~ro2k+T z^x2(El`f#qhUdC~J{z9v0{ZNuOw|Z2;khoL&xXfXK(}Y(g6InRY{zr3>h@E;LoTfIbV&0|53cGy?+Iv(PaGz$`SQ0GNg9 z6acd@APRt)C8kONFtewrQUJ`vc?y7;I8OmE6Xz)aW@2C!05jpH0$?WG0|4xqeqR*` z;LbEvh<|&=G*hMFKLh`);Xl3HRB8B6Ki*Vn_)kZ58vfJq?;8Ho@b4P_)1szI!+#on z*YKZ;QPuFDic!??pNi4b@Sh6zH2kNc9u5DgxSbIG?o?BS=(nffFb)4H@Lt1z3J%lo zpMq*N{HLJz8vau-m>T|*k26&o{*#uNDh>ZhIA6nm63)}`pNNa0;XeV*BK+-A9H-G= zic!|+FGW;n^q2NDRT}-JI8LL#6z6O7mtwRu`b*KiMt>>XMfAI+rb>f<{327O!9RY! zsY3AE+vD4s zN)7(;ZA_&G|M=FXQiFedD^sb#Ki)N!8vNt^rc#4{yw6l>@Q?SHN(8@s{t{D(=(o=w zXeu@M&+lm}HTcg*{Tlq|w=|v^q+%fH2Tj$GaCKppc#$+a~xkK zqTfA7DiQpi5s>LhL_drbrc%Ry#57Z>;XeZX(C{Ate>D6@pdk(a5ok!me+2x~@E?H& zHT*}wKMns8Xc*ydk3hp3|0B?_#{Y<*snqx%fqrWIk3c^V|LzD=so{S%s@3p68`Wv} zpB?g5BK+O6O{GTvnYa%%`p<+H8vSRYc8&fs(H%s;d#0(>;6DS-BKYkyP@6{o8K_O8 z{|q>#(SHU`)960~PHFU?0cSM&hvGPm{-NlEM*mQeN<_bBh*cq#2!9yMOr^&Eka4C` z<9|qrsnqx%a;&M;_#cAHt?@q?9%%dz?r$nJ{s&{2H2w#p1&#l~XhGwDu$W4X{}Kci z;@>X8aSDJE{JR36#Fa__z%$5-nMx(VpoOMV2{34asZ;_C8f7Y#0E1A05@1kQQ)z42 z(Nrn{2DLX8N`OH>HWf;MLG4V15@1kkQ=tSH)XG#S0S39ILJ2UigQ-vg48(a#fPpw( z2`~`HDFFtCO@$I*z${at1Q^iWR44%koMI}J00TOj3MIgR7N$Z8Fu-FflmPv&H5DL$ z-TxP+0syf4&o&hr|NSSK3XT8%15Aa+e}A;A@!!9VsnGcE55F}2`-`d2`0tPUH2zO- zX(}}SPit!`H2(WdH5D5F{f;pev$VwT0D#>Oek1ecw~je0fyd!t^B|K6xq@0Q5q=0D#>K z^@0F)FVqVF*u7A%#(yu=tMT6p^=kb0LcJRQJyEa5e^1n_@!u2mYW(*^y&C^LQLn~- zPt>dN-xKv}{P#q?8vi{}uf~5*)T{B|6ZIng?H<>e3I#xq#il|5&|{pbPyqD6{RjZq zJ=){>zjd#FvK9SH^n>Ud(HEjSqK`&5;xGSI(fVjCdM$STn-`secm9V*PmlgQdLrKU zk3_xrYyTsB^Z!!h*~nv&2O@XkJ^xH3f$#kk)uv^KO7x&5m`SBHKPDho{tjmCHV{qWa&htScX5VHFJ74PuB8GI3Y`fUx~7rX-* z{nPl1{W|3HpC6nW9EY#@`vb=bRaiF>g-+nwN^ z>kh$R;y=T_eLr#o{%`!BVb{Lb{m=WK#y9)-;_vS!|4sgC|26(a{<#?BBmW&4fg>Yu zWCV_kz>yL7|NRK~SGDtLlFZMf%)jawnxD!E{#9*h{z-o7Uv)IiPxRL|G(VD4{i}XN z^Fuk=zp6FOKT6cU>L{A;%UJ)aRx}Ulu`Ox7C&&0#wV-)GdiqyIX}+uVMrgjR2ZU+9 zrS*nrzNrTUX}%#t{Hp>qUzaoet6Z9|$pHT3W*G(6^@RX#P%G`j6t>J|#2!O^azhiH4i5r1^xJSJ2$9tzAxYn^yKqnp@SK z%V<8T^Wl%xn4(SHqCWf?<|_Pt2vYA zT3mpp88mNGAEwh>qpeM&d8^htmF6v4*%X@1TEk?TtM!0MG=HU|Gm+-aTG<4etMq_U znoT+m7tma(PL8KpQ1g76c{R_YnbW?FquHpfjis5zz0)*?W=5MBO|wCd9Yr&(gL*E_ zdNoJVyh*!x4$T#M+6bDyQkiS*?|wL9$J6jG_O^62GG1l59m+xYHjUwnpf$doe&f2r)cLp(44G;dOXcZTJLc*C+cbKX-?1seoC`c&7aV`Kx;Ub=6D^4AJaTv z$DzpI^K{IL{2iyB75O_>e=YKNjMh-(?`Y}lZz}S4lr~)C??^qM$lr6ch9ZAQ=&wco zo~^$Y`FoZ&T;%VWT0@b)!__SEcbJ+*{tnf9y~y7oT3M05gSE0Ee@k>6iu@g znjTx^Z$GW9$lp`-fFghUYTt_d?W4aI`P)lxm?D3BDjbXa?SUFr7Wvy9r>!jV_Y`=x zvdG_)(V>+^{&qvdD~tU787|?$g=J*~*!leFFR?mw3 zZKtOd`Fo6-MgATw$LRb2r+bzi`TPI>fxrLpZ5L-G#t^Zi~P$JhA3#$wg!cyT<;cE2-Fv?GZ0|sCPkd>AEb{IN zcHaBM^RDM*&o0kzJ)1qh@!aakc~ZFP_@94eaR$%^8n?1I1Nf2NqQx0NYc-2AfTJ`L z%ZRsDdcPNE04+5(iZg%~pzX@y3?Qth6=wh;jl|*%AgDDIX8!izHij~-i`0od9wX8`s$(3h2*0oY$_hd2YUztT|U48Zam;w*zak>oB`Mev@*^BY@K|szordy24KIcr*Q^gzoHFu24KIeO>hQa@7Hc}24L?~*Es{QU(#PW z1F-k%mFEn=eo>p?48VR~D?6E;-=o;%48Y#4zj6j(Kc|&(24L^fUpWJ?f2Xc<24Fv{ zm2n1OKLe>*$r*sH6ZMsx0oXgVh7;Mu)A}oC0QOV*D`x=qllm)X0QM95D`x=qZ`I@s zzI)>;2b&H(IN&{}~r0DCoty1*HLtyBF1 zX8`uiaJ|48fV~PAw!j&H-Gs^toB`N6zb|kGfPL$a@fSD)up2R7E^r25XY^Rk0PF@m zfHMF)tqpSqVAms#3!DMiE3{$G0I-KSPU8%~*6DkJGXOh{GFwh75O_;2dc>58TxCHztgqeB7djhPAwGqtMmIpk-t;4 zi6Va|X}v}MPSjtE{GFh8Ns+&$Y8Lr>fmT-J?|2>5B7e`MgE?RfGiaGdlvrADHQp8rv6&w?{KZG z$lo*cY8LrBRO>DBcZhbg$lntEwaDK=`fHKD1NE{L`8z-xF7o#@?R=5H{q)!(e@|5> zi~Q}YcS(`Iebg-Sx3>;dk-xpPwIY9es=r14;w3|Wp~&A;wB90rPu4*#^0%A*TIBD~ zw8usMcGX{t{OzPSOp(7G)tw@LPtt~q{MDI!p~&A8^vW0ct26mRk-x{O4@LgA*DF-y z?@zSeB7cw7(~A84u?}jHzwLBLiu^rBdtBsiTdlXq-!|$_k-tAue~bKWEyiCc^7kk` zpvd1=+T$XBTWV`X{Eb=#`7putMpkDbRe*TjZ|?7b{=nuML0m`GYkiHr<;h>{&4L5SUu>(= z+Mg$Xt-omv_Ay58dhYmDB{#t+5ddXjGuYqI9 zU+Yg=FZpZfJStEAT8Ge!Jo#&VtOt<4*j{0PKTrN*|AI69dGgo#qaHy1Vy6K#LH=6r zYs2KPbx_?Qf35elVe;3~c~qYKwcf?}=E+~{9qlpsYyClwC4a5AwFdIndP`3uf2}vw z9r72+so^jAYrU?gk-ye!>I3;}y{e~?zesV61IS-%znbJPKL5i3_g9TD!Fd@)sXa;Q;d2`aSOTJo#%qtG|-J)-$+w^5n0z zQ%@s*tsUAh`D;C`9U_0Nr*IqP$zMxnOnLIx`mJ`J{Iwp}%E(`9yS7IDT90WBC-L0*Wzt*qSB!8{D^o}BbEu9eM$zP--L~G=)bvu+H zPySlB=|GXc)*2lh^4GdWFE{yX{Yozj`D@)Q7wF&rLp_a0{{H`e_wWDhc;Egv&KjrS ztZ=HGYn{um0x;bf@0^2u`uaNE@Sc5JC*t_92Jnd-Ku*3r@-%Yv--kEsS4$SF0L$=x zJr+D=GTyE~QwE6c(Dzep4Hpp0{MCGD-bOaQ-y=K!7OVuUHBF}8#Le~Co$o?3)0CS3 zgT42FvZ`3SMXPGBz1P|q$(bg{MsiSsP0l$eLD>E^ z?gb3;_w;v=-{!054>-R$-#ec=Z_C{PyqEuWr_R|YdGzJZMb3K3qF?4LaArsj{V=DW z)6Ge98p}KRjocCV+V`RFb>DNoM|?+pcq9K_UzM-acdjpA^5z%%X86X+@A38ab@H|F z)${qyzsxu0L-U$>#yn{5FbB*&S%iQ6dAlhK)TLO$lm%)J_CpUSTy+UHB4vTRdrDR_ zWr4gcO7=Qsf!c)~MOmO~04WRP4MqYf3*_BJLIxe_Y$r-73nX1nAZ39hILh{R-jyPG(2&Oa|O&E4G?=-fbtySow4#y}r;cSE3a0`1-14S+TT znz_5{1Ib-=clSv^g@GaN?s`D$0u9~WaiBGUF7ECaP(dK%?v4WG!%GAx6-ak?hk;h3 zU3smdJazzo@^A$5{-s?HNVK**69SZjo(6zcz@Hx|JCNz_c7U>A=L1>}WPp|dX`m$R z6i{Yhkz0En&=M)DJr^h;BdXm9v^X%vtvv^5q13J20JH#TJ{U)vf`WEpe$W0Gfrm`9N~7+^tOk%|O{|py?9v+EqYuqui~{1DY!F ztIY+Pgc>V>CIICCO_W&Gt^gVjbQX}@7k6v3f#hDeTbl(m8Y5Z`GzxW>0gXf(Ngxr@ zZfzz|270;_NN$L`wM&48qecQ~7^1cqXsFC@?IIxA^=|E%K&N6H3xQ6NIjUU%B&yo2 zoev~;%H7&|Km!63-P$vN`pN9qo(|LpXfBZ4J9lg60Ex7AYi9%XLa$~4os8(t1nLPi z1E>f5O$U;@=5FmYAh}cS)=mZL1}{^9y28t3Ah~1i)=mQIj5(SJ)CoP90Mrql#{+eM zzi~kAF*9R<+F@*CfYK0y(Li#0+^ro2)Eb^g0<{7f0VF!xt<3D1uM> z1BEf7en27E^#ux|jXppgKJ5(@Kx@5#{21lQKn_q(ARlU^0~z7?+8$0&YNUI$q(t+h zKhvuvC7SR3?Y&x3qWR9>+N&icnr~4`N;F^j2YR)nMDwLzJdhI27yh_cOG-4K`KNic zq(t*6Y)Ogc6Mr+WmXv5d_V@8>Nr~no_$4Kp_hCRvH1EMLDbc(O15%=S$DiTVk`m3^ z{xM!HDbc*;AL7-L63v_NOG-3vz!^_U=5=(3CnfVLJn*DsUh#*#TAq~5%V>=!CG(Ph zqF2k4l6eu%cv3RYV{AMrndjh_CnfVN#>SJ9dD`F5tK~__Jc%ac;wEu^0$mRiJ`QIg z!pC3`Bz)Ar$gA}TA3?(|;lqFd!UxcrpYT37a|rK6cYK8Rz^@^^8@8J8sDHLss|fFu z)^?poc!vaP*SUnZi|wwBgty5kcAZ0bNSy83KzOUPwrf4%K?(J)Lc*J5J?vUXcq2-A zQZfgmwOu?ZnL7C8Ny*$GD{2={O6Gcr&Muym%(d{#lajdxkS8T`wM@V+o|Mdf^noWO za}`?SNy%IZzdR|KeQ=h;K3oA0D+n)#!C8ctNpyB)6YhnxEW%4+u$*uY;;@YH5_B?2 zcrjom;cmdCgtZvk62e_*EkRf#YiHMD!fG_Jh_DK_XA)K-s0#^q0xlrjfnm-k+>UDV z2rJmc2i@UG$!vhZB=%uFTAN5%h@eg&T!%=GCtM4IafE9C#}XF6 z*%-oZYNor=p10|`Mg*Z@>nwf}$<=+{IndRT<=$Yl; zX(+Y)I~5)*|4#9D@pf7Mos5Pp|4xK~<=+YLVEK1EN-h76L#gH8v1r)x?-+Qn{5u-Z z^6w}>%fBPA)-C^Lz?tRW)8NeV?{LJy^6xN|TK*jjTg$(L;LP&xKp0s59e}Y}{_T%a z%fEfGU@ZUk5pvvR`L{PbSpMyWYL50-y}vVLnU|9Y^s{OkJL|NZ^{|GE7C zzrX)C|99X2*H{G*z#_K_z%O)BV-v-3PArG9;g8HzW}KK^si`w3PAsYy+H+_e?~Pb0R0p892J265v@@H z=pRsx3P67k3ZMee-@*eGfc^$MkqSV6jZ!KA{S_b;fc_GFpaRfepfxH0{W)5r0??mf z6jT8EQ`k}g=uglv6@dO2&Zq!1#CHu9fc_AXqyo?%z<>%szYhZ{0R0}qM+KnYLCmNC z^xLqd0?==vVJZOqCaO^Z=r_of4~_PfPMkANd=&vM=2G6eh$v40Q9pcr2^1Tqw7=v`YAZ00?<#&rKW}oKtrh4 zPyy)2;eiT3KZa5&0R1RRsQ~mND5V0>52KU{KtBXX1)v{9bf^II1F)q6(D$J`Q~>&3 ztW+uheGd$%05s%#4HbYsic%^7eHZ4K3P9fp4^#m92qH-Zpzpxgr~vd~c<8|R-j3){ z0qEP{j0!*>LQtsy^sR741)vY2zf=JF7OY7s01c^MLj|C3gaH+RK7djx09^+UQ~>%0 zku)__0Q!0~K?R_%gDn++z83wZ0?<(XHBptoU;sQ~m=aaK(Qptnf1YAOI- zhEgg3T?$AApf}5GR#O4!5{YCr6@V_5K2%cy=!>MaYAOJIq3o7wDgb?f48EEQKo`j_ ztfm6c=ff`*fId&=qM8aopNl?F0qBizMg^cZ$U3X00?>tkQ~(-MznTg_uSFA706JfW zSxx@cDR?0N>ec8D`B$$(HS({{10?_Im9Vw^n*&?Rzu6eE<=-s$wfwtGLS1e7Hwh1x ze=`vh%fCzE%<^vn&Mf~fMxZSJE<&m0-!tLY^6x@fsnwQ$q3^3L|ISAfmVeKHU(3Iz zqtx>6Tv;X6mVf5}TK=64XO@3wA*hyrXUgiUw){H-{k8l%9TQ;rcbY7XYRkV<(O=8I zQ($ZPcM=-5{5uf_mVYN;K$d^U$%d@9{5w|8In|bb$Dq{m?`Sx){5uk}Y58{q{969a zz%VWU4o9iw-(e`V{5urWV)=In8n*m97=yR`I|#Oxe+QzImVXCem014m4`}(f9|C3h zw=Y_={M!fpwfx%)i`MdQPx!U`n~tDb{_TNM%fH<*V#~kXP-^+NGZu{H-%jX`<=>8I z*zzxwe6{7@_LzCgzfkhkmVeXG2g|>0FhiDqTfwj8-jM(z;NodXTZ#_WEzi~jzzfnNTzY$E9 z<=-%7-tun<(DH8(aj^XBVL+CDU5wcBZvg$Z{Oe#8mVbS)wft)^S(bk_`b+**e_~aW zf7Krt8~IoL4g>P9`VDJ`{HuP22lB7_1+9^P)z7F#{#8E#l7H1d5eM?G`Vmc#f7K80 zOa4{3M_En&Ro}rc`By&{0+7U-gm1w~G9$K9mtxk$=?(uqFSh_u+y3 ztKLI3@~?Uq-68*~cci~nuYn9!JCEU-cLUME+He$}*}V|0*b^D)O&-2!kj8st4hW{Hq>7Yvf;bKRl3s z)qQA!{HyLoAIQJzC_IpV)m?}>`By?7fDIg)RA4?LoujUv&vaO#W3D!{dAr_ufi?0BKq4?VFgY+X zFgS2>pkts#;G}>nPXYYk|IGi6|0VyE@=m?O{saCikNpzg26>;}a{ogAOnIB$aDRV) zcX<+^i9hQ1$(!`PaXxb1koV|4>f9~Q0$k@@=2SadoFZqfv(j1WoZ(EBrvXlt_vdwX zTFEc)1$}?|{^|Qd-ktY~?`hwIz9aJHy#2mQd^>z4zK!zjf42Pk-fZ7QUxsg>FWuMP z*UT6HfByczin2g$!+xMFP|)60lm+sRunez?vOqz5S5X$oyIf>pQx>RVPy}Uxyg*KN zDrJGZWk$9UWq~RJ-%u8)^Zf(esxJ64?L7Z9x2iMHx&92dsuR#ge;>E1BhWei_HI=N zpbh?JZdH3AxvTD0wF4^j4{@v70b!Qv(+J$GBBY4Oob}Obu876h__osKL|#5!Y@NQv*a> zyH!jLnCp+r11zwcgAp+`U^e2y)PPy2%hUk5SMFB%U^fG0ObwWhh%+@nZj`%KObwVS z@vCHNz$B?r$<%-eKuisoD6y(!YQT6PrUuA;akrAG0dgGzXci3FQ5E<=OGBqHAPnjAJ#)z025P}_31A=ISsR157WokeGtuZyg zk5Mu;zyV@vfDbj88esel1C>MZg}bm#C9l-xM<>&()b##X(CG#pg@T6p3aY9}tPfF%xw8oQ?dC8gRRq~`{UW7BAl+5!O8&68+Ir!yC z$vlg(@uXy)b{cw>JSmwc(F9LQ<_UDYCGOCe$Kk96;bSmpPWY&^$g6Bd_y`(qO878f z6T%13T4Tcd;H(khz35Iu!h7Ji0pZ=StxtH=neA1cM0h7!t4DYT0u?8`9kwyT+c1hK z;UPGS5Z;Q`!h{DA>JZ^gSPwzM8&S%Wk~x6Zcv3QT@XM2uxdAJRCna+|qQjGtxfXtT zQZm;7@}y+0#su)BWcH&EJSmy0q_v$qDVZz9?@peS%sz3plP4u}g?QL`4!d(X3^ov6 zCehiso^Y=?+gV6>DGb&T?vXg`TuXR~baLm}gck#@A>0jEKv*kd+nG;0 zTuoSwCRPzv!8VVuQi8fOmvATGO2Qp7%$+%e+fi)=VFmo2MOco8vkAAMlqV%qh7t3m zWHuutJSmw?aK@98xk%<>Cr?V|LYeNJJSmwAFif75Oc4xtQZnbmFHcHlBUFq3d4Iyr-Ih3tTx(+ST)=`_MDG(44XIp7q+Wq^|jGZB(WgiD=y-p+}HOHew2 zFo8~vCtQqbJSmw)Xo4ptb0!RUQZftBGoF;pd<>o^B{L5}(m z84X+hod^TVzZ2lW^6z+*TK*k}Qp>+%(Xi#;G4NpdcQl~o-%)^;e@9@gTmH>}Gt0lH z!I|aX;fRCf-(e`V{5u%7mVXDqndRSsFtGeP0AsWK+aINtfBRy=SpMxJ73Gj^8c-%hAz`L`qdTK<)1 zg}t4Yf7=6E{%r?n`L`{au>6|_50-!1z`*ivYfP5q-&U}-{M!;Sv;5lvJ+u7V9MvrU zHUqT$+XT?^Z(}fw<=;jqwfx%<(DH8s#KH1!eY9rz_ayYp@^3w?b<4j|RI~gWK^!dq zhT*~TZ&23n4$Hru*zU0W>x#h+%fA6>Vu$5lzf9H+%fCK}?+(kq2DX-ewKTkg{Hy<;y=Z| zjK3d$CH_?W{`l?j8|2OZ)$y|UdGUgHcKpov%=ozY(0HGC=XlHbN%27J-}1Kqk7BRK zo{c>eI}*Dwc4e$Kwk>u+Y;7zjmWa)XO_VqM_m6dpwTU&11*3mNe~@?kzZHEU`e^iM z^p@z=(M#m5{>9Pt(Y$D8^o;0~=*Z}xXnM3=v`I7))sdefUqs%GycBst-s68La$RI^ zq%yKOvN4j1ERQUROplC-oEqsRZ|`p&iAS99ui>x5AIN+ApAJ6|J{+zKUlFbeZ;?0l zuL++OUKE}c9v>bS?i=nBZWXQ{c0<2~z6*UEdPCmQ|8VHe&`qJM2CRl#I%UT|t~ zRB*7oiNAfYX)qcz-p}5b-h1B5-jm*a-fiCX^8WoQuhcu&%lERph29K#^ZpR8x7W#Q zA#dIHyZ>^(aX)llbDwb^l>Gk#?ml;yyH(z`f3~~AUF^=5?Em3zKRN%mb{n`};CFe$ z{wI?A|9s#PIsM-p*e~zamlyvA3MKP@sk~Qza$rPYV4z2!ZJ===EP4O`^nWfV{}<(L z`uF&6^g{{O8EIf0=*2f0}=^{}lhplJnopACpu6FV0ua`_3!QQ_lU8@qdGJ zxt#gSob#LlC)+vGnJM}HL!CZy;%_PM%MbYe?fcgEk!1Tn>w8Gf`#1Wo^ws*d`7ZFS z_2u{yzBzK*Kh4+Q*Ui_)*U%RdpTd?RK!1XUDFXDza7GcJp}%)f1n3X3ohbtJ z2QZ)r(C@>5B0#@~Jxmdx-@(SE2+(iCmLfpEg@!2t^qZ(g5uo2d!xRDfbu>W{pkKo< zDFXDX=sHD!eg&lz0s3Xo4@H1}2~AK0=zqW&MSy-mm~#h3fPNmOlNsOV;EW=7z75VO0`wsSl_Egj3TG4n`XKsC5uk6unxqKO zQ2RS50`!eANN0Qxpp+s&*TDltfWARg%?^qHeLb3>2+-HTmLfo3i~dpsXbAru6ao5b z^p_$)?+2s^&{x5LB0xj<@1O|K`_LLifW91|rU=kb`#UHC^j>tGB0yh?iKPh8d*F;B zKttm1pa{^l@JkV(cZp=)K@p&m*YfW| zS*hDC|3cz#xBNRFO<4Xt1AZ<4o{mz>zjI}kY`6S72hj5GY&f(0J4=GP-SY2DS$*3r z|IR>vE&ooJ3D|D=cbY7X?UsM1qQ91Zr@+?o?<6#A`FA1=EdNfxfGq!xlMT7u^6ywV z>TI|CI|ikee@DZa<=>Gqo7*k_j(}gwzZn>&<=^2bwfs8_rIvq(Vp=T!4nf0~e+Ogm zmVXDq*7EN_bkg$g0IU+rzx@F%|Mo+mEdTaJYnFfepud)XdRZuz$-{967^M^G*Q z_CTrS-|iT(<=<{7wfx%|3&!$qCv?a1Z$~t2`4?h-yXD{Zn0d>;5cAtD|E8f2mVet| zhAjWKf?vzOEm6(#Zwmy~@^5p5#PV-5jM(yTQ~0&~+Zf%k{M!iqwfx)A@q61X|2BYu z<=^@kvE|>B(3<7ndVrRHfGq#I7_sHw z0QzhB*TE<(|N3BS`PX2wEdOfsm;9^##HuF$sy{F`@~`?G2IOD$8`ci_SN#ePT4K~f7MqA3HeujiI|ap)fX@z|EkZ?1o>BehK9+%>Qi)w{Hs1e!{lED9kreOt3E<} z$-nADjF|kZK7cLxSG^Apj{#B2oVe+qfOa@dz{#B34GO8f|Dk!E3 z@~?VG246w`RS&`$`By!F*2ur=et01Ns{7Cc`B&YGK9GOaQFtK#s=FlW735z9nN&gk zRY%}~{HyLj!{lFe7#_&K>Ja>rf7Pu*D;4Blbr99azY2n=g8Zv)hAsJ5-GtW2zv=)W z`B&A+imD+0DhQ$q@~^rMt&xA#HF9OBApffU2o(8OU4`h7f7L!rH~CjxhG`-Hs=a{Z zUv(*L$-in38Ycg$OE6;cueunvf~Rw6Q$%| zwF8j+tF|K!Y1{HwMIBUX@q6+})2`B#;|1Nm1KV=lLP@M z{HrcNf62e92op>GRp&bc-3mUTc%C!Ot>6=i=QhpL zsR1)k#?*l6U;(BE$c=Khf~f&h5kIB|OhOH&2222AYQRLqim3tPftVT~_r=``rUuBp zaJPb~0i!V@rUr~cU8V+%L>o*E5EboKFf||pJ!NWu+z@vwm>Mt~HJBPO3{hihz);LD zQv-&;3sVD5m2s3aHQ*GPqjIJOh@^JQnHnH>%H49N1`Kc}y5&p_=qIyZ&eVWDKuisg zd*^OBQv*a-yX8y`=q0@>XKKL765Vp92J{4CYCsS8V`_lhHFwLI8X$Md-EyV|bb}YB z26TlNrUu9zbGMwS0i9)z%9$F_2|ZwHKu36HYCs40V`@NqnVE8?2DHQ2m>Q5KF(_wh zfZQH;%b6O`8lIUN&`#u0kp={06#{_)Bp#FsR2IJU}}JI8V1Uz;0t%T0+#bKZGQA+dgZ)Ko9}(? zy>ecr&3C@mUO6w*=3A8VGHt%{4fM)+nKobg!~-wW<_llkE9Yg}eCC_xmGd%fK7}nW z)8-RjGq0SNY4fqKk5|siwD|~rd6_ov!+@7*^B(;2GHu?40WZ_$9bbl5&daoU+c(B5 z=VjWwas9O-;g!Ryz@J5vKm}Cy1H6D{p z9sKf`WNyHU;xWlwkLd82WUhr@9+S*9fIKFdt1$sQCYk-{1CL4ODzwI9lDQIoc}z0< z;Ecy4a|Jv^*`3Q_5Fxw_(Fqgog|iUhr7#E*?m-+p!b{Lem+)f10O4*xKVdD#<`C{e zYd*pntQ|vGjV3f<6>Jq@r37``d4xLw&n4U;!`!x!aJy97b`D{M_}#XFuv{A6ww`bs zN_k8&WisMzJSLgV5|V8^CYeoe#$%GXNakW2k4ff2neJ^oCYcLlnA>By$eB!()=!00SPA%zCu8lHDnkpl-_{Tqlv-wt{di49+541DH)%0B2c* zDcO44mJ_Z<9F`I0p@}46F8Yv3xDuUQO1MIHz_ulXXQ4Dfn1zNH6D|i_M7Rv_Ou|fr zWFg^F-#l;I0>ULIollrRC+86^Ml~Lj%px?wW0E-&20SL21?U-%NoGC<&tsCAhoJJ9 zWX^zJ9+S-JsK#TGnF|9Rlgu1I9+S*$c;GR~%t9P^OfoYO2g|=R5Hria)6p}_ztd1^ z`FAQjSpJ>j>*8&*{5u&9TmGF01Ixb?;KB0mc$8ZH9fwlOzhlv`<=-*zVEK16pyl6D zfR=wp$XefK`8Na3EdQPcXO@45BMz2-hoRK+?_k(k{v8BomVXDr!1C_^jLq_If0SDO z?TZCt`L~ad<2K8`z2U*~Z!c7{{M!>fv-~Sbu--Pyzdg{f<=^g@Ld(D1;KA~57kIGz z+Zj8{@^2?pv;5l;el7oYK+i1yN|vp+&GK(MK+C^v(S+sSGxdAh?(Wz7U-Gf-{z=h`L`LM<=-ZNmVX-y!)&ws+X$tWe;Wc?{%wFbSpKb#)-3;? zgq~Uct%tR4`8SGcmVYCNgXP~aJXrn>V*OhF^3H}@~{3kJdl6&zW~X<`d2hT{?)&rXXIb~ zGpdn)^-nV5t>j<*qqMe_{HuSEYFo*_`g@sxt>j<*t$5f<{?*?|9JZ2w_17pR|LU&* z$-nwb>BCm?ul_<>+e-e`pQAPMul~%}1?T@$&F16Z|Gyf4CjL}c$k*wwL1V%uZIvGuXM zSZ3^u*p%4F*q~T?tX-^0EE3bvpQ2wx-;KT$eIj~q^icG==-y~$baQlLG!I+B_PMI+0%^Uq?QOyc&5r@<8Npq%Lwrq$aW@a(-k@fNd1T#{w@4n_~Y;!;pf5+hwlvE6uv6FJ6s;VFuX3jGQ1=_H#{kv5grik9!?84 z3Wq{}hJFlv7J57MkI-YGyF&*<*M#4p#+k)2zFAG)$OM~YI^MhHzg~1uYvB4q1-oZ}67QuQ!zxOZi8}CE! zHSZbkLGKRlfVWTHO0m@|^3L{Fc#FN+-UM&B*U#(fwe}i#p8LD|z59v#ru)46hB_K))kX8pkKpwqYluoVhd3R=vPom9iU$p2HHv;pkG20)B*Y*a7G=VUl8)#N*$n| zM=5oHeh$v41N5^fr4GK;MV%PzUIHg)z5M2k3iXKpmhV;%ppPJu)B*YqjEy=#ABG3&0DU{6Lmi-RgEQ&?eF#CN4$!y4 z8Fhd@i2hOs=v%;z)BzfTe=Bu>z7Yo00r~(+sRMK!JWvPd8${S_r4G>7qY3H&eI0D6 z1N61%FLi*v2Hl|!&{w0s)B$=wAa#Jg3I@~x8rpv=b%5T7)~EyYHxhHu8@1L^=>iH4~I^iEt) zr~~wNG)x_!A>y}E2WW`+t<(W}8|IigKyQUJ>Hxh3)u;n>8A_=GbSWTpfZmMRqz=#} zh$MA@E=C`y1N22`jXFSIh}}XRpfAASsRMKob|H0uJ|BLm1N3>A3+e!UF8V+npf|!9 zb%5T0bw(Ya3jwJEGz9-v>HxhKO;88ud<>I1K&Rk=IzX>Rcc=sODpaEm(0PE=0eU5D ztq#b6tS@3Ihz%qo|>VPCXSRIgwkXRkC6wa&;NWhuZ0gDkRs{D6`;0*D*#p;05QEGL-Tv;VstPYq1Xm!ABII}un zmIQT+)d4eQ^=+~IJ45=r#q#fTnSd>pf2YaP*kbv2s`Ph@<=-i=wfs8?4O{-52m{N% z6EGmlzvE;>Zn6A3R!%!xEdP!{spa3%aAx^;q|D|P%fBPw*Ya-$hH3eCI7%)54nwKs z-=Q)sTP*($LBp1R2V?M-e+R+V^6x-&((>;BStVO6|Mmy8{M!$Kvi#c@ty%uy!dlFi+{96yu@^2i_@^2K-@^1u_W%)OZnYa8K z0<`=aL>w&tdKi%9Ul${`{2M@jE&nu#Y%Tv9OqS(ejsB8<)t^|^3Lzo?sxJ{U@~`>=2IOD$Ihr8~AyIT(~#Uj@ZfM*dX~$>7V#zv@A8R!06+4@hfeM%Tzf7K!QCI6~hg;vVQzv>{Wk$)8gQ5pGH z-3(juueu4Xk$=?zK=QAulND7){#6h}W#nIV9ak$~C?)@@ZD^SMs~~d9$iHffFk%_`S3%^I zk$+VQJdl4?G3J8&t1d!F$iM0W^q2griZHR{Uv<83pj*a=6wmWbbIbUU;<>&Iw~P-d zZuIqW%lMGuIllI886Q&I;A`fV@gYUIiRqT{A;m)95VwpEDX#N1bj$dV;u>EUw~P-d z7WhJL86Q&2hZjDinDV8&Wqe3+HQF7@?>FIZ2*2M%yQlK|P1HDr-*5T`;{2ascKrSQ z|NsBK|NkHV{$C@>|JVMX<@Eo7|26-!{)gok0B`YMBToQS`b*{fzsA49pYYF>-vAux zA1u!Rbo957I{>cpyYqwdnfwaiOU{$>6u@EUfZPMv?Nm6$&Ib7%z~#ggSO&z@fF$gg8j$H*L)No_5Qa8X$Md-7=;I4Dd~K%a|I_4|K%TfIdJ>4Ul{1ZW&VpL{z(F zObzITUNJS`WJH�X>138qfp&m>M8=&D}Dl2FRUqw~VO)-Qb0(0bSvRsR44w+%08l zKxdhwQlrA!TIf=`(m&=~D9HK37iwp+^7 zfClIvQv>RwU8V+{Bx|FTsR8xyDN_SPK)a<(4T#`VrUryDBBlm}V8_&eAlhJRfQL_+ z8W2EhObzg3luQk9fS4NKLk*?|7+=Fc=^}jLE|EJR`}SQ0dMve!urPN{13Y1~`Q9 zQAFod!bjlY6vBtiY_D`M;RERJAj10q2NK>3+X004pbz~C??!1q!lUrpm+($ZRv*GU z5Z~T}x5Ks<;cX~AneY($+mrBC^dX(_AR6vLcoS^96W#~|o{{9IU8FUhkxU&*c}6lf zpp<7MbG?asr92}^8km&wjAX6>=bT!pdmj3mD&D}CS@Nq$B~ z`oJ@ixdN~?yK_0_trg*AaMqG=FP26N!b{B}ue3Sg9yHvH@DjkLgck!gA>0kvn6MVr z8WHY7cN!Abpos>A)qwR0t5EGE!b+6ZBixCQ#0htxG)A}`1Bwz>07eMQVH+mghOYCB zWXb?}MlzdCA9>o42Ai-&T*8Ym$2=pM3jujXG8bU|@{DAPFtI!%ne)*Ho{`K(^p|HO za}Er6Mlu_uwaq*unf2mf^SNxIP~xz8BjGxU`sQ;8*8*-JTq9lIyq>T?n%G=On39-n zUPrhZ25SlPWb17{n=n`Uuz3yPN*UYc0>Tx5`GjW)Yiv#tX2I`j!sURg2$xCMH|G&% z!dWiiQoxmjOW-VrFd>U;^9sVn@W3;YSp>*4k~tFwJR_L}@UWb=^U-ylk<2`a?`EEn z%o!L3&q(HUc;Fey%!L8ZNM;To&q!uA40uK|vt*@i<{8P%gkQ_QGhkr(ce<>H&6a+ zI||V9?+Dmh{>=ci{CgSOasL`B(pro{@j`ZR;iP{HuRKDfw6bj8gKi z{t4B{zxqcI2>Dn4fT)vy_4nul`B#4n59D9{4Lp#4_17pR|LU(W7vx|4C908sHT2GA z@~{3J;UoX*&#*Vizxq>50Qpycf{>7Z^~YE{ zW6fjnm=paq`gQb!=&RAEqYp$6N9&?jL~Ej3qUT4~M9+#Yiq49Tj}D9WjdqE)iq?<1 zk>4WUMLv$a5qU21aOBR&O_8f2yCdb13nS|yD=g^m-_d+j+ zo($a=x-E2l=(12%s5EqLC_j`HS{RxU8XFoC>K*D7Y7wdz@(2GF{3iHe@U`GG!3TqP z1P=uF1$PCv28)7c2Ui3a2WJN-1cwLv1-k}Y2O9)E?|1Ke?-TD$?|JVL?=J6VZ@>J4 zYK3=^SLo$>OTE**$=(QWpx47|>oxYmo^t=`e(t{GzUV&g-s9ftUh7`!?sQAsbKKSL zGIzc^%^mHY;-2hwaGS|50Qv&I$U6Yv54;k1DsX?`_P`DD`+wDeGI;}FK_FY60hk#W z7Z@tP{?|FsQr`a;@c&z$0QkuNy4(YJ$bZCtqrCmE*1t{i|JV9+@Fi?i(ZL{$9S0zUK1AKgawkIsYGs#(vs7 zU=E8Gyu#GT!u#vbE1?w7uVV#M3h38l50p>}=vT3VDFyT^D5Vt8FT<8nK)(b7N&)>3 z*-#~v0{R8nd?l0u`gz&2C6ogCIe4HH(9Z&{1QF_|;fzv1KP3!NLMfo1l>J>oDWIQ# z0i}R`9MvcV^kZn4Qb0e7(q(k^h!9H&rGS1IrIZ5tA;6_Hcu+J`38jF30GpapK;MTZ zCDfKwpQ3DFrm-e+i|4z6J)A0vhtaWGt)g2OL9q6(FU6z7n>S z0(u_?KavKQ%Vna3Qb0rVmrx4my$BzrfW8z_rxeh85J^e_eKA5pDWGf7GfDxy3v)p! zpsO)>N&#Jk)+hyZC8nEFK<~sTCaGfDwngsxKxXlVYD z&a8GGwn!(!b5Tkupf{qFQb2D&X?q$J0#XX-b!dW8K(EEvC!j@B~gxeKDrYjJOduA6o9%fu~J|z46GEG zgKAa^%tjwvR+|Oe0O3pw-b#TP2$Yoq(_tW7DQq+i&a4!e3TIXdOhLm|3QQ8eo2(R= zCEtHMzk|@4<==s@wfs8((DHA87+C)8hteDlzAw6N z`L_>BE&ukyh%Nv2L?0~wrlaBItky&3ZIk8S?g)wH-)^X8`L{DlE&q0ct>xd2D7F0C z0nRM{wnwSu-*)JO<=-^)*Ya;0K+C_agiAJA{%whBmVaBw?%rhiw>dmm{%wY8mVcX~ zHOs$^F=ETVjnH4qzYy=6EdMruf#u)&D7E|x@xICOZ#_WEzj4_Yn=JoEQEK@&BA3NY zmVd)=X8AXS)-3-9VQcxS`Paa}@~_4Wk$)8= z{U-9S`U6oX|Ek{s$-nA17?6L}ujm>1S3$gQBLAwN(H-)y`U%}3|Ehlil7H2YuqFSh zA5cpERo|lt@~`?1e#yV;TR`%!`UaCl{#9RN6y#q8NxzBwtGJv-=`B!}`OuLEvt3JXiA^)lm(KGU|`T*VO%xdq$FZox!hic?s z^)7lw{#Eb5FZox!jR_$CDyaKSIKAs{Hvg&Hj#hTb8trfRnNed{HvY$=>VL<*>PoR|is~*RI z$iM0_tUmIudK68Nf7QbnJo#5WghfmKRSybJZi+Ba55O7uSKW^$$iM17l#+kdy;xD? zUv(6->9X2gFbELd2}u4`N5FFAUv&pc$-n9_I!XRj5Ja2Ezv@pL zR!sg?Hv^J?)lGopUj;!_O#W4MGN5Aeuew2IzL@;0u9Kh^lYiAU()D8Uui7ttC?@}^ zt7P>RlYiAd7?6L}Wq{;gwHHm0f7PWZCI6~DC?)@@OVAqmS6vLh6*4WwNy2S1Z^h(a1(8!s z{#9Gh2lB5flYLQ4{#7ODI{8->!%l7CeZnjrtG^G&*2%;yx(Gt=B+ zKBsuDndlbtImL}8rNbW87k92gtolx0u-hOUxp-nArhx`_e6DcEDmY z&@E8iBik%|&06N85#_GpBc@yA|(a+^MfS03B$z6auqBlmbie4=5 z0^AfmN1g=8ik>O=0VYIGiw=nPkhcLgjmG3zfPY24l{*1%MqY?K7P&{>2Y7wta(NnH zYvcmC7myptjLeHnlQ#kmiS&`@0a`~IMnVx4{we%r_amb;W_e5z=-f5xg*da+&o+_9FX?{ejoZ&o(gy|^n~0KxII)C+83&oHv?W2 zS})H9EDJ4=y8`1v!$SQ+-Q?YXjYAQ6GT@is*K%Lr_26^CM}kKsh=2e69f7|i@OK3M zj=WM3z%ZcC)TQo$p%(3N4-Bzr zhkM{uppWqBDM0Tl`E)SQt0)@;^rX7NJuuLsz3zblKu@5oKhR@9{VbAC`vN_xcDo1q z0NoESy)6>Yy?~CO#>o~*jh;YvDCtW&&;fP1d!UC!GNSH4*Qm?f1KljzQxW2igE#f@rk1NZM@$ zbTMkQv`9W}VUc{=+#>PP3}`p%Hnm7TZDNsp+8C%7pEj~cK5Yn8jTvkJbdi#{)Cal{ z=p>*Efa(ElL^R?SN!=LGIf!Z0BI!L|sbmT(Qb}K^63homu0r<&H{Q6DBB|GeHPHY z@Vwk2`E;2@@@W$2C_c>uI)dIWwMcru#3E@e0d$MRv~Drbwdlbji=_8w0$qW!g+O~? zw*cs3^ku$9(%L+rO4)gJX8>(cQulPAY}n1UNbKeSWujNJffB0Lt(yh35M?ug7NEuq zi=@VMpn0;w>ZVyFv6^a;v@r!}mJ++kK$Fx?w{8;9aP)a1&>&eQbrUR-8smXFqo?D5 z+N17Rpq6NNj73s58mI~SH_9UMGSVVxV+2qGv;qDTBxT?~9asC@I`E$kVMO3RLE;(w zr+q-+KS9z4_)qKfi(gX0HdJH`U|Eb$h2L4l5BjVscK@xHBpSnub zxOLz^RgD_pKUD(H;6HT%S_A*7B6UTeuA@9=y+rK~)O8@(6R2x1V3xWpP}fdC7qvG~ z*H%D#7^e~J#rc1+dR@g|j9(E?$r-K3wc-m#liW(9>IwBwfBN| zi{wO{?hWzT4!f^6By{c}TuB-XpnaQ%z_6i+)|-r8nuBdVp@BewP9KcYkdcM|F#~ zLHW~KzLj4*B0H72GLbtYQJ6L(k(-&wX%uL=C@QPbnrNjK!6bchA~$XP_@+_Q?98Zc z_1C9o9{*H6YOye?+r~9(itmN|XhbTJoty59_FfRx9s9~RI6*P+yagk&^K!G7rp?Wh z)+Z!ZW+&6;CUbJ~(`M$aT$xPe8`p{UogdZBhN6npF?#-p?A%1EaAYEtmyd=5z^P;k=&<7czQu9V+=j0ToP0Y?+n#j%!nU-hB;NtdE>BbrO zIBi@awIV5`D*RQA#pX=C#8*Gf0tVAQDnV>``_ z>NZ{BOgiX!b74Lzk(pUIDzP#zS86UzB@>y2UiUd-*Op@!w`LCPMkG?13H~EZ+$Uwc za*tdcRVA%wOMv5ul5|79IT(?hJ|daOFGzR%QT379+iVs;N1JGL)`(bgWX^IWuBj!3X|s~KxrO<8xlWVmV$JAE z=R9xvh-9)LCz&2KeWpnS`br^!VDvPOC~ZVu;n+lKS#~lfQ-YORkv1uBc_KF!i#2uB zfl)oDMs<_k$LpA>tTQf|n<`A3mX)28y((?;(vf+EuIqM_iC4EZof6fJ6r#fAGDYlH zIBlZprr~WS%feJP{z0QAk4O}(#kxzIV`5FDt%E%#i9zS%Rm~(ZkbdVEBvO--D-)@- zae4VGvUBr;PE%jfwJ`-g4 z{o$WACY-1-BYSCV^ax4(bk#TX}rX;{fRo`ZJiORHR675qA)i}yQ7PG zj+5SW$KW{ZabmYTJC&0*XZ*~9m;IKRN~DdH?QCKg*8YCIN6Tg!e9V>SjXq&Ne*Cy(PBK>( zUtWG9ZA3Cvkew^rCR>^q^&d@)`m2iUg(V4TU~FD0Gf$ep#9TkP*GSosmNu|(Mjo@5 z)l!%?IwyO5Vo9OdJ$#y2Ce!7sAN8a< z)NHtf)P4qT8!n&8e`HOhH*Gdd7IBjk`)63jdRbKbhja2!ZY{egJa1^m`cxvBGZ`x) znIlu2pOu}vyg;_+Bxx!r5pO+&yG!Or{)`@yu|BarnUgNh9LWw-he!54RX!gmpV?J* zYQ~yWUMev`#v)ynh+!cl6Kj@aCsRT<*fhi2og(9J1AAEvo<1d`Aa7;b_&GBQG5)N) z?4aX}YdY96K5dj5Y;DGmZ{l=kLpmc8t${DNd|u!#)xaI$B=zsx0e{r*ShZcx6hQ;(e6qOaHs z3mi1!BAEHlFzf_8F7YGqz(Ty}n9MPAzYrCHf4 zWJk$5zOql7o?_VKgkjGNnd}_?!!&Lr3;opQ>0G|(6s{`i8L4D`dN3GOM|7-ZkK><- z17mvdvnEa>$ETiC`&xI$s*_LTPxe#yj8vvvhto})1`!ig_b*BBCd=a(|41;pWu%to zWG~MuupGTAAy;(SfXhwm#?xt3r{&&^8} z9Nz%KtZo1-e|S`nE)qeu#@=+vNF|nLJUWmDuGLV0i~*)7~_^ZD+A=bbJ;~ z=Zw{Buq~&ivUA;l=cprUXWLF<(&V^_?gW#ZM5bJq3bI^BY!05)t)q`5W=r7PS(KT!gD zS%-E~x2@$Cx!Sg4-I=QrYei7WbtG+WVQy~7v}r5COO~Tdy!@%Qw3;m9s~|7cRu=ov zu4!~5@nlHT#Ne3Ro4GWvpaArj=iMsD`XM2={JT)B4S@(MX3 zu_Bogx56beo3`YV#K<8tT4rGV$dU}1Q(8oI_rWqw`9v0L3w|PT&pA7hnU_lbU;b4# zW!9Sf6$aOWD+hFKE}dwDx{O0}YdmUAe!-QlWpGg#K(ht-J3*p*RJTf)bzY28M&ExGS2)&7Wn=@|1I94@qgWz zx^c$Jm3irMC6JgMO}1<#jR->_2C~u`WvooB&r5G&zq~)treRd4ACtjCLJjTmnZ70= zyJMwX!kcwyAl2KT8?4?SBPTnVdpZ_gUTRgABd0(0qq=TP{ir^9G6KSw)X&ISCR#Xc z8U){3?9gKcv$90?CfDSrjTBj#{4bwdF*C07NwNq#o*4E?89B*yvc>aq@^cFRD~_LB zPb5WOh6htyj~`A>rj{mCfTBT?sk8|_(`-@GW~OyqX3erKW;~vOZNh~MxpQb<^O(et zpJSXlmT^{Moy@m{@cwz-qEeR-))0%=X8s3iLz(X zP9$TwFiqOLWGc_Dj=WW}vqic@)xBA5!?NrfoQPW3+K$LuBL58rg_#ac?-mkH`HR^@ z)}(jenX=er>k5-u8jGp}L(_u{-tkEYW@O5BVkQLq^5lQT+#N?wZ{_ibBj#LU9!+G! zr}C2NZXn*ylZo^)KSt^3!v4ah!kVFw_y5Xe}PN~}xdANxPoi9}7h zUs@5OJGT7%8B6lyK*00F=#|+yX(Mv-a`RUd##)NQC-srmj*J(pRc1lL=VZu9Lk@AN zbhpVs;o*65LL>tm``nkYBq`f=m8?lLnuP)L-(0t?Pu-i*+Xw>;wyn$M%w(jG$XmJO zxSF1vos*TfCO;_>AUB!cRst?P=%%B=7U>w1R5QA7`Y7mp;;+g(|F+Bb{C(xkeh4MT{6^lq;K@P9drp1>FV`F6#oZs= zr{uTo=DPje27y-twSgs)ryldaCBI3(N!~`+L~_rcajtbrPPu(#xCy||kAjReWKmJ87-tqoL1H_)!T&x$U+w6ku z^`Z|%-6eCT@Ir$PIxZd2fnMSY`2q`WjLMPVj<`3ES&$2pJC+kmNr?AJgKu2MmD^J@}>d3i+UsaSpQ@qb?~o>qmoNR zsb{5)l2fTvGcne=eqef0FX?XAzv|p_tj;tU%~DZ}a>A1Hf{Dp>(TEJ}QFOAjaZEd- zb2npM%f(Sl^{L&ha*?Z*E}ACBo))I&yjDuRy5 z*6ILUh~Ss)n^%w}bU6BKkh+|l^5yE33g}c|J3e(#Nx65sgMEjzyvfOxwfaIXAADNW! zilm&8EKKJ6Ae65k)~%>Z)U;}EX9Iq&8+q(T!c0Uskv6d~N9GP^Z(Y<;I{w$iOIZ;>b)8t~A zEJ$w>YZPr;)J|M+=ZKNa+JEwZa_jEG1sjGVz3_#p)|p^>26Yrd zVqm%Hy9#yI$D@5)Sy3X)cT6#(Hx;|`orU`A5)m0eh|2p+Vioz>#2y>D zEIo~*g{wqKE^@_*eJ|U;cjnBQ9t56MX;$pwqSkD^ef)aXbfh-~cRd7? z1&Knq*%|L=3aKc6CR(4ag|)bxQyzWSdT@n!tS993>!}^}X*9e>7b1~dNPYhWlv|g-@Gn`sNEOeIhI0=?nkY-w zCj`4%JwH8(*PZ6PguLz>)}?^B;V4_d7FCzH>0x6uY!e&Cg9#~MsmjltUZj@)i{E)z$zbz`U9%)G<@HV-#QwHy~ zOzsiszTIq|@%UyL{DF9&EkQ3UK zjD75~h3P68S6xt|i&(EIRNb9G6qI^?^b)YsTbt6Au*IM=1!R)EdR_c5h*M{%FCseY z=J0_7NZzpAO5dzBF#C)NT={mb+tE=X3;B`Q+<~ z8GWQMkpDh!1*-M#MK>eXD_i1<^75M}T);VtOe>I)1Z|@F!kehuKs{Z}=^KJYicmX!4L|RvA%2nZprm%b~?`7)tKw4-Ba9t&=K2GgW)7wH-fD7i^WH1 zY*E)cvJUY0i16f&jy9MxClH&Er?mF*&go;StvC%XFCQyZMiVnj4x>2B1eSIh`!L_d z1`%LcL19KK(Vt=-8~^!JWHPLpHj1KBkOGW7Dz*C?5IT;w%{)YlnBBUHE$h&_Xb50B zDO0J$Qvc8YKN!r+^|vMve%-&#!wJ!F zo38`LD2VgMzD{d^VfMvG>Ch`3@PA!68b<(@#d_?rhGFMzT^2e#;R@p~f#V!oy(o@m zs{6S1Y}~1`ovjE?wW|?MkH9PN2RySO#_d=vGNX8-m{XVc7zqVccR6$Qk|7y!86|z` zp+c3Rp3tsPY>ZVK2AM-%{}Ne0B=|(9jC8#VB&?&Rc#5-xN|RR38TOlxo+6GNI08* z3iWf0CclOH<%DYvv+~-7eO%&hFpOcJbBV|+yV%5n@`3XQrrUM1dudTYYqP9gw`NZd z_#{}WS6YP$Cg`iN{l40hWE%dgp==tI-85Si7Nhk>o&pn57a*az-kj zMYVYB2GRfbT3@tkepvJAntO=zAN2nXYyP+R!~RCUAN~Jv-}2kr|U5|HJsX z?oPx3`9`<2*9B*0SoXDY2*6sR_n9EZaH?F$!Fi zjdWBHY(%Awjs`gpx_d=S2C0WbQJw5^&q?k@9UWkBurW5WR9`#_d)kAzB*5?Pi!um4 zvx=IcT~oMb;)5JGS|8^C#(QUEkblNu0n4)e&UMaNacG9E2KYHrRT^UQB5jzHB$ zpbrGt1ZQm>JDuIfJv8%5=bX7}QHGsHcFe+{RgPccG;3!n**l0xsXi7uxnJVQNQUTd>%qvw zM@rOQO~DpVUHUNZTC@hT{<;A?tbZ(MymQl|KxoQMWR}?0%Nt^_o7f18BFXo1$aD0P zMd?F4xq(-4c#id#4MRi7!wH1>;wP0ZPP7KoDc)Vzi%u5n&l|v>`#J6C6X9lONZ3eQ zq$f5er^lza2ht;jCg?n{{y89FRRLp(o9qgde#*U8O`)7KT=&$<5}HZY|}B4WIu z))O)r5&|EVSVvdYrH9dVCd>488vOeW{c-+hjjTz>eWpdgaOo??^F_Aj!vTaQ#$L{z z$2dE2jw+eL!y?19#Z&z}tkyq`HEu6&PRC$S%iKGlSNyImA>B5>X*<&b2Bl-(C0Z1s zOucu8XI^@UsVOR-vd3 zI~Wa*30&9Dwg&X@Avl)hCk&ZPHl#0&p7_2zj-+n0t@(XOFG0A zMdLE-Cv961VMoQ(%2hVEhDIHOD)-Md&FTF#ZNeh^aT~|3Set;+59%QXCWIr2G0yOW zH_ItsefnA*b!X|&uxH1Y1<eHHaxr9z(bePQ5;rXw4p@v%OH?on)l=yevv>8oa^_fZ?kSXQCb{$7FI zBgmE@V>hL*geetu6zjWEfoZ%BHp&nJ)WrG}NIkW0ae6Oxi~5E2owm@CBOHoxHY{8N zshhnY<|tCcin{b3U<}<#Su8B3XWBw2Ukw$d7X;J0`KqvkvKxN84Gl*A0U$)d6;(nK z_QpglIWl8;`U?71l`nY<%!%3GumRakuOh>Qm=J;P!Ld<2nr-j_g@7bAq<6W4#x=r$ zzUf@li4+!`TUSG^V@uQBbdP)HA`AK()EQK}J}l^vBarSA@k1Q!SIA2pT{SJeQ~Fcc zY=xX=d|fYy^kKz5FTI2BrZ`+uTKP5oE*$B*g0sJX*JF`vt<*QPS z9^W1wlO7jgHj4UGSJ#~G;)X??$$DC9%Jx1H;M^XU%|n)%bsCIy8*^DTA%{bKg-b9_ zWuy1jQbPu@lbujcMQACH|8iSs7z>$zbDwj5F=lpps|v>4jM}{KmsohQG31swR!9|P z-p2G6SfZ;%OAmgrEz}R|MuYCdu9UWkT_H%vistlYYAQ&|t*H6b`Y}Oh-MuEzst4OU z;Q-+=1@MlyElPLDgcDZoyv-NZkM&05!%4^R#6>;zxaaKN=5#wU;{=5L$@OELnjtZ! zXe#~di2amFx)3D8W17;NXwvxP$H3gV#ARXK3S848d&? z!%=^aE)S;H!<|JAlfI|J9$lY^2)7KhjbS|)Z$-Lg_dr*RbNqC2U}<_C=N$Sr)Tf1? zE7V_~z#5vRvNMimFU*ZIMDHx$@^q`Stekz-wm!QaeK{T*0?HX!m%{K$RwF00L-d)$ zEl>3>ORwdfl+QV>dIT9Z$-k5i#LG53i38kM)K|XgUI&nq@`D zk{pfI)?>s~!ddR>^ySR_4TXV6dgYWPo=m2a+avLjM8G+2j4-Vcv9MrH4ySmhnPPnN z6YIw_)0gR_)g>mA_A}1aAu#Xwm}hqS(sSqZr}>)x-xKOb0`*mR>(i^@98=_SXQ
X64ebcJ{jOG8` zdw&JZs#k;bi5$n(&m!Br` z-3U4FmXqo3SKg<*Z}x`07ka0Zy;S!3ve%U*$yV1`<|R7aiPBrK{GU$*`X3T?{&vr` zSOCe&qlT!e~Xngb_TXtB^M0biJ z*%n5%<8fkcd*6ycKJK#|$VXc<@d`E#I9>Pbe9ULzsG#eIH*=~mpIQB~uV34cAM#n) zIB^4=%6rAE4XHiM`Br!gos47J`S~dAb}?rb;oji#3k4J7Fc$;vu)u?hebzgdFUt@5 zEDW~__u4atY|K_4>pu{WkDU9PfFh5krsNO$ta0eqvi2aP@P57(`3P@TG-;Ncdi8(&V{LmOD{L&#JyPn#-BtPJ@9PvtC#<-X9M%{1~X18NMngkBRi{QhT<@@QJ zB5QK1i^1*saa-;U;|A2-u=C^)yARBK5~kf3F3unDS@q+rna4#rPlm5-=1eb%LHuY` z4gq9E-fP{uBp;@BlMKDvxafgo56gdNNVcACBw&#p0D66P%>K@dd|zRz>A>uX9HG1W z6ivFLaZbLMt}gCzbh6CWZccB2UUefpo|>QU@maHUm*p1tUPPjO2arx2WD@31!9|_pmB-9r60|LQyua_LNG5ST~6M zuISUiAJ%&p=6CaaCZTjQ@3T7<#cTtcH$5BQ1t;UaQuk!n{QMQ%*FEB(yQ#`a?52=o z#^>%=*h(+T?}8^3wffxvcQL_~0-GbF;-Nz9Qew$hFGkb|%CB#YlRVJmzd_Mt&1xM61Uy>&IFv=xI66SVuc%<~M4u7xoa= zAIAV76X49&eO-_!6_<9J$*)?*kl_lY(ngZ-^QzD@S4LQR{Ru*nC^4%(oV&9P8(bC_+oDb|$#1^<{g@{QO!*!L_h}v&h#WL6mkSQ^9@kFm3PA$o%{o z+JX&?axgqsN8`bba6+1U2+*XBRS2s%wR1-Pa_v2LhTFTqX5%ryk~#2)-OKWqv90Uo zDcvLA4s#HtJ1o!Knu;f4_;#cc{c$su6SJeMr{^zq?ng%n=i~~`gT#deOY~X!)w0~2 z5#}(HFR>ee_bZ%nYDZK46+UZ%YO@R8EjUjilu9Pg`Nh;s%U{B5DQ+UW;)k*MXX-H} z7Ufs5Fl6G%56_8*`Z}3yoSi2EyW(T9o^aSx;TKW$_}0mJvO!J2H+Lbc#Jo-UmGqx( zkKBe^a0ZO=Spuhr7|wd|K^9_#z+3NIxj26jchG}4rGM+}3P zD#5EeOu_i$z?AeD$2E!C%HXRCcTbX*S(W(`8maW(VfvTNq&NMW^X<_pojPvb06e zIlg&jzFGHcolA)0?#*SVpptJ%ei<*+B%}K8_fW653A-2@M0kFFDU)EDyb%BM{sSOt zBDU-jOB-2lULI<_ette!*z3mYY27Z|Q}hj}$14Q$OXz)Gou@nZLuMO4=fbie-or3N zhOm)BsI-1OE04SfVyO#5W+*V~c7SPd>ytCs6?yjB`9&N^zF*Hq)Q9Nix@<6)Fb)^E zWKjLKz+u8u@=db%<+l7tCy7ik9B$4NY^-%0dg66Mv3N4Dup@RrTrtNmw?J5D-)IWf za*k&`X@6=?zL7P=nSWu1=q1jDwNu?nE%^n^i=rpQuJF(>ilOpWKsnbR0)1(M14JM{ zpB-SF1KGPmNpK695qJv8d^^*7X?~v1n!HClll5zM1^1#-i#e!>dFa03A@Dche96Gx zz%KN1u?deS%uK>ESZ`UApUYreizUrdp|MmuaH{lkwa)-w{@^GHo_{2Lj{||R) zo_L2qCxS>6_th4?*MsZx9C1y%hVf87z{|8W{|`5lFDJBTCV?TCFmGIt5746G{O%s( zFk=J4GV*c2IV_TOa&mJZk27km>jc5f>K?;rq#&Wd3C3znQ+_s8kYbz`+1;a|#Lz}~ z?va4tt(};TB^ny!aOlxxO?kErwZNCwbGuQ%eICg4##5pf5uGElSRb~X9y24)u%Q-vywOQ3^D~&6tDexp0kFgqA{G-#kekrDf19!baG#BDMxhH$~Wh^*!vqV7JgnDEhYg?3u(G}%nULlqoj*k?)KHEa;5%94c zVeLN9S)ZReVTsclOwb{=QGvXIbz*2G`u{c753QOn*L%`$0jes$UwIV!|FxCVDxRzO zSjDX%04uQl|6ck1bOHTJdk65C7D z|A(Iid0YZ(i#o5qe;2-@cp|05O~k0} zUDceQ?6ZPFrmXOH=Ha;4v4c4N`|qZVX?{KpaPeMvJXOL`-3%NIuLF?=!QO)ddN!Cp zkEvVKGP+0bG3nhM;rufi=sFbXE1VcGxXgq#OY@VMJ9=y^{GK+;Q7Kw)oH>s|@7UvD z^v1;G3HM#zn6Kdi@%!S(y+AZcY~o<)eT|~EPF{)i?55hhpPH_Su5XHfhI?4hw=@rU zU%#xlf_NyB8wcuYJ>VRy?nt4cd&^kiXS$~5tC_C_yD-b5m54HXjc;1MimoZjE#1Q$ zWhgj(s(X6AlJDGAEG@;ia1h(T`U`+>=HJ{?ZG?(!~6+TYQk+d z0|FuxVq6*?73+AUoYfcRVTlbF7dyOi%2AF=)r8F$($6?$Qrg-M%ZeZ{m3O#1j&$E?47>F*iT0G}fT3@%{vM6^w z?+efN>eaod3M0VE>;BJvYr;n?~yGJiIW}xuc(hr7%8*tCS zLS=d`RahGGCZ>BZ#0tc;4`}~nb8dt$U0Fz$MkFy7V{nk_aPWXHV9O?;7|#Bpi=h{; zn3e+!6V%Eu*?bVFSjbsGrFq zc_h3v2VS}uPF@(TNEB~+XAbi20*j*2ZcZin(~+@3WHEwZ zh=wB|)g$30xgJ)$zLzsW#A-uyI}t7q!CDZ-y2Y!b0ukx#-UYc3Z>FHQ^u5gs#rlKm z2O{Gi)TZe~S1<<-T3W#%3 zcqL7qnGADLLCT7 z=7izgF)<9p_n$E1w>`wtXjD{g(~{fE!ejar=31{f zsmOT&An+cfiVz5adm8Aa`#7pz-ICkmvzCn?R@&X}HO}6{`rD$j7)} z3>E@nc<|wt9JuO)Xg-{}VvEV1>Ne0d1KEP@dvB6cd2p{a({g~T6T~`nW1)vjThZ1+zW6W*VK4CcJ6N#6=l97 z(}KAU`sEUbIdde(x{3G3hbOqv>rqZ;3V(n0^)m}|0IZIwCv73tQ(SzscXzB8K8lYB zXLfQP#tW4+<~DKb;#_sBwuOdd!4;zu-M(OXZX=yl3*ww|7nKgxrCR*eQ7^a@%WPfu` zU|9DM9RYyG5IZay>Myj)ajnxo;Yb6Lcf}<+5ZT&}!m>0wQRL%{)nGCl33Ns<3yI)O zq3*Y1BS}uiG5@W=FVT9`UR#&Doc*-0(=yWGu^?=@uGDUtkprTguto30l8wVwB)13) zZbt4>`BvVA2V2%oOi8c?NWg76DiNwyvv~i@ZwV`s1B`V}UzxbZa4Fa+W@LSFe7z*H zN>lCgDe5B?mxd7f(^H$;myqbIfH`N>_Qec!= z0sCwA)$FR-MqYq*HJ8?`tZ61Uz`UB-HFY)TlOLdrNP_?2e+hen=lnk*rr@`*EBJ!{ zQU7QB50NY2e*Xvj_xj&KzJS+}Gce=7ft&%u{)7HL|0~HGu+zVl?15{^9k9Z`)W6U_ zhx`Fk{geEa%-nzaUiSUL_X4&F&-%XW`-bn!z9+~faK`ru--EtmCa^Di)0$;#4-FE>w22d2MUna}IbJfpQKSPv* zC#oN=eyIAv>Z4>DxU2fk>f5SsA=kig^+5Ih>b+zeXs=#dy{fvId;@{%y6Q>Q<<(Zz z%T+H{Jy-QCIR~D`H{#){hsZi`wCdifyQ=Oa@4ziphl!LpK<0tHL`-O}T1)PM=BkBN z0rC+}sw%ItDqpUAk^BSCRz8D8#S>&8c&PG0FqM1BL2zf~ZI!oF9wrOHK;{0*y_MU^ zL$J1TRb_MKLNXE5k)g1>(jphZi(oI$Ry;#Cf+s2-CQspm6-O)Xt+=b=&WhVAZmBq2 zFf>lXs%dT5vZuEm{d_-VU@pJ{$lxa<P+nI)sl1#lhA(?x^gic(*87b2 zY3~!>hrJJZA0(IIz23XLcY1I0-r_y%9rg}*_j~t}(XidS*1O8v>|N*$c&vWLnZEIUee!@J7vEW54jma@ZT!({_y`^)y0Z70X!+Okz; z&1DPA0%dh&lgi4=tkRcDUo3sD^x4v9N}n!$qV(a?he{tTJz9D%`3~*_m*x4X`CX0VLB&5<98nWg=EQM>fP~N}euxqU7O{ zhe{qSIa+dW$z3IPk`wWklEWp#4p3FL-~O0=!i!0ZeZu~z(vR8?Dt*vCuJpM55v3oo zHBPVnfPKIIe!u-;r60DBDLrO?Na=^{qe_q3A5{85`vXcpVBe?oKKuPj-*3N9>HF;W zDt)j09;NTG?^SxQ{cff2w(n7TkA1h&yX|)=eV6@CrSG)gq4XX0+m*iEzDwy{_S=-c z&3>!Wx7u$}`WE}mO5be1N$H#HH!6Lj{RX9PuP^mX=YmA=-#L+Ks% zYm~mmeznq9+qWyd-M&rfZT787Z?#{g^i_6VY2MB$&DmL{Sv#XNW2cp-?OT-IV&AOv zX8R_kH`zBTz0tlw=?(VvO0Tz%C_Q42DIK#9D?MzFDjl^CDLrJTl&0(vr6YDyY0^$8 zP1x5dz0Mw1I&8<4#%*lpnIAEGNa>IrRT{Mil@8hml^(PsN+b4w(gC|)X}^6y=>a>e zG;H@N?X!E8_S!v4d+dV1! z)!w3Xi@jOtX1haahuyBU-QJ{hlf6;tMtg(O4R)K-HhaC&_4YcY>+Dvgt@c`_Ywa~k z*VvaUz1+S`>1Fn%N-wonD_w2BLg_2)OO#$>uTr|ozF6tS_DZEI?TeINWUo-V!oE=H zh4yl#%k37WEq1fgW_y{^W%g2~OYNZ2puI%t5__@I#r7hli|i()P4+^i3++awjrIbi z3+(wy=iBp?&a>w#oomlgI>&BM+F%Ei2JCvJ_4aI~v+Y?*XW28A&a`JJoncQ`I^CY8 zbedhKw9c+oT5C^LI@O+{bc%g}(hKa#N+;XrD?Q&nPw9E~B&Cz=8l^S1U#Z{rDfQXa zN~`TErB!yN(n`BRX@y;`lw$^^Ub{>whYLzeZI4n?)+#0Ru~OWZlp+u+wXA;$B|h{& zmHyNEhthvoe^>f<>u*Z`X8niK|FHh5^sm;-N?*4AqVzA;pOyaE`jgT>SuZJl$@-(x zKU#lK`UmUxN`G&?sPskacS?U}{Z{F3t=}m9jrD@k7pz|^{k8SH(&w#TDgBl8oYLp4 zUn>2j^$VrHuzs%e=hn}Z{>=KR(w|yCQHq7B(jQyTDt*@akjz4IV0~Zd z_pR?K{hsw*rQfx_qx3u0GfJPazOD4z*0+>?%lf9$Z(83_`VH&rO22M>P3hOHuPXhj z^|aEbt*LQ$E?pQ z{k-+4(nqb&DgB)FS*4$~9#Q&;^{~>1t+PtcTAxw+8SB$ZKW&{+ddB*c(ob2RRQgHl zw9?boLrNdAPANTQeM0FctdA@GxOGzLN$X=uKW3d!dcyjs(vMmXDt*v8uJpL|5v3oo z9#Hy#b-&X4tq&{xuysu7G3!H0KV%(MdX$LsGJij4eL(34toxMSN0f8<`}>KzFZ6xZ zdzHSIh~x76dx)ej^j@N?3w<~7$A#WQ%yFT26LDPVyNI7J^qoXB7y1t3j|+V}G24aS zMJ#inZzF2C(6eIr>QgucPLQ|X=7>y^IVdY#hOTCY`l zhjoY2*I2Jn`fBUdN^iGrS9+Uuo6=jYTa~`bdX>_=l~;uChYm@ZN=Z2~B4siuWg;Qv zb=OHbJS=5AE@dnx<EM;Gxl)b%D_Vh>@ z3Q4(tzm(TrE9EO+DdjcSNO|?uQeJhHl>7EcdF7Q-?%gZpo;_0T-Yw-7S4g>Qmz3Sz zQtsR-<&GUvZr?6tSC^FAwn^F9DdpC!Qf}EI<>t*&c63PD-Y(^)O;T>$DCLF?Qns~8 zxqiKr>()ux+A8JRwNkEGBjx3nOL^I4QeJwgl&e=u`HELadC4VGu39DK#TQGta;20P zT_oj-6;fV!p_I#)OWD#QWplHX%a%#Gbg7iVpp;9NNV#~il#3Qg+0-QE!i7>cHcGi* zft2&-OF3_zlym1wIcJWP4GmHT0#er3OF4VCl(S|@ITOL#JwuegATK}ty{{KrCEg2xb!rqeYCG90^YhDKV zd9LPJaG$4Zo&fcEsOCX1pL@ywcxTORHMfBG4A%^R_Ux_M4%V}_W)(=!Lb5>C)l8}> z2jzL${~{RAv;Jp5c%JY-48HRqxghWL-{rp(Z08nyNrwFc{{7%O+x_jJI;+SCxzHc* z*MaDi`z`RC7k$rx<~-wj8Z75w@D#n zIjA&hB1#9%fYO7eUunb~P O8dzaD!q5W^ePRT9;JOIq_o%US1LJ3rC!LqQt5tk zjnZq))kbEVRKX0OsK%^szD&2FW8$jvJE+ii9!y~1=W-DP$v?KV4< z?j)P5)Z1aYlx{cMly;d;rQ6I_rJdw;m3mvvW~Ez9htkcaU1^8eq_myf-%ayJ%mnprRoUn4g%gkz}mzq~7U2QH=`Uyb0ELVD=X;He|G%IZ}%ak^orAn8P4Oi~B)GSdN zG>erkF^iNgHcd(wkrP(xHJL`G3(W$hjb^^m1!kVo`DD43dh^U2rE^V#(m5uew87LX z4Uq3v>eZWBN@tsyN@tlFN@tqsN@tj9N~fDTrPEBU(mFF$X|0)}bSl|!<$hDlWTh9F z^Oa6E=P5nkOj3FtnQx`uB;!|FV|+^erdlb1xs+Cu4Oi+_nF^(qrd(--@hUAhWlFuK zRB4&3`UN zRr*)^Wu-6Me^L4u`_D@MZ2w8=pX`^EzGVMV=^yPsDE))|d!@g(UsU>{{X3<FYm=?nI+mHyg(Ug`7puay4EeopCg_AizG(*A|gU)Voa`g8kdN`GemROwIc zpD6u_{bQv+wx3n{toWf}zS8g8-&6WM`@2fNYkx=SckE}BK4X7d z>9_4~DgBoHO{L$ozoGOS_Scnu-Ts==ui0N!`c?aBrBB;mQTi48%Syj&e@W?=>@O<) zqWzT8r|d5%{eu0Z(kJaFls;iUuJm#HF{O{$pI7>M`%$Hj+MiSUIs3CpKWjgt^bz}E zr4QR@m7cXfqx3WOr5dRzGjqvnXek6} z{L6gBDE~5FGRnWq7me~S^ORBkWximPf0-wZ@-Op*QT}BfH_E@vV@COx`Mgp7Wga!k zzs%>1@-OpQqx{P}Vw8WGhmGpYkojUhVLqz#3!y2rSCWIR{B13kJ9&=yOqAj zyi4i5=ABC4ZQh~u9`kmkcbmJEzRSE#={wC^mA=EgMd{nko0Z;W-lX(xM)?<(>~GND z-(v1m`eyTbrEfB?Q~E~pTBUC=cPPEnyhiEk&8wBZ&fKo_wdOXZcbHq1zQ(*t>8nj% z>Fp+`^fr@KdaKDOeU(xEWpd^g$Nv8-R?Syyj@P7XHr7n_|HA*c{{jDN(bz8a*PxT# z@4Lyj&9~TBTK%o+j}iZWXZ1Xd|93yp|2I`NaCZG{8*8|5d<-%uVe-%viy`)BX>ydU5sc);7`UEwV&`$E~>8UbKt=?kS#l^!p>skF0n zisu*D|KH;o^Q`mKl>E5lINty5B{NFAteCUr4srl2w*N-vzq9uH?d$C>au5B>`bt3) zQyR?SfmX1maV;_0iPid_>}fc+B|O89Svh>r#tl*(%Z*k%sz)`<{>koO4yUum36>q9 zBlvo3#PkBgkwUx=Oyh76$3W$5sws#2+2mrN6Jp)D;qC(Kmx@ab@#dm>?-Lsr8oCT+2Z3OzbL8bv5T86M`#ZNRC}&d^~eSA(+XLQ1XZvC|PH^r{tQ%z{xR; z!JwG5FPN(id4suyv{gg2s5Ksc9}KsJBFV9W&8%2UiOH;ZEWK~VdAUZPRpa`?S>*Ez zPPt%FZUM%0<6(DgLI%dWN0vHvR;`$b;S1Iq#(xlRgSO>4@hFqZ@hME7q zaF4jmk-Ckc#~<=gcXU*g%+O4BD|K8kMjsGA&N-Sdj}jqb<{uDaZrd zOZF1SRwI@d-e)f%zVLCC!QMS5hmRN`!<|`e44#L5;y-s~C^d30L~>T^o7VHwa`=!G z9I=`C;%B4r+DOIKl*22mp@C+)&(-h5-g4Y0YpdguMHtH**2@zj+-=9>9E)lERD`2iESw9I^Y>3Ur1s5;PO{4FMR;|tjy)tdKhj3ZN zmaeN&KO%An44FV5jT zHD5WIa)#}0MHvkIg$Y2shwxf)j8G-(U1JV6DspS-(957YwI@d1!M+rOb*75uh$R2OTumj&h_yg;@#slC%ww? zR@dSju2c<0)<)JR2@?#D$23nOJFMd#qhiO()V;DUhac61#FFE+jc;lW$EgY4PVQGF zzF9f^rm(ft=`O28BBc$-I3{LhlSudtb z@~sk-WMuq<1mv_^?6=hC=)uCKBrig~#?~IJ7lSa=U`e&FB#%ajgw*rya!-X~InVz%7&qE$8ePYA`n}`Lhk}q>W zl)afRUDip5ZNn;h+z3`Y)SS2=yO(CgDavh~*251?d>CXM2Az4~=B^Z@?UUULv-nEE zB(zHytZfpdrE64jkifJgP(A);M7&7!duC;CU?CRw{B8Io!>&n9kvc3}{^IQQvKcrG zN^dn75w=9@lG`G^xSSMzgLBrjWO16BVE&EC@4yJFVBEI%zpyx7l*Pb$zQ%1-R-o7P zCPDQbQq$SJA+fJh3vE6)!4ftknvgTg<3KH7f+{c?1AD(xGg+PV2}vb`&@ML zifP&F#)Umul6sNP!#X{4qP|=w{zHVnQ0c~r&(o?wS0&EW z$&E|1_*RY2FEm{SIb@{F@9YAYJ!Mck6JG`WqarlbwuJh<>ge46A++J0^pDPafJvw5%HKg zS)8$)%tFp|gV*$?oH-Qqs(JshHM6q&bul`e&ayh=xCX$R#E&nW9H^J4=;ubBEFB`fL7EN61GKQCF^);~ zl!h@eC2_}^pc;v-Jk5_f>CwZxkgx$6iXCa&v$>-E=(f{{bKeKAS$Eoi-YWiwgYo_>L@PE$#R{x;? za{nCPpFsdV<9mZI?py7vtA3&SbJcIG4pp~Q`-lMWRMq`N0N7QvvT8cE>u0f4k5%rh zY_9lQ#n&o6RdG*6x?+7rUHS9npDKSV*5j9zpXdF9_i^tBz1Mqpc$b&`tL%GaXUpD< zO?R-Y3hV8sO5a-ATY3p7z@Mu^e^1G7Hl)SqnRq~1wKRDmh<^$#?(`#DI z1@<59r^p9z#BR6ic!B@nrzN`wIkvbn7rF##R^;>Eo%lQj+^w4EU8oADcLcL^&3v6P z&ia9O3m+3dyUs>}JFzGl;|YAZYHNupb=k7)6%!b`4$OfF{u!)!z=1HqO-Leu2PzqZ zNv|RD|BUP|o>27tB#R!KH9ktj$1u~Hv$&eoURGSck)a3?yP}fWj9ICOn8144>ZT$(QPumZa9(kZHf#@N`iRICy8%5XJ>afbBLSkDq^!msgVHs@t_w_ zVNn)0GXXtZ>5xo9sIRV=aLz9ko|^hBE@uU@mK`>;$2V$AZ02kV7>KW5$0lQ7VBpqJ zsX>%T7rVk$5CPRme&D+$h24WBDzgvvmkD9elUfjD;lzw(XCUsrQB7z6+c$|fVUtT675>H`cPx`QtIf| z?Y1^X2#Tn_HLiZd>mw`{ddQi5%~_nw#6?d3lP1O-4{O;L5YC;%RDwc8h*R$a_xSPh zEG}m=*0>Fju8#$Ag?5FZEs?|^>s2CT$NI(FUP6<5O6FwoLR;)keK0I8k^q~XxB;WN zk(JInb4q1h7WXqJnKE}bR3eL0P^SN>`s~Gg)gTY0rE-0LyuS^|2Ih!TiO*_tb|uYn z-4C^2h%-A9-ypf@Rk*?^W` zvP1>J7n(VG%D#DFb_I`d`Hw!PcOclYyR}Cm;owgW$8(Zho$Q*H1)n7qYoX0s#}0<% zA4ra`Aqy5erI3%uAJ29jlIwiU*%n0@bfj58s_u;k;6C>^Hv3ZRXy?pqGu*ML zR%@m8L?r4#VW?q6fXEiT&W-zZe|Y?ZdYwOJoo&i4^;s7d;T6eaC64k)A`&KDRKlPs zlic(tx@Kj;Y75wow3GN15-D9AHJM}c7G}X_o!GZBV~xbplYdN+)Pe9AbI~seKM1@2 z!ptn_tSYh8)T%us)Ap46dC<@8KvNb8TEbQ`Ui!5p80sSoG%EzRZ&~o5s-8it@Bq?GB>->nWS=en95evaKarB0I!Fk0Rr$MT{4w{ zVrTfnz1?rhF^I9_|MWu0gLQ z3&#anfLE4sVHepdsTqU51zEI}X{;Wn@3;maDdDDcA^uPzmXwf>C)O;_0tqlB$_YUgJ`gd{jE z@5$`7w-CbQ3gUu`4l18(b^V+P1+y2x=axBL$t>T(N*IpBP@Pz>pj7Ni0gjpmHC6eO z)>AXH0H@-wtxL1KT%q+Nz zq}i30!Hu`zX+0Y6ubooqv7Wcv8nTm|Whd=saQPb8CZI8c5u(bx=nTOu;OBTSHob+^ zS)Yja;th|G$t)-8ORc{N3FRfbZ)uh-ZN94{GQWv_EY9Fb$s&#J#WEqX&V-ysqpi-HThCb>>tV zAlwd{e=Iy28k685@@pq2M$TVM-39eo&>9)S!U6~}tB5!d>60K1#0_a*lm$zjaO$*$ zJ-(LJV`8NH^#~VPe7pdbf&5aVRqAgreFpEsGR0VLfk7#Ri8X(J)6aW>MA%mvml- z8N4|q853f_R}x*G)sPsBOSXm*yS`-4!Yt@1Ge`LnQ5rU<`atWzF8lgim`^{~O|s4` z%39JsnWXYww6BLJhLsQ$jY$TxBXbsf@V*rbGB?ZKCza_4DuY=#X@z!$21Df620!C< z?`v6*0eMw{R;IU9!ilgR`nVs}&KMp{|~F;&mlqv<{;e3X;JQ+orM z8yr?pXsSeEfXx!ma7209>3tVwKwX35K_1FowFP8I;j^r(t?yVh z->G?^<_>JUFT?`i1^;LLxB3rYt=){2{TJUC@v6Vxca5(J8-TA>f22BDy|U^bRZo%u zFj}>xYHH<6m0z#CzcO37sd7POWyLQlPLKm&PsP%T61>Te;y>Px$M{U|KfGVY0^oY@ zPH%%o0DM*1?y}m_pK$j7#?k|&8%n2oe&)F!Z|kUMgU4U;-I51NZZC$7;dh*Yij0XK!}};>CRl_JXByeWphN0V_!M8YIX*c7-im>und-4iF_4D zWW{+}G62Mms6-vciwY8*9bQ$aOA=9bLm&f52%j`{o~0Q;V(`B4iCqY#L=4IBcw8m< z#X*%XuFn7vFD{&&$V1Q@5G%>#*^3Ytth-=YW>~gCx1ID#;p}hx!@nN_&dY!k7w~WC zM?u>qnKB?dN<~F&9Y?iiubZBU=`xVTEm2>cYU>C&77N!kWPpT=$@0oK0}#r;bp*Rg z1{*G@Ewvt5kr5QQcoyye%e#P0(RSQlx>t#XV?pg1n0K+ZWu@ z7?NETJfmw~rbpM1Gy9F*5j#Zuc+h(%*8{T?k?`osh71_+WR@gzL>P5PY!KD4ml#PB z#!JisPVShN0RoozV!}=lK{`0)>}S`SYCI(uWq^MhoiIPn^}I5@qbt#grzwGn=o9@* zU~^(A8r!L^Wf{QXg4!z29U4xNoczC%J~U(JvDPJ-YiNa@(K#*c4WrYz>bC}6W8cD} zL=*Nr&^kMFwQiB_`r2EXM+dqd2d^Bq zaKNcK$s0m&O(2Ba2)t=?8U-M3adV8gQLNz>mp+m!8p|TzzR~4>Jv&#@*H0 zIqj0OVU7^Sr}b70E}{ha8fl@&M@for!RvJ4S#kKk>^Uux!Oy13^!%x1s}Meg`nB<<&@plL3C6kO)VJwwU`{NeL{Wr6jESM0-<^qX24jT*v?L;6{;3)%j{s@03fei zkO6Nk;LtM7s7f2w!{Au~?I}c1dPE+d!)<4p|?1 zA0l4!F2#_8MFhpk*UZXfRPMW^C9{q;6s_yz)?B+DdoE#p1zRiOZkirZ00AHsx zIFs0Yp2IMu&7?)LPG31K1N=H6a9exmP<&8%$f>T`8Bo^3>79%U$FXAwlft)&Jz7_F zNd`o9zRQ?adqj(d z$v*SxntLzGTnv{}qeSHlk{JO_1|kUd&5US%k=j%ra(g z5k;DKTQ`YqcS{DecJ9CTmD&n-MI#byRyM{YQ=UO3FFn(UWZa;(5{#aP&=eiGzIbq| zzgbc?F35n{PGBYgK4gI#`^D)~$E7Me1BhDBQB@bbh?pLjL-_#5+tz%cq z-7JgH$nt%iZ&n6mcFGKuQiXGEiVvo?uxDXa+JbG?<&!gjvXfm<68UXYyg%qMvom0? zpb=a~FB{LMc;69eTCl_{YA(HI^DV0~^9uaKc}2BPbvm;X{8GbRkE=XHV7f{5UpMIWPQ8#=caPcCjhQ*TRD*C`d5GC0sh~$fr13=r z+ehj`;m)S+pgA->)1VVVUsGP61vw%c5ggjLCeKkt95#zp!?^v{mM|x4B`^>G*eBbgN^S#=4rLVz?`RTpjY{kB_K!EA*T%NgQM>m5TntEaIk7kKU>!+E1*zWA8g8iMmvU_+{f5hg2axDNx*a2Vgxk5 zG8Zsg)x2CMm~7dDl2KBHd)fWY{YwcluXX0?H5q{EWvxy#>5=hDZvQ36|1i$f3*V@K z@w(vj42n{%!j;a0C9zCU)8w7n(VSuS7d2Vig^zdtw+h_dE0$#thyjtEF{MRO2XXHB zuRvr$+jd`=so^0-)wgiP#D_QwycL+n$vb^&@3f46oKXnu;ammd?Tg2jWaymQBBG(+ zO>{(I4zjfkdaG!_$sLO_z}VulplvQM-$=f;gqnjAznu9r64*eQj7JF>ZI)($vK^~_ zor4>R+OC2=OofQMV)5+yov?qg6 zRt+Uu#X_WreEG#l{DD=C84&GRMcZ$=ag_JRTOuq_4O>u2J>2vWxEILw1mg;+DWUJd zUL-5#W@N1J5%tHoZ`IrkOst3)nT^cBjUeIVN{bH#YyES5C12UNy*>lTE!dqfbm@ID zP}M_`VeFYl1R%};($*^8<;*MLfxyxjSz=7Xq=2u-lO+vI`{TlweW3%4IaTl8!z44fP{^kNG&Jm9Y zjpQDjlL72@07h=h(1d!}Q2>NdDvp$3R0$-#e{uR|?kb)F@?-1`hvOKUu$)9??Mn$L z6-jzMVrPvE)RYG4Ch6IDZL%g0peG>A_0yB<;)8>=9>1aP*@JV_fZfg;Id`V690T@g z!n=CCZ{UxJXptz_%sIg!1%ECfoA=c8 zb%GrbpF!(6Kwu~D)1hCa%2UpQN4XDknYP*Yh*U+#ayRK7Pt)ETq z5qtU4bR0$l{Gr#Em$#|=jRf9j@CyKEml*w^2iJRNc;=*IOg*3q8AxPaq^qIO&`_#C z&zqAPpW|fIJY^+F1BW5||WdZM{L1eaU{; zC20`g3A!|T85`TKM4YPfshyOpgQARrboN5z8$r1w?d6R0LAatTprcudPB(&d5GU{7 zqKc+;ggGhryY?NeL>-B56Ekz5hLpT9H^Lr>pyijO1qv>1A8qkoL{Of|4%#0^?IJ-q zIv#Do_POw9pE7_Q&1pd40*uV8aIfv+4#TNr7m2I`d`3A)X>L*SZp0R)4=^!{?A*O% zBdu2pE%EG9?H`7I<%MZr<0cms(yr9DNl(JG;Lz+G2Mz>w%gJ4$PhrR>tz2^&{CKYG zn{@6~t{JH$>XMtt83_5>tWO2gz{d?uvJ{-j7RQ8{H0be|Yt`WX<`vkF`0~rrJ=9+< zYo4h<_irtvICAmkt)Wq*74Sj_9LUq#*oU>)JD zn7izol?G3CJji4^8p#>X1Ynx+^%?u-`t-FlL1!X^%nk9maqzt$GB;E8+6&X5$A#pt zS_4H$ZZa6|M%5rq2*%~G4Tq;=9wuBcyf_Vltcg+Ft`NJ5Xnas?KM>ZDEyczM`(##` zb)qSKHMLzkaOW9)V&1bc8XprguTfNJdYK7-`l^{}uw;=5W$Czeg;gUll78>n#x(G8 z5l!KRLXsn39n6s7!h(i_4%z2V?Q2S3NliUoQvTA0j~0qVD?qZ;jFQIK1(6?EF*Usx zwU2Jnk!7R^tsZQsD7|2DdJkU|b~_@xA!o@yOw6O;dMdZ*fk|0i$&ne&X>j4W%tWW{ zhW!Yob2q(+bP-~N37bBKzZL`7;6q1#%(}Hc4K%C+B&!g9qL*~yJ{6o>S0j4#lJqV* zq?o>g8syRpCAMN$!agtVV(~-d1J&G8M_1LR!G3|}oLOgCS{;3?#LP(p^@`b_bgax3 zxe=CcR~!x{YklMZv#Uy4=BIbiYi`0MnX^=)f47Inq<{A?y`mfp)YUCbZ|7!3yg=&7 z7Cg~3(>aZmA^tPIyScCI3#*@hRe_(mxTf6gz)%uIKx zSjuqmHu8=F7_8U4ZxvEhNYGyO1L2ooZoK*0q0LSZ@$=SV2)1WmDOhadv zmy;V2nV(7=ozR~yIw{tKBsur^Ff~ns08ew*2s@c`fy;`}Phw=C^m&*v4@^yi{W=&j zEsnG0j(H~pY8fT4bmZQ diff --git a/data/candidates.db b/data/candidates.db deleted file mode 100644 index e69de29b..00000000 From bba8298e5559987a323fcf283be1acbbacff2c41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Mon, 15 Jul 2024 23:51:40 +0200 Subject: [PATCH 15/34] looks good --- benchmarks/sql/README.md | 37 ++++-- benchmarks/sql/__init__.py | 0 benchmarks/sql/bench.py | 106 ++++++++-------- benchmarks/sql/bench/evaluator.py | 53 ++++---- benchmarks/sql/bench/metrics/__init__.py | 13 +- benchmarks/sql/bench/metrics/base.py | 67 ++++++++++ benchmarks/sql/bench/metrics/iql.py | 21 ++++ benchmarks/sql/bench/metrics/sql.py | 21 ++++ benchmarks/sql/bench/pipelines/__init__.py | 12 +- benchmarks/sql/bench/pipelines/base.py | 2 +- .../bench/pipelines/{e2e.py => collection.py} | 53 ++++---- benchmarks/sql/bench/pipelines/iql.py | 96 -------------- benchmarks/sql/bench/pipelines/sql.py | 86 ------------- benchmarks/sql/bench/pipelines/view.py | 119 ++++++++++++++++++ benchmarks/sql/bench/utils.py | 22 ++++ benchmarks/sql/bench/views/__init__.py | 7 +- .../sql/config/component/collection.yaml | 1 + benchmarks/sql/config/component/iql-view.yaml | 1 + benchmarks/sql/config/component/sql-view.yaml | 1 + benchmarks/sql/config/config.yaml | 2 +- benchmarks/sql/config/data/superhero.yaml | 6 +- benchmarks/sql/config/llm/gpt.yaml | 2 +- benchmarks/sql/config/task/e2e.yaml | 1 - benchmarks/sql/config/task/iql.yaml | 1 - benchmarks/sql/config/task/sql.yaml | 1 - 25 files changed, 401 insertions(+), 330 deletions(-) delete mode 100644 benchmarks/sql/__init__.py create mode 100644 benchmarks/sql/bench/metrics/base.py rename benchmarks/sql/bench/pipelines/{e2e.py => collection.py} (64%) delete mode 100644 benchmarks/sql/bench/pipelines/iql.py delete mode 100644 benchmarks/sql/bench/pipelines/sql.py create mode 100644 benchmarks/sql/bench/pipelines/view.py create mode 100644 benchmarks/sql/config/component/collection.yaml create mode 100644 benchmarks/sql/config/component/iql-view.yaml create mode 100644 benchmarks/sql/config/component/sql-view.yaml delete mode 100644 benchmarks/sql/config/task/e2e.yaml delete mode 100644 benchmarks/sql/config/task/iql.yaml delete mode 100644 benchmarks/sql/config/task/sql.yaml diff --git a/benchmarks/sql/README.md b/benchmarks/sql/README.md index f3f39fb3..a4f2ac2d 100644 --- a/benchmarks/sql/README.md +++ b/benchmarks/sql/README.md @@ -1,34 +1,51 @@ # SQL benchmarks -This folder contains benchmarks for querying SQL databases with db-ally. This suite evaluates the following tasks: +This folder contains benchmarks for querying SQL databases with db-ally. This suite evaluates the following components: -- `E2E` - measures correctness of rows returned from the database by db-ally. -- `IQL` - measures correctness of IQL queries generated by structured views. -- `SQL` - measures correctness of SQL queries generated by freeform views. +- `COLLECTION` - measures correctness of SQL queries generated by the collection in a multi-view setup. +- `IQL-VIEW` - measures correctness of SQL queries generated by structured views. +- `SQL-VIEW` - measures correctness of SQL queries generated by freeform views. All benchmarks are run on a dev split of the [BIRD](https://bird-bench.github.io/) dataset. For now, only one configuration is available to run the suite against the `superhero` database. We plan to extend it to all databases in the set to cover all cases. -Any new PRs adding support for new databases from BIRD or SPIDER are welcome. +New PRs adding support for new databases from BIRD or SPIDER are welcome. ## Run benchmarks +### Usage + Run the whole suite on the `superhero` database: ```bash -python bench.py task=iql,sql,e2e data=superhero +python bench.py --multirun component=iql-view,sql-view,collection data=superhero ``` You can also run each evaluation separately or in subgroups: ```bash -python bench.py task=e2e -python bench.py task=iql,sql +python bench.py component=iql-view +python bench.py --multirun component=iql-view,sql-view ``` Compare IQL generation performance on multiple LLMs: ```bash -python bench.py --multirun task=iql llm=gpt,claude +python bench.py --multirun component=iql-view llm=gpt,claude +``` + +### Log to Neptune + +Before running the suite with Neptune, configure the following environment variables: + +```bash +export NEPTUNE_API_TOKEN="API_TOKEN" +export NEPTUNE_PROJECT="WORKSPACE_NAME/PROJECT_NAME" +``` + +Export evaluation results to Neptune: + +```bash +python bench.py component=iql-view neptune=True ``` ## Run tests @@ -57,4 +74,4 @@ Evaluation dataset required fields: - `db_id` - database identifier -Additionaly, you need to create approprite structure and freeform view for downstream tasks \ No newline at end of file +Additionaly, you need to create approprite structure and freeform view for downstream components \ No newline at end of file diff --git a/benchmarks/sql/__init__.py b/benchmarks/sql/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py index b0db9c0c..44795be5 100644 --- a/benchmarks/sql/bench.py +++ b/benchmarks/sql/bench.py @@ -1,29 +1,27 @@ import asyncio import logging from enum import Enum -from typing import Callable, Dict +from pathlib import Path +from typing import Dict import hydra import neptune from bench.evaluator import Evaluator -from bench.metrics import ( - exact_match_iql, - exact_match_sql, - execution_accuracy, - invalid_iql, - invalid_sql, - unsupported_iql, - valid_efficiency_score, - valid_iql, - valid_sql, +from bench.metrics import ExactMatchIQL, ExactMatchSQL, MetricSet +from bench.pipelines import ( + CollectionEvaluationPipeline, + EvaluationPipeline, + IQLViewEvaluationPipeline, + SQLViewEvaluationPipeline, ) -from bench.pipelines import EndToEndEvaluationPipeline, EvaluationPipeline, IQLEvaluationPipeline, SQLEvaluationPipeline +from bench.utils import save from datasets import load_dataset from neptune.utils import stringify_unsupported from omegaconf import DictConfig logging.getLogger("LiteLLM").setLevel(logging.ERROR) logging.getLogger("httpx").setLevel(logging.ERROR) +log = logging.getLogger(__name__) class EvaluationType(Enum): @@ -31,47 +29,29 @@ class EvaluationType(Enum): Enum representing the type of evaluation. """ - E2E = "E2E" - SQL = "SQL" - IQL = "IQL" + COLLECTION = "COLLECTION" + IQL_VIEW = "IQL-VIEW" + SQL_VIEW = "SQL-VIEW" EVALUATION_PIPELINES: Dict[str, EvaluationPipeline] = { - EvaluationType.SQL.value: SQLEvaluationPipeline, - EvaluationType.IQL.value: IQLEvaluationPipeline, - EvaluationType.E2E.value: EndToEndEvaluationPipeline, + EvaluationType.COLLECTION.value: CollectionEvaluationPipeline, + EvaluationType.IQL_VIEW.value: IQLViewEvaluationPipeline, + EvaluationType.SQL_VIEW.value: SQLViewEvaluationPipeline, } -EVALUATION_METRICS: Dict[str, Callable] = { - EvaluationType.IQL.value: { - "em_iql": exact_match_iql, - "valid_iql": valid_iql, - "invalid_iql": invalid_iql, - "unsupported_iql": unsupported_iql, - "em_sql": exact_match_sql, - "valid_sql": valid_sql, - "invalid_sql": invalid_sql, - "ex": execution_accuracy, - "ves": valid_efficiency_score, - }, - EvaluationType.SQL.value: { - "em_sql": exact_match_sql, - "valid_sql": valid_sql, - "invalid_sql": invalid_sql, - "ex": execution_accuracy, - "ves": valid_efficiency_score, - }, - EvaluationType.E2E.value: { - "em_iql": exact_match_iql, - "valid_iql": valid_iql, - "invalid_iql": invalid_iql, - "unsupported_iql": unsupported_iql, - "em_sql": exact_match_iql, - "valid_sql": valid_sql, - "invalid_sql": invalid_sql, - "ex": execution_accuracy, - "ves": valid_efficiency_score, - }, +EVALUATION_METRICS: Dict[str, MetricSet] = { + EvaluationType.COLLECTION.value: MetricSet( + ExactMatchIQL, + ExactMatchSQL, + ), + EvaluationType.IQL_VIEW.value: MetricSet( + ExactMatchIQL, + ExactMatchSQL, + ), + EvaluationType.SQL_VIEW.value: MetricSet( + ExactMatchSQL, + ), } @@ -82,33 +62,49 @@ async def bench(config: DictConfig) -> None: Args: config: Hydra configuration. """ + log.info("Starting evaluation for component: %s.", config.component.type) + dataset = load_dataset(config.data.path, split=config.data.split) dataset = dataset.filter(lambda x: x["db_id"] in config.data.db_ids and x["difficulty"] in config.data.difficulties) - dataset = dataset.select(range(2)) + dataset = dataset.select(range(3)) - pipeline = EVALUATION_PIPELINES[config.task.type](config) - metrics = EVALUATION_METRICS[config.task.type] + pipeline = EVALUATION_PIPELINES[config.component.type](config) + metrics = EVALUATION_METRICS[config.component.type](config) - evaluator = Evaluator(task=config.task.type) + evaluator = Evaluator(config.component.type) results = await evaluator.compute( pipe=pipeline, data=dataset, metrics=metrics, ) + log.info("Evaluation finished. Saving results...") + + output_dir = Path(hydra.core.hydra_config.HydraConfig.get().runtime.output_dir) + metrics_file = output_dir / "metrics.json" + results_file = output_dir / "results.json" + + save(metrics_file, metrics=results["metrics"], time_perf=results["time_perf"]) + save(results_file, results=results["results"]) + + log.info("Evaluation results saved under directory: %s", output_dir) + if config.neptune: run = neptune.init_run() run["sys/tags"].add( [ - EvaluationType.SQL.value, - config.view_name, + config.component.type, + config.data.id, config.llm.model_name, - *config.db_ids, ] ) run["config"] = stringify_unsupported(config) + run["evaluation/results.json"].upload(results_file.as_posix()) + run["evaluation/metrics.json"].upload(metrics_file.as_posix()) run["evaluation/metrics"] = stringify_unsupported(results["metrics"]) + log.info("Evaluation results logged to neptune at %s", run.get_url()) + @hydra.main(config_path="config", config_name="config", version_base="1.3.2") def main(config: DictConfig) -> None: diff --git a/benchmarks/sql/bench/evaluator.py b/benchmarks/sql/bench/evaluator.py index 318cd8da..a5216bcb 100644 --- a/benchmarks/sql/bench/evaluator.py +++ b/benchmarks/sql/bench/evaluator.py @@ -1,11 +1,11 @@ import time +from dataclasses import asdict from typing import Any, Callable, Dict, List, Tuple from datasets import Dataset -from sqlalchemy import create_engine +from .metrics.base import MetricSet from .pipelines.base import EvaluationPipeline, EvaluationResult -from .utils import avarage_execution_time, execute_query class Evaluator: @@ -26,7 +26,7 @@ async def compute( self, pipe: Callable, data: Dataset, - metrics: Dict[str, Callable], + metrics: MetricSet, ) -> Dict[str, Any]: """ Compute the evaluation results for the given pipeline and data. @@ -39,18 +39,21 @@ async def compute( Returns: The evaluation results. """ - results, perf_results = await self.call_pipeline(pipe, data) - results = self.results_processor(results) - metrics = self.compute_metrics(metrics, results["results"]) + results, perf_results = await self._call_pipeline(pipe, data) + results = self._results_processor(results) + computed_metrics = self._compute_metrics(metrics, results["results"]) + results["results"] = [asdict(result) for result in results["results"]] result = {} - result.update(metrics) result.update(perf_results) + result.update(computed_metrics) result.update(results) return result - async def call_pipeline( - self, pipe: EvaluationPipeline, data: Dataset + async def _call_pipeline( + self, + pipe: EvaluationPipeline, + data: Dataset, ) -> Tuple[List[EvaluationResult], Dict[str, Any]]: """ Call the pipeline with the given data. @@ -67,7 +70,7 @@ async def call_pipeline( end_time = time.perf_counter() return pipe_output, self._compute_time_perf(start_time, end_time, len(pipe_output)) - def results_processor(self, results: List[EvaluationResult]) -> Dict[str, Any]: + def _results_processor(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ Process the results. @@ -77,22 +80,9 @@ def results_processor(self, results: List[EvaluationResult]) -> Dict[str, Any]: Returns: The processed results. """ - for result in results: - if result.db_url is not None: - engine = create_engine(result.db_url) - if result.reference.sql is not None: - result.reference.results, _ = execute_query(result.reference.sql, engine) - result.reference.execution_time = avarage_execution_time(result.reference.sql, engine, 10) - - if result.prediction.sql is not None: - result.prediction.results, _ = execute_query(result.prediction.sql, engine) - result.prediction.execution_time = avarage_execution_time(result.prediction.sql, engine, 10) - - return { - "results": results, - } + return {"results": results} - def compute_metrics(self, metrics: Dict[str, Callable], results: List[EvaluationResult]) -> Dict[str, Any]: + def _compute_metrics(self, metrics: MetricSet, results: List[EvaluationResult]) -> Dict[str, Any]: """ Compute a metric using the given inputs. @@ -103,10 +93,9 @@ def compute_metrics(self, metrics: Dict[str, Callable], results: List[Evaluation Returns: The computed metric. """ - return {"metrics": {metric_name: metric(results) for metric_name, metric in metrics.items()}} + return {"metrics": metrics.compute(results)} - @staticmethod - def _compute_time_perf(start_time: float, end_time: float, num_samples: int) -> Dict[str, Any]: + def _compute_time_perf(self, start_time: float, end_time: float, num_samples: int) -> Dict[str, Any]: """ Compute the performance metrics. @@ -123,7 +112,9 @@ def _compute_time_perf(start_time: float, end_time: float, num_samples: int) -> latency_sample = 1.0 / throughput return { - "total_time_in_seconds": latency, - "samples_per_second": throughput, - "latency_in_seconds": latency_sample, + "time_perf": { + "total_time_in_seconds": latency, + "samples_per_second": throughput, + "latency_in_seconds": latency_sample, + }, } diff --git a/benchmarks/sql/bench/metrics/__init__.py b/benchmarks/sql/bench/metrics/__init__.py index 2e107a73..c43dcb6e 100644 --- a/benchmarks/sql/bench/metrics/__init__.py +++ b/benchmarks/sql/bench/metrics/__init__.py @@ -1,11 +1,12 @@ -from .iql import exact_match as exact_match_iql -from .iql import invalid_iql, unsupported_iql, valid_iql -from .sql import exact_match as exact_match_sql -from .sql import execution_accuracy, invalid_sql, valid_efficiency_score, valid_sql +from .base import Metric, MetricSet +from .iql import ExactMatchIQL, invalid_iql, unsupported_iql, valid_iql +from .sql import ExactMatchSQL, execution_accuracy, invalid_sql, valid_efficiency_score, valid_sql __all__ = [ - "exact_match_iql", - "exact_match_sql", + "Metric", + "MetricSet", + "ExactMatchIQL", + "ExactMatchSQL", "valid_iql", "valid_sql", "invalid_iql", diff --git a/benchmarks/sql/bench/metrics/base.py b/benchmarks/sql/bench/metrics/base.py new file mode 100644 index 00000000..b0d50f4d --- /dev/null +++ b/benchmarks/sql/bench/metrics/base.py @@ -0,0 +1,67 @@ +from abc import ABC, abstractmethod +from typing import Dict, List, Optional, Type, Union + +from typing_extensions import Self + +from ..pipelines.base import EvaluationResult + + +class Metric(ABC): + """ + Base class for metrics. + """ + + name: str = "Metric" + + def __init__(self, config: Optional[Dict] = None) -> None: + """ + Initializes the metric. + + Args: + config: The metric configuration. + """ + self.config = config or {} + + @abstractmethod + def compute(self, results: List[EvaluationResult]) -> Union[int, float]: + """ + Compute the metric. + + Args: + results: The evaluation results. + + Returns: + The computed metric. + """ + + +class MetricSet: + """ + Represents a set of metrics. + """ + + def __init__(self, *metrics: List[Type[Metric]]) -> None: + """ + Initializes the metric set. + + Args: + metrics: The metrics. + """ + self._metrics = metrics + self.metrics: List[Metric] = [] + + def __call__(self, config: Dict) -> Self: + self.metrics = [metric(config) for metric in self._metrics] + return self + + def compute(self, results: List[EvaluationResult]) -> List[Union[int, float]]: + """ + Compute the metrics. + + Args: + results: The evaluation results. + + Returns: + The computed metrics. + """ + return {metric.name: metric.compute(results) for metric in self.metrics} diff --git a/benchmarks/sql/bench/metrics/iql.py b/benchmarks/sql/bench/metrics/iql.py index ab45c1cf..2293c200 100644 --- a/benchmarks/sql/bench/metrics/iql.py +++ b/benchmarks/sql/bench/metrics/iql.py @@ -3,6 +3,27 @@ from dbally.iql._exceptions import IQLError, IQLUnsupportedSyntaxError from ..pipelines.base import EvaluationResult +from .base import Metric + + +class ExactMatchIQL(Metric): + """ + Computes the ratio of predicated queries that are identical to the ground truth ones. + """ + + name: str = "EM_IQL" + + def compute(self, results: List[EvaluationResult]) -> float: + """ + Computes the ratio of predicated queries that are identical to the ground truth ones. + + Args: + results: List of evaluation results. + + Returns: + Ratio of predicated queries that are identical to the ground truth ones. + """ + return sum(result.prediction.iql == result.reference.iql for result in results) / len(results) def exact_match(results: List[EvaluationResult]) -> float: diff --git a/benchmarks/sql/bench/metrics/sql.py b/benchmarks/sql/bench/metrics/sql.py index 23bc27d0..343843ff 100644 --- a/benchmarks/sql/bench/metrics/sql.py +++ b/benchmarks/sql/bench/metrics/sql.py @@ -6,6 +6,27 @@ from dbally.iql._exceptions import IQLError, IQLUnsupportedSyntaxError from ..pipelines.base import EvaluationResult +from .base import Metric + + +class ExactMatchSQL(Metric): + """ + Computes the ratio of predicated queries that are identical to the ground truth ones. + """ + + name: str = "EM_SQL" + + def compute(self, results: List[EvaluationResult]) -> float: + """ + Computes the ratio of predicated queries that are identical to the ground truth ones. + + Args: + results: List of evaluation results. + + Returns: + Ratio of predicated queries that are identical to the ground truth ones. + """ + return sum(result.prediction.sql == result.reference.sql for result in results) / len(results) def exact_match(results: List[EvaluationResult]) -> float: diff --git a/benchmarks/sql/bench/pipelines/__init__.py b/benchmarks/sql/bench/pipelines/__init__.py index 76feca79..1a97c186 100644 --- a/benchmarks/sql/bench/pipelines/__init__.py +++ b/benchmarks/sql/bench/pipelines/__init__.py @@ -1,12 +1,12 @@ from .base import EvaluationPipeline, EvaluationResult -from .e2e import EndToEndEvaluationPipeline -from .iql import IQLEvaluationPipeline -from .sql import SQLEvaluationPipeline +from .collection import CollectionEvaluationPipeline +from .view import IQLViewEvaluationPipeline, SQLViewEvaluationPipeline, ViewEvaluationPipeline __all__ = [ "EvaluationPipeline", - "EndToEndEvaluationPipeline", - "SQLEvaluationPipeline", - "IQLEvaluationPipeline", + "CollectionEvaluationPipeline", + "ViewEvaluationPipeline", + "IQLViewEvaluationPipeline", + "SQLViewEvaluationPipeline", "EvaluationResult", ] diff --git a/benchmarks/sql/bench/pipelines/base.py b/benchmarks/sql/bench/pipelines/base.py index 7fa85cb3..92320793 100644 --- a/benchmarks/sql/bench/pipelines/base.py +++ b/benchmarks/sql/bench/pipelines/base.py @@ -15,8 +15,8 @@ class ExecutionResult: Represents the result of a single query execution. """ - sql: Optional[str] = None iql: Optional[str] = None + sql: Optional[str] = None results: List[Dict[str, Any]] = field(default_factory=list) exception: Optional[Exception] = None execution_time: Optional[float] = None diff --git a/benchmarks/sql/bench/pipelines/e2e.py b/benchmarks/sql/bench/pipelines/collection.py similarity index 64% rename from benchmarks/sql/bench/pipelines/e2e.py rename to benchmarks/sql/bench/pipelines/collection.py index 84cb035a..11dce296 100644 --- a/benchmarks/sql/bench/pipelines/e2e.py +++ b/benchmarks/sql/bench/pipelines/collection.py @@ -9,13 +9,14 @@ from dbally.collection.exceptions import NoViewFoundError from dbally.iql._exceptions import IQLError from dbally.iql_generator.prompt import UnsupportedQueryError +from dbally.views.freeform.text2sql.exceptions import Text2SQLError from dbally.views.sqlalchemy_base import SqlAlchemyBaseView -from ..views import FREEFORM_VIEW_REGISTRY, STRUCTURED_VIEW_REGISTRY +from ..views import FREEFORM_VIEWS_REGISTRY, STRUCTURED_VIEWS_REGISTRY from .base import EvaluationPipeline, EvaluationResult, ExecutionResult -class EndToEndEvaluationPipeline(EvaluationPipeline): +class CollectionEvaluationPipeline(EvaluationPipeline): """ Pipeline for evaluating IQL predictions. """ @@ -46,22 +47,20 @@ def get_collection(self, config: Dict) -> Collection: Raises: ValueError: If the view name is not supported. """ - if not config.structured and not config.freeform: - raise ValueError("No structured and freeform views found in the configuration.") + if not config: + raise ValueError("No structured or freeform views found in the configuration.") collection = dbally.create_collection("bench", self.llm) + collection.n_retries = 0 - for view_name, db_url in config.structured.items(): - if view_cls := STRUCTURED_VIEW_REGISTRY.get(view_name): + for view_name, db_url in config.items(): + if view_cls := STRUCTURED_VIEWS_REGISTRY.get(view_name) or FREEFORM_VIEWS_REGISTRY.get(view_name): collection.add(view_cls, lambda: view_cls(create_engine(db_url))) # pylint: disable=cell-var-from-loop else: - raise ValueError(f"View {view_name} not supported. Available views: {STRUCTURED_VIEW_REGISTRY}.") - - for view_name, db_url in config.freeform.items(): - if view_cls := FREEFORM_VIEW_REGISTRY.get(view_name): - collection.add(view_cls, lambda: view_cls(create_engine(db_url))) # pylint: disable=cell-var-from-loop - else: - raise ValueError(f"View {view_name} not supported. Available views: {FREEFORM_VIEW_REGISTRY}.") + raise ValueError( + f"View {view_name} not supported. " + f"Available views: {list(STRUCTURED_VIEWS_REGISTRY) + list(FREEFORM_VIEWS_REGISTRY)}." + ) return collection @@ -75,9 +74,10 @@ async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: Returns: The list of IQL predictions. """ + db_url = None results = [] - for data in tqdm(dataset, desc="E2E evaluation"): + for data in tqdm(dataset, desc="Evaluation"): try: result = await self.collection.ask( question=data["question"], @@ -86,11 +86,12 @@ async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: ) except NoViewFoundError as exc: prediction = ExecutionResult(exception=exc) - db_url = None - except (IQLError, SyntaxError, UnsupportedQueryError) as exc: - query = "UNSUPPORTED_QUERY" if isinstance(exc, UnsupportedQueryError) else exc.source - prediction = ExecutionResult(iql=query, exception=exc) - db_url = None + except IQLError as exc: + prediction = ExecutionResult(iql=exc.source, exception=exc) + except UnsupportedQueryError as exc: + prediction = ExecutionResult(exception=exc) + except Text2SQLError as exc: + prediction = ExecutionResult(iql=exc.source, exception=exc) else: prediction = ExecutionResult( iql=result.context.get("iql", None), @@ -102,17 +103,17 @@ async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: if isinstance(used_view, SqlAlchemyBaseView) else used_view._engine.url ) + reference = ExecutionResult( iql=data["iql"], sql=data["sql"], ) - results.append( - EvaluationResult( - question=data["question"], - reference=reference, - prediction=prediction, - db_url=db_url, - ), + result = EvaluationResult( + question=data["question"], + reference=reference, + prediction=prediction, + db_url=db_url, ) + results.append(result) return results diff --git a/benchmarks/sql/bench/pipelines/iql.py b/benchmarks/sql/bench/pipelines/iql.py deleted file mode 100644 index e75f5065..00000000 --- a/benchmarks/sql/bench/pipelines/iql.py +++ /dev/null @@ -1,96 +0,0 @@ -from typing import Dict, List - -from datasets import Dataset -from sqlalchemy import create_engine -from tqdm import tqdm - -from dbally.iql._exceptions import IQLError -from dbally.iql_generator.prompt import UnsupportedQueryError -from dbally.views.sqlalchemy_base import SqlAlchemyBaseView - -from ..views import STRUCTURED_VIEW_REGISTRY -from .base import EvaluationPipeline, EvaluationResult, ExecutionResult - - -class IQLEvaluationPipeline(EvaluationPipeline): - """ - Pipeline for evaluating IQL predictions. - """ - - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating IQL predictions. - - Args: - config: The configuration for the pipeline. - - Raises: - ValueError: If the view name is not supported. - """ - self.llm = self.get_llm(config.llm) - self.view = self.get_view(config.data.views) - - def get_view(self, config: Dict) -> SqlAlchemyBaseView: - """ - Returns the view object based on the view name. - - Args: - config: The view configuration. - - Returns: - The view object. - - Raises: - ValueError: If the view name is not supported. - """ - if not config.structured: - raise ValueError("No structured views found in the configuration.") - - view_name, db_url = list(config.structured.items())[0] - if view_cls := STRUCTURED_VIEW_REGISTRY.get(view_name): - return view_cls(create_engine(db_url)) - - raise ValueError(f"View {view_name} not supported. Available views: {STRUCTURED_VIEW_REGISTRY}.") - - async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: - """ - Runs the pipeline for evaluating IQL predictions. - - Args: - dataset: The dataset containing the questions and ground truth IQL queries. - - Returns: - The list of IQL predictions. - """ - results = [] - - for data in tqdm(dataset, desc="IQL evaluation"): - try: - result = await self.view.ask( - query=data["question"], - llm=self.llm, - n_retries=0, - dry_run=True, - ) - except (IQLError, UnsupportedQueryError) as exc: - query = "UNSUPPORTED_QUERY" if isinstance(exc, UnsupportedQueryError) else exc.source - prediction = ExecutionResult(iql=query, exception=exc) - else: - prediction = ExecutionResult( - iql=result.context.get("iql", None), - sql=result.context.get("sql", None), - ) - reference = ExecutionResult( - iql=data["iql"], - sql=data["sql"], - ) - results.append( - EvaluationResult( - db_url=self.view._sqlalchemy_engine.url, - question=data["question"], - reference=reference, - prediction=prediction, - ), - ) - - return results diff --git a/benchmarks/sql/bench/pipelines/sql.py b/benchmarks/sql/bench/pipelines/sql.py deleted file mode 100644 index f1b82b6d..00000000 --- a/benchmarks/sql/bench/pipelines/sql.py +++ /dev/null @@ -1,86 +0,0 @@ -from typing import Dict, List - -from datasets import Dataset -from sqlalchemy import create_engine -from tqdm import tqdm - -from dbally.views.freeform.text2sql.view import BaseText2SQLView - -from ..views import FREEFORM_VIEW_REGISTRY -from .base import EvaluationPipeline, EvaluationResult, ExecutionResult - - -class SQLEvaluationPipeline(EvaluationPipeline): - """ - Pipeline for evaluating SQL predictions. - """ - - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating SQL predictions. - - Args: - config: The configuration for the pipeline. - - Raises: - ValueError: If the view name is not supported. - """ - self.view = self.get_view(config.data.views) - self.llm = self.get_llm(config.llm) - - def get_view(self, config: Dict) -> BaseText2SQLView: - """ - Returns the view object based on the view name. - - Args: - config: The view configuration. - - Returns: - The view object. - - Raises: - ValueError: If the view name is not supported - """ - if not config.freeform: - raise ValueError("No freeform views found in the configuration.") - - view_name, db_url = list(config.freeform.items())[0] - if view_cls := FREEFORM_VIEW_REGISTRY.get(view_name): - return view_cls(create_engine(db_url)) - - raise ValueError(f"View {view_name} not supported. Available views: {FREEFORM_VIEW_REGISTRY}.") - - async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: - """ - Runs the pipeline for evaluating IQL predictions. - - Args: - dataset: The dataset containing the questions and ground truth IQL queries. - - Returns: - The list of IQL predictions. - """ - results = [] - - for data in tqdm(dataset, desc="SQL evaluation"): - result = await self.view.ask( - query=data["question"], - llm=self.llm, - n_retries=0, - dry_run=True, - ) - prediction = ExecutionResult(sql=result.context["sql"]) - reference = ExecutionResult( - iql=result.context.get("iql", None), - sql=result.context.get("sql", None), - ) - results.append( - EvaluationResult( - db_url=self.view._engine.url, - question=data["question"], - reference=reference, - prediction=prediction, - ), - ) - - return results diff --git a/benchmarks/sql/bench/pipelines/view.py b/benchmarks/sql/bench/pipelines/view.py new file mode 100644 index 00000000..4f529a44 --- /dev/null +++ b/benchmarks/sql/bench/pipelines/view.py @@ -0,0 +1,119 @@ +from abc import ABC +from typing import Dict, List + +from datasets import Dataset +from sqlalchemy import create_engine +from tqdm import tqdm + +from dbally.iql._exceptions import IQLError +from dbally.iql_generator.prompt import UnsupportedQueryError +from dbally.views.base import BaseView +from dbally.views.freeform.text2sql.exceptions import Text2SQLError + +from ..views import FREEFORM_VIEWS_REGISTRY, STRUCTURED_VIEWS_REGISTRY +from .base import EvaluationPipeline, EvaluationResult, ExecutionResult + + +class ViewEvaluationPipeline(EvaluationPipeline, ABC): + """ + Pipeline for evaluating views. + """ + + VIEWS_REGISTRY: Dict[str, BaseView] = {} + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the pipeline. + + Raises: + ValueError: If the view name is not supported. + """ + self.llm = self.get_llm(config.llm) + self.view, self.db_url = self.get_view(config.data.views) + + def get_view(self, config: Dict) -> BaseView: + """ + Returns the view object based on the configuration. + + Args: + config: The view configuration. + + Returns: + The view object, and the database URL. + + Raises: + ValueError: If the view name is not supported. + """ + view_name, db_url = next( + ((view, db_url) for view, db_url in config.items() if view in self.VIEWS_REGISTRY), + (None, None), + ) + if not view_name: + raise ValueError(f"No views found in the configuration. Supported views: {list(self.VIEWS_REGISTRY)}.") + view_cls = self.VIEWS_REGISTRY[view_name] + return view_cls(create_engine(db_url)), db_url + + async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: + """ + Runs the pipeline for evaluating IQL predictions. + + Args: + dataset: The dataset containing the questions and ground truth IQL queries. + + Returns: + The list of IQL predictions. + """ + results = [] + + for data in tqdm(dataset, desc="Evaluation"): + try: + result = await self.view.ask( + query=data["question"], + llm=self.llm, + n_retries=0, + dry_run=True, + ) + except IQLError as exc: + prediction = ExecutionResult(iql=exc.source, exception=exc) + except UnsupportedQueryError as exc: + prediction = ExecutionResult(exception=exc) + except Text2SQLError as exc: + prediction = ExecutionResult(exception=exc) + else: + prediction = ExecutionResult( + iql=result.context.get("iql", None), + sql=result.context.get("sql", None), + ) + + reference = ExecutionResult( + iql=data["iql"], + sql=data["sql"], + ) + result = EvaluationResult( + question=data["question"], + reference=reference, + prediction=prediction, + db_url=self.db_url, + ) + results.append(result) + + return results + + +class IQLViewEvaluationPipeline(ViewEvaluationPipeline): + """ + Pipeline for evaluating structured views. + """ + + VIEWS_REGISTRY = STRUCTURED_VIEWS_REGISTRY + + +class SQLViewEvaluationPipeline(ViewEvaluationPipeline): + """ + Pipeline for evaluating freeform views. + """ + + VIEWS_REGISTRY = FREEFORM_VIEWS_REGISTRY diff --git a/benchmarks/sql/bench/utils.py b/benchmarks/sql/bench/utils.py index ef85e5a1..f00daaba 100644 --- a/benchmarks/sql/bench/utils.py +++ b/benchmarks/sql/bench/utils.py @@ -1,4 +1,8 @@ +import json +import sys import time +from datetime import datetime +from pathlib import Path from typing import Any, Dict, List, Tuple from sqlalchemy import Engine, text @@ -35,3 +39,21 @@ def avarage_execution_time(query: str, engine: Engine, n: int) -> float: The average execution time. """ return sum(execute_query(query, engine)[1] for _ in range(n)) / n + + +def save(file_path: Path, **data: Any) -> None: + """ + Save the data to a file. Add the current timestamp and Python version to the data. + + Args: + file_path: The path to the file. + data: The data to be saved. + """ + current_time = datetime.now() + + data["_timestamp"] = current_time.isoformat() + data["_python_version"] = sys.version + data["_interpreter_path"] = sys.executable + + with open(file_path, "w", encoding="utf-8") as file: + json.dump(data, file, indent=4) diff --git a/benchmarks/sql/bench/views/__init__.py b/benchmarks/sql/bench/views/__init__.py index 9a8488a0..40bb10c2 100644 --- a/benchmarks/sql/bench/views/__init__.py +++ b/benchmarks/sql/bench/views/__init__.py @@ -1,15 +1,14 @@ from typing import Dict, Type -from dbally.views.freeform.text2sql.view import BaseText2SQLView -from dbally.views.sqlalchemy_base import SqlAlchemyBaseView +from dbally.views.base import BaseView from .freeform.superhero import SuperheroFreeformView from .structured.superhero import SuperheroView -STRUCTURED_VIEW_REGISTRY: Dict[str, Type[SqlAlchemyBaseView]] = { +STRUCTURED_VIEWS_REGISTRY: Dict[str, Type[BaseView]] = { SuperheroView.__name__: SuperheroView, } -FREEFORM_VIEW_REGISTRY: Dict[str, Type[BaseText2SQLView]] = { +FREEFORM_VIEWS_REGISTRY: Dict[str, Type[BaseView]] = { SuperheroFreeformView.__name__: SuperheroFreeformView, } diff --git a/benchmarks/sql/config/component/collection.yaml b/benchmarks/sql/config/component/collection.yaml new file mode 100644 index 00000000..f1fad783 --- /dev/null +++ b/benchmarks/sql/config/component/collection.yaml @@ -0,0 +1 @@ +type: "COLLECTION" diff --git a/benchmarks/sql/config/component/iql-view.yaml b/benchmarks/sql/config/component/iql-view.yaml new file mode 100644 index 00000000..1885e690 --- /dev/null +++ b/benchmarks/sql/config/component/iql-view.yaml @@ -0,0 +1 @@ +type: "IQL-VIEW" diff --git a/benchmarks/sql/config/component/sql-view.yaml b/benchmarks/sql/config/component/sql-view.yaml new file mode 100644 index 00000000..d2dda1c6 --- /dev/null +++ b/benchmarks/sql/config/component/sql-view.yaml @@ -0,0 +1 @@ +type: "SQL-VIEW" diff --git a/benchmarks/sql/config/config.yaml b/benchmarks/sql/config/config.yaml index dc6784c3..f5088da7 100644 --- a/benchmarks/sql/config/config.yaml +++ b/benchmarks/sql/config/config.yaml @@ -1,5 +1,5 @@ defaults: - - task: iql + - component: iql-view - data: superhero - llm: gpt - _self_ diff --git a/benchmarks/sql/config/data/superhero.yaml b/benchmarks/sql/config/data/superhero.yaml index 7273d98d..5d81fe75 100644 --- a/benchmarks/sql/config/data/superhero.yaml +++ b/benchmarks/sql/config/data/superhero.yaml @@ -1,11 +1,9 @@ +id: "superhero" path: "micpst/bird-dev-iql" split: "dev" db_ids: ["superhero"] difficulties: ["simple"] -views: - structured: { +views: { "SuperheroView": "sqlite:///data/superhero.db", - } - freeform: { "SuperheroFreeformView": "sqlite:///data/superhero.db" } diff --git a/benchmarks/sql/config/llm/gpt.yaml b/benchmarks/sql/config/llm/gpt.yaml index 7c8fe609..eff838ef 100644 --- a/benchmarks/sql/config/llm/gpt.yaml +++ b/benchmarks/sql/config/llm/gpt.yaml @@ -1 +1 @@ -model_name: "gpt-4-turbo" +model_name: "gpt-3.5-turbo" diff --git a/benchmarks/sql/config/task/e2e.yaml b/benchmarks/sql/config/task/e2e.yaml deleted file mode 100644 index f8d43329..00000000 --- a/benchmarks/sql/config/task/e2e.yaml +++ /dev/null @@ -1 +0,0 @@ -type: "E2E" diff --git a/benchmarks/sql/config/task/iql.yaml b/benchmarks/sql/config/task/iql.yaml deleted file mode 100644 index 3d385f11..00000000 --- a/benchmarks/sql/config/task/iql.yaml +++ /dev/null @@ -1 +0,0 @@ -type: "IQL" diff --git a/benchmarks/sql/config/task/sql.yaml b/benchmarks/sql/config/task/sql.yaml deleted file mode 100644 index dfa246b1..00000000 --- a/benchmarks/sql/config/task/sql.yaml +++ /dev/null @@ -1 +0,0 @@ -type: "SQL" From 5beac0efe914e8ef3cc049f40234742f4d7c5aa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 16 Jul 2024 10:09:44 +0200 Subject: [PATCH 16/34] update benchmarks --- benchmarks/sql/README.md | 15 ++- benchmarks/sql/bench.py | 40 ++++++- benchmarks/sql/bench/metrics/__init__.py | 14 +-- benchmarks/sql/bench/metrics/iql.py | 100 ++++++++++------- benchmarks/sql/bench/metrics/sql.py | 105 ------------------ benchmarks/sql/bench/pipelines/collection.py | 2 +- .../sql/bench/views/structured/superhero.py | 4 - 7 files changed, 114 insertions(+), 166 deletions(-) diff --git a/benchmarks/sql/README.md b/benchmarks/sql/README.md index a4f2ac2d..44ed3173 100644 --- a/benchmarks/sql/README.md +++ b/benchmarks/sql/README.md @@ -14,6 +14,8 @@ New PRs adding support for new databases from BIRD or SPIDER are welcome. ### Usage +Before starting, download the `superhero.sqlite` database file from [BIRD](https://bird-bench.github.io/) and change its extension to `*.db`, place it in the `data/` folder. + Run the whole suite on the `superhero` database: ```bash @@ -58,12 +60,16 @@ python -m pytest This suite computes following metrics: -- `exact_match` - ratio of predicated queries that are identical to the ground truth ones. -- `exact_match` - estimates the pass@k metric for code synthesis. +- `EM_IQL` - ratio of predicated IQL queries that are identical to the ground truth ones. +- `VAL_IQL` - ratio of valid IQL queries. +- `UNSUPP_IQL` - ratio of unsupported IQL queries. +- `HAL_IQL` - ratio of hallucinated IQL queries. +- `EM_SQL` - ratio of predicated SQL queries that are identical to the ground truth ones. +- ... ## Add new dataset -In order to run this suite against you own dataset, upload your dataset to [Hugging Face](https://huggingface.co) and make sure the data is in the format expected by the evaluation pipeline. +In order to run this suite against your own dataset, upload it to [Hugging Face](https://huggingface.co) and make sure the data is in the format expected by the evaluation pipeline. Evaluation dataset required fields: @@ -73,5 +79,4 @@ Evaluation dataset required fields: - `difficulty` - SQL code difficulty label - `db_id` - database identifier - -Additionaly, you need to create approprite structure and freeform view for downstream components \ No newline at end of file +In addition, add a database file in the `data/` folder and create a structure and freeform view in the `bench.views` module for evaluation. diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py index 44795be5..e2558be7 100644 --- a/benchmarks/sql/bench.py +++ b/benchmarks/sql/bench.py @@ -7,7 +7,7 @@ import hydra import neptune from bench.evaluator import Evaluator -from bench.metrics import ExactMatchIQL, ExactMatchSQL, MetricSet +from bench.metrics import ExactMatchIQL, ExactMatchSQL, HallucinatedIQL, MetricSet, UnsupportedIQL, ValidIQL from bench.pipelines import ( CollectionEvaluationPipeline, EvaluationPipeline, @@ -47,12 +47,46 @@ class EvaluationType(Enum): ), EvaluationType.IQL_VIEW.value: MetricSet( ExactMatchIQL, - ExactMatchSQL, + ValidIQL, + UnsupportedIQL, + HallucinatedIQL, ), EvaluationType.SQL_VIEW.value: MetricSet( ExactMatchSQL, ), } +# EVALUATION_METRICS: Dict[str, Callable] = { +# EvaluationType.IQL.value: { +# ExactMatchIQL.name: ExactMatchIQL, +# "em_iql": exact_match_iql, +# "valid_iql": valid_iql, +# "invalid_iql": invalid_iql, +# "unsupported_iql": unsupported_iql, +# "em_sql": exact_match_sql, +# "valid_sql": valid_sql, +# "invalid_sql": invalid_sql, +# "ex": execution_accuracy, +# "ves": valid_efficiency_score, +# }, +# EvaluationType.SQL.value: { +# "em_sql": exact_match_sql, +# "valid_sql": valid_sql, +# "invalid_sql": invalid_sql, +# "ex": execution_accuracy, +# "ves": valid_efficiency_score, +# }, +# EvaluationType.E2E.value: { +# "em_iql": exact_match_iql, +# "valid_iql": valid_iql, +# "invalid_iql": invalid_iql, +# "unsupported_iql": unsupported_iql, +# "em_sql": exact_match_iql, +# "valid_sql": valid_sql, +# "invalid_sql": invalid_sql, +# "ex": execution_accuracy, +# "ves": valid_efficiency_score, +# }, +# } async def bench(config: DictConfig) -> None: @@ -66,7 +100,7 @@ async def bench(config: DictConfig) -> None: dataset = load_dataset(config.data.path, split=config.data.split) dataset = dataset.filter(lambda x: x["db_id"] in config.data.db_ids and x["difficulty"] in config.data.difficulties) - dataset = dataset.select(range(3)) + dataset = dataset.select(range(10, 20)) pipeline = EVALUATION_PIPELINES[config.component.type](config) metrics = EVALUATION_METRICS[config.component.type](config) diff --git a/benchmarks/sql/bench/metrics/__init__.py b/benchmarks/sql/bench/metrics/__init__.py index c43dcb6e..32f4d418 100644 --- a/benchmarks/sql/bench/metrics/__init__.py +++ b/benchmarks/sql/bench/metrics/__init__.py @@ -1,17 +1,13 @@ from .base import Metric, MetricSet -from .iql import ExactMatchIQL, invalid_iql, unsupported_iql, valid_iql -from .sql import ExactMatchSQL, execution_accuracy, invalid_sql, valid_efficiency_score, valid_sql +from .iql import ExactMatchIQL, HallucinatedIQL, UnsupportedIQL, ValidIQL +from .sql import ExactMatchSQL __all__ = [ "Metric", "MetricSet", "ExactMatchIQL", "ExactMatchSQL", - "valid_iql", - "valid_sql", - "invalid_iql", - "invalid_sql", - "unsupported_iql", - "execution_accuracy", - "valid_efficiency_score", + "UnsupportedIQL", + "HallucinatedIQL", + "ValidIQL", ] diff --git a/benchmarks/sql/bench/metrics/iql.py b/benchmarks/sql/bench/metrics/iql.py index 2293c200..c2b56fb9 100644 --- a/benchmarks/sql/bench/metrics/iql.py +++ b/benchmarks/sql/bench/metrics/iql.py @@ -1,6 +1,7 @@ from typing import List -from dbally.iql._exceptions import IQLError, IQLUnsupportedSyntaxError +from dbally.iql._exceptions import IQLError, IQLFunctionNotExists +from dbally.iql_generator.prompt import UnsupportedQueryError from ..pipelines.base import EvaluationResult from .base import Metric @@ -8,7 +9,7 @@ class ExactMatchIQL(Metric): """ - Computes the ratio of predicated queries that are identical to the ground truth ones. + Ratio of predicated queries that are identical to the ground truth ones. """ name: str = "EM_IQL" @@ -26,57 +27,78 @@ def compute(self, results: List[EvaluationResult]) -> float: return sum(result.prediction.iql == result.reference.iql for result in results) / len(results) -def exact_match(results: List[EvaluationResult]) -> float: +class ValidIQL(Metric): + """ + Ratio of valid IQL queries. """ - Computes the ratio of predicated queries that are identical to the ground truth ones. - Args: - results: List of evaluation results. + name: str = "VAL_IQL" - Returns: - Ratio of predicated queries that are identical to the ground truth ones. - """ - return sum(result.prediction.iql == result.reference.iql for result in results) / len(results) + def compute(self, results: List[EvaluationResult]) -> float: + """ + Calculates the ratio of valid IQL queries for a given results. + Args: + results: List of evaluation results. -def valid_iql(results: List[EvaluationResult]) -> float: - """ - Calculates the ratio of valid IQL queries for a given results. + Returns: + Valid IQL queries ratio. + """ + supported_queries = [result for result in results if result.prediction.iql is not None] + if not supported_queries: + return 0.0 + return sum(not isinstance(result.prediction.exception, IQLError) for result in supported_queries) / len( + supported_queries + ) - Args: - results: List of evaluation results. - Returns: - Valid IQL queries ratio. +class UnsupportedIQL(Metric): + """ + Ratio of unsupported IQL queries. """ - return sum( - not isinstance(result.prediction.exception, (IQLError, IQLUnsupportedSyntaxError, SyntaxError)) - for result in results - ) / len(results) + name: str = "UNSUPP_IQL" -def invalid_iql(results: List[EvaluationResult]) -> float: - """ - Calculates the ratio of invalid IQL queries for a given results. + def compute(self, results: List[EvaluationResult]) -> float: + """ + Calculates the ratio of unsupported queries for a given results. - Args: - results: List of evaluation results. + Args: + results: List of evaluation results. - Returns: - Invalid IQL queries ratio. + Returns: + Unsupported queries ratio. + """ + iql_queries = [ + result + for result in results + if result.prediction.iql is not None or isinstance(result.prediction.exception, UnsupportedQueryError) + ] + if not iql_queries: + return 0.0 + return sum(isinstance(result.prediction.exception, UnsupportedQueryError) for result in iql_queries) / len( + iql_queries + ) + + +class HallucinatedIQL(Metric): + """ + Ratio of hallucinated IQL queries. """ - return sum(isinstance(result.prediction.exception, (IQLError, SyntaxError)) for result in results) / len(results) - + name: str = "HAL_IQL" -def unsupported_iql(results: List[EvaluationResult]) -> float: - """ - Calculates the ratio of unsupported queries for a given results. + def compute(self, results: List[EvaluationResult]) -> float: + """ + Calculates the ratio of hallucinated queries for a given results. - Args: - results: List of evaluation results. + Args: + results: List of evaluation results. - Returns: - Unsupported queries ratio. - """ - return sum(isinstance(result.prediction.exception, IQLUnsupportedSyntaxError) for result in results) / len(results) + Returns: + Hallucinated queries ratio. + """ + supported_queries = [result for result in results if result.prediction.iql is not None] + if not supported_queries: + return 0.0 + return sum(isinstance(result, IQLFunctionNotExists) for result in supported_queries) / len(supported_queries) diff --git a/benchmarks/sql/bench/metrics/sql.py b/benchmarks/sql/bench/metrics/sql.py index 343843ff..43b82b0a 100644 --- a/benchmarks/sql/bench/metrics/sql.py +++ b/benchmarks/sql/bench/metrics/sql.py @@ -1,10 +1,5 @@ from typing import List -import pandas as pd -from sqlalchemy.exc import SQLAlchemyError - -from dbally.iql._exceptions import IQLError, IQLUnsupportedSyntaxError - from ..pipelines.base import EvaluationResult from .base import Metric @@ -27,103 +22,3 @@ def compute(self, results: List[EvaluationResult]) -> float: Ratio of predicated queries that are identical to the ground truth ones. """ return sum(result.prediction.sql == result.reference.sql for result in results) / len(results) - - -def exact_match(results: List[EvaluationResult]) -> float: - """ - Computes the ratio of predicated queries that are identical to the ground truth ones. - - Args: - results: List of evaluation results. - - Returns: - Ratio of predicated queries that are identical to the ground truth ones. - """ - return sum(result.prediction.sql == result.reference.sql for result in results) / len(results) - - -def valid_sql(results: List[EvaluationResult]) -> float: - """ - Calculates the ratio of valid SQL queries for a given results. - - Args: - results: List of evaluation results. - - Returns: - Valid IQL ratio. - """ - return sum( - not isinstance(result.prediction.exception, (IQLError, IQLUnsupportedSyntaxError, SyntaxError, SQLAlchemyError)) - for result in results - ) / len(results) - - -def invalid_sql(results: List[EvaluationResult]) -> float: - """ - Calculates the ratio of valid SQL queries for a given results. - - Args: - results: List of evaluation results. - - Returns: - Invalid IQL ratio. - """ - - return sum( - isinstance(result.prediction.exception, (IQLError, IQLUnsupportedSyntaxError, SyntaxError, SQLAlchemyError)) - for result in results - ) / len(results) - - -def _execution_accuracy(result: EvaluationResult) -> bool: - reference = pd.DataFrame(result.reference.results) - prediction = pd.DataFrame(result.prediction.results) - - # If filtering works correctly, the number of rows will be the same - # TODO: Sometimes a different number of rows is okay, e.g. if df has aggregated values that are expanded in gt - if reference.shape[0] != prediction.shape[0]: - return False - # Returned view may have the same columns, or more columns than the ground truth - if not reference.columns.isin(prediction.columns).all(): - return False - # Check if dataframe equality, disregarding indexing and order - # commented out way is also ok but slower. Leaving it here just in case - # return df_gt.merge(df[df_gt.columns], how='outer', on=df_gt.columns.tolist(), - # indicator='indicator').indicator.drop_duplicates().values.tolist() == ['both'] - prediction = prediction[reference.columns].sort_values(by=reference.columns.tolist()).reset_index(drop=True) - reference = reference.sort_values(by=reference.columns.tolist()).reset_index(drop=True) - return prediction.equals(reference) - - -def execution_accuracy(results: List[EvaluationResult]) -> float: - """ - Calculates execution accuracy score i.e. the proportion of examples in the evaluation set for - which the executed results of both the predicted and ground-truth SQLs are identical. - - Args: - results: List of evaluation results. - - Returns: - Execution accuracy score. - """ - return sum(_execution_accuracy(result) for result in results) / len(results) - - -def _valid_efficiency_score(result: EvaluationResult) -> float: - if _execution_accuracy(result) is False: - return 0 - return (result.reference.execution_time_ns / result.prediction.execution_time_ns) ** 0.5 - - -def valid_efficiency_score(results: List[EvaluationResult]) -> float: - """ - Calculates valid efficiency score that measures the efficiency of valid SQLs generated - by models. More details about this metric can be found here: https://arxiv.org/pdf/2305.03111.pdf. - - Args: - results: List of evaluation results. - - Returns: - Valid efficiency score. - """ - return sum(_valid_efficiency_score(result) for result in results) / len(results) diff --git a/benchmarks/sql/bench/pipelines/collection.py b/benchmarks/sql/bench/pipelines/collection.py index 11dce296..088f9c06 100644 --- a/benchmarks/sql/bench/pipelines/collection.py +++ b/benchmarks/sql/bench/pipelines/collection.py @@ -91,7 +91,7 @@ async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: except UnsupportedQueryError as exc: prediction = ExecutionResult(exception=exc) except Text2SQLError as exc: - prediction = ExecutionResult(iql=exc.source, exception=exc) + prediction = ExecutionResult(exception=exc) else: prediction = ExecutionResult( iql=result.context.get("iql", None), diff --git a/benchmarks/sql/bench/views/structured/superhero.py b/benchmarks/sql/bench/views/structured/superhero.py index d7a20026..ee45a1fc 100644 --- a/benchmarks/sql/bench/views/structured/superhero.py +++ b/benchmarks/sql/bench/views/structured/superhero.py @@ -42,10 +42,6 @@ class SuperheroDBSchema: class SuperheroFilterMixin: - @decorators.view_filter() - def filter_by_name(self, name: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.name == name - @decorators.view_filter() def filter_by_full_name(self, full_name: str) -> sqlalchemy.ColumnElement: return SuperheroModel.classes.superhero.full_name == full_name From 460aa0f6528117bc01c234f3c571fb7dae405ad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 16 Jul 2024 10:16:26 +0200 Subject: [PATCH 17/34] fix + cleanup --- benchmarks/sql/bench.py | 32 ------------------- .../sql/bench/views/structured/superhero.py | 3 -- 2 files changed, 35 deletions(-) diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py index e2558be7..8ec7977c 100644 --- a/benchmarks/sql/bench.py +++ b/benchmarks/sql/bench.py @@ -55,38 +55,6 @@ class EvaluationType(Enum): ExactMatchSQL, ), } -# EVALUATION_METRICS: Dict[str, Callable] = { -# EvaluationType.IQL.value: { -# ExactMatchIQL.name: ExactMatchIQL, -# "em_iql": exact_match_iql, -# "valid_iql": valid_iql, -# "invalid_iql": invalid_iql, -# "unsupported_iql": unsupported_iql, -# "em_sql": exact_match_sql, -# "valid_sql": valid_sql, -# "invalid_sql": invalid_sql, -# "ex": execution_accuracy, -# "ves": valid_efficiency_score, -# }, -# EvaluationType.SQL.value: { -# "em_sql": exact_match_sql, -# "valid_sql": valid_sql, -# "invalid_sql": invalid_sql, -# "ex": execution_accuracy, -# "ves": valid_efficiency_score, -# }, -# EvaluationType.E2E.value: { -# "em_iql": exact_match_iql, -# "valid_iql": valid_iql, -# "invalid_iql": invalid_iql, -# "unsupported_iql": unsupported_iql, -# "em_sql": exact_match_iql, -# "valid_sql": valid_sql, -# "invalid_sql": invalid_sql, -# "ex": execution_accuracy, -# "ves": valid_efficiency_score, -# }, -# } async def bench(config: DictConfig) -> None: diff --git a/benchmarks/sql/bench/views/structured/superhero.py b/benchmarks/sql/bench/views/structured/superhero.py index ee45a1fc..8f47566b 100644 --- a/benchmarks/sql/bench/views/structured/superhero.py +++ b/benchmarks/sql/bench/views/structured/superhero.py @@ -134,9 +134,6 @@ class SuperheroView(SqlAlchemyBaseView, SuperheroFilterMixin): Main view, meant for finding superheroes meeting specific criteria """ - def __init__(self, sqlalchemy_engine: sqlalchemy.engine.Engine) -> None: - super().__init__(sqlalchemy_engine) - def get_select(self) -> sqlalchemy.Select: """ Creates the initial SqlAlchemy select object, which will be used to build the query. From 70d3e74d63833f813ad43a598d2570c18d5d5801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 16 Jul 2024 10:17:50 +0200 Subject: [PATCH 18/34] make EventTracker optional --- src/dbally/views/base.py | 2 +- src/dbally/views/freeform/text2sql/view.py | 9 ++++++--- src/dbally/views/structured.py | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/dbally/views/base.py b/src/dbally/views/base.py index d5103884..66cbe5b4 100644 --- a/src/dbally/views/base.py +++ b/src/dbally/views/base.py @@ -22,7 +22,7 @@ async def ask( self, query: str, llm: LLM, - event_tracker: EventTracker, + event_tracker: Optional[EventTracker] = None, n_retries: int = 3, dry_run: bool = False, llm_options: Optional[LLMOptions] = None, diff --git a/src/dbally/views/freeform/text2sql/view.py b/src/dbally/views/freeform/text2sql/view.py index 7f24f00e..1dfa8f62 100644 --- a/src/dbally/views/freeform/text2sql/view.py +++ b/src/dbally/views/freeform/text2sql/view.py @@ -99,7 +99,7 @@ async def ask( self, query: str, llm: LLM, - event_tracker: EventTracker, + event_tracker: Optional[EventTracker] = None, n_retries: int = 3, dry_run: bool = False, llm_options: Optional[LLMOptions] = None, @@ -173,7 +173,7 @@ async def _generate_sql( self, conversation: PromptTemplate, llm: LLM, - event_tracker: EventTracker, + event_tracker: Optional[EventTracker] = None, llm_options: Optional[LLMOptions] = None, ) -> Tuple[str, List[SQLParameterOption], PromptTemplate]: response = await llm.generate_text( @@ -194,7 +194,10 @@ async def _generate_sql( return sql, param_objs, conversation async def _execute_sql( - self, sql: str, parameters: List[SQLParameterOption], event_tracker: EventTracker + self, + sql: str, + parameters: List[SQLParameterOption], + event_tracker: Optional[EventTracker] = None, ) -> Iterable: param_values = {} diff --git a/src/dbally/views/structured.py b/src/dbally/views/structured.py index d7b8d99b..5ba37d6a 100644 --- a/src/dbally/views/structured.py +++ b/src/dbally/views/structured.py @@ -36,7 +36,7 @@ async def ask( self, query: str, llm: LLM, - event_tracker: EventTracker, + event_tracker: Optional[EventTracker] = None, n_retries: int = 3, dry_run: bool = False, llm_options: Optional[LLMOptions] = None, From 351fb0d62c03d2174fa74e4581dd3d96486431f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 16 Jul 2024 11:57:03 +0200 Subject: [PATCH 19/34] fix eval results saving --- benchmarks/sql/bench/evaluator.py | 6 ++---- benchmarks/sql/bench/pipelines/base.py | 29 ++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/benchmarks/sql/bench/evaluator.py b/benchmarks/sql/bench/evaluator.py index a5216bcb..740e58eb 100644 --- a/benchmarks/sql/bench/evaluator.py +++ b/benchmarks/sql/bench/evaluator.py @@ -1,5 +1,4 @@ import time -from dataclasses import asdict from typing import Any, Callable, Dict, List, Tuple from datasets import Dataset @@ -40,9 +39,8 @@ async def compute( The evaluation results. """ results, perf_results = await self._call_pipeline(pipe, data) + computed_metrics = self._compute_metrics(metrics, results) results = self._results_processor(results) - computed_metrics = self._compute_metrics(metrics, results["results"]) - results["results"] = [asdict(result) for result in results["results"]] result = {} result.update(perf_results) @@ -80,7 +78,7 @@ def _results_processor(self, results: List[EvaluationResult]) -> Dict[str, Any]: Returns: The processed results. """ - return {"results": results} + return {"results": [result.dict() for result in results]} def _compute_metrics(self, metrics: MetricSet, results: List[EvaluationResult]) -> Dict[str, Any]: """ diff --git a/benchmarks/sql/bench/pipelines/base.py b/benchmarks/sql/bench/pipelines/base.py index 92320793..4b91ac40 100644 --- a/benchmarks/sql/bench/pipelines/base.py +++ b/benchmarks/sql/bench/pipelines/base.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from dataclasses import asdict, dataclass, field +from dataclasses import dataclass, field from typing import Any, Dict, List, Optional from datasets import Dataset @@ -21,6 +21,19 @@ class ExecutionResult: exception: Optional[Exception] = None execution_time: Optional[float] = None + def dict(self) -> Dict[str, Any]: + """ + Returns the dictionary representation of the object. + + Returns: + The dictionary representation. + """ + return { + "iql": self.iql, + "sql": self.sql, + "execution_time": self.execution_time, + } + @dataclass class EvaluationResult: @@ -33,7 +46,19 @@ class EvaluationResult: prediction: ExecutionResult db_url: Optional[str] = None - dict = asdict + def dict(self) -> Dict[str, Any]: + """ + Returns the dictionary representation of the object. + + Returns: + The dictionary representation. + """ + return { + "question": self.question, + "reference": self.reference.dict(), + "prediction": self.prediction.dict(), + "db_url": self.db_url, + } class EvaluationPipeline(ABC): From 6430576e447b2c97089e7cf25e2a5bc43afcf9e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 16 Jul 2024 12:21:43 +0200 Subject: [PATCH 20/34] add more metrics --- benchmarks/sql/bench.py | 23 +++++++- benchmarks/sql/bench/metrics/__init__.py | 5 +- benchmarks/sql/bench/metrics/iql.py | 8 +-- benchmarks/sql/bench/metrics/sql.py | 71 +++++++++++++++++++++++- 4 files changed, 99 insertions(+), 8 deletions(-) diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py index 8ec7977c..c82508f9 100644 --- a/benchmarks/sql/bench.py +++ b/benchmarks/sql/bench.py @@ -7,7 +7,17 @@ import hydra import neptune from bench.evaluator import Evaluator -from bench.metrics import ExactMatchIQL, ExactMatchSQL, HallucinatedIQL, MetricSet, UnsupportedIQL, ValidIQL +from bench.metrics import ( + ExactMatchIQL, + ExactMatchSQL, + ExecutionAccuracy, + HallucinatedIQL, + MetricSet, + UnsupportedIQL, + ValidEfficiencyScore, + ValidIQL, + ValidSQL, +) from bench.pipelines import ( CollectionEvaluationPipeline, EvaluationPipeline, @@ -44,15 +54,26 @@ class EvaluationType(Enum): EvaluationType.COLLECTION.value: MetricSet( ExactMatchIQL, ExactMatchSQL, + ValidIQL, + ValidSQL, + UnsupportedIQL, + HallucinatedIQL, + ExecutionAccuracy, + ValidEfficiencyScore, ), EvaluationType.IQL_VIEW.value: MetricSet( ExactMatchIQL, ValidIQL, UnsupportedIQL, HallucinatedIQL, + ExecutionAccuracy, + ValidEfficiencyScore, ), EvaluationType.SQL_VIEW.value: MetricSet( ExactMatchSQL, + ValidSQL, + ExecutionAccuracy, + ValidEfficiencyScore, ), } diff --git a/benchmarks/sql/bench/metrics/__init__.py b/benchmarks/sql/bench/metrics/__init__.py index 32f4d418..714ee14f 100644 --- a/benchmarks/sql/bench/metrics/__init__.py +++ b/benchmarks/sql/bench/metrics/__init__.py @@ -1,6 +1,6 @@ from .base import Metric, MetricSet from .iql import ExactMatchIQL, HallucinatedIQL, UnsupportedIQL, ValidIQL -from .sql import ExactMatchSQL +from .sql import ExactMatchSQL, ExecutionAccuracy, ValidEfficiencyScore, ValidSQL __all__ = [ "Metric", @@ -10,4 +10,7 @@ "UnsupportedIQL", "HallucinatedIQL", "ValidIQL", + "ValidSQL", + "ExecutionAccuracy", + "ValidEfficiencyScore", ] diff --git a/benchmarks/sql/bench/metrics/iql.py b/benchmarks/sql/bench/metrics/iql.py index c2b56fb9..42ce81b2 100644 --- a/benchmarks/sql/bench/metrics/iql.py +++ b/benchmarks/sql/bench/metrics/iql.py @@ -16,7 +16,7 @@ class ExactMatchIQL(Metric): def compute(self, results: List[EvaluationResult]) -> float: """ - Computes the ratio of predicated queries that are identical to the ground truth ones. + Computes the exact match ratio. Args: results: List of evaluation results. @@ -36,7 +36,7 @@ class ValidIQL(Metric): def compute(self, results: List[EvaluationResult]) -> float: """ - Calculates the ratio of valid IQL queries for a given results. + Calculates the valid IQL ratio. Args: results: List of evaluation results. @@ -61,7 +61,7 @@ class UnsupportedIQL(Metric): def compute(self, results: List[EvaluationResult]) -> float: """ - Calculates the ratio of unsupported queries for a given results. + Calculates the unsupported IQL ratio. Args: results: List of evaluation results. @@ -90,7 +90,7 @@ class HallucinatedIQL(Metric): def compute(self, results: List[EvaluationResult]) -> float: """ - Calculates the ratio of hallucinated queries for a given results. + Calculates the hallucinated IQL ratio. Args: results: List of evaluation results. diff --git a/benchmarks/sql/bench/metrics/sql.py b/benchmarks/sql/bench/metrics/sql.py index 43b82b0a..ce20ec9a 100644 --- a/benchmarks/sql/bench/metrics/sql.py +++ b/benchmarks/sql/bench/metrics/sql.py @@ -1,19 +1,21 @@ from typing import List +from dbally.views.freeform.text2sql.exceptions import Text2SQLError + from ..pipelines.base import EvaluationResult from .base import Metric class ExactMatchSQL(Metric): """ - Computes the ratio of predicated queries that are identical to the ground truth ones. + Ratio of predicated queries that are identical to the ground truth ones. """ name: str = "EM_SQL" def compute(self, results: List[EvaluationResult]) -> float: """ - Computes the ratio of predicated queries that are identical to the ground truth ones. + Computes the exact match ratio. Args: results: List of evaluation results. @@ -22,3 +24,68 @@ def compute(self, results: List[EvaluationResult]) -> float: Ratio of predicated queries that are identical to the ground truth ones. """ return sum(result.prediction.sql == result.reference.sql for result in results) / len(results) + + +class ValidSQL(Metric): + """ + Ratio of valid SQL queries for a given results. + """ + + name: str = "VAL_SQL" + + def compute(self, results: List[EvaluationResult]) -> float: + """ + Calculates the valid SQL ratio. + + Args: + results: List of evaluation results. + + Returns: + Valid IQL ratio. + """ + supported_queries = [result for result in results if result.prediction.sql is not None] + return sum(not isinstance(result.prediction.exception, Text2SQLError) for result in supported_queries) / len( + supported_queries + ) + + +class ExecutionAccuracy(Metric): + """ + Execution accuracy score i.e. the proportion of examples in the evaluation set for + which the executed results of both the predicted and ground-truth SQLs are identical. + """ + + name: str = "EX" + + def compute(self, results: List[EvaluationResult]) -> float: + """ + Calculates the execution accuracy score. + + Args: + results: List of evaluation results. + + Returns: + Execution accuracy score. + """ + return 0.0 + + +class ValidEfficiencyScore(Metric): + """ + Valid efficiency score measures the efficiency of valid SQLs generated + by models. More details about this metric can be found here: https://arxiv.org/pdf/2305.03111.pdf. + """ + + name: str = "VES" + + def compute(self, results: List[EvaluationResult]) -> float: + """ + Calculates the valid efficiency score. + + Args: + results: List of evaluation results. + + Returns: + Valid efficiency score. + """ + return 0.0 From c91851f6494f69864df6818f9c2a7133324c5a21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 17 Jul 2024 08:44:36 +0200 Subject: [PATCH 21/34] almost done --- benchmarks/sql/README.md | 26 ++- benchmarks/sql/bench.py | 76 ++------ benchmarks/sql/bench/evaluator.py | 2 +- benchmarks/sql/bench/metrics/__init__.py | 3 +- benchmarks/sql/bench/metrics/base.py | 21 ++- benchmarks/sql/bench/metrics/iql.py | 70 ++++--- benchmarks/sql/bench/metrics/sql.py | 142 ++++++++++++--- benchmarks/sql/bench/pipeline.py | 172 ++++++++++++++++++ benchmarks/sql/bench/pipelines/__init__.py | 12 -- benchmarks/sql/bench/pipelines/base.py | 93 ---------- benchmarks/sql/bench/pipelines/collection.py | 119 ------------ benchmarks/sql/bench/pipelines/view.py | 119 ------------ benchmarks/sql/bench/utils.py | 38 +--- benchmarks/sql/bench/views/__init__.py | 5 +- .../sql/config/component/collection.yaml | 1 - benchmarks/sql/config/component/iql-view.yaml | 1 - benchmarks/sql/config/component/sql-view.yaml | 1 - benchmarks/sql/config/config.yaml | 2 +- benchmarks/sql/config/data/superhero.yaml | 7 +- benchmarks/sql/config/setup/iql.yaml | 3 + benchmarks/sql/config/setup/mixed.yaml | 3 + benchmarks/sql/config/setup/sql.yaml | 3 + .../setup/views/superhero/freeform.yaml | 3 + .../config/setup/views/superhero/mixed.yaml | 4 + .../setup/views/superhero/structured.yaml | 3 + benchmarks/sql/tests/test_evaluator.py | 93 ++++++++++ benchmarks/sql/tests/test_metrics.py | 42 +++++ 27 files changed, 535 insertions(+), 529 deletions(-) create mode 100644 benchmarks/sql/bench/pipeline.py delete mode 100644 benchmarks/sql/bench/pipelines/__init__.py delete mode 100644 benchmarks/sql/bench/pipelines/base.py delete mode 100644 benchmarks/sql/bench/pipelines/collection.py delete mode 100644 benchmarks/sql/bench/pipelines/view.py delete mode 100644 benchmarks/sql/config/component/collection.yaml delete mode 100644 benchmarks/sql/config/component/iql-view.yaml delete mode 100644 benchmarks/sql/config/component/sql-view.yaml create mode 100644 benchmarks/sql/config/setup/iql.yaml create mode 100644 benchmarks/sql/config/setup/mixed.yaml create mode 100644 benchmarks/sql/config/setup/sql.yaml create mode 100644 benchmarks/sql/config/setup/views/superhero/freeform.yaml create mode 100644 benchmarks/sql/config/setup/views/superhero/mixed.yaml create mode 100644 benchmarks/sql/config/setup/views/superhero/structured.yaml create mode 100644 benchmarks/sql/tests/test_evaluator.py create mode 100644 benchmarks/sql/tests/test_metrics.py diff --git a/benchmarks/sql/README.md b/benchmarks/sql/README.md index 44ed3173..5008c94c 100644 --- a/benchmarks/sql/README.md +++ b/benchmarks/sql/README.md @@ -1,10 +1,10 @@ # SQL benchmarks -This folder contains benchmarks for querying SQL databases with db-ally. This suite evaluates the following components: +This folder contains benchmarks for querying SQL databases with db-ally. This suite evaluates the following view setups: -- `COLLECTION` - measures correctness of SQL queries generated by the collection in a multi-view setup. -- `IQL-VIEW` - measures correctness of SQL queries generated by structured views. -- `SQL-VIEW` - measures correctness of SQL queries generated by freeform views. +- `structured` - measures correctness of SQL queries generated by the collection with structured views only. +- `freeform` - measures correctness of SQL queries generated by the collection with freeform views only. +- `mixed` - measures correctness of SQL queries generated by the collection with both structured and freeform views. All benchmarks are run on a dev split of the [BIRD](https://bird-bench.github.io/) dataset. For now, only one configuration is available to run the suite against the `superhero` database. We plan to extend it to all databases in the set to cover all cases. @@ -14,25 +14,31 @@ New PRs adding support for new databases from BIRD or SPIDER are welcome. ### Usage -Before starting, download the `superhero.sqlite` database file from [BIRD](https://bird-bench.github.io/) and change its extension to `*.db`, place it in the `data/` folder. +Before starting, download the `superhero.sqlite` database file from [BIRD](https://bird-bench.github.io/), change its extension to `*.db` and place it in the `data/` folder. Run the whole suite on the `superhero` database: ```bash -python bench.py --multirun component=iql-view,sql-view,collection data=superhero +python bench.py --multirun setup=iql,sql,mixed data=superhero ``` You can also run each evaluation separately or in subgroups: ```bash -python bench.py component=iql-view -python bench.py --multirun component=iql-view,sql-view +python bench.py setup=iql +python bench.py --multirun setup=iql,sql +``` + +Change views for the setup: + +```bash +python bench.py setup=iql setup/views=new-db/structured ``` Compare IQL generation performance on multiple LLMs: ```bash -python bench.py --multirun component=iql-view llm=gpt,claude +python bench.py --multirun setup=iql llm=gpt,claude ``` ### Log to Neptune @@ -47,7 +53,7 @@ export NEPTUNE_PROJECT="WORKSPACE_NAME/PROJECT_NAME" Export evaluation results to Neptune: ```bash -python bench.py component=iql-view neptune=True +python bench.py setup=iql neptune=True ``` ## Run tests diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py index c82508f9..fe6979b2 100644 --- a/benchmarks/sql/bench.py +++ b/benchmarks/sql/bench.py @@ -1,8 +1,6 @@ import asyncio import logging -from enum import Enum from pathlib import Path -from typing import Dict import hydra import neptune @@ -14,16 +12,10 @@ HallucinatedIQL, MetricSet, UnsupportedIQL, - ValidEfficiencyScore, ValidIQL, ValidSQL, ) -from bench.pipelines import ( - CollectionEvaluationPipeline, - EvaluationPipeline, - IQLViewEvaluationPipeline, - SQLViewEvaluationPipeline, -) +from bench.pipeline import EvaluationPipeline from bench.utils import save from datasets import load_dataset from neptune.utils import stringify_unsupported @@ -34,24 +26,21 @@ log = logging.getLogger(__name__) -class EvaluationType(Enum): - """ - Enum representing the type of evaluation. +async def bench(config: DictConfig) -> None: """ + Function running evaluation for all datasets and evaluation tasks defined in hydra config. - COLLECTION = "COLLECTION" - IQL_VIEW = "IQL-VIEW" - SQL_VIEW = "SQL-VIEW" - + Args: + config: Hydra configuration. + """ + log.info("Starting evaluation for views: %s.", config.setup.views) -EVALUATION_PIPELINES: Dict[str, EvaluationPipeline] = { - EvaluationType.COLLECTION.value: CollectionEvaluationPipeline, - EvaluationType.IQL_VIEW.value: IQLViewEvaluationPipeline, - EvaluationType.SQL_VIEW.value: SQLViewEvaluationPipeline, -} + dataset = load_dataset(config.data.path, split=config.data.split) + dataset = dataset.filter(lambda x: x["db_id"] == config.data.db_id and x["difficulty"] in config.data.difficulties) + dataset = dataset.select(range(10, 25)) -EVALUATION_METRICS: Dict[str, MetricSet] = { - EvaluationType.COLLECTION.value: MetricSet( + pipeline = EvaluationPipeline(config) + metrics = MetricSet( ExactMatchIQL, ExactMatchSQL, ValidIQL, @@ -59,42 +48,9 @@ class EvaluationType(Enum): UnsupportedIQL, HallucinatedIQL, ExecutionAccuracy, - ValidEfficiencyScore, - ), - EvaluationType.IQL_VIEW.value: MetricSet( - ExactMatchIQL, - ValidIQL, - UnsupportedIQL, - HallucinatedIQL, - ExecutionAccuracy, - ValidEfficiencyScore, - ), - EvaluationType.SQL_VIEW.value: MetricSet( - ExactMatchSQL, - ValidSQL, - ExecutionAccuracy, - ValidEfficiencyScore, - ), -} - - -async def bench(config: DictConfig) -> None: - """ - Function running evaluation for all datasets and evaluation tasks defined in hydra config. - - Args: - config: Hydra configuration. - """ - log.info("Starting evaluation for component: %s.", config.component.type) - - dataset = load_dataset(config.data.path, split=config.data.split) - dataset = dataset.filter(lambda x: x["db_id"] in config.data.db_ids and x["difficulty"] in config.data.difficulties) - dataset = dataset.select(range(10, 20)) - - pipeline = EVALUATION_PIPELINES[config.component.type](config) - metrics = EVALUATION_METRICS[config.component.type](config) + )(config) - evaluator = Evaluator(config.component.type) + evaluator = Evaluator(config.setup.name) results = await evaluator.compute( pipe=pipeline, data=dataset, @@ -116,7 +72,7 @@ async def bench(config: DictConfig) -> None: run = neptune.init_run() run["sys/tags"].add( [ - config.component.type, + *config.views, config.data.id, config.llm.model_name, ] @@ -129,7 +85,7 @@ async def bench(config: DictConfig) -> None: log.info("Evaluation results logged to neptune at %s", run.get_url()) -@hydra.main(config_path="config", config_name="config", version_base="1.3.2") +@hydra.main(config_path="config", config_name="config") def main(config: DictConfig) -> None: """ Function running evaluation for all datasets and evaluation tasks defined in hydra config. diff --git a/benchmarks/sql/bench/evaluator.py b/benchmarks/sql/bench/evaluator.py index 740e58eb..fc3a2bf6 100644 --- a/benchmarks/sql/bench/evaluator.py +++ b/benchmarks/sql/bench/evaluator.py @@ -4,7 +4,7 @@ from datasets import Dataset from .metrics.base import MetricSet -from .pipelines.base import EvaluationPipeline, EvaluationResult +from .pipeline import EvaluationPipeline, EvaluationResult class Evaluator: diff --git a/benchmarks/sql/bench/metrics/__init__.py b/benchmarks/sql/bench/metrics/__init__.py index 714ee14f..a6425b25 100644 --- a/benchmarks/sql/bench/metrics/__init__.py +++ b/benchmarks/sql/bench/metrics/__init__.py @@ -1,6 +1,6 @@ from .base import Metric, MetricSet from .iql import ExactMatchIQL, HallucinatedIQL, UnsupportedIQL, ValidIQL -from .sql import ExactMatchSQL, ExecutionAccuracy, ValidEfficiencyScore, ValidSQL +from .sql import ExactMatchSQL, ExecutionAccuracy, ValidSQL __all__ = [ "Metric", @@ -12,5 +12,4 @@ "ValidIQL", "ValidSQL", "ExecutionAccuracy", - "ValidEfficiencyScore", ] diff --git a/benchmarks/sql/bench/metrics/base.py b/benchmarks/sql/bench/metrics/base.py index b0d50f4d..2d4b095d 100644 --- a/benchmarks/sql/bench/metrics/base.py +++ b/benchmarks/sql/bench/metrics/base.py @@ -1,9 +1,9 @@ from abc import ABC, abstractmethod -from typing import Dict, List, Optional, Type, Union +from typing import Any, Dict, List, Optional, Type from typing_extensions import Self -from ..pipelines.base import EvaluationResult +from ..pipeline import EvaluationResult class Metric(ABC): @@ -11,8 +11,6 @@ class Metric(ABC): Base class for metrics. """ - name: str = "Metric" - def __init__(self, config: Optional[Dict] = None) -> None: """ Initializes the metric. @@ -23,7 +21,7 @@ def __init__(self, config: Optional[Dict] = None) -> None: self.config = config or {} @abstractmethod - def compute(self, results: List[EvaluationResult]) -> Union[int, float]: + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ Compute the metric. @@ -51,10 +49,19 @@ def __init__(self, *metrics: List[Type[Metric]]) -> None: self.metrics: List[Metric] = [] def __call__(self, config: Dict) -> Self: + """ + Initializes the metrics. + + Args: + config: The configuration for the metrics. + + Returns: + The initialized metric set. + """ self.metrics = [metric(config) for metric in self._metrics] return self - def compute(self, results: List[EvaluationResult]) -> List[Union[int, float]]: + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ Compute the metrics. @@ -64,4 +71,4 @@ def compute(self, results: List[EvaluationResult]) -> List[Union[int, float]]: Returns: The computed metrics. """ - return {metric.name: metric.compute(results) for metric in self.metrics} + return {name: value for metric in self.metrics for name, value in metric.compute(results).items()} diff --git a/benchmarks/sql/bench/metrics/iql.py b/benchmarks/sql/bench/metrics/iql.py index 42ce81b2..799b7fee 100644 --- a/benchmarks/sql/bench/metrics/iql.py +++ b/benchmarks/sql/bench/metrics/iql.py @@ -1,9 +1,10 @@ -from typing import List +from typing import Any, Dict, List +from dbally.collection.exceptions import NoViewFoundError from dbally.iql._exceptions import IQLError, IQLFunctionNotExists from dbally.iql_generator.prompt import UnsupportedQueryError -from ..pipelines.base import EvaluationResult +from ..pipeline import EvaluationResult from .base import Metric @@ -12,9 +13,7 @@ class ExactMatchIQL(Metric): Ratio of predicated queries that are identical to the ground truth ones. """ - name: str = "EM_IQL" - - def compute(self, results: List[EvaluationResult]) -> float: + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ Computes the exact match ratio. @@ -24,7 +23,18 @@ def compute(self, results: List[EvaluationResult]) -> float: Returns: Ratio of predicated queries that are identical to the ground truth ones. """ - return sum(result.prediction.iql == result.reference.iql for result in results) / len(results) + iql_results = [ + result + for result in results + if result.prediction.iql is not None or isinstance(result.prediction.exception, NoViewFoundError) + ] + return { + "EM_IQL": ( + sum(result.prediction.iql == result.reference.iql for result in iql_results) / len(iql_results) + if iql_results + else 0.0 + ) + } class ValidIQL(Metric): @@ -32,9 +42,7 @@ class ValidIQL(Metric): Ratio of valid IQL queries. """ - name: str = "VAL_IQL" - - def compute(self, results: List[EvaluationResult]) -> float: + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ Calculates the valid IQL ratio. @@ -45,11 +53,14 @@ def compute(self, results: List[EvaluationResult]) -> float: Valid IQL queries ratio. """ supported_queries = [result for result in results if result.prediction.iql is not None] - if not supported_queries: - return 0.0 - return sum(not isinstance(result.prediction.exception, IQLError) for result in supported_queries) / len( - supported_queries - ) + return { + "VAL_IQL": ( + sum(not isinstance(result.prediction.exception, IQLError) for result in supported_queries) + / len(supported_queries) + if supported_queries + else 0.0 + ) + } class UnsupportedIQL(Metric): @@ -57,9 +68,7 @@ class UnsupportedIQL(Metric): Ratio of unsupported IQL queries. """ - name: str = "UNSUPP_IQL" - - def compute(self, results: List[EvaluationResult]) -> float: + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ Calculates the unsupported IQL ratio. @@ -74,11 +83,14 @@ def compute(self, results: List[EvaluationResult]) -> float: for result in results if result.prediction.iql is not None or isinstance(result.prediction.exception, UnsupportedQueryError) ] - if not iql_queries: - return 0.0 - return sum(isinstance(result.prediction.exception, UnsupportedQueryError) for result in iql_queries) / len( - iql_queries - ) + return { + "UNSUPP_IQL": ( + sum(isinstance(result.prediction.exception, UnsupportedQueryError) for result in iql_queries) + / len(iql_queries) + if iql_queries + else 0.0 + ) + } class HallucinatedIQL(Metric): @@ -86,9 +98,7 @@ class HallucinatedIQL(Metric): Ratio of hallucinated IQL queries. """ - name: str = "HAL_IQL" - - def compute(self, results: List[EvaluationResult]) -> float: + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ Calculates the hallucinated IQL ratio. @@ -99,6 +109,10 @@ def compute(self, results: List[EvaluationResult]) -> float: Hallucinated queries ratio. """ supported_queries = [result for result in results if result.prediction.iql is not None] - if not supported_queries: - return 0.0 - return sum(isinstance(result, IQLFunctionNotExists) for result in supported_queries) / len(supported_queries) + return { + "HAL_IQL": ( + sum(isinstance(result, IQLFunctionNotExists) for result in supported_queries) / len(supported_queries) + if supported_queries + else 0.0 + ) + } diff --git a/benchmarks/sql/bench/metrics/sql.py b/benchmarks/sql/bench/metrics/sql.py index ce20ec9a..42058b8b 100644 --- a/benchmarks/sql/bench/metrics/sql.py +++ b/benchmarks/sql/bench/metrics/sql.py @@ -1,8 +1,13 @@ -from typing import List +import time +from typing import Any, Dict, List + +import pandas as pd +from sqlalchemy import create_engine, text +from sqlalchemy.exc import SQLAlchemyError from dbally.views.freeform.text2sql.exceptions import Text2SQLError -from ..pipelines.base import EvaluationResult +from ..pipeline import EvaluationResult from .base import Metric @@ -11,9 +16,7 @@ class ExactMatchSQL(Metric): Ratio of predicated queries that are identical to the ground truth ones. """ - name: str = "EM_SQL" - - def compute(self, results: List[EvaluationResult]) -> float: + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ Computes the exact match ratio. @@ -23,7 +26,13 @@ def compute(self, results: List[EvaluationResult]) -> float: Returns: Ratio of predicated queries that are identical to the ground truth ones. """ - return sum(result.prediction.sql == result.reference.sql for result in results) / len(results) + return { + "EM_SQL": ( + sum(result.prediction.sql == result.reference.sql for result in results) / len(results) + if results + else 0.0 + ) + } class ValidSQL(Metric): @@ -31,9 +40,7 @@ class ValidSQL(Metric): Ratio of valid SQL queries for a given results. """ - name: str = "VAL_SQL" - - def compute(self, results: List[EvaluationResult]) -> float: + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ Calculates the valid SQL ratio. @@ -44,48 +51,127 @@ def compute(self, results: List[EvaluationResult]) -> float: Valid IQL ratio. """ supported_queries = [result for result in results if result.prediction.sql is not None] - return sum(not isinstance(result.prediction.exception, Text2SQLError) for result in supported_queries) / len( - supported_queries - ) + return { + "VAL_SQL": ( + sum(not isinstance(result.prediction.exception, Text2SQLError) for result in supported_queries) + / len(supported_queries) + if supported_queries + else 0.0 + ) + } -class ExecutionAccuracy(Metric): +class _DBMixin: """ - Execution accuracy score i.e. the proportion of examples in the evaluation set for - which the executed results of both the predicted and ground-truth SQLs are identical. + Mixin class for database operations. """ - name: str = "EX" + def __init__(self, config: Dict, *args: Any, **kwargs: Any) -> None: + super().__init__(config, *args, **kwargs) + self.db = create_engine(config.data.db_url) - def compute(self, results: List[EvaluationResult]) -> float: + def _execute_query(self, query: str) -> List[Dict[str, Any]]: """ - Calculates the execution accuracy score. + Execute the given query on the database. Args: - results: List of evaluation results. + query: The query to be executed. Returns: - Execution accuracy score. + The query results. """ - return 0.0 + with self.db.connect() as connection: + rows = connection.execute(text(query)).fetchall() + return [dict(row._mapping) for row in rows] # pylint: disable=protected-access + def _avarage_execution_time(self, query: str, n: int = 100) -> float: + """ + Execute the given query on the database n times and return the average execution time. + + Args: + query: The query to be executed. + n: The number of times to execute the query. + + Returns: + The average execution time. + """ + total_time = 0 + for _ in range(n): + start_time = time.perf_counter() + self._execute_query(query) + total_time += time.perf_counter() - start_time + return total_time / n -class ValidEfficiencyScore(Metric): + +class ExecutionAccuracy(_DBMixin, Metric): """ + Execution accuracy score i.e. the proportion of examples in the evaluation set for + which the executed results of both the predicted and ground-truth SQLs are identical. + Valid efficiency score measures the efficiency of valid SQLs generated by models. More details about this metric can be found here: https://arxiv.org/pdf/2305.03111.pdf. """ - name: str = "VES" - - def compute(self, results: List[EvaluationResult]) -> float: + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Calculates the valid efficiency score. + Calculates the execution accuracy score and valid efficiency score. Args: results: List of evaluation results. Returns: - Valid efficiency score. + Execution accuracy score and valid efficiency score. + """ + accurate_results = [result for result in results if self._execution_accuracy(result)] + return { + "EX": len(accurate_results) / len(results) if results else 0.0, + "VES": sum( + ( + self._avarage_execution_time(result.reference.sql) + / self._avarage_execution_time(result.prediction.sql) + ) + ** 0.5 + for result in accurate_results + ) + / len(results) + if results + else 0.0, + } + + def _execution_accuracy(self, result: EvaluationResult) -> bool: + """ + Checks if the execution results of both the predicted and ground-truth SQLs are identical. + + Args: + result: Evaluation result. + + Returns: + True if the execution results are identical, False otherwise. """ - return 0.0 + if result.prediction.sql is None: + return False + try: + result.reference.results = self._execute_query(result.reference.sql) + result.prediction.results = self._execute_query(result.prediction.sql) + except SQLAlchemyError: + return False + + reference = pd.DataFrame(result.reference.results) + prediction = pd.DataFrame(result.prediction.results) + + # If filtering works correctly, the number of rows will be the same + # TODO: Sometimes a different number of rows is okay, e.g. if df has aggregated values that are expanded in gt + if reference.shape[0] != prediction.shape[0]: + return False + + # Returned view may have the same columns, or more columns than the ground truth + if not reference.columns.isin(prediction.columns).all(): + return False + + # Check if dataframe equality, disregarding indexing and order + # commented out way is also ok but slower. Leaving it here just in case + # return df_gt.merge(df[df_gt.columns], how='outer', on=df_gt.columns.tolist(), + # indicator='indicator').indicator.drop_duplicates().values.tolist() == ['both'] + prediction = prediction[reference.columns].sort_values(by=reference.columns.tolist()).reset_index(drop=True) + reference = reference.sort_values(by=reference.columns.tolist()).reset_index(drop=True) + return prediction.equals(reference) diff --git a/benchmarks/sql/bench/pipeline.py b/benchmarks/sql/bench/pipeline.py new file mode 100644 index 00000000..e25ddca8 --- /dev/null +++ b/benchmarks/sql/bench/pipeline.py @@ -0,0 +1,172 @@ +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + +from datasets import Dataset +from sqlalchemy import create_engine +from tqdm import tqdm + +import dbally +from dbally.collection.collection import Collection +from dbally.collection.exceptions import NoViewFoundError +from dbally.iql._exceptions import IQLError +from dbally.iql_generator.prompt import UnsupportedQueryError +from dbally.llms.base import LLM +from dbally.llms.litellm import LiteLLM +from dbally.llms.local import LocalLLM + +from .views import VIEWS_REGISTRY + + +@dataclass +class ExecutionResult: + """ + Represents the result of a single query execution. + """ + + iql: Optional[str] = None + sql: Optional[str] = None + results: List[Dict[str, Any]] = field(default_factory=list) + exception: Optional[Exception] = None + execution_time: Optional[float] = None + + def dict(self) -> Dict[str, Any]: + """ + Returns the dictionary representation of the object. + + Returns: + The dictionary representation. + """ + return { + "iql": self.iql, + "sql": self.sql, + } + + +@dataclass +class EvaluationResult: + """ + Represents the result of a single evaluation. + """ + + question: str + reference: ExecutionResult + prediction: ExecutionResult + db_url: Optional[str] = None + + def dict(self) -> Dict[str, Any]: + """ + Returns the dictionary representation of the object. + + Returns: + The dictionary representation. + """ + return { + "question": self.question, + "reference": self.reference.dict(), + "prediction": self.prediction.dict(), + "db_url": self.db_url, + } + + +class EvaluationPipeline: + """ + Collection evaluation pipeline. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the pipeline. + + Raises: + ValueError: If no valid views are found in the configuration. + """ + self.db = create_engine(config.data.db_url) + self.llm = self.get_llm(config.llm) + self.collection = self.get_collection(config.setup) + + def get_llm(self, config: Dict) -> LLM: + """ + Returns the LLM based on the configuration. + + Args: + config: The LLM configuration. + + Returns: + The LLM object. + """ + if config.model_name.startswith("local/"): + return LocalLLM(config.model_name.split("/", 1)[1]) + return LiteLLM(config.model_name) + + def get_collection(self, config: Dict) -> Collection: + """ + Returns the view object based on the view name. + + Args: + config: The view configuration. + + Returns: + The view object. + + Raises: + ValueError: If the view name is not supported. + """ + collection = dbally.create_collection(config.name, self.llm) + collection.n_retries = 0 + + for view_name in config.views: + view_cls = VIEWS_REGISTRY[view_name] + collection.add(view_cls, lambda: view_cls(self.db)) # pylint: disable=cell-var-from-loop + + return collection + + async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: + """ + Runs the evaluation pipeline. + + Args: + dataset: The evaluation dataset. + + Returns: + The list of evaluation results. + """ + results = [] + + for data in tqdm(dataset, desc="Evaluation"): + try: + result = await self.collection.ask( + question=data["question"], + dry_run=True, + return_natural_response=False, + ) + except NoViewFoundError as exc: + prediction = ExecutionResult(exception=exc) + except IQLError as exc: + prediction = ExecutionResult(iql=exc.source, exception=exc) + except UnsupportedQueryError as exc: + prediction = ExecutionResult(iql="UNSUPPORTED_QUERY", exception=exc) + # TODO: Remove this exception handling once the Text2SQL view is fixed + except Exception as exc: # pylint: disable=broad-except + prediction = ExecutionResult(exception=exc) + else: + prediction = ExecutionResult( + iql=result.context.get("iql", None), + sql=result.context.get("sql", None), + ) + + reference = ExecutionResult( + iql=data["iql"], + sql=data["sql"], + ) + result = EvaluationResult( + question=data["question"], + reference=reference, + prediction=prediction, + db_url=str(self.db.url), + ) + results.append(result) + + return results diff --git a/benchmarks/sql/bench/pipelines/__init__.py b/benchmarks/sql/bench/pipelines/__init__.py deleted file mode 100644 index 1a97c186..00000000 --- a/benchmarks/sql/bench/pipelines/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from .base import EvaluationPipeline, EvaluationResult -from .collection import CollectionEvaluationPipeline -from .view import IQLViewEvaluationPipeline, SQLViewEvaluationPipeline, ViewEvaluationPipeline - -__all__ = [ - "EvaluationPipeline", - "CollectionEvaluationPipeline", - "ViewEvaluationPipeline", - "IQLViewEvaluationPipeline", - "SQLViewEvaluationPipeline", - "EvaluationResult", -] diff --git a/benchmarks/sql/bench/pipelines/base.py b/benchmarks/sql/bench/pipelines/base.py deleted file mode 100644 index 4b91ac40..00000000 --- a/benchmarks/sql/bench/pipelines/base.py +++ /dev/null @@ -1,93 +0,0 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional - -from datasets import Dataset - -from dbally.llms.base import LLM -from dbally.llms.litellm import LiteLLM -from dbally.llms.local import LocalLLM - - -@dataclass -class ExecutionResult: - """ - Represents the result of a single query execution. - """ - - iql: Optional[str] = None - sql: Optional[str] = None - results: List[Dict[str, Any]] = field(default_factory=list) - exception: Optional[Exception] = None - execution_time: Optional[float] = None - - def dict(self) -> Dict[str, Any]: - """ - Returns the dictionary representation of the object. - - Returns: - The dictionary representation. - """ - return { - "iql": self.iql, - "sql": self.sql, - "execution_time": self.execution_time, - } - - -@dataclass -class EvaluationResult: - """ - Represents the result of a single evaluation. - """ - - question: str - reference: ExecutionResult - prediction: ExecutionResult - db_url: Optional[str] = None - - def dict(self) -> Dict[str, Any]: - """ - Returns the dictionary representation of the object. - - Returns: - The dictionary representation. - """ - return { - "question": self.question, - "reference": self.reference.dict(), - "prediction": self.prediction.dict(), - "db_url": self.db_url, - } - - -class EvaluationPipeline(ABC): - """ - Evaluation pipeline base class. - """ - - def get_llm(self, config: Dict) -> LLM: - """ - Returns the LLM based on the configuration. - - Args: - config: The LLM configuration. - - Returns: - The LLM object. - """ - if config.model_name.startswith("local/"): - return LocalLLM(config.model_name.split("/", 1)[1]) - return LiteLLM(config.model_name) - - @abstractmethod - async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: - """ - Runs the evaluation pipeline. - - Args: - dataset: The evaluation dataset. - - Returns: - The list of evaluation results. - """ diff --git a/benchmarks/sql/bench/pipelines/collection.py b/benchmarks/sql/bench/pipelines/collection.py deleted file mode 100644 index 088f9c06..00000000 --- a/benchmarks/sql/bench/pipelines/collection.py +++ /dev/null @@ -1,119 +0,0 @@ -from typing import Dict, List - -from datasets import Dataset -from sqlalchemy import create_engine -from tqdm import tqdm - -import dbally -from dbally.collection.collection import Collection -from dbally.collection.exceptions import NoViewFoundError -from dbally.iql._exceptions import IQLError -from dbally.iql_generator.prompt import UnsupportedQueryError -from dbally.views.freeform.text2sql.exceptions import Text2SQLError -from dbally.views.sqlalchemy_base import SqlAlchemyBaseView - -from ..views import FREEFORM_VIEWS_REGISTRY, STRUCTURED_VIEWS_REGISTRY -from .base import EvaluationPipeline, EvaluationResult, ExecutionResult - - -class CollectionEvaluationPipeline(EvaluationPipeline): - """ - Pipeline for evaluating IQL predictions. - """ - - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating IQL predictions. - - Args: - config: The configuration for the pipeline. - - Raises: - ValueError: If the view name is not supported. - """ - self.llm = self.get_llm(config.llm) - self.collection = self.get_collection(config.data.views) - - def get_collection(self, config: Dict) -> Collection: - """ - Returns the view object based on the view name. - - Args: - config: The view configuration. - - Returns: - The view object. - - Raises: - ValueError: If the view name is not supported. - """ - if not config: - raise ValueError("No structured or freeform views found in the configuration.") - - collection = dbally.create_collection("bench", self.llm) - collection.n_retries = 0 - - for view_name, db_url in config.items(): - if view_cls := STRUCTURED_VIEWS_REGISTRY.get(view_name) or FREEFORM_VIEWS_REGISTRY.get(view_name): - collection.add(view_cls, lambda: view_cls(create_engine(db_url))) # pylint: disable=cell-var-from-loop - else: - raise ValueError( - f"View {view_name} not supported. " - f"Available views: {list(STRUCTURED_VIEWS_REGISTRY) + list(FREEFORM_VIEWS_REGISTRY)}." - ) - - return collection - - async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: - """ - Runs the pipeline for evaluating IQL predictions. - - Args: - dataset: The dataset containing the questions and ground truth IQL queries. - - Returns: - The list of IQL predictions. - """ - db_url = None - results = [] - - for data in tqdm(dataset, desc="Evaluation"): - try: - result = await self.collection.ask( - question=data["question"], - dry_run=True, - return_natural_response=False, - ) - except NoViewFoundError as exc: - prediction = ExecutionResult(exception=exc) - except IQLError as exc: - prediction = ExecutionResult(iql=exc.source, exception=exc) - except UnsupportedQueryError as exc: - prediction = ExecutionResult(exception=exc) - except Text2SQLError as exc: - prediction = ExecutionResult(exception=exc) - else: - prediction = ExecutionResult( - iql=result.context.get("iql", None), - sql=result.context.get("sql", None), - ) - used_view = self.collection.get(result.view_name) - db_url = ( - used_view._sqlalchemy_engine.url - if isinstance(used_view, SqlAlchemyBaseView) - else used_view._engine.url - ) - - reference = ExecutionResult( - iql=data["iql"], - sql=data["sql"], - ) - result = EvaluationResult( - question=data["question"], - reference=reference, - prediction=prediction, - db_url=db_url, - ) - results.append(result) - - return results diff --git a/benchmarks/sql/bench/pipelines/view.py b/benchmarks/sql/bench/pipelines/view.py deleted file mode 100644 index 4f529a44..00000000 --- a/benchmarks/sql/bench/pipelines/view.py +++ /dev/null @@ -1,119 +0,0 @@ -from abc import ABC -from typing import Dict, List - -from datasets import Dataset -from sqlalchemy import create_engine -from tqdm import tqdm - -from dbally.iql._exceptions import IQLError -from dbally.iql_generator.prompt import UnsupportedQueryError -from dbally.views.base import BaseView -from dbally.views.freeform.text2sql.exceptions import Text2SQLError - -from ..views import FREEFORM_VIEWS_REGISTRY, STRUCTURED_VIEWS_REGISTRY -from .base import EvaluationPipeline, EvaluationResult, ExecutionResult - - -class ViewEvaluationPipeline(EvaluationPipeline, ABC): - """ - Pipeline for evaluating views. - """ - - VIEWS_REGISTRY: Dict[str, BaseView] = {} - - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating IQL predictions. - - Args: - config: The configuration for the pipeline. - - Raises: - ValueError: If the view name is not supported. - """ - self.llm = self.get_llm(config.llm) - self.view, self.db_url = self.get_view(config.data.views) - - def get_view(self, config: Dict) -> BaseView: - """ - Returns the view object based on the configuration. - - Args: - config: The view configuration. - - Returns: - The view object, and the database URL. - - Raises: - ValueError: If the view name is not supported. - """ - view_name, db_url = next( - ((view, db_url) for view, db_url in config.items() if view in self.VIEWS_REGISTRY), - (None, None), - ) - if not view_name: - raise ValueError(f"No views found in the configuration. Supported views: {list(self.VIEWS_REGISTRY)}.") - view_cls = self.VIEWS_REGISTRY[view_name] - return view_cls(create_engine(db_url)), db_url - - async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: - """ - Runs the pipeline for evaluating IQL predictions. - - Args: - dataset: The dataset containing the questions and ground truth IQL queries. - - Returns: - The list of IQL predictions. - """ - results = [] - - for data in tqdm(dataset, desc="Evaluation"): - try: - result = await self.view.ask( - query=data["question"], - llm=self.llm, - n_retries=0, - dry_run=True, - ) - except IQLError as exc: - prediction = ExecutionResult(iql=exc.source, exception=exc) - except UnsupportedQueryError as exc: - prediction = ExecutionResult(exception=exc) - except Text2SQLError as exc: - prediction = ExecutionResult(exception=exc) - else: - prediction = ExecutionResult( - iql=result.context.get("iql", None), - sql=result.context.get("sql", None), - ) - - reference = ExecutionResult( - iql=data["iql"], - sql=data["sql"], - ) - result = EvaluationResult( - question=data["question"], - reference=reference, - prediction=prediction, - db_url=self.db_url, - ) - results.append(result) - - return results - - -class IQLViewEvaluationPipeline(ViewEvaluationPipeline): - """ - Pipeline for evaluating structured views. - """ - - VIEWS_REGISTRY = STRUCTURED_VIEWS_REGISTRY - - -class SQLViewEvaluationPipeline(ViewEvaluationPipeline): - """ - Pipeline for evaluating freeform views. - """ - - VIEWS_REGISTRY = FREEFORM_VIEWS_REGISTRY diff --git a/benchmarks/sql/bench/utils.py b/benchmarks/sql/bench/utils.py index f00daaba..7bca9dba 100644 --- a/benchmarks/sql/bench/utils.py +++ b/benchmarks/sql/bench/utils.py @@ -1,44 +1,8 @@ import json import sys -import time from datetime import datetime from pathlib import Path -from typing import Any, Dict, List, Tuple - -from sqlalchemy import Engine, text - - -def execute_query(query: str, engine: Engine) -> Tuple[List[Dict[str, Any]], float]: - """ - Execute the given query on the database. - - Args: - query: The query to be executed. - engine: The database engine. - - Returns: - The query results. - """ - with engine.connect() as connection: - start_time = time.perf_counter() - rows = connection.execute(text(query)).fetchall() - execution_time = time.perf_counter() - start_time - return [dict(row._mapping) for row in rows], execution_time # pylint: disable=protected-access - - -def avarage_execution_time(query: str, engine: Engine, n: int) -> float: - """ - Execute the given query on the database n times and return the average execution time. - - Args: - query: The query to be executed. - engine: The database engine. - n: The number of times to execute the query. - - Returns: - The average execution time. - """ - return sum(execute_query(query, engine)[1] for _ in range(n)) / n +from typing import Any def save(file_path: Path, **data: Any) -> None: diff --git a/benchmarks/sql/bench/views/__init__.py b/benchmarks/sql/bench/views/__init__.py index 40bb10c2..bf03b6be 100644 --- a/benchmarks/sql/bench/views/__init__.py +++ b/benchmarks/sql/bench/views/__init__.py @@ -5,10 +5,7 @@ from .freeform.superhero import SuperheroFreeformView from .structured.superhero import SuperheroView -STRUCTURED_VIEWS_REGISTRY: Dict[str, Type[BaseView]] = { +VIEWS_REGISTRY: Dict[str, Type[BaseView]] = { SuperheroView.__name__: SuperheroView, -} - -FREEFORM_VIEWS_REGISTRY: Dict[str, Type[BaseView]] = { SuperheroFreeformView.__name__: SuperheroFreeformView, } diff --git a/benchmarks/sql/config/component/collection.yaml b/benchmarks/sql/config/component/collection.yaml deleted file mode 100644 index f1fad783..00000000 --- a/benchmarks/sql/config/component/collection.yaml +++ /dev/null @@ -1 +0,0 @@ -type: "COLLECTION" diff --git a/benchmarks/sql/config/component/iql-view.yaml b/benchmarks/sql/config/component/iql-view.yaml deleted file mode 100644 index 1885e690..00000000 --- a/benchmarks/sql/config/component/iql-view.yaml +++ /dev/null @@ -1 +0,0 @@ -type: "IQL-VIEW" diff --git a/benchmarks/sql/config/component/sql-view.yaml b/benchmarks/sql/config/component/sql-view.yaml deleted file mode 100644 index d2dda1c6..00000000 --- a/benchmarks/sql/config/component/sql-view.yaml +++ /dev/null @@ -1 +0,0 @@ -type: "SQL-VIEW" diff --git a/benchmarks/sql/config/config.yaml b/benchmarks/sql/config/config.yaml index f5088da7..c339f323 100644 --- a/benchmarks/sql/config/config.yaml +++ b/benchmarks/sql/config/config.yaml @@ -1,6 +1,6 @@ defaults: - - component: iql-view - data: superhero + - setup: iql - llm: gpt - _self_ diff --git a/benchmarks/sql/config/data/superhero.yaml b/benchmarks/sql/config/data/superhero.yaml index 5d81fe75..e2c60e16 100644 --- a/benchmarks/sql/config/data/superhero.yaml +++ b/benchmarks/sql/config/data/superhero.yaml @@ -1,9 +1,6 @@ id: "superhero" path: "micpst/bird-dev-iql" split: "dev" -db_ids: ["superhero"] +db_id: "superhero" difficulties: ["simple"] -views: { - "SuperheroView": "sqlite:///data/superhero.db", - "SuperheroFreeformView": "sqlite:///data/superhero.db" - } +db_url: "sqlite:///data/superhero.db" diff --git a/benchmarks/sql/config/setup/iql.yaml b/benchmarks/sql/config/setup/iql.yaml new file mode 100644 index 00000000..b0f0516b --- /dev/null +++ b/benchmarks/sql/config/setup/iql.yaml @@ -0,0 +1,3 @@ +name: IQL +defaults: + - views: superhero/structured diff --git a/benchmarks/sql/config/setup/mixed.yaml b/benchmarks/sql/config/setup/mixed.yaml new file mode 100644 index 00000000..eefa1d33 --- /dev/null +++ b/benchmarks/sql/config/setup/mixed.yaml @@ -0,0 +1,3 @@ +name: MIXED +defaults: + - views: superhero/mixed diff --git a/benchmarks/sql/config/setup/sql.yaml b/benchmarks/sql/config/setup/sql.yaml new file mode 100644 index 00000000..82240ddf --- /dev/null +++ b/benchmarks/sql/config/setup/sql.yaml @@ -0,0 +1,3 @@ +name: SQL +defaults: + - views: superhero/freeform diff --git a/benchmarks/sql/config/setup/views/superhero/freeform.yaml b/benchmarks/sql/config/setup/views/superhero/freeform.yaml new file mode 100644 index 00000000..424442d2 --- /dev/null +++ b/benchmarks/sql/config/setup/views/superhero/freeform.yaml @@ -0,0 +1,3 @@ +[ + "SuperheroFreeformView", +] diff --git a/benchmarks/sql/config/setup/views/superhero/mixed.yaml b/benchmarks/sql/config/setup/views/superhero/mixed.yaml new file mode 100644 index 00000000..dd744cc9 --- /dev/null +++ b/benchmarks/sql/config/setup/views/superhero/mixed.yaml @@ -0,0 +1,4 @@ +[ + "SuperheroView", + "SuperheroFreeformView", +] diff --git a/benchmarks/sql/config/setup/views/superhero/structured.yaml b/benchmarks/sql/config/setup/views/superhero/structured.yaml new file mode 100644 index 00000000..6223d633 --- /dev/null +++ b/benchmarks/sql/config/setup/views/superhero/structured.yaml @@ -0,0 +1,3 @@ +[ + "SuperheroView", +] diff --git a/benchmarks/sql/tests/test_evaluator.py b/benchmarks/sql/tests/test_evaluator.py new file mode 100644 index 00000000..dd84f4af --- /dev/null +++ b/benchmarks/sql/tests/test_evaluator.py @@ -0,0 +1,93 @@ +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from benchmarks.sql.bench.evaluator import Evaluator + + +class MockPipeline: + async def __call__(self, data): + return ["mock_result"], {"mock_perf": "mock_value"} + + +class MockMetricSet: + def compute(self, results): + return {"mock_metric": "mock_value"} + + +class MockDataset: + pass + + +class MockEvaluationResult: + def dict(self): + return {"mock_result_key": "mock_result_value"} + + +@pytest.mark.asyncio +async def test_compute(): + evaluator = Evaluator(task="test_task") + pipe = MockPipeline() + data = MockDataset() + metrics = MockMetricSet() + + # Mocking the internal methods which are not the target of this test + evaluator._call_pipeline = AsyncMock(return_value=(["mock_result"], {"mock_perf": "mock_value"})) + evaluator._compute_metrics = MagicMock(return_value={"mock_metric": "mock_value"}) + evaluator._results_processor = MagicMock(return_value={"processed_results": "mock_processed_results"}) + + expected_result = { + "mock_perf": "mock_value", + "mock_metric": "mock_value", + "processed_results": "mock_processed_results", + } + + result = await evaluator.compute(pipe, data, metrics) + assert result == expected_result + + +@pytest.mark.asyncio +async def test_call_pipeline(): + evaluator = Evaluator(task="test_task") + pipe = MockPipeline() + data = MockDataset() + + results, perf_results = await evaluator._call_pipeline(pipe, data) + + assert len(results) == 2 + assert "mock_perf" in perf_results + + +def test_results_processor(): + evaluator = Evaluator(task="test_task") + results = [MockEvaluationResult()] + + processed_results = evaluator._results_processor(results) + + assert "results" in processed_results + assert processed_results["results"][0]["mock_result_key"] == "mock_result_value" + + +def test_compute_metrics(): + evaluator = Evaluator(task="test_task") + metrics = MockMetricSet() + results = [MockEvaluationResult()] + + computed_metrics = evaluator._compute_metrics(metrics, results) + + assert "metrics" in computed_metrics + assert computed_metrics["metrics"]["mock_metric"] == "mock_value" + + +def test_compute_time_perf() -> None: + evaluator = Evaluator(task="test_task") + start_time = 0 + end_time = 10 + num_samples = 100 + + perf_metrics = evaluator._compute_time_perf(start_time, end_time, num_samples) + + assert "time_perf" in perf_metrics + assert perf_metrics["time_perf"]["total_time_in_seconds"] == 10 + assert perf_metrics["time_perf"]["samples_per_second"] == 10 + assert perf_metrics["time_perf"]["latency_in_seconds"] == 0.1 diff --git a/benchmarks/sql/tests/test_metrics.py b/benchmarks/sql/tests/test_metrics.py new file mode 100644 index 00000000..df437915 --- /dev/null +++ b/benchmarks/sql/tests/test_metrics.py @@ -0,0 +1,42 @@ +from typing import List + +import pytest + +from benchmarks.sql.bench.metrics.iql import ExactMatchIQL, ValidIQL +from benchmarks.sql.bench.pipeline import EvaluationResult, ExecutionResult + + +@pytest.fixture +def evaluation_results() -> List[EvaluationResult]: + return [ + EvaluationResult( + question="question1", + reference=ExecutionResult(iql="filter_by_column1(10)"), + prediction=ExecutionResult(iql="filter_by_column1(10)"), + ), + EvaluationResult( + question="question2", + reference=ExecutionResult(iql="filter_by_column2(20)"), + prediction=ExecutionResult(iql="filter_by_column2(30)"), + ), + EvaluationResult( + question="question3", + reference=ExecutionResult(iql="filter_by_column3('Test')"), + prediction=ExecutionResult(iql="filter_by_column3(30)"), + ), + EvaluationResult( + question="question4", + reference=ExecutionResult(iql="filter_by_column4(40)"), + prediction=ExecutionResult(iql="filter_by_column4(40)"), + ), + ] + + +def test_exact_match_iql(evaluation_results: List[EvaluationResult]) -> None: + metric = ExactMatchIQL() + assert metric.compute(evaluation_results) == 0.5 + + +def test_valid_iql(evaluation_results): + metric = ValidIQL() + assert metric.compute(evaluation_results) == 1 From 11545a1598358a4064a17654dab44bee053ac9d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Fri, 19 Jul 2024 02:03:39 +0200 Subject: [PATCH 22/34] update views --- benchmarks/sql/bench/views/__init__.py | 5 +- .../sql/bench/views/structured/superhero.py | 629 +++++++++++++----- 2 files changed, 472 insertions(+), 162 deletions(-) diff --git a/benchmarks/sql/bench/views/__init__.py b/benchmarks/sql/bench/views/__init__.py index bf03b6be..732779e2 100644 --- a/benchmarks/sql/bench/views/__init__.py +++ b/benchmarks/sql/bench/views/__init__.py @@ -3,9 +3,12 @@ from dbally.views.base import BaseView from .freeform.superhero import SuperheroFreeformView -from .structured.superhero import SuperheroView +from .structured.superhero import HeroAttributeView, HeroPowerView, PublisherView, SuperheroView VIEWS_REGISTRY: Dict[str, Type[BaseView]] = { + PublisherView.__name__: PublisherView, + HeroAttributeView.__name__: HeroAttributeView, + HeroPowerView.__name__: HeroPowerView, SuperheroView.__name__: SuperheroView, SuperheroFreeformView.__name__: SuperheroFreeformView, } diff --git a/benchmarks/sql/bench/views/structured/superhero.py b/benchmarks/sql/bench/views/structured/superhero.py index 8f47566b..22ad6b7c 100644 --- a/benchmarks/sql/bench/views/structured/superhero.py +++ b/benchmarks/sql/bench/views/structured/superhero.py @@ -1,192 +1,499 @@ # pylint: disable=missing-docstring, missing-return-doc, missing-param-doc -import sqlalchemy -from sqlalchemy import create_engine -from sqlalchemy.dialects.postgresql import ARRAY -from sqlalchemy.ext.automap import automap_base +from typing import Literal + +from sqlalchemy import ColumnElement, Engine, Select, func, select +from sqlalchemy.ext.declarative import DeferredReflection, declarative_base from sqlalchemy.orm import aliased -from dbally import SqlAlchemyBaseView, decorators - -engine = create_engine("sqlite:///data/superhero.db") -SuperheroModel = automap_base() -SuperheroModel.prepare(autoload_with=engine, reflect=True) - -eye_color_alias = aliased(SuperheroModel.classes.colour) -hair_color_alias = aliased(SuperheroModel.classes.colour) -skin_color_alias = aliased(SuperheroModel.classes.colour) - -hero_power = SuperheroModel.metadata.tables["hero_power"] -hero_attr = SuperheroModel.metadata.tables["hero_attribute"] - - -class SuperheroDBSchema: - id = SuperheroModel.classes.superhero.id - name = SuperheroModel.classes.superhero.superhero_name - full_name = SuperheroModel.classes.superhero.full_name - gender = SuperheroModel.classes.gender.gender - race = SuperheroModel.classes.race.race - publisher_name = SuperheroModel.classes.publisher.publisher_name - alignment = SuperheroModel.classes.alignment.alignment - weight_kg = SuperheroModel.classes.superhero.weight_kg - height_cm = SuperheroModel.classes.superhero.height_cm - eye_color = eye_color_alias.colour.label("eye_color") - hair_color = hair_color_alias.colour.label("hair_color") - skin_color = skin_color_alias.colour.label("skin_color") - powers = sqlalchemy.func.array_agg( - sqlalchemy.func.distinct(SuperheroModel.classes.superpower.power_name), type_=ARRAY(sqlalchemy.String) - ).label("powers") - attributes = sqlalchemy.func.jsonb_object_agg( - SuperheroModel.classes.attribute.attribute_name, hero_attr.c.attribute_value - ).label("attributes") +from dbally.views.decorators import view_filter +from dbally.views.sqlalchemy_base import SqlAlchemyBaseView + +Base = declarative_base(cls=DeferredReflection) + + +class Alignment(Base): + __tablename__ = "alignment" + + +class Attribute(Base): + __tablename__ = "attribute" + + +class Colour(Base): + __tablename__ = "colour" + + +class Gender(Base): + __tablename__ = "gender" + + +class HeroAttribute(Base): + __tablename__ = "hero_attribute" + __mapper_args__ = {"primary_key": ["hero_id", "attribute_id"]} + + +class HeroPower(Base): + __tablename__ = "hero_power" + __mapper_args__ = {"primary_key": ["hero_id", "power_id"]} + + +class Publisher(Base): + __tablename__ = "publisher" + + +class Race(Base): + __tablename__ = "race" + + +class Superhero(Base): + __tablename__ = "superhero" + + +class Superpower(Base): + __tablename__ = "superpower" + + +class DBInitMixin: + def __init__(self, sqlalchemy_engine: Engine) -> None: + """ + Initializes the view. + + Args: + sqlalchemy_engine: The database engine. + """ + DeferredReflection.prepare(sqlalchemy_engine) + + super().__init__(sqlalchemy_engine) class SuperheroFilterMixin: - @decorators.view_filter() - def filter_by_full_name(self, full_name: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.full_name == full_name - - @decorators.view_filter() - def filter_by_superhero_name(self, name: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.superhero_name == name - - @decorators.view_filter() - def filter_by_superhero_id(self, superhero_id: int) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.id == superhero_id - - @decorators.view_filter() - def filter_by_eye_color(self, color: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.eye_colour_id.in_( - sqlalchemy.select(SuperheroModel.classes.colour.id).where(SuperheroModel.classes.colour.colour == color) - ) + """ + Mixin for filtering the view by the superhero attributes. + """ - @decorators.view_filter() - def filter_by_hair_color(self, color: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.hair_colour_id.in_( - sqlalchemy.select(SuperheroModel.classes.colour.id).where(SuperheroModel.classes.colour.colour == color) - ) + @view_filter() + def filter_by_superhero_id(self, superhero_id: int) -> ColumnElement: + """ + Filters the view by the superhero id. - @decorators.view_filter() - def filter_by_skin_color(self, color: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.skin_colour_id.in_( - sqlalchemy.select(SuperheroModel.classes.colour.id).where(SuperheroModel.classes.colour.colour == color) - ) + Args: + superhero_id: The id of the superhero. - @decorators.view_filter() - def filter_by_race(self, race: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.race_id.in_( - sqlalchemy.select(SuperheroModel.classes.race.id).where(SuperheroModel.classes.race.race == race) - ) + Returns: + The filter condition. + """ + return Superhero.id == superhero_id - @decorators.view_filter() - def filter_by_publisher(self, publisher: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.publisher_id.in_( - sqlalchemy.select(SuperheroModel.classes.publisher.id).where( - SuperheroModel.classes.publisher.publisher_name == publisher - ) - ) + @view_filter() + def filter_by_superhero_name(self, superhero_name: str) -> ColumnElement: + """ + Filters the view by the superhero nick or handle. - @decorators.view_filter() - def filter_by_alignment(self, alignment: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.alignment_id.in_( - sqlalchemy.select(SuperheroModel.classes.alignment.id).where( - SuperheroModel.classes.alignment.alignment == alignment - ) - ) + Args: + superhero_name: The abstract nick or handle of the superhero. - @decorators.view_filter() - def filter_by_gender(self, gender: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.gender_id.in_( - sqlalchemy.select(SuperheroModel.classes.gender.id).where(SuperheroModel.classes.gender.gender == gender) - ) + Returns: + The filter condition. + """ + return Superhero.superhero_name == superhero_name + + @view_filter() + def filter_by_missing_superhero_full_name(self) -> ColumnElement: + """ + Filters the view by the missing full name of the superhero. + + Returns: + The filter condition. + """ + return Superhero.full_name is None + + @view_filter() + def filter_by_superhero_full_name(self, superhero_full_name: str) -> ColumnElement: + """ + Filters the view by the full name of the superhero. + + Args: + superhero_full_name: The human name of the superhero. + + Returns: + The filter condition. + """ + return Superhero.full_name == superhero_full_name + + @view_filter() + def filter_by_height_cm(self, height_cm: float) -> ColumnElement: + """ + Filters the view by the height of the superhero. + + Args: + height_cm: The height of the superhero. + + Returns: + The filter condition. + """ + return Superhero.height_cm == height_cm + + @view_filter() + def filter_by_missing_weight(self) -> ColumnElement: + """ + Filters the view by the missing weight of the superhero. + + Returns: + The filter condition. + """ + return Superhero.weight_kg == 0 or Superhero.weight_kg is None + + @view_filter() + def filter_by_weight_kg(self, weight_kg: float) -> ColumnElement: + """ + Filters the view by the weight of the superhero. + + Args: + weight_kg: The weight of the superhero. + + Returns: + The filter condition. + """ + return Superhero.weight_kg == weight_kg + + @view_filter() + def filter_by_weight_kg_bigger_than(self, weight_kg: float) -> ColumnElement: + """ + Filters the view by the weight of the superhero. + + Args: + weight_kg: The weight of the superhero. + + Returns: + The filter condition. + """ + return Superhero.weight_kg > weight_kg + + @view_filter() + def filter_by_weight_kg_lower_than(self, weight_kg: float) -> ColumnElement: + """ + Filters the view by the weight of the superhero. + + Args: + weight_kg: The weight of the superhero. + + Returns: + The filter condition. + """ + return Superhero.weight_kg < weight_kg + + +class SuperheroColourFilterMixin: + """ + Mixin for filtering the view by the superhero colour attributes. + """ + + def __init__(self) -> None: + super().__init__() + self.eye_colour = aliased(Colour) + self.hair_colour = aliased(Colour) + self.skin_colour = aliased(Colour) + + @view_filter() + def filter_by_eye_colour(self, eye_colour: str) -> ColumnElement: + """ + Filters the view by the superhero eye colour. + + Args: + eye_colour: The eye colour of the superhero. + + Returns: + The filter condition. + """ + return self.eye_colour.colour == eye_colour + + @view_filter() + def filter_by_hair_colour(self, hair_colour: str) -> ColumnElement: + """ + Filters the view by the superhero hair colour. + + Args: + hair_colour: The hair colour of the superhero. + + Returns: + The filter condition. + """ + return self.hair_colour.colour == hair_colour + + @view_filter() + def filter_by_skin_colour(self, skin_colour: str) -> ColumnElement: + """ + Filters the view by the superhero skin colour. + + Args: + skin_colour: The skin colour of the superhero. + + Returns: + The filter condition. + """ + return self.skin_colour.colour == skin_colour - @decorators.view_filter() - def filter_by_power(self, power: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superpower.power_name == power - @decorators.view_filter() - def filter_by_missing_weight(self) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.weight_kg == 0 or SuperheroModel.classes.superhero.weight_kg is None +class PublisherFilterMixin: + """ + Mixin for filtering the view by the publisher attributes. + """ + + @view_filter() + def filter_by_publisher_name(self, publisher_name: str) -> ColumnElement: + """ + Filters the view by the publisher name. + + Args: + publisher_name: The name of the publisher. + + Returns: + The filter condition. + """ + return Publisher.publisher_name == publisher_name + + +class AlignmentFilterMixin: + """ + Mixin for filtering the view by the alignment attributes. + """ + + @view_filter() + def filter_by_alignment(self, alignment: Literal["Good", "Bad", "Neutral", "N/A"]) -> ColumnElement: + """ + Filters the view by the superhero alignment. + + Args: + alignment: The alignment of the superhero. + + Returns: + The filter condition. + """ + return Alignment.alignment == alignment + + +class SuperpowerFilterMixin: + """ + Mixin for filtering the view by the superpower attributes. + """ + + @view_filter() + def filter_by_power_name(self, power_name: str) -> ColumnElement: + """ + Filters the view by the superpower name. + + Args: + power_name: The name of the superpower. + + Returns: + The filter condition. + """ + return Superpower.power_name == power_name + + +class RaceFilterMixin: + """ + Mixin for filtering the view by the race. + """ + + @view_filter() + def filter_by_race(self, race: str) -> ColumnElement: + """ + Filters the view by the object race. + + Args: + race: The race of the object. + + Returns: + The filter condition. + """ + return Race.race == race + + +class GenderFilterMixin: + """ + Mixin for filtering the view by the gender. + """ + + @view_filter() + def filter_by_gender(self, gender: Literal["Male", "Female", "N/A"]) -> ColumnElement: + """ + Filters the view by the object gender. + + Args: + gender: The gender of the object. + + Returns: + The filter condition. + """ + return Gender.gender == gender + + +class HeroAttributeFilterMixin: + """ + Mixin for filtering the view by the hero attribute. + """ + + @view_filter() + def filter_by_attribute_value(self, attribute_value: int) -> ColumnElement: + """ + Filters the view by the hero attribute value. + + Args: + attribute_value: The value of the hero attribute. + + Returns: + The filter condition. + """ + return HeroAttribute.attribute_value == attribute_value + + @view_filter() + def filter_by_attribute_value_between(self, begin_attribute_value: int, end_attribute_value: int) -> ColumnElement: + """ + Filters the view by the hero attribute value. + + Args: + begin_attribute_value: The begin value of the hero attribute. + end_attribute_value: The end value of the hero attribute. + + Returns: + The filter condition. + """ + return HeroAttribute.attribute_value.between(begin_attribute_value, end_attribute_value) + + @view_filter() + def filter_by_the_lowest_attribute_value(self) -> ColumnElement: + """ + Filters the view by the lowest hero attribute value. + + Returns: + The filter condition. + """ + return HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)) - @decorators.view_filter() - def filter_by_weight(self, weight: float) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.weight_kg == weight - @decorators.view_filter() - def heavier_than(self, weight: float) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.weight_kg > weight +class AttributeFilterMixin: + """ + Mixin for filtering the view by the attribute. + """ - @decorators.view_filter() - def lighter_than(self, weight: float) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.weight_kg < weight + @view_filter() + def filter_by_attribute_name( + self, attribute_name: Literal["Intelligence", "Strength", "Speed", "Durability", "Power", "Combat"] + ) -> ColumnElement: + """ + Filters the view by the attribute name. - @decorators.view_filter() - def filter_by_height(self, height: float) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.height_cm == height + Args: + attribute_name: The name of the attribute. - @decorators.view_filter() - def taller_than(self, height: float) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.height_cm > height + Returns: + The filter condition. + """ + return Attribute.attribute_name == attribute_name -class SuperheroView(SqlAlchemyBaseView, SuperheroFilterMixin): +class SuperheroView( # pylint: disable=too-many-ancestors + DBInitMixin, + SqlAlchemyBaseView, + SuperheroFilterMixin, + SuperheroColourFilterMixin, + PublisherFilterMixin, + AlignmentFilterMixin, + GenderFilterMixin, + RaceFilterMixin, + HeroAttributeFilterMixin, +): """ - Main view, meant for finding superheroes meeting specific criteria + View containing superhero data for querying superheroes. """ - def get_select(self) -> sqlalchemy.Select: + def get_select(self) -> Select: """ - Creates the initial SqlAlchemy select object, which will be used to build the query. + Initializes the select object for the view. + + Returns: + The select object. """ return ( - sqlalchemy.select( - SuperheroDBSchema.id, - SuperheroDBSchema.name, - SuperheroDBSchema.full_name, - SuperheroDBSchema.gender, - SuperheroDBSchema.race, - SuperheroDBSchema.publisher_name, - SuperheroDBSchema.alignment, - SuperheroDBSchema.weight_kg, - SuperheroDBSchema.height_cm, - SuperheroDBSchema.eye_color, - SuperheroDBSchema.hair_color, - SuperheroDBSchema.skin_color, + select( + Superhero.id, + Superhero.superhero_name, + Superhero.full_name, + Superhero.height_cm, + Superhero.weight_kg, + Publisher.publisher_name, + Gender.gender, + Race.race, + Alignment.alignment, + self.eye_colour.colour.label("eye_colour"), + self.hair_colour.colour.label("hair_colour"), + self.skin_colour.colour.label("skin_colour"), ) - .join( - SuperheroModel.classes.gender, - SuperheroModel.classes.superhero.gender_id == SuperheroModel.classes.gender.id, - ) - .join( - SuperheroModel.classes.race, SuperheroModel.classes.superhero.race_id == SuperheroModel.classes.race.id - ) - .join( - SuperheroModel.classes.publisher, - SuperheroModel.classes.superhero.publisher_id == SuperheroModel.classes.publisher.id, - ) - .join( - SuperheroModel.classes.alignment, - SuperheroModel.classes.superhero.alignment_id == SuperheroModel.classes.alignment.id, - ) - .join(eye_color_alias, SuperheroModel.classes.superhero.eye_colour_id == eye_color_alias.id) - .join(hair_color_alias, SuperheroModel.classes.superhero.hair_colour_id == hair_color_alias.id) - .join(skin_color_alias, SuperheroModel.classes.superhero.skin_colour_id == skin_color_alias.id) - .join(hero_power, hero_power.c.hero_id == SuperheroModel.classes.superhero.id) - .join(SuperheroModel.classes.superpower, SuperheroModel.classes.superpower.id == hero_power.c.power_id) - .join(hero_attr, hero_attr.c.hero_id == SuperheroModel.classes.superhero.id) - .join(SuperheroModel.classes.attribute, SuperheroModel.classes.attribute.id == hero_attr.c.attribute_id) - .group_by( - SuperheroDBSchema.id, - SuperheroDBSchema.name, - SuperheroDBSchema.full_name, - SuperheroDBSchema.gender, - SuperheroDBSchema.race, - SuperheroDBSchema.publisher_name, - SuperheroDBSchema.alignment, - SuperheroDBSchema.weight_kg, - SuperheroDBSchema.height_cm, - SuperheroDBSchema.eye_color, - SuperheroDBSchema.hair_color, - SuperheroDBSchema.skin_color, + .join(Publisher, Publisher.id == Superhero.publisher_id) + .join(Race, Race.id == Superhero.race_id) + .join(Gender, Gender.id == Superhero.gender_id) + .join(Alignment, Alignment.id == Superhero.alignment_id) + .join(self.eye_colour, self.eye_colour.id == Superhero.eye_colour_id) + .join(self.hair_colour, self.hair_colour.id == Superhero.hair_colour_id) + .join(self.skin_colour, self.skin_colour.id == Superhero.skin_colour_id) + .join(HeroAttribute, HeroAttribute.hero_id == Superhero.id) + ) + + +class HeroAttributeView(DBInitMixin, SqlAlchemyBaseView, HeroAttributeFilterMixin, AttributeFilterMixin): + """ + View containing hero attribute data for querying superhero attributes. + """ + + def get_select(self) -> Select: + """ + Initializes the select object for the view. + + Returns: + The select object. + """ + return select( + HeroAttribute.hero_id, + Attribute.attribute_name, + HeroAttribute.attribute_value, + ).join(Attribute, Attribute.id == HeroAttribute.attribute_id) + + +class HeroPowerView(DBInitMixin, SqlAlchemyBaseView, SuperheroFilterMixin, SuperpowerFilterMixin): + """ + View containing hero superpowers data for querying hero superpowers. + """ + + def get_select(self) -> Select: + """ + Initializes the select object for the view. + + Returns: + The select object. + """ + return ( + select( + HeroPower.hero_id, + Superhero.superhero_name, + Superpower.power_name, ) + .join(Superhero, Superhero.id == HeroPower.hero_id) + .join(Superpower, Superpower.id == HeroPower.power_id) ) + + +class PublisherView(DBInitMixin, SqlAlchemyBaseView, PublisherFilterMixin): + """ + View containing publisher data for querying publishers. + """ + + def get_select(self) -> Select: + """ + Initializes the select object for the view. + + Returns: + The select object. + """ + return select(Publisher.id, Publisher.publisher_name) From c4005fba266da4ca7578d4656cda0e7ef7ef5edb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Fri, 19 Jul 2024 11:41:09 +0200 Subject: [PATCH 23/34] update eval --- benchmarks/sql/bench.py | 3 ++- benchmarks/sql/bench/metrics/__init__.py | 3 ++- benchmarks/sql/bench/metrics/iql.py | 23 +++++++++++++++++++ benchmarks/sql/bench/metrics/sql.py | 1 + benchmarks/sql/bench/pipeline.py | 1 - .../sql/bench/views/structured/superhero.py | 2 +- benchmarks/sql/config/data/superhero.yaml | 2 +- benchmarks/sql/config/setup/mixed.yaml | 3 --- .../config/setup/views/superhero/mixed.yaml | 4 ---- .../setup/views/superhero/structured.yaml | 3 +++ 10 files changed, 33 insertions(+), 12 deletions(-) delete mode 100644 benchmarks/sql/config/setup/mixed.yaml delete mode 100644 benchmarks/sql/config/setup/views/superhero/mixed.yaml diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py index fe6979b2..13f89c92 100644 --- a/benchmarks/sql/bench.py +++ b/benchmarks/sql/bench.py @@ -11,6 +11,7 @@ ExecutionAccuracy, HallucinatedIQL, MetricSet, + NoViewFound, UnsupportedIQL, ValidIQL, ValidSQL, @@ -37,12 +38,12 @@ async def bench(config: DictConfig) -> None: dataset = load_dataset(config.data.path, split=config.data.split) dataset = dataset.filter(lambda x: x["db_id"] == config.data.db_id and x["difficulty"] in config.data.difficulties) - dataset = dataset.select(range(10, 25)) pipeline = EvaluationPipeline(config) metrics = MetricSet( ExactMatchIQL, ExactMatchSQL, + NoViewFound, ValidIQL, ValidSQL, UnsupportedIQL, diff --git a/benchmarks/sql/bench/metrics/__init__.py b/benchmarks/sql/bench/metrics/__init__.py index a6425b25..c8277eba 100644 --- a/benchmarks/sql/bench/metrics/__init__.py +++ b/benchmarks/sql/bench/metrics/__init__.py @@ -1,5 +1,5 @@ from .base import Metric, MetricSet -from .iql import ExactMatchIQL, HallucinatedIQL, UnsupportedIQL, ValidIQL +from .iql import ExactMatchIQL, HallucinatedIQL, NoViewFound, UnsupportedIQL, ValidIQL from .sql import ExactMatchSQL, ExecutionAccuracy, ValidSQL __all__ = [ @@ -11,5 +11,6 @@ "HallucinatedIQL", "ValidIQL", "ValidSQL", + "NoViewFound", "ExecutionAccuracy", ] diff --git a/benchmarks/sql/bench/metrics/iql.py b/benchmarks/sql/bench/metrics/iql.py index 799b7fee..0131f56d 100644 --- a/benchmarks/sql/bench/metrics/iql.py +++ b/benchmarks/sql/bench/metrics/iql.py @@ -116,3 +116,26 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: else 0.0 ) } + + +class NoViewFound(Metric): + """ + Ratio of queries with no view found. + """ + + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: + """ + Calculates the ratio of queries with no view found. + + Args: + results: List of evaluation results. + + Returns: + Ratio of queries with no view found. + """ + return { + "NO_VIEW": sum(isinstance(result.prediction.exception, NoViewFoundError) for result in results) + / len(results) + if results + else 0.0 + } diff --git a/benchmarks/sql/bench/metrics/sql.py b/benchmarks/sql/bench/metrics/sql.py index 42058b8b..bf71c0e6 100644 --- a/benchmarks/sql/bench/metrics/sql.py +++ b/benchmarks/sql/bench/metrics/sql.py @@ -150,6 +150,7 @@ def _execution_accuracy(self, result: EvaluationResult) -> bool: """ if result.prediction.sql is None: return False + try: result.reference.results = self._execute_query(result.reference.sql) result.prediction.results = self._execute_query(result.prediction.sql) diff --git a/benchmarks/sql/bench/pipeline.py b/benchmarks/sql/bench/pipeline.py index e25ddca8..e8686d3b 100644 --- a/benchmarks/sql/bench/pipeline.py +++ b/benchmarks/sql/bench/pipeline.py @@ -64,7 +64,6 @@ def dict(self) -> Dict[str, Any]: "question": self.question, "reference": self.reference.dict(), "prediction": self.prediction.dict(), - "db_url": self.db_url, } diff --git a/benchmarks/sql/bench/views/structured/superhero.py b/benchmarks/sql/bench/views/structured/superhero.py index 22ad6b7c..37c0c929 100644 --- a/benchmarks/sql/bench/views/structured/superhero.py +++ b/benchmarks/sql/bench/views/structured/superhero.py @@ -370,7 +370,7 @@ def filter_by_the_lowest_attribute_value(self) -> ColumnElement: Returns: The filter condition. """ - return HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)) + return HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)).scalar_subquery() class AttributeFilterMixin: diff --git a/benchmarks/sql/config/data/superhero.yaml b/benchmarks/sql/config/data/superhero.yaml index e2c60e16..f6b22233 100644 --- a/benchmarks/sql/config/data/superhero.yaml +++ b/benchmarks/sql/config/data/superhero.yaml @@ -3,4 +3,4 @@ path: "micpst/bird-dev-iql" split: "dev" db_id: "superhero" difficulties: ["simple"] -db_url: "sqlite:///data/superhero.db" +db_url: "sqlite:///../../../data/superhero.db" diff --git a/benchmarks/sql/config/setup/mixed.yaml b/benchmarks/sql/config/setup/mixed.yaml deleted file mode 100644 index eefa1d33..00000000 --- a/benchmarks/sql/config/setup/mixed.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: MIXED -defaults: - - views: superhero/mixed diff --git a/benchmarks/sql/config/setup/views/superhero/mixed.yaml b/benchmarks/sql/config/setup/views/superhero/mixed.yaml deleted file mode 100644 index dd744cc9..00000000 --- a/benchmarks/sql/config/setup/views/superhero/mixed.yaml +++ /dev/null @@ -1,4 +0,0 @@ -[ - "SuperheroView", - "SuperheroFreeformView", -] diff --git a/benchmarks/sql/config/setup/views/superhero/structured.yaml b/benchmarks/sql/config/setup/views/superhero/structured.yaml index 6223d633..7003ce96 100644 --- a/benchmarks/sql/config/setup/views/superhero/structured.yaml +++ b/benchmarks/sql/config/setup/views/superhero/structured.yaml @@ -1,3 +1,6 @@ [ + "HeroAttributeView", + "HeroPowerView", + "PublisherView", "SuperheroView", ] From b8226c76882080a3e1ed7b65a70988f72943e34a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Mon, 22 Jul 2024 10:11:55 +0200 Subject: [PATCH 24/34] update pipeline for aggregations --- benchmarks/sql/README.md | 35 +-- benchmarks/sql/bench.py | 62 ++-- benchmarks/sql/bench/metrics/__init__.py | 11 +- benchmarks/sql/bench/metrics/iql.py | 87 ++---- benchmarks/sql/bench/metrics/sql.py | 29 +- benchmarks/sql/bench/pipeline.py | 223 +++++++++++--- .../sql/bench/views/structured/superhero.py | 279 ++++++++++++++++-- benchmarks/sql/config/config.yaml | 3 +- benchmarks/sql/config/data/superhero.yaml | 5 +- .../structured.yaml => iql-view.yaml} | 6 +- benchmarks/sql/config/setup/iql.yaml | 3 - .../llm/claude-3.5-sonnet.yaml} | 0 .../gpt.yaml => setup/llm/gpt-3.5-turbo.yaml} | 0 benchmarks/sql/config/setup/sql-view.yaml | 5 + benchmarks/sql/config/setup/sql.yaml | 3 - .../setup/views/superhero/freeform.yaml | 3 - 16 files changed, 546 insertions(+), 208 deletions(-) rename benchmarks/sql/config/setup/{views/superhero/structured.yaml => iql-view.yaml} (58%) delete mode 100644 benchmarks/sql/config/setup/iql.yaml rename benchmarks/sql/config/{llm/claude.yaml => setup/llm/claude-3.5-sonnet.yaml} (100%) rename benchmarks/sql/config/{llm/gpt.yaml => setup/llm/gpt-3.5-turbo.yaml} (100%) create mode 100644 benchmarks/sql/config/setup/sql-view.yaml delete mode 100644 benchmarks/sql/config/setup/sql.yaml delete mode 100644 benchmarks/sql/config/setup/views/superhero/freeform.yaml diff --git a/benchmarks/sql/README.md b/benchmarks/sql/README.md index 5008c94c..96a4712c 100644 --- a/benchmarks/sql/README.md +++ b/benchmarks/sql/README.md @@ -1,10 +1,10 @@ # SQL benchmarks -This folder contains benchmarks for querying SQL databases with db-ally. This suite evaluates the following view setups: +This folder contains benchmarks for querying SQL databases with db-ally. This suite evaluates the following tasks: + +- `IQL_VIEW` - measures correctness of SQL queries generated by the structured views. +- `SQL_VIEW` - measures correctness of SQL queries generated by the freeform views. -- `structured` - measures correctness of SQL queries generated by the collection with structured views only. -- `freeform` - measures correctness of SQL queries generated by the collection with freeform views only. -- `mixed` - measures correctness of SQL queries generated by the collection with both structured and freeform views. All benchmarks are run on a dev split of the [BIRD](https://bird-bench.github.io/) dataset. For now, only one configuration is available to run the suite against the `superhero` database. We plan to extend it to all databases in the set to cover all cases. @@ -19,26 +19,20 @@ Before starting, download the `superhero.sqlite` database file from [BIRD](https Run the whole suite on the `superhero` database: ```bash -python bench.py --multirun setup=iql,sql,mixed data=superhero +python bench.py --multirun setup=iql-view,sql-view data=superhero ``` You can also run each evaluation separately or in subgroups: ```bash -python bench.py setup=iql -python bench.py --multirun setup=iql,sql -``` - -Change views for the setup: - -```bash -python bench.py setup=iql setup/views=new-db/structured +python bench.py setup=iql-view +python bench.py --multirun setup=iql-view,sql-view ``` Compare IQL generation performance on multiple LLMs: ```bash -python bench.py --multirun setup=iql llm=gpt,claude +python bench.py --multirun setup=iql-view setup/llm=gpt-3.5-turbo,claude-3.5-sonnet ``` ### Log to Neptune @@ -53,13 +47,7 @@ export NEPTUNE_PROJECT="WORKSPACE_NAME/PROJECT_NAME" Export evaluation results to Neptune: ```bash -python bench.py setup=iql neptune=True -``` - -## Run tests - -```bash -python -m pytest +python bench.py setup=iql-view neptune=True ``` ## Metrics @@ -69,7 +57,6 @@ This suite computes following metrics: - `EM_IQL` - ratio of predicated IQL queries that are identical to the ground truth ones. - `VAL_IQL` - ratio of valid IQL queries. - `UNSUPP_IQL` - ratio of unsupported IQL queries. -- `HAL_IQL` - ratio of hallucinated IQL queries. - `EM_SQL` - ratio of predicated SQL queries that are identical to the ground truth ones. - ... @@ -81,7 +68,9 @@ Evaluation dataset required fields: - `question` - natural langugage SQL prompt - `sql` - SQL corresponding to the SQL prompt -- `iql` - IQL corresponding to the SQL prompt +- `view` - view name corresponding to the SQL prompt +- `iql_filters` - IQL filters corresponding to the SQL prompt +- `iql_aggregation` - IQL agrregation corresponding to the SQL prompt - `difficulty` - SQL code difficulty label - `db_id` - database identifier diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py index 13f89c92..4b969203 100644 --- a/benchmarks/sql/bench.py +++ b/benchmarks/sql/bench.py @@ -1,22 +1,13 @@ import asyncio import logging +from enum import Enum from pathlib import Path import hydra import neptune from bench.evaluator import Evaluator -from bench.metrics import ( - ExactMatchIQL, - ExactMatchSQL, - ExecutionAccuracy, - HallucinatedIQL, - MetricSet, - NoViewFound, - UnsupportedIQL, - ValidIQL, - ValidSQL, -) -from bench.pipeline import EvaluationPipeline +from bench.metrics import ExactMatchIQL, ExactMatchSQL, ExecutionAccuracy, MetricSet, UnsupportedIQL, ValidIQL +from bench.pipeline import IQLViewEvaluationPipeline, SQLViewEvaluationPipeline from bench.utils import save from datasets import load_dataset from neptune.utils import stringify_unsupported @@ -27,6 +18,34 @@ log = logging.getLogger(__name__) +class EvaluationType(Enum): + """ + Enum representing the evaluation type. + """ + + IQL = "IQL_VIEW" + SQL = "SQL_VIEW" + + +EVALUATION_PIPELINES = { + EvaluationType.IQL.value: IQLViewEvaluationPipeline, + EvaluationType.SQL.value: SQLViewEvaluationPipeline, +} + +EVALUATION_METRICS = { + EvaluationType.IQL.value: MetricSet( + ExactMatchIQL, + ValidIQL, + UnsupportedIQL, + ExecutionAccuracy, + ), + EvaluationType.SQL.value: MetricSet( + ExactMatchSQL, + ExecutionAccuracy, + ), +} + + async def bench(config: DictConfig) -> None: """ Function running evaluation for all datasets and evaluation tasks defined in hydra config. @@ -34,22 +53,14 @@ async def bench(config: DictConfig) -> None: Args: config: Hydra configuration. """ - log.info("Starting evaluation for views: %s.", config.setup.views) + log.info("Starting evaluation: %s", config.setup.name) dataset = load_dataset(config.data.path, split=config.data.split) dataset = dataset.filter(lambda x: x["db_id"] == config.data.db_id and x["difficulty"] in config.data.difficulties) + dataset = dataset.select(range(30)) - pipeline = EvaluationPipeline(config) - metrics = MetricSet( - ExactMatchIQL, - ExactMatchSQL, - NoViewFound, - ValidIQL, - ValidSQL, - UnsupportedIQL, - HallucinatedIQL, - ExecutionAccuracy, - )(config) + pipeline = EVALUATION_PIPELINES[config.setup.name](config) + metrics = EVALUATION_METRICS[config.setup.name](config) evaluator = Evaluator(config.setup.name) results = await evaluator.compute( @@ -74,7 +85,8 @@ async def bench(config: DictConfig) -> None: run["sys/tags"].add( [ *config.views, - config.data.id, + config.data.db_id, + *config.data.difficulties, config.llm.model_name, ] ) diff --git a/benchmarks/sql/bench/metrics/__init__.py b/benchmarks/sql/bench/metrics/__init__.py index c8277eba..df3c8d4b 100644 --- a/benchmarks/sql/bench/metrics/__init__.py +++ b/benchmarks/sql/bench/metrics/__init__.py @@ -1,16 +1,13 @@ from .base import Metric, MetricSet -from .iql import ExactMatchIQL, HallucinatedIQL, NoViewFound, UnsupportedIQL, ValidIQL -from .sql import ExactMatchSQL, ExecutionAccuracy, ValidSQL +from .iql import ExactMatchIQL, UnsupportedIQL, ValidIQL +from .sql import ExactMatchSQL, ExecutionAccuracy __all__ = [ "Metric", "MetricSet", - "ExactMatchIQL", "ExactMatchSQL", - "UnsupportedIQL", - "HallucinatedIQL", + "ExactMatchIQL", "ValidIQL", - "ValidSQL", - "NoViewFound", + "UnsupportedIQL", "ExecutionAccuracy", ] diff --git a/benchmarks/sql/bench/metrics/iql.py b/benchmarks/sql/bench/metrics/iql.py index 0131f56d..1667628b 100644 --- a/benchmarks/sql/bench/metrics/iql.py +++ b/benchmarks/sql/bench/metrics/iql.py @@ -1,7 +1,6 @@ from typing import Any, Dict, List -from dbally.collection.exceptions import NoViewFoundError -from dbally.iql._exceptions import IQLError, IQLFunctionNotExists +from dbally.iql._exceptions import IQLError from dbally.iql_generator.prompt import UnsupportedQueryError from ..pipeline import EvaluationResult @@ -23,42 +22,14 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: Returns: Ratio of predicated queries that are identical to the ground truth ones. """ - iql_results = [ - result - for result in results - if result.prediction.iql is not None or isinstance(result.prediction.exception, NoViewFoundError) + results = [ + result for result in results if result.prediction.iql is not None and result.reference.iql is not None ] return { "EM_IQL": ( - sum(result.prediction.iql == result.reference.iql for result in iql_results) / len(iql_results) - if iql_results - else 0.0 - ) - } - - -class ValidIQL(Metric): - """ - Ratio of valid IQL queries. - """ - - def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: - """ - Calculates the valid IQL ratio. - - Args: - results: List of evaluation results. - - Returns: - Valid IQL queries ratio. - """ - supported_queries = [result for result in results if result.prediction.iql is not None] - return { - "VAL_IQL": ( - sum(not isinstance(result.prediction.exception, IQLError) for result in supported_queries) - / len(supported_queries) - if supported_queries - else 0.0 + sum(result.prediction.iql == result.reference.iql for result in results) / len(results) + if results + else None ) } @@ -78,64 +49,68 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: Returns: Unsupported queries ratio. """ - iql_queries = [ + results = [ result for result in results - if result.prediction.iql is not None or isinstance(result.prediction.exception, UnsupportedQueryError) + # TODO: Update filtering to filter out text-to-sql results + if result.prediction.iql is not None + and result.reference.iql is not None + or isinstance(result.prediction.exception, UnsupportedQueryError) ] return { "UNSUPP_IQL": ( - sum(isinstance(result.prediction.exception, UnsupportedQueryError) for result in iql_queries) - / len(iql_queries) - if iql_queries + sum(isinstance(result.prediction.exception, UnsupportedQueryError) for result in results) / len(results) + if results else 0.0 ) } -class HallucinatedIQL(Metric): +class ValidIQL(Metric): """ - Ratio of hallucinated IQL queries. + Ratio of valid IQL queries. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Calculates the hallucinated IQL ratio. + Calculates the valid IQL ratio. Args: results: List of evaluation results. Returns: - Hallucinated queries ratio. + Valid IQL queries ratio. """ - supported_queries = [result for result in results if result.prediction.iql is not None] + results = [result for result in results if result.prediction.iql is not None] return { - "HAL_IQL": ( - sum(isinstance(result, IQLFunctionNotExists) for result in supported_queries) / len(supported_queries) - if supported_queries + "VAL_IQL": ( + sum(not isinstance(result.prediction.exception, IQLError) for result in results) / len(results) + if results else 0.0 ) } -class NoViewFound(Metric): +class InvalidIQL(Metric): """ - Ratio of queries with no view found. + Ratio of invalid IQL queries. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Calculates the ratio of queries with no view found. + Calculates the invalid IQL ratio. Args: results: List of evaluation results. Returns: - Ratio of queries with no view found. + Invalid IQL queries ratio. """ + results = [result for result in results if result.prediction.iql is not None] return { - "NO_VIEW": sum(isinstance(result.prediction.exception, NoViewFoundError) for result in results) - / len(results) - if results - else 0.0 + "INV_IQL": ( + sum(isinstance(result.prediction.exception, IQLError) for result in results) / len(results) + if results + else 0.0 + ) } diff --git a/benchmarks/sql/bench/metrics/sql.py b/benchmarks/sql/bench/metrics/sql.py index bf71c0e6..88cedf09 100644 --- a/benchmarks/sql/bench/metrics/sql.py +++ b/benchmarks/sql/bench/metrics/sql.py @@ -5,8 +5,6 @@ from sqlalchemy import create_engine, text from sqlalchemy.exc import SQLAlchemyError -from dbally.views.freeform.text2sql.exceptions import Text2SQLError - from ..pipeline import EvaluationResult from .base import Metric @@ -35,32 +33,6 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: } -class ValidSQL(Metric): - """ - Ratio of valid SQL queries for a given results. - """ - - def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: - """ - Calculates the valid SQL ratio. - - Args: - results: List of evaluation results. - - Returns: - Valid IQL ratio. - """ - supported_queries = [result for result in results if result.prediction.sql is not None] - return { - "VAL_SQL": ( - sum(not isinstance(result.prediction.exception, Text2SQLError) for result in supported_queries) - / len(supported_queries) - if supported_queries - else 0.0 - ) - } - - class _DBMixin: """ Mixin class for database operations. @@ -123,6 +95,7 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: Execution accuracy score and valid efficiency score. """ accurate_results = [result for result in results if self._execution_accuracy(result)] + return { "EX": len(accurate_results) / len(results) if results else 0.0, "VES": sum( diff --git a/benchmarks/sql/bench/pipeline.py b/benchmarks/sql/bench/pipeline.py index e8686d3b..f7272fa0 100644 --- a/benchmarks/sql/bench/pipeline.py +++ b/benchmarks/sql/bench/pipeline.py @@ -1,30 +1,65 @@ +from abc import ABC, abstractmethod from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Type from datasets import Dataset from sqlalchemy import create_engine from tqdm import tqdm -import dbally -from dbally.collection.collection import Collection -from dbally.collection.exceptions import NoViewFoundError from dbally.iql._exceptions import IQLError from dbally.iql_generator.prompt import UnsupportedQueryError from dbally.llms.base import LLM from dbally.llms.litellm import LiteLLM from dbally.llms.local import LocalLLM +from dbally.views.freeform.text2sql.view import BaseText2SQLView +from dbally.views.sqlalchemy_base import SqlAlchemyBaseView from .views import VIEWS_REGISTRY +@dataclass +class IQLResult: + """ + Represents the IQL result. + """ + + filters: Optional[str] = None + aggregation: Optional[str] = None + + def __eq__(self, other: "IQLResult") -> bool: + """ + Compares two IQL results. + + Args: + other: The other IQL result to compare. + + Returns: + True if the two IQL results are equal, False otherwise. + """ + return self.filters == other.filters and self.aggregation == other.aggregation + + def dict(self) -> Dict[str, Any]: + """ + Returns the dictionary representation of the object. + + Returns: + The dictionary representation. + """ + return { + "filters": self.filters, + "aggregation": self.aggregation, + } + + @dataclass class ExecutionResult: """ Represents the result of a single query execution. """ - iql: Optional[str] = None + view: Optional[str] = None sql: Optional[str] = None + iql: Optional[IQLResult] = None results: List[Dict[str, Any]] = field(default_factory=list) exception: Optional[Exception] = None execution_time: Optional[float] = None @@ -37,8 +72,10 @@ def dict(self) -> Dict[str, Any]: The dictionary representation. """ return { - "iql": self.iql, + "view": self.view, + "iql": self.iql.dict() if self.iql else None, "sql": self.sql, + "len_results": len(self.results), } @@ -51,7 +88,6 @@ class EvaluationResult: question: str reference: ExecutionResult prediction: ExecutionResult - db_url: Optional[str] = None def dict(self) -> Dict[str, Any]: """ @@ -67,7 +103,7 @@ def dict(self) -> Dict[str, Any]: } -class EvaluationPipeline: +class EvaluationPipeline(ABC): """ Collection evaluation pipeline. """ @@ -78,13 +114,36 @@ def __init__(self, config: Dict) -> None: Args: config: The configuration for the pipeline. - - Raises: - ValueError: If no valid views are found in the configuration. """ self.db = create_engine(config.data.db_url) - self.llm = self.get_llm(config.llm) - self.collection = self.get_collection(config.setup) + + @abstractmethod + async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: + """ + Runs the evaluation pipeline. + + Args: + dataset: The evaluation dataset. + + Returns: + The list of evaluation results. + """ + + +class ViewEvaluationPipeline(EvaluationPipeline, ABC): + """ + Collection evaluation pipeline. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the pipeline. + """ + super().__init__(config) + self.llm = self.get_llm(config.setup.llm) def get_llm(self, config: Dict) -> LLM: """ @@ -100,7 +159,23 @@ def get_llm(self, config: Dict) -> LLM: return LocalLLM(config.model_name.split("/", 1)[1]) return LiteLLM(config.model_name) - def get_collection(self, config: Dict) -> Collection: + +class IQLViewEvaluationPipeline(ViewEvaluationPipeline): + """ + Collection evaluation pipeline. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the pipeline. + """ + super().__init__(config) + self.views = self.get_views(config.setup) + + def get_views(self, config: Dict) -> Dict[str, Type[SqlAlchemyBaseView]]: """ Returns the view object based on the view name. @@ -109,18 +184,93 @@ def get_collection(self, config: Dict) -> Collection: Returns: The view object. + """ + return {view: VIEWS_REGISTRY[view] for view in config.views} - Raises: - ValueError: If the view name is not supported. + async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: """ - collection = dbally.create_collection(config.name, self.llm) - collection.n_retries = 0 + Runs the evaluation pipeline. - for view_name in config.views: - view_cls = VIEWS_REGISTRY[view_name] - collection.add(view_cls, lambda: view_cls(self.db)) # pylint: disable=cell-var-from-loop + Args: + dataset: The evaluation dataset. - return collection + Returns: + The list of evaluation results. + """ + results = [] + + for data in tqdm(dataset, desc="Evaluation"): + view = self.views[data["view"]](self.db) + try: + result = await view.ask( + query=data["question"], + llm=self.llm, + dry_run=True, + n_retries=0, + ) + # TODO: Refactor exception handling for IQLError for filters and aggregation + except IQLError as exc: + prediction = ExecutionResult( + view=data["view"], + iql=IQLResult(filters=exc.source), + exception=exc, + ) + except (UnsupportedQueryError, Exception) as exc: # pylint: disable=broad-except + prediction = ExecutionResult( + view=data["view"], + exception=exc, + ) + else: + prediction = ExecutionResult( + view=data["view"], + iql=IQLResult(filters=result.context["iql"]), + sql=result.context["sql"], + ) + + reference = ExecutionResult( + view=data["view"], + iql=IQLResult( + filters=data["iql_filters"], + aggregation=data["iql_aggregation"], + ), + sql=data["sql"], + ) + result = EvaluationResult( + question=data["question"], + reference=reference, + prediction=prediction, + ) + results.append(result) + + return results + + +class SQLViewEvaluationPipeline(ViewEvaluationPipeline): + """ + Collection evaluation pipeline. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the pipeline. + """ + super().__init__(config) + self.view = self.get_view(config.setup) + + def get_view(self, config: Dict) -> Type[BaseText2SQLView]: + """ + Returns the view object based on the view name. + + Args: + config: The view configuration. + + Returns: + The view object. + """ + return VIEWS_REGISTRY[config.view] async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: """ @@ -135,36 +285,35 @@ async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: results = [] for data in tqdm(dataset, desc="Evaluation"): + view = self.view(self.db) + try: - result = await self.collection.ask( - question=data["question"], + result = await view.ask( + query=data["question"], + llm=self.llm, dry_run=True, - return_natural_response=False, + n_retries=0, ) - except NoViewFoundError as exc: - prediction = ExecutionResult(exception=exc) - except IQLError as exc: - prediction = ExecutionResult(iql=exc.source, exception=exc) - except UnsupportedQueryError as exc: - prediction = ExecutionResult(iql="UNSUPPORTED_QUERY", exception=exc) - # TODO: Remove this exception handling once the Text2SQL view is fixed + # TODO: Remove this broad exception handling once the Text2SQL view is fixed except Exception as exc: # pylint: disable=broad-except - prediction = ExecutionResult(exception=exc) + prediction = ExecutionResult( + view=self.view.__name__, + exception=exc, + ) else: prediction = ExecutionResult( - iql=result.context.get("iql", None), - sql=result.context.get("sql", None), + view=self.view.__name__, + sql=result.context["sql"], ) reference = ExecutionResult( - iql=data["iql"], + view=data["view"], sql=data["sql"], ) result = EvaluationResult( question=data["question"], reference=reference, prediction=prediction, - db_url=str(self.db.url), ) results.append(result) diff --git a/benchmarks/sql/bench/views/structured/superhero.py b/benchmarks/sql/bench/views/structured/superhero.py index 37c0c929..65b1c756 100644 --- a/benchmarks/sql/bench/views/structured/superhero.py +++ b/benchmarks/sql/bench/views/structured/superhero.py @@ -67,7 +67,7 @@ def __init__(self, sqlalchemy_engine: Engine) -> None: super().__init__(sqlalchemy_engine) -class SuperheroFilterMixin: +class SuperheroFilterMixin: # pylint: disable=too-many-public-methods """ Mixin for filtering the view by the superhero attributes. """ @@ -121,6 +121,19 @@ def filter_by_superhero_full_name(self, superhero_full_name: str) -> ColumnEleme """ return Superhero.full_name == superhero_full_name + @view_filter() + def filter_by_superhero_first_name(self, superhero_first_name: str) -> ColumnElement: + """ + Filters the view by the simmilar full name of the superhero. + + Args: + superhero_first_name: The first name of the superhero. + + Returns: + The filter condition. + """ + return Superhero.full_name.like(f"{superhero_first_name}%") + @view_filter() def filter_by_height_cm(self, height_cm: float) -> ColumnElement: """ @@ -134,6 +147,55 @@ def filter_by_height_cm(self, height_cm: float) -> ColumnElement: """ return Superhero.height_cm == height_cm + @view_filter() + def filter_by_height_cm_greater_than(self, height_cm: float) -> ColumnElement: + """ + Filters the view by the height of the superhero. + + Args: + height_cm: The height of the superhero. + + Returns: + The filter condition. + """ + return Superhero.height_cm > height_cm + + @view_filter() + def filter_by_the_tallest(self) -> ColumnElement: + """ + Filter the view by the tallest superhero. + + Returns: + The filter condition. + """ + return Superhero.height_cm == select(func.max(Superhero.height_cm)).scalar_subquery() + + @view_filter() + def filter_by_height_greater_than_percentage_of_average(self, average_percentage: int) -> ColumnElement: + """ + Filters the view by the height greater than the percentage of average of superheroes. + + Args: + average_percentage: The percentage of the average height. + + Returns: + The filter condition. + """ + return Superhero.height_cm * 100 > select(func.avg(Superhero.height_cm)).scalar_subquery() * average_percentage + + @view_filter() + def filter_by_height_cm_less_than(self, height_cm: float) -> ColumnElement: + """ + Filters the view by the height of the superhero. + + Args: + height_cm: The height of the superhero. + + Returns: + The filter condition. + """ + return Superhero.height_cm < height_cm + @view_filter() def filter_by_missing_weight(self) -> ColumnElement: """ @@ -145,7 +207,24 @@ def filter_by_missing_weight(self) -> ColumnElement: return Superhero.weight_kg == 0 or Superhero.weight_kg is None @view_filter() - def filter_by_weight_kg(self, weight_kg: float) -> ColumnElement: + def filter_by_the_heaviest(self) -> ColumnElement: + return + + @view_filter() + def filter_by_weight_greater_than_percentage_of_average(self, average_percentage: int) -> ColumnElement: + """ + Filters the view by the weight greater than the percentage of average of superheroes. + + Args: + average_percentage: The percentage of the average weight. + + Returns: + The filter condition. + """ + return Superhero.weight_kg * 100 > select(func.avg(Superhero.weight_kg)).scalar_subquery() * average_percentage + + @view_filter() + def filter_by_weight_kg(self, weight_kg: int) -> ColumnElement: """ Filters the view by the weight of the superhero. @@ -158,7 +237,7 @@ def filter_by_weight_kg(self, weight_kg: float) -> ColumnElement: return Superhero.weight_kg == weight_kg @view_filter() - def filter_by_weight_kg_bigger_than(self, weight_kg: float) -> ColumnElement: + def filter_by_weight_kg_greater_than(self, weight_kg: int) -> ColumnElement: """ Filters the view by the weight of the superhero. @@ -171,7 +250,7 @@ def filter_by_weight_kg_bigger_than(self, weight_kg: float) -> ColumnElement: return Superhero.weight_kg > weight_kg @view_filter() - def filter_by_weight_kg_lower_than(self, weight_kg: float) -> ColumnElement: + def filter_by_weight_kg_less_than(self, weight_kg: int) -> ColumnElement: """ Filters the view by the weight of the superhero. @@ -183,6 +262,139 @@ def filter_by_weight_kg_lower_than(self, weight_kg: float) -> ColumnElement: """ return Superhero.weight_kg < weight_kg + @view_filter() + def filter_by_number_powers(self, number_powers: int) -> ColumnElement: + """ + Filters the view by the number of superpowers. + + Args: + number_powers: The number of hero superpowers. + + Returns: + The filter condition. + """ + return Superhero.id.in_( + select(HeroPower.hero_id) + .group_by(HeroPower.hero_id) + .having(func.count(HeroPower.power_id) == number_powers) + ) + + @view_filter() + def filter_by_number_super_powers_greater_than(self, number_powers: int) -> ColumnElement: + """ + Filters the view by the number of superpowers. + + Args: + number_powers: The number of hero superpowers. + + Returns: + The filter condition. + """ + return Superhero.id.in_( + select(HeroPower.hero_id).group_by(HeroPower.hero_id).having(func.count(HeroPower.power_id) > number_powers) + ) + + @view_filter() + def filter_by_number_powers_less_than(self, number_powers: int) -> ColumnElement: + """ + Filters the view by the number of superpowers. + + Args: + number_powers: The number of hero superpowers. + + Returns: + The filter condition. + """ + return Superhero.id.in_( + select(HeroPower.hero_id).group_by(HeroPower.hero_id).having(func.count(HeroPower.power_id) < number_powers) + ) + + @view_filter() + def filter_by_missing_publisher(self) -> ColumnElement: + """ + Filters the view by the missing publisher of the superhero. + + Returns: + The filter condition. + """ + return Superhero.publisher_id is None + + @view_filter() + def filter_by_super_power(self, super_power: str) -> ColumnElement: + """ + Filters the view by the hero superpower. + + Args: + super_power: The superpower of the superhero. + + Returns: + The filter condition. + """ + return ( + select(1) + .select_from(HeroPower) + .join(Superpower, Superpower.id == HeroPower.power_id) + .where(Superpower.power_name == super_power) + .where(Superhero.id == HeroPower.hero_id) + .exists() + ) + + @view_filter() + def filter_by_the_most_super_powers(self) -> ColumnElement: + pass + + @view_filter() + def filter_by_attribute_name(self, attribute_name: str) -> ColumnElement: + """ + Filters the view by the hero attribute name. + + Args: + attribute_name: The name of the hero attribute. + + Returns: + The filter condition. + """ + return ( + select(1) + .select_from(HeroAttribute) + .join(Attribute, Attribute.id == HeroAttribute.attribute_id) + .where(Attribute.attribute_name == attribute_name) + .where(Superhero.id == HeroAttribute.hero_id) + .exists() + ) + + @view_filter() + def filter_by_the_lowest_attribute_value(self) -> ColumnElement: + return True + + @view_filter() + def filter_by_the_highest_attribute_value(self) -> ColumnElement: + return True + + @view_filter() + def filter_by_attribute_value_between(self) -> ColumnElement: + return True + + @view_filter() + def filter_by_attribute_value(self) -> ColumnElement: + return True + + @view_filter() + def filter_by_the_fastest(self) -> ColumnElement: + return True + + @view_filter() + def filter_by_same_hair_and_eye_colour(self) -> ColumnElement: + return True + + @view_filter() + def filter_by_same_hair_and_skin_colour(self) -> ColumnElement: + return True + + @view_filter() + def filter_by_the_dumbest(self) -> ColumnElement: + return True + class SuperheroColourFilterMixin: """ @@ -348,6 +560,19 @@ def filter_by_attribute_value(self, attribute_value: int) -> ColumnElement: """ return HeroAttribute.attribute_value == attribute_value + @view_filter() + def filter_by_attribute_value_less_than(self, attribute_value: int) -> ColumnElement: + """ + Filters the view by the hero attribute value. + + Args: + attribute_value: The value of the hero attribute. + + Returns: + The filter condition. + """ + return HeroAttribute.attribute_value < attribute_value + @view_filter() def filter_by_attribute_value_between(self, begin_attribute_value: int, end_attribute_value: int) -> ColumnElement: """ @@ -372,6 +597,16 @@ def filter_by_the_lowest_attribute_value(self) -> ColumnElement: """ return HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)).scalar_subquery() + @view_filter() + def filter_by_the_highest_attribute_value(self) -> ColumnElement: + """ + Filters the view by the highest hero attribute value. + + Returns: + The filter condition. + """ + return HeroAttribute.attribute_value == select(func.max(HeroAttribute.attribute_value)).scalar_subquery() + class AttributeFilterMixin: """ @@ -403,10 +638,9 @@ class SuperheroView( # pylint: disable=too-many-ancestors AlignmentFilterMixin, GenderFilterMixin, RaceFilterMixin, - HeroAttributeFilterMixin, ): """ - View containing superhero data for querying superheroes. + View for querying only superheros data. """ def get_select(self) -> Select: @@ -438,13 +672,19 @@ def get_select(self) -> Select: .join(self.eye_colour, self.eye_colour.id == Superhero.eye_colour_id) .join(self.hair_colour, self.hair_colour.id == Superhero.hair_colour_id) .join(self.skin_colour, self.skin_colour.id == Superhero.skin_colour_id) - .join(HeroAttribute, HeroAttribute.hero_id == Superhero.id) ) -class HeroAttributeView(DBInitMixin, SqlAlchemyBaseView, HeroAttributeFilterMixin, AttributeFilterMixin): +class HeroAttributeView( + DBInitMixin, + SqlAlchemyBaseView, + HeroAttributeFilterMixin, + AttributeFilterMixin, + SuperheroFilterMixin, + AlignmentFilterMixin, +): """ - View containing hero attribute data for querying superhero attributes. + View for querying only hero attributes data. """ def get_select(self) -> Select: @@ -454,16 +694,21 @@ def get_select(self) -> Select: Returns: The select object. """ - return select( - HeroAttribute.hero_id, - Attribute.attribute_name, - HeroAttribute.attribute_value, - ).join(Attribute, Attribute.id == HeroAttribute.attribute_id) + return ( + select( + Attribute.attribute_name, + HeroAttribute.attribute_value, + ) + .join(Attribute, Attribute.id == HeroAttribute.attribute_id) + .join(Superhero, Superhero.id == HeroAttribute.hero_id) + .join(Alignment, Alignment.id == Superhero.alignment_id) + .join(Publisher, Publisher.id == Superhero.publisher_id) + ) class HeroPowerView(DBInitMixin, SqlAlchemyBaseView, SuperheroFilterMixin, SuperpowerFilterMixin): """ - View containing hero superpowers data for querying hero superpowers. + View for querying only hero powers data. """ def get_select(self) -> Select: @@ -476,7 +721,7 @@ def get_select(self) -> Select: return ( select( HeroPower.hero_id, - Superhero.superhero_name, + HeroPower.power_id, Superpower.power_name, ) .join(Superhero, Superhero.id == HeroPower.hero_id) @@ -486,7 +731,7 @@ def get_select(self) -> Select: class PublisherView(DBInitMixin, SqlAlchemyBaseView, PublisherFilterMixin): """ - View containing publisher data for querying publishers. + View for querying only publisher data. """ def get_select(self) -> Select: diff --git a/benchmarks/sql/config/config.yaml b/benchmarks/sql/config/config.yaml index c339f323..eb2e926f 100644 --- a/benchmarks/sql/config/config.yaml +++ b/benchmarks/sql/config/config.yaml @@ -1,7 +1,6 @@ defaults: - data: superhero - - setup: iql - - llm: gpt + - setup: iql-view - _self_ neptune: False diff --git a/benchmarks/sql/config/data/superhero.yaml b/benchmarks/sql/config/data/superhero.yaml index f6b22233..4664f969 100644 --- a/benchmarks/sql/config/data/superhero.yaml +++ b/benchmarks/sql/config/data/superhero.yaml @@ -1,6 +1,5 @@ -id: "superhero" -path: "micpst/bird-dev-iql" +path: "micpst/bird-iql" split: "dev" db_id: "superhero" -difficulties: ["simple"] +difficulties: ["simple", "moderate", "challenging"] db_url: "sqlite:///../../../data/superhero.db" diff --git a/benchmarks/sql/config/setup/views/superhero/structured.yaml b/benchmarks/sql/config/setup/iql-view.yaml similarity index 58% rename from benchmarks/sql/config/setup/views/superhero/structured.yaml rename to benchmarks/sql/config/setup/iql-view.yaml index 7003ce96..9b6bcdde 100644 --- a/benchmarks/sql/config/setup/views/superhero/structured.yaml +++ b/benchmarks/sql/config/setup/iql-view.yaml @@ -1,6 +1,10 @@ -[ +name: IQL_VIEW +views: [ "HeroAttributeView", "HeroPowerView", "PublisherView", "SuperheroView", ] + +defaults: + - llm: gpt-3.5-turbo diff --git a/benchmarks/sql/config/setup/iql.yaml b/benchmarks/sql/config/setup/iql.yaml deleted file mode 100644 index b0f0516b..00000000 --- a/benchmarks/sql/config/setup/iql.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: IQL -defaults: - - views: superhero/structured diff --git a/benchmarks/sql/config/llm/claude.yaml b/benchmarks/sql/config/setup/llm/claude-3.5-sonnet.yaml similarity index 100% rename from benchmarks/sql/config/llm/claude.yaml rename to benchmarks/sql/config/setup/llm/claude-3.5-sonnet.yaml diff --git a/benchmarks/sql/config/llm/gpt.yaml b/benchmarks/sql/config/setup/llm/gpt-3.5-turbo.yaml similarity index 100% rename from benchmarks/sql/config/llm/gpt.yaml rename to benchmarks/sql/config/setup/llm/gpt-3.5-turbo.yaml diff --git a/benchmarks/sql/config/setup/sql-view.yaml b/benchmarks/sql/config/setup/sql-view.yaml new file mode 100644 index 00000000..f501b0d8 --- /dev/null +++ b/benchmarks/sql/config/setup/sql-view.yaml @@ -0,0 +1,5 @@ +name: SQL_VIEW +view: SuperheroFreeformView + +defaults: + - llm: gpt-3.5-turbo diff --git a/benchmarks/sql/config/setup/sql.yaml b/benchmarks/sql/config/setup/sql.yaml deleted file mode 100644 index 82240ddf..00000000 --- a/benchmarks/sql/config/setup/sql.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: SQL -defaults: - - views: superhero/freeform diff --git a/benchmarks/sql/config/setup/views/superhero/freeform.yaml b/benchmarks/sql/config/setup/views/superhero/freeform.yaml deleted file mode 100644 index 424442d2..00000000 --- a/benchmarks/sql/config/setup/views/superhero/freeform.yaml +++ /dev/null @@ -1,3 +0,0 @@ -[ - "SuperheroFreeformView", -] From 053eb6127db31b96f46a91a1a572a4fa4b12b4a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Mon, 22 Jul 2024 10:15:11 +0200 Subject: [PATCH 25/34] fix pylint --- benchmarks/sql/bench/views/structured/superhero.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/sql/bench/views/structured/superhero.py b/benchmarks/sql/bench/views/structured/superhero.py index 65b1c756..f8e34341 100644 --- a/benchmarks/sql/bench/views/structured/superhero.py +++ b/benchmarks/sql/bench/views/structured/superhero.py @@ -675,7 +675,7 @@ def get_select(self) -> Select: ) -class HeroAttributeView( +class HeroAttributeView( # pylint: disable=too-many-ancestors DBInitMixin, SqlAlchemyBaseView, HeroAttributeFilterMixin, From ed2553195a44ffc441f5e3fa4c98d07683173fd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 23 Jul 2024 15:44:52 +0200 Subject: [PATCH 26/34] final pipeline --- benchmarks/sql/README.md | 37 +- benchmarks/sql/bench.py | 58 ++- benchmarks/sql/bench/evaluator.py | 21 +- benchmarks/sql/bench/loaders.py | 96 +++++ benchmarks/sql/bench/metrics/__init__.py | 6 +- benchmarks/sql/bench/metrics/base.py | 2 +- benchmarks/sql/bench/metrics/iql.py | 83 ++-- benchmarks/sql/bench/metrics/selector.py | 28 ++ benchmarks/sql/bench/metrics/sql.py | 2 +- benchmarks/sql/bench/pipeline.py | 320 --------------- benchmarks/sql/bench/pipelines/__init__.py | 13 + benchmarks/sql/bench/pipelines/base.py | 136 +++++++ benchmarks/sql/bench/pipelines/collection.py | 100 +++++ benchmarks/sql/bench/pipelines/view.py | 177 +++++++++ .../sql/bench/views/structured/superhero.py | 363 ++++++++++++------ benchmarks/sql/config/data/superhero.yaml | 2 +- benchmarks/sql/config/setup/collection.yaml | 12 + 17 files changed, 939 insertions(+), 517 deletions(-) create mode 100644 benchmarks/sql/bench/loaders.py create mode 100644 benchmarks/sql/bench/metrics/selector.py delete mode 100644 benchmarks/sql/bench/pipeline.py create mode 100644 benchmarks/sql/bench/pipelines/__init__.py create mode 100644 benchmarks/sql/bench/pipelines/base.py create mode 100644 benchmarks/sql/bench/pipelines/collection.py create mode 100644 benchmarks/sql/bench/pipelines/view.py create mode 100644 benchmarks/sql/config/setup/collection.yaml diff --git a/benchmarks/sql/README.md b/benchmarks/sql/README.md index 96a4712c..1f3b5cfa 100644 --- a/benchmarks/sql/README.md +++ b/benchmarks/sql/README.md @@ -1,11 +1,11 @@ # SQL benchmarks -This folder contains benchmarks for querying SQL databases with db-ally. This suite evaluates the following tasks: +This folder contains benchmarks for querying SQL databases with db-ally. This suite evaluates the following components: +- `COLLECTION` - measures correctness of SQL queries generated by the collection. - `IQL_VIEW` - measures correctness of SQL queries generated by the structured views. - `SQL_VIEW` - measures correctness of SQL queries generated by the freeform views. - All benchmarks are run on a dev split of the [BIRD](https://bird-bench.github.io/) dataset. For now, only one configuration is available to run the suite against the `superhero` database. We plan to extend it to all databases in the set to cover all cases. New PRs adding support for new databases from BIRD or SPIDER are welcome. @@ -19,7 +19,7 @@ Before starting, download the `superhero.sqlite` database file from [BIRD](https Run the whole suite on the `superhero` database: ```bash -python bench.py --multirun setup=iql-view,sql-view data=superhero +python bench.py --multirun setup=iql-view,sql-view,collection data=superhero ``` You can also run each evaluation separately or in subgroups: @@ -33,6 +33,11 @@ Compare IQL generation performance on multiple LLMs: ```bash python bench.py --multirun setup=iql-view setup/llm=gpt-3.5-turbo,claude-3.5-sonnet +python bench.py --multirun setup=sql-view setup/llm=gpt-3.5-turbo,claude-3.5-sonnet +``` + +```bash +python bench.py --multirun setup=collection setup/llm@setup.generator_llm=gpt-3.5-turbo,claude-3.5-sonnet ``` ### Log to Neptune @@ -49,29 +54,3 @@ Export evaluation results to Neptune: ```bash python bench.py setup=iql-view neptune=True ``` - -## Metrics - -This suite computes following metrics: - -- `EM_IQL` - ratio of predicated IQL queries that are identical to the ground truth ones. -- `VAL_IQL` - ratio of valid IQL queries. -- `UNSUPP_IQL` - ratio of unsupported IQL queries. -- `EM_SQL` - ratio of predicated SQL queries that are identical to the ground truth ones. -- ... - -## Add new dataset - -In order to run this suite against your own dataset, upload it to [Hugging Face](https://huggingface.co) and make sure the data is in the format expected by the evaluation pipeline. - -Evaluation dataset required fields: - -- `question` - natural langugage SQL prompt -- `sql` - SQL corresponding to the SQL prompt -- `view` - view name corresponding to the SQL prompt -- `iql_filters` - IQL filters corresponding to the SQL prompt -- `iql_aggregation` - IQL agrregation corresponding to the SQL prompt -- `difficulty` - SQL code difficulty label -- `db_id` - database identifier - -In addition, add a database file in the `data/` folder and create a structure and freeform view in the `bench.views` module for evaluation. diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py index 4b969203..d2668e88 100644 --- a/benchmarks/sql/bench.py +++ b/benchmarks/sql/bench.py @@ -6,10 +6,20 @@ import hydra import neptune from bench.evaluator import Evaluator -from bench.metrics import ExactMatchIQL, ExactMatchSQL, ExecutionAccuracy, MetricSet, UnsupportedIQL, ValidIQL -from bench.pipeline import IQLViewEvaluationPipeline, SQLViewEvaluationPipeline +from bench.loaders import CollectionDataLoader, IQLViewDataLoader, SQLViewDataLoader +from bench.metrics import ( + ExactMatchAggregationIQL, + ExactMatchFiltersIQL, + ExactMatchIQL, + ExactMatchSQL, + ExecutionAccuracy, + MetricSet, + UnsupportedIQL, + ValidIQL, + ViewSelectionAccuracy, +) +from bench.pipelines import CollectionEvaluationPipeline, IQLViewEvaluationPipeline, SQLViewEvaluationPipeline from bench.utils import save -from datasets import load_dataset from neptune.utils import stringify_unsupported from omegaconf import DictConfig @@ -25,17 +35,28 @@ class EvaluationType(Enum): IQL = "IQL_VIEW" SQL = "SQL_VIEW" + E2E = "COLLECTION" +EVALUATION_DATALOADERS = { + EvaluationType.IQL.value: IQLViewDataLoader, + EvaluationType.SQL.value: SQLViewDataLoader, + EvaluationType.E2E.value: CollectionDataLoader, +} + EVALUATION_PIPELINES = { EvaluationType.IQL.value: IQLViewEvaluationPipeline, EvaluationType.SQL.value: SQLViewEvaluationPipeline, + EvaluationType.E2E.value: CollectionEvaluationPipeline, } EVALUATION_METRICS = { EvaluationType.IQL.value: MetricSet( ExactMatchIQL, + ExactMatchFiltersIQL, + ExactMatchAggregationIQL, ValidIQL, + ViewSelectionAccuracy, UnsupportedIQL, ExecutionAccuracy, ), @@ -43,6 +64,16 @@ class EvaluationType(Enum): ExactMatchSQL, ExecutionAccuracy, ), + EvaluationType.E2E.value: MetricSet( + ExactMatchIQL, + ExactMatchFiltersIQL, + ExactMatchAggregationIQL, + ValidIQL, + UnsupportedIQL, + ViewSelectionAccuracy, + ExactMatchSQL, + ExecutionAccuracy, + ), } @@ -55,17 +86,14 @@ async def bench(config: DictConfig) -> None: """ log.info("Starting evaluation: %s", config.setup.name) - dataset = load_dataset(config.data.path, split=config.data.split) - dataset = dataset.filter(lambda x: x["db_id"] == config.data.db_id and x["difficulty"] in config.data.difficulties) - dataset = dataset.select(range(30)) - + dataloader = EVALUATION_DATALOADERS[config.setup.name](config) pipeline = EVALUATION_PIPELINES[config.setup.name](config) metrics = EVALUATION_METRICS[config.setup.name](config) evaluator = Evaluator(config.setup.name) results = await evaluator.compute( - pipe=pipeline, - data=dataset, + pipeline=pipeline, + dataloader=dataloader, metrics=metrics, ) @@ -84,21 +112,19 @@ async def bench(config: DictConfig) -> None: run = neptune.init_run() run["sys/tags"].add( [ - *config.views, + config.setup.name, config.data.db_id, *config.data.difficulties, - config.llm.model_name, ] ) run["config"] = stringify_unsupported(config) - run["evaluation/results.json"].upload(results_file.as_posix()) - run["evaluation/metrics.json"].upload(metrics_file.as_posix()) run["evaluation/metrics"] = stringify_unsupported(results["metrics"]) - - log.info("Evaluation results logged to neptune at %s", run.get_url()) + run["evaluation/time_perf"] = stringify_unsupported(results["time_perf"]) + run["evaluation/metrics.json"].upload(metrics_file.as_posix()) + run["evaluation/results.json"].upload(results_file.as_posix()) -@hydra.main(config_path="config", config_name="config") +@hydra.main(config_path="config", config_name="config", version_base="3.2") def main(config: DictConfig) -> None: """ Function running evaluation for all datasets and evaluation tasks defined in hydra config. diff --git a/benchmarks/sql/bench/evaluator.py b/benchmarks/sql/bench/evaluator.py index fc3a2bf6..2a5a201d 100644 --- a/benchmarks/sql/bench/evaluator.py +++ b/benchmarks/sql/bench/evaluator.py @@ -2,9 +2,11 @@ from typing import Any, Callable, Dict, List, Tuple from datasets import Dataset +from tqdm.asyncio import tqdm +from .loaders import DataLoader from .metrics.base import MetricSet -from .pipeline import EvaluationPipeline, EvaluationResult +from .pipelines import EvaluationPipeline, EvaluationResult class Evaluator: @@ -23,22 +25,23 @@ def __init__(self, task: str) -> None: async def compute( self, - pipe: Callable, - data: Dataset, + pipeline: Callable, + dataloader: DataLoader, metrics: MetricSet, ) -> Dict[str, Any]: """ Compute the evaluation results for the given pipeline and data. Args: - pipe: The pipeline to be evaluated. - data: The evaluation data. + pipeline: The pipeline to be evaluated. + dataloader: The dataloader to load the data. metrics: The metrics to be computed. Returns: The evaluation results. """ - results, perf_results = await self._call_pipeline(pipe, data) + dataset = await dataloader.load() + results, perf_results = await self._call_pipeline(pipeline, dataset) computed_metrics = self._compute_metrics(metrics, results) results = self._results_processor(results) @@ -51,7 +54,7 @@ async def compute( async def _call_pipeline( self, pipe: EvaluationPipeline, - data: Dataset, + dataset: Dataset, ) -> Tuple[List[EvaluationResult], Dict[str, Any]]: """ Call the pipeline with the given data. @@ -64,9 +67,9 @@ async def _call_pipeline( The evaluation results and performance metrics. """ start_time = time.perf_counter() - pipe_output = await pipe(data) + pipe_outputs = await tqdm.gather(*[pipe(data) for data in dataset], desc="Evaluation") end_time = time.perf_counter() - return pipe_output, self._compute_time_perf(start_time, end_time, len(pipe_output)) + return pipe_outputs, self._compute_time_perf(start_time, end_time, len(pipe_outputs)) def _results_processor(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ diff --git a/benchmarks/sql/bench/loaders.py b/benchmarks/sql/bench/loaders.py new file mode 100644 index 00000000..8ec23be3 --- /dev/null +++ b/benchmarks/sql/bench/loaders.py @@ -0,0 +1,96 @@ +from abc import ABC, abstractmethod +from typing import Dict, Iterable + +from datasets import Dataset, load_dataset + + +class DataLoader(ABC): + """ + Data loader. + """ + + def __init__(self, config: Dict) -> None: + self.config = config + + @abstractmethod + async def load(self) -> Iterable: + """ + Load the data. + + Returns: + The loaded data. + """ + + +class HuggingFaceDataLoader(DataLoader): + """ + Hugging Face data loader. + """ + + async def load(self) -> Dataset: + """ + Load the data from Hugging Face. + + Returns: + The loaded data. + """ + return load_dataset( + path=self.config.data.path, + split=self.config.data.split, + ) + + +class IQLViewDataLoader(HuggingFaceDataLoader): + """ + Data loader for IQL view evaluation. + """ + + async def load(self) -> Dataset: + """ + Load the data from Hugging Face and filter out samples without views. + + Returns: + The loaded data. + """ + dataset = await super().load() + return dataset.filter( + lambda x: x["db_id"] == self.config.data.db_id + and x["difficulty"] in self.config.data.difficulties + and x["view"] is not None + ) + + +class SQLViewDataLoader(HuggingFaceDataLoader): + """ + Data loader for SQL view evaluation. + """ + + async def load(self) -> Dataset: + """ + Load the data from Hugging Face. + + Returns: + The loaded data. + """ + dataset = await super().load() + return dataset.filter( + lambda x: x["db_id"] == self.config.data.db_id and x["difficulty"] in self.config.data.difficulties + ) + + +class CollectionDataLoader(HuggingFaceDataLoader): + """ + Data loader for collection evaluation. + """ + + async def load(self) -> Dataset: + """ + Load the data from Hugging Face. + + Returns: + The loaded data. + """ + dataset = await super().load() + return dataset.filter( + lambda x: x["db_id"] == self.config.data.db_id and x["difficulty"] in self.config.data.difficulties + ) diff --git a/benchmarks/sql/bench/metrics/__init__.py b/benchmarks/sql/bench/metrics/__init__.py index df3c8d4b..86d72f78 100644 --- a/benchmarks/sql/bench/metrics/__init__.py +++ b/benchmarks/sql/bench/metrics/__init__.py @@ -1,5 +1,6 @@ from .base import Metric, MetricSet -from .iql import ExactMatchIQL, UnsupportedIQL, ValidIQL +from .iql import ExactMatchAggregationIQL, ExactMatchFiltersIQL, ExactMatchIQL, UnsupportedIQL, ValidIQL +from .selector import ViewSelectionAccuracy from .sql import ExactMatchSQL, ExecutionAccuracy __all__ = [ @@ -7,7 +8,10 @@ "MetricSet", "ExactMatchSQL", "ExactMatchIQL", + "ExactMatchFiltersIQL", + "ExactMatchAggregationIQL", "ValidIQL", + "ViewSelectionAccuracy", "UnsupportedIQL", "ExecutionAccuracy", ] diff --git a/benchmarks/sql/bench/metrics/base.py b/benchmarks/sql/bench/metrics/base.py index 2d4b095d..d0e78072 100644 --- a/benchmarks/sql/bench/metrics/base.py +++ b/benchmarks/sql/bench/metrics/base.py @@ -3,7 +3,7 @@ from typing_extensions import Self -from ..pipeline import EvaluationResult +from ..pipelines import EvaluationResult class Metric(ABC): diff --git a/benchmarks/sql/bench/metrics/iql.py b/benchmarks/sql/bench/metrics/iql.py index 1667628b..c9339f86 100644 --- a/benchmarks/sql/bench/metrics/iql.py +++ b/benchmarks/sql/bench/metrics/iql.py @@ -3,7 +3,7 @@ from dbally.iql._exceptions import IQLError from dbally.iql_generator.prompt import UnsupportedQueryError -from ..pipeline import EvaluationResult +from ..pipelines import EvaluationResult from .base import Metric @@ -22,9 +22,7 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: Returns: Ratio of predicated queries that are identical to the ground truth ones. """ - results = [ - result for result in results if result.prediction.iql is not None and result.reference.iql is not None - ] + results = [result for result in results if result.prediction.iql is not None] return { "EM_IQL": ( sum(result.prediction.iql == result.reference.iql for result in results) / len(results) @@ -34,82 +32,105 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: } -class UnsupportedIQL(Metric): +class ExactMatchFiltersIQL(Metric): """ - Ratio of unsupported IQL queries. + Ration of predicated IQL filters that are identical to the ground truth ones. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Calculates the unsupported IQL ratio. + Computes the exact match ratio. Args: results: List of evaluation results. Returns: - Unsupported queries ratio. + Ratio of predicated queries that are identical to the ground truth ones. """ - results = [ - result - for result in results - # TODO: Update filtering to filter out text-to-sql results - if result.prediction.iql is not None - and result.reference.iql is not None - or isinstance(result.prediction.exception, UnsupportedQueryError) - ] + results = [result for result in results if result.prediction.iql is not None] return { - "UNSUPP_IQL": ( - sum(isinstance(result.prediction.exception, UnsupportedQueryError) for result in results) / len(results) + "EM_FLT_IQL": ( + sum(result.prediction.iql.filters == result.reference.iql.filters for result in results) / len(results) if results - else 0.0 + else None ) } -class ValidIQL(Metric): +class ExactMatchAggregationIQL(Metric): """ - Ratio of valid IQL queries. + Ratio of predicated aggregation that are identical to the ground truth ones. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Calculates the valid IQL ratio. + Computes the exact match ratio. Args: results: List of evaluation results. Returns: - Valid IQL queries ratio. + Ratio of predicated queries that are identical to the ground truth ones. """ results = [result for result in results if result.prediction.iql is not None] return { - "VAL_IQL": ( - sum(not isinstance(result.prediction.exception, IQLError) for result in results) / len(results) + "EM_AGG_IQL": ( + sum(result.prediction.iql.aggregation == result.reference.iql.aggregation for result in results) + / len(results) + if results + else None + ) + } + + +class UnsupportedIQL(Metric): + """ + Ratio of unsupported IQL queries. + """ + + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: + """ + Calculates the unsupported IQL ratio. + + Args: + results: List of evaluation results. + + Returns: + Unsupported queries ratio. + """ + results = [ + result + for result in results + if result.prediction.iql is not None or isinstance(result.prediction.exception, UnsupportedQueryError) + ] + return { + "UNSUPP_IQL": ( + sum(isinstance(result.prediction.exception, UnsupportedQueryError) for result in results) / len(results) if results else 0.0 ) } -class InvalidIQL(Metric): +class ValidIQL(Metric): """ - Ratio of invalid IQL queries. + Ratio of valid IQL queries. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Calculates the invalid IQL ratio. + Calculates the valid IQL ratio. Args: results: List of evaluation results. Returns: - Invalid IQL queries ratio. + Valid IQL queries ratio. """ results = [result for result in results if result.prediction.iql is not None] return { - "INV_IQL": ( - sum(isinstance(result.prediction.exception, IQLError) for result in results) / len(results) + "VAL_IQL": ( + sum(not isinstance(result.prediction.exception, IQLError) for result in results) / len(results) if results else 0.0 ) diff --git a/benchmarks/sql/bench/metrics/selector.py b/benchmarks/sql/bench/metrics/selector.py new file mode 100644 index 00000000..66c8ab3b --- /dev/null +++ b/benchmarks/sql/bench/metrics/selector.py @@ -0,0 +1,28 @@ +from typing import Any, Dict, List + +from ..pipelines import EvaluationResult +from .base import Metric + + +class ViewSelectionAccuracy(Metric): + """ + Ratio of predicated queries that are identical to the ground truth ones. + """ + + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: + """ + Computes the exact match ratio. + + Args: + results: List of evaluation results. + + Returns: + Ratio of predicated queries that are identical to the ground truth ones. + """ + return { + "ACC_VIEW": ( + sum(result.prediction.view == result.reference.view for result in results) / len(results) + if results + else None + ) + } diff --git a/benchmarks/sql/bench/metrics/sql.py b/benchmarks/sql/bench/metrics/sql.py index 88cedf09..c8594455 100644 --- a/benchmarks/sql/bench/metrics/sql.py +++ b/benchmarks/sql/bench/metrics/sql.py @@ -5,7 +5,7 @@ from sqlalchemy import create_engine, text from sqlalchemy.exc import SQLAlchemyError -from ..pipeline import EvaluationResult +from ..pipelines import EvaluationResult from .base import Metric diff --git a/benchmarks/sql/bench/pipeline.py b/benchmarks/sql/bench/pipeline.py deleted file mode 100644 index f7272fa0..00000000 --- a/benchmarks/sql/bench/pipeline.py +++ /dev/null @@ -1,320 +0,0 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Type - -from datasets import Dataset -from sqlalchemy import create_engine -from tqdm import tqdm - -from dbally.iql._exceptions import IQLError -from dbally.iql_generator.prompt import UnsupportedQueryError -from dbally.llms.base import LLM -from dbally.llms.litellm import LiteLLM -from dbally.llms.local import LocalLLM -from dbally.views.freeform.text2sql.view import BaseText2SQLView -from dbally.views.sqlalchemy_base import SqlAlchemyBaseView - -from .views import VIEWS_REGISTRY - - -@dataclass -class IQLResult: - """ - Represents the IQL result. - """ - - filters: Optional[str] = None - aggregation: Optional[str] = None - - def __eq__(self, other: "IQLResult") -> bool: - """ - Compares two IQL results. - - Args: - other: The other IQL result to compare. - - Returns: - True if the two IQL results are equal, False otherwise. - """ - return self.filters == other.filters and self.aggregation == other.aggregation - - def dict(self) -> Dict[str, Any]: - """ - Returns the dictionary representation of the object. - - Returns: - The dictionary representation. - """ - return { - "filters": self.filters, - "aggregation": self.aggregation, - } - - -@dataclass -class ExecutionResult: - """ - Represents the result of a single query execution. - """ - - view: Optional[str] = None - sql: Optional[str] = None - iql: Optional[IQLResult] = None - results: List[Dict[str, Any]] = field(default_factory=list) - exception: Optional[Exception] = None - execution_time: Optional[float] = None - - def dict(self) -> Dict[str, Any]: - """ - Returns the dictionary representation of the object. - - Returns: - The dictionary representation. - """ - return { - "view": self.view, - "iql": self.iql.dict() if self.iql else None, - "sql": self.sql, - "len_results": len(self.results), - } - - -@dataclass -class EvaluationResult: - """ - Represents the result of a single evaluation. - """ - - question: str - reference: ExecutionResult - prediction: ExecutionResult - - def dict(self) -> Dict[str, Any]: - """ - Returns the dictionary representation of the object. - - Returns: - The dictionary representation. - """ - return { - "question": self.question, - "reference": self.reference.dict(), - "prediction": self.prediction.dict(), - } - - -class EvaluationPipeline(ABC): - """ - Collection evaluation pipeline. - """ - - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating IQL predictions. - - Args: - config: The configuration for the pipeline. - """ - self.db = create_engine(config.data.db_url) - - @abstractmethod - async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: - """ - Runs the evaluation pipeline. - - Args: - dataset: The evaluation dataset. - - Returns: - The list of evaluation results. - """ - - -class ViewEvaluationPipeline(EvaluationPipeline, ABC): - """ - Collection evaluation pipeline. - """ - - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating IQL predictions. - - Args: - config: The configuration for the pipeline. - """ - super().__init__(config) - self.llm = self.get_llm(config.setup.llm) - - def get_llm(self, config: Dict) -> LLM: - """ - Returns the LLM based on the configuration. - - Args: - config: The LLM configuration. - - Returns: - The LLM object. - """ - if config.model_name.startswith("local/"): - return LocalLLM(config.model_name.split("/", 1)[1]) - return LiteLLM(config.model_name) - - -class IQLViewEvaluationPipeline(ViewEvaluationPipeline): - """ - Collection evaluation pipeline. - """ - - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating IQL predictions. - - Args: - config: The configuration for the pipeline. - """ - super().__init__(config) - self.views = self.get_views(config.setup) - - def get_views(self, config: Dict) -> Dict[str, Type[SqlAlchemyBaseView]]: - """ - Returns the view object based on the view name. - - Args: - config: The view configuration. - - Returns: - The view object. - """ - return {view: VIEWS_REGISTRY[view] for view in config.views} - - async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: - """ - Runs the evaluation pipeline. - - Args: - dataset: The evaluation dataset. - - Returns: - The list of evaluation results. - """ - results = [] - - for data in tqdm(dataset, desc="Evaluation"): - view = self.views[data["view"]](self.db) - try: - result = await view.ask( - query=data["question"], - llm=self.llm, - dry_run=True, - n_retries=0, - ) - # TODO: Refactor exception handling for IQLError for filters and aggregation - except IQLError as exc: - prediction = ExecutionResult( - view=data["view"], - iql=IQLResult(filters=exc.source), - exception=exc, - ) - except (UnsupportedQueryError, Exception) as exc: # pylint: disable=broad-except - prediction = ExecutionResult( - view=data["view"], - exception=exc, - ) - else: - prediction = ExecutionResult( - view=data["view"], - iql=IQLResult(filters=result.context["iql"]), - sql=result.context["sql"], - ) - - reference = ExecutionResult( - view=data["view"], - iql=IQLResult( - filters=data["iql_filters"], - aggregation=data["iql_aggregation"], - ), - sql=data["sql"], - ) - result = EvaluationResult( - question=data["question"], - reference=reference, - prediction=prediction, - ) - results.append(result) - - return results - - -class SQLViewEvaluationPipeline(ViewEvaluationPipeline): - """ - Collection evaluation pipeline. - """ - - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating IQL predictions. - - Args: - config: The configuration for the pipeline. - """ - super().__init__(config) - self.view = self.get_view(config.setup) - - def get_view(self, config: Dict) -> Type[BaseText2SQLView]: - """ - Returns the view object based on the view name. - - Args: - config: The view configuration. - - Returns: - The view object. - """ - return VIEWS_REGISTRY[config.view] - - async def __call__(self, dataset: Dataset) -> List[EvaluationResult]: - """ - Runs the evaluation pipeline. - - Args: - dataset: The evaluation dataset. - - Returns: - The list of evaluation results. - """ - results = [] - - for data in tqdm(dataset, desc="Evaluation"): - view = self.view(self.db) - - try: - result = await view.ask( - query=data["question"], - llm=self.llm, - dry_run=True, - n_retries=0, - ) - # TODO: Remove this broad exception handling once the Text2SQL view is fixed - except Exception as exc: # pylint: disable=broad-except - prediction = ExecutionResult( - view=self.view.__name__, - exception=exc, - ) - else: - prediction = ExecutionResult( - view=self.view.__name__, - sql=result.context["sql"], - ) - - reference = ExecutionResult( - view=data["view"], - sql=data["sql"], - ) - result = EvaluationResult( - question=data["question"], - reference=reference, - prediction=prediction, - ) - results.append(result) - - return results diff --git a/benchmarks/sql/bench/pipelines/__init__.py b/benchmarks/sql/bench/pipelines/__init__.py new file mode 100644 index 00000000..5985a951 --- /dev/null +++ b/benchmarks/sql/bench/pipelines/__init__.py @@ -0,0 +1,13 @@ +from .base import EvaluationPipeline, EvaluationResult, ExecutionResult, IQLResult +from .collection import CollectionEvaluationPipeline +from .view import IQLViewEvaluationPipeline, SQLViewEvaluationPipeline + +__all__ = [ + "CollectionEvaluationPipeline", + "EvaluationPipeline", + "EvaluationResult", + "ExecutionResult", + "IQLResult", + "IQLViewEvaluationPipeline", + "SQLViewEvaluationPipeline", +] diff --git a/benchmarks/sql/bench/pipelines/base.py b/benchmarks/sql/bench/pipelines/base.py new file mode 100644 index 00000000..58ba5186 --- /dev/null +++ b/benchmarks/sql/bench/pipelines/base.py @@ -0,0 +1,136 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + +from sqlalchemy import create_engine + +from dbally.llms.base import LLM +from dbally.llms.litellm import LiteLLM +from dbally.llms.local import LocalLLM + + +@dataclass +class IQLResult: + """ + Represents the IQL result. + """ + + filters: Optional[str] = None + aggregation: Optional[str] = None + + def __eq__(self, other: "IQLResult") -> bool: + """ + Compares two IQL results. + + Args: + other: The other IQL result to compare. + + Returns: + True if the two IQL results are equal, False otherwise. + """ + return self.filters == other.filters and self.aggregation == other.aggregation + + def dict(self) -> Dict[str, Any]: + """ + Returns the dictionary representation of the object. + + Returns: + The dictionary representation. + """ + return { + "filters": self.filters, + "aggregation": self.aggregation, + } + + +@dataclass +class ExecutionResult: + """ + Represents the result of a single query execution. + """ + + view: Optional[str] = None + sql: Optional[str] = None + iql: Optional[IQLResult] = None + results: List[Dict[str, Any]] = field(default_factory=list) + exception: Optional[Exception] = None + execution_time: Optional[float] = None + + def dict(self) -> Dict[str, Any]: + """ + Returns the dictionary representation of the object. + + Returns: + The dictionary representation. + """ + return { + "view": self.view, + "iql": self.iql.dict() if self.iql else None, + "sql": self.sql, + "len_results": len(self.results), + } + + +@dataclass +class EvaluationResult: + """ + Represents the result of a single evaluation. + """ + + question: str + reference: ExecutionResult + prediction: ExecutionResult + + def dict(self) -> Dict[str, Any]: + """ + Returns the dictionary representation of the object. + + Returns: + The dictionary representation. + """ + return { + "question": self.question, + "reference": self.reference.dict(), + "prediction": self.prediction.dict(), + } + + +class EvaluationPipeline(ABC): + """ + Collection evaluation pipeline. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the pipeline. + """ + self.db = create_engine(config.data.db_url) + + def get_llm(self, config: Dict) -> LLM: + """ + Returns the LLM based on the configuration. + + Args: + config: The LLM configuration. + + Returns: + The LLM object. + """ + if config.model_name.startswith("local/"): + return LocalLLM(config.model_name.split("/", 1)[1]) + return LiteLLM(config.model_name) + + @abstractmethod + async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: + """ + Runs the evaluation pipeline. + + Args: + data: The evaluation data. + + Returns: + The evaluation result. + """ diff --git a/benchmarks/sql/bench/pipelines/collection.py b/benchmarks/sql/bench/pipelines/collection.py new file mode 100644 index 00000000..3245e82d --- /dev/null +++ b/benchmarks/sql/bench/pipelines/collection.py @@ -0,0 +1,100 @@ +from typing import Any, Dict + +import dbally +from dbally.collection.collection import Collection +from dbally.iql._exceptions import IQLError +from dbally.iql_generator.prompt import UnsupportedQueryError +from dbally.view_selection.llm_view_selector import LLMViewSelector + +from ..views import VIEWS_REGISTRY +from .base import EvaluationPipeline, EvaluationResult, ExecutionResult, IQLResult + + +class CollectionEvaluationPipeline(EvaluationPipeline): + """ + Collection evaluation pipeline. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating collection predictions. + + Args: + config: The configuration for the pipeline. + """ + super().__init__(config) + self.collection = self.get_collection(config.setup) + + def get_collection(self, config: Dict) -> Collection: + """ + Sets up the collection based on the configuration. + + Args: + config: The collection configuration. + + Returns: + The collection. + """ + generator_llm = self.get_llm(config.generator_llm) + selector_llm = self.get_llm(config.selector_llm) + view_selector = LLMViewSelector(selector_llm) + + collection = dbally.create_collection( + name=config.name, + llm=generator_llm, + view_selector=view_selector, + ) + collection.n_retries = 0 + + for view_name in config.views: + view_cls = VIEWS_REGISTRY[view_name] + collection.add(view_cls, lambda: view_cls(self.db)) # pylint: disable=cell-var-from-loop + + return collection + + async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: + """ + Runs the collection evaluation pipeline. + + Args: + data: The evaluation data. + + Returns: + The evaluation result. + """ + try: + result = await self.collection.ask( + question=data["question"], + dry_run=True, + return_natural_response=False, + ) + # TODO: Refactor exception handling for IQLError for filters and aggregation + except IQLError as exc: + prediction = ExecutionResult( + iql=IQLResult(filters=exc.source), + exception=exc, + ) + # TODO: Remove this broad exception handling once the Text2SQL view is fixed + except (UnsupportedQueryError, Exception) as exc: # pylint: disable=broad-except + prediction = ExecutionResult(exception=exc) + else: + iql = IQLResult(filters=result.context["iql"]) if "iql" in result.context else None + prediction = ExecutionResult( + view=result.view_name, + iql=iql, + sql=result.context.get("sql"), + ) + + reference = ExecutionResult( + view=data["view"], + iql=IQLResult( + filters=data["iql_filters"], + aggregation=data["iql_aggregation"], + ), + sql=data["sql"], + ) + return EvaluationResult( + question=data["question"], + reference=reference, + prediction=prediction, + ) diff --git a/benchmarks/sql/bench/pipelines/view.py b/benchmarks/sql/bench/pipelines/view.py new file mode 100644 index 00000000..f0108b42 --- /dev/null +++ b/benchmarks/sql/bench/pipelines/view.py @@ -0,0 +1,177 @@ +from abc import ABC +from typing import Any, Dict, Type + +from dbally.iql._exceptions import IQLError +from dbally.iql_generator.prompt import UnsupportedQueryError +from dbally.views.freeform.text2sql.view import BaseText2SQLView +from dbally.views.sqlalchemy_base import SqlAlchemyBaseView + +from ..views import VIEWS_REGISTRY +from .base import EvaluationPipeline, EvaluationResult, ExecutionResult, IQLResult + + +class ViewEvaluationPipeline(EvaluationPipeline, ABC): + """ + Collection evaluation pipeline. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the pipeline. + """ + super().__init__(config) + self.llm = self.get_llm(config.setup.llm) + + +class IQLViewEvaluationPipeline(ViewEvaluationPipeline): + """ + Collection evaluation pipeline. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the pipeline. + """ + super().__init__(config) + self.views = self.get_views(config.setup) + + def get_views(self, config: Dict) -> Dict[str, Type[SqlAlchemyBaseView]]: + """ + Returns the view object based on the view name. + + Args: + config: The view configuration. + + Returns: + The view object. + """ + return {view: VIEWS_REGISTRY[view] for view in config.views} + + async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: + """ + Runs the evaluation pipeline. + + Args: + data: The evaluation data. + + Returns: + The evaluation result. + """ + view = self.views[data["view"]](self.db) + try: + result = await view.ask( + query=data["question"], + llm=self.llm, + dry_run=True, + n_retries=0, + ) + # TODO: Refactor exception handling for IQLError for filters and aggregation + except IQLError as exc: + prediction = ExecutionResult( + view=data["view"], + iql=IQLResult(filters=exc.source), + exception=exc, + ) + except (UnsupportedQueryError, Exception) as exc: # pylint: disable=broad-except + prediction = ExecutionResult( + view=data["view"], + exception=exc, + ) + else: + prediction = ExecutionResult( + view=data["view"], + iql=IQLResult(filters=result.context["iql"]), + sql=result.context["sql"], + ) + + reference = ExecutionResult( + view=data["view"], + iql=IQLResult( + filters=data["iql_filters"], + aggregation=data["iql_aggregation"], + ), + sql=data["sql"], + ) + return EvaluationResult( + question=data["question"], + reference=reference, + prediction=prediction, + ) + + +class SQLViewEvaluationPipeline(ViewEvaluationPipeline): + """ + Collection evaluation pipeline. + """ + + def __init__(self, config: Dict) -> None: + """ + Constructs the pipeline for evaluating IQL predictions. + + Args: + config: The configuration for the pipeline. + """ + super().__init__(config) + self.view = self.get_view(config.setup) + + def get_view(self, config: Dict) -> Type[BaseText2SQLView]: + """ + Returns the view object based on the view name. + + Args: + config: The view configuration. + + Returns: + The view object. + """ + return VIEWS_REGISTRY[config.view] + + async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: + """ + Runs the evaluation pipeline. + + Args: + data: The evaluation data. + + Returns: + The evaluation result. + """ + view = self.view(self.db) + try: + result = await view.ask( + query=data["question"], + llm=self.llm, + dry_run=True, + n_retries=0, + ) + # TODO: Remove this broad exception handling once the Text2SQL view is fixed + except Exception as exc: # pylint: disable=broad-except + prediction = ExecutionResult( + view=self.view.__name__, + exception=exc, + ) + else: + prediction = ExecutionResult( + view=self.view.__name__, + sql=result.context["sql"], + ) + + reference = ExecutionResult( + view=data["view"], + iql=IQLResult( + filters=data["iql_filters"], + aggregation=data["iql_aggregation"], + ), + sql=data["sql"], + ) + return EvaluationResult( + question=data["question"], + reference=reference, + prediction=prediction, + ) diff --git a/benchmarks/sql/bench/views/structured/superhero.py b/benchmarks/sql/bench/views/structured/superhero.py index f8e34341..1e09da8a 100644 --- a/benchmarks/sql/bench/views/structured/superhero.py +++ b/benchmarks/sql/bench/views/structured/superhero.py @@ -1,4 +1,5 @@ -# pylint: disable=missing-docstring, missing-return-doc, missing-param-doc +# pylint: disable=missing-docstring, missing-return-doc, missing-param-doc, singleton-comparison, consider-using-in +# flake8: noqa from typing import Literal @@ -63,7 +64,6 @@ def __init__(self, sqlalchemy_engine: Engine) -> None: sqlalchemy_engine: The database engine. """ DeferredReflection.prepare(sqlalchemy_engine) - super().__init__(sqlalchemy_engine) @@ -106,7 +106,7 @@ def filter_by_missing_superhero_full_name(self) -> ColumnElement: Returns: The filter condition. """ - return Superhero.full_name is None + return Superhero.full_name == None @view_filter() def filter_by_superhero_full_name(self, superhero_full_name: str) -> ColumnElement: @@ -148,7 +148,7 @@ def filter_by_height_cm(self, height_cm: float) -> ColumnElement: return Superhero.height_cm == height_cm @view_filter() - def filter_by_height_cm_greater_than(self, height_cm: float) -> ColumnElement: + def filter_by_height_cm_less_than(self, height_cm: float) -> ColumnElement: """ Filters the view by the height of the superhero. @@ -158,17 +158,20 @@ def filter_by_height_cm_greater_than(self, height_cm: float) -> ColumnElement: Returns: The filter condition. """ - return Superhero.height_cm > height_cm + return Superhero.height_cm < height_cm @view_filter() - def filter_by_the_tallest(self) -> ColumnElement: + def filter_by_height_cm_greater_than(self, height_cm: float) -> ColumnElement: """ - Filter the view by the tallest superhero. + Filters the view by the height of the superhero. + + Args: + height_cm: The height of the superhero. Returns: The filter condition. """ - return Superhero.height_cm == select(func.max(Superhero.height_cm)).scalar_subquery() + return Superhero.height_cm > height_cm @view_filter() def filter_by_height_greater_than_percentage_of_average(self, average_percentage: int) -> ColumnElement: @@ -184,17 +187,14 @@ def filter_by_height_greater_than_percentage_of_average(self, average_percentage return Superhero.height_cm * 100 > select(func.avg(Superhero.height_cm)).scalar_subquery() * average_percentage @view_filter() - def filter_by_height_cm_less_than(self, height_cm: float) -> ColumnElement: + def filter_by_the_tallest(self) -> ColumnElement: """ - Filters the view by the height of the superhero. - - Args: - height_cm: The height of the superhero. + Filter the view by the tallest superhero. Returns: The filter condition. """ - return Superhero.height_cm < height_cm + return Superhero.height_cm == select(func.max(Superhero.height_cm)).scalar_subquery() @view_filter() def filter_by_missing_weight(self) -> ColumnElement: @@ -204,27 +204,23 @@ def filter_by_missing_weight(self) -> ColumnElement: Returns: The filter condition. """ - return Superhero.weight_kg == 0 or Superhero.weight_kg is None - - @view_filter() - def filter_by_the_heaviest(self) -> ColumnElement: - return + return Superhero.weight_kg == 0 or Superhero.weight_kg == None @view_filter() - def filter_by_weight_greater_than_percentage_of_average(self, average_percentage: int) -> ColumnElement: + def filter_by_weight_kg(self, weight_kg: int) -> ColumnElement: """ - Filters the view by the weight greater than the percentage of average of superheroes. + Filters the view by the weight of the superhero. Args: - average_percentage: The percentage of the average weight. + weight_kg: The weight of the superhero. Returns: The filter condition. """ - return Superhero.weight_kg * 100 > select(func.avg(Superhero.weight_kg)).scalar_subquery() * average_percentage + return Superhero.weight_kg == weight_kg @view_filter() - def filter_by_weight_kg(self, weight_kg: int) -> ColumnElement: + def filter_by_weight_kg_greater_than(self, weight_kg: int) -> ColumnElement: """ Filters the view by the weight of the superhero. @@ -234,10 +230,10 @@ def filter_by_weight_kg(self, weight_kg: int) -> ColumnElement: Returns: The filter condition. """ - return Superhero.weight_kg == weight_kg + return Superhero.weight_kg > weight_kg @view_filter() - def filter_by_weight_kg_greater_than(self, weight_kg: int) -> ColumnElement: + def filter_by_weight_kg_less_than(self, weight_kg: int) -> ColumnElement: """ Filters the view by the weight of the superhero. @@ -247,20 +243,46 @@ def filter_by_weight_kg_greater_than(self, weight_kg: int) -> ColumnElement: Returns: The filter condition. """ - return Superhero.weight_kg > weight_kg + return Superhero.weight_kg < weight_kg @view_filter() - def filter_by_weight_kg_less_than(self, weight_kg: int) -> ColumnElement: + def filter_by_weight_greater_than_percentage_of_average(self, average_percentage: int) -> ColumnElement: """ - Filters the view by the weight of the superhero. + Filters the view by the weight greater than the percentage of average of superheroes. Args: - weight_kg: The weight of the superhero. + average_percentage: The percentage of the average weight. Returns: The filter condition. """ - return Superhero.weight_kg < weight_kg + return Superhero.weight_kg * 100 > select(func.avg(Superhero.weight_kg)).scalar_subquery() * average_percentage + + @view_filter() + def filter_by_the_heaviest(self) -> ColumnElement: + """ + Filters the view by the heaviest superhero. + + Returns: + The filter condition. + """ + return Superhero.weight_kg == select(func.max(Superhero.weight_kg)).scalar_subquery() + + @view_filter() + def filter_by_missing_publisher(self) -> ColumnElement: + """ + Filters the view by the missing publisher of the superhero. + + Returns: + The filter condition. + """ + return Superhero.publisher_id == None + + +class SuperheroHeroPowerFilterMixin: + """ + Mixin for filtering the view by the superhero superpowers. + """ @view_filter() def filter_by_number_powers(self, number_powers: int) -> ColumnElement: @@ -310,38 +332,42 @@ def filter_by_number_powers_less_than(self, number_powers: int) -> ColumnElement ) @view_filter() - def filter_by_missing_publisher(self) -> ColumnElement: + def filter_by_power_name(self, power_name: str) -> ColumnElement: """ - Filters the view by the missing publisher of the superhero. + Filters the view by the superpower name. + + Args: + power_name: The name of the superpower. Returns: The filter condition. """ - return Superhero.publisher_id is None + return Superhero.id.in_( + select(HeroPower.hero_id) + .join(Superpower, Superpower.id == HeroPower.power_id) + .where(Superpower.power_name == power_name) + ) @view_filter() - def filter_by_super_power(self, super_power: str) -> ColumnElement: + def filter_by_the_most_super_powers(self) -> ColumnElement: """ - Filters the view by the hero superpower. - - Args: - super_power: The superpower of the superhero. + Filters the view by the most superpowers. Returns: The filter condition. """ - return ( - select(1) - .select_from(HeroPower) - .join(Superpower, Superpower.id == HeroPower.power_id) - .where(Superpower.power_name == super_power) - .where(Superhero.id == HeroPower.hero_id) - .exists() + return Superhero.id.in_( + select(HeroPower.hero_id) + .group_by(HeroPower.hero_id) + .order_by(func.count(HeroPower.power_id).desc()) + .limit(1) ) - @view_filter() - def filter_by_the_most_super_powers(self) -> ColumnElement: - pass + +class SuperheroHeroAttributeFilterMixin: + """ + Mixin for filtering the view by the superhero attributes. + """ @view_filter() def filter_by_attribute_name(self, attribute_name: str) -> ColumnElement: @@ -354,46 +380,123 @@ def filter_by_attribute_name(self, attribute_name: str) -> ColumnElement: Returns: The filter condition. """ - return ( - select(1) - .select_from(HeroAttribute) + return Superpower.id.in_( + select(HeroAttribute.hero_id) .join(Attribute, Attribute.id == HeroAttribute.attribute_id) .where(Attribute.attribute_name == attribute_name) - .where(Superhero.id == HeroAttribute.hero_id) - .exists() + ) + + @view_filter() + def filter_by_attribute_value(self, attribute_value: int) -> ColumnElement: + """ + Filters the view by the hero attribute value. + + Args: + attribute_value: The value of the hero attribute. + + Returns: + The filter condition. + """ + return Superhero.id.in_( + select(HeroAttribute.hero_id) + .group_by(HeroAttribute.hero_id) + .having(HeroAttribute.attribute_value == attribute_value) ) @view_filter() def filter_by_the_lowest_attribute_value(self) -> ColumnElement: - return True + """ + Filters the view by the lowest hero attribute value. + + Returns: + The filter condition. + """ + return Superhero.id.in_( + select(HeroAttribute.hero_id) + .group_by(HeroAttribute.hero_id) + .having(HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)).scalar_subquery()) + ) @view_filter() def filter_by_the_highest_attribute_value(self) -> ColumnElement: - return True + """ + Filters the view by the highest hero attribute value. - @view_filter() - def filter_by_attribute_value_between(self) -> ColumnElement: - return True + Returns: + The filter condition. + """ + return Superhero.id.in_( + select(HeroAttribute.hero_id) + .group_by(HeroAttribute.hero_id) + .having(HeroAttribute.attribute_value == select(func.max(HeroAttribute.attribute_value)).scalar_subquery()) + ) @view_filter() - def filter_by_attribute_value(self) -> ColumnElement: - return True + def filter_by_attribute_value_less_than(self, attribute_value: int) -> ColumnElement: + """ + Filters the view by the hero attribute value. - @view_filter() - def filter_by_the_fastest(self) -> ColumnElement: - return True + Args: + attribute_value: The value of the hero attribute. + + Returns: + The filter condition. + """ + return Superhero.id.in_( + select(HeroAttribute.hero_id) + .group_by(HeroAttribute.hero_id) + .having(func.min(HeroAttribute.attribute_value) < attribute_value) + ) @view_filter() - def filter_by_same_hair_and_eye_colour(self) -> ColumnElement: - return True + def filter_by_attribute_value_between(self, begin_attribute_value: int, end_attribute_value: int) -> ColumnElement: + """ + Filters the view by the hero attribute value. + + Args: + begin_attribute_value: The begin value of the hero attribute. + end_attribute_value: The end value of the hero attribute. + + Returns: + The filter condition. + """ + return Superhero.id.in_( + select(HeroAttribute.hero_id) + .group_by(HeroAttribute.hero_id) + .having(HeroAttribute.attribute_value.between(begin_attribute_value, end_attribute_value)) + ) @view_filter() - def filter_by_same_hair_and_skin_colour(self) -> ColumnElement: - return True + def filter_by_the_fastest(self) -> ColumnElement: + """ + Filters the view by the fastest superhero. + + Returns: + The filter condition. + """ + return Superhero.id.in_( + select(HeroAttribute.hero_id) + .join(Attribute, Attribute.id == HeroAttribute.attribute_id) + .where(Attribute.attribute_name == "Speed") + .group_by(HeroAttribute.hero_id) + .having(HeroAttribute.attribute_value == select(func.max(HeroAttribute.attribute_value)).scalar_subquery()) + ) @view_filter() def filter_by_the_dumbest(self) -> ColumnElement: - return True + """ + Filters the view by the dumbest superhero. + + Returns: + The filter condition. + """ + return Superhero.id.in_( + select(HeroAttribute.hero_id) + .join(Attribute, Attribute.id == HeroAttribute.attribute_id) + .where(Attribute.attribute_name == "Intelligence") + .group_by(HeroAttribute.hero_id) + .having(HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)).scalar_subquery()) + ) class SuperheroColourFilterMixin: @@ -446,6 +549,26 @@ def filter_by_skin_colour(self, skin_colour: str) -> ColumnElement: """ return self.skin_colour.colour == skin_colour + @view_filter() + def filter_by_same_hair_and_eye_colour(self) -> ColumnElement: + """ + Filters the view by the superhero with the same hair and eye colour. + + Returns: + The filter condition. + """ + return self.eye_colour.colour == self.hair_colour.colour + + @view_filter() + def filter_by_same_hair_and_skin_colour(self) -> ColumnElement: + """ + Filters the view by the superhero with the same hair and skin colour. + + Returns: + The filter condition. + """ + return self.hair_colour.colour == self.skin_colour.colour + class PublisherFilterMixin: """ @@ -466,6 +589,43 @@ def filter_by_publisher_name(self, publisher_name: str) -> ColumnElement: return Publisher.publisher_name == publisher_name +class PublisherSuperheroMixin: + """ + Mixin for filtering the publisher view by superheros. + """ + + @view_filter() + def filter_by_superhero_name(self, superhero_name: str) -> ColumnElement: + """ + Filters the view by the superhero name. + + Args: + superhero_name: The name of the superhero. + + Returns: + The filter condition. + """ + return Publisher.id.in_(select(Superhero.publisher_id).where(Superhero.superhero_name == superhero_name)) + + @view_filter() + def filter_by_the_slowest_superhero(self) -> ColumnElement: + """ + Filters the view by the slowest superhero. + + Returns: + The filter condition. + """ + return Publisher.id.in_( + select(Superhero.publisher_id) + .join(HeroAttribute, HeroAttribute.hero_id == Superhero.id) + .join(Attribute, Attribute.id == HeroAttribute.attribute_id) + .where( + Attribute.attribute_name == "Speed", + HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)).scalar_subquery(), + ) + ) + + class AlignmentFilterMixin: """ Mixin for filtering the view by the alignment attributes. @@ -548,64 +708,46 @@ class HeroAttributeFilterMixin: """ @view_filter() - def filter_by_attribute_value(self, attribute_value: int) -> ColumnElement: - """ - Filters the view by the hero attribute value. - - Args: - attribute_value: The value of the hero attribute. - - Returns: - The filter condition. - """ - return HeroAttribute.attribute_value == attribute_value - - @view_filter() - def filter_by_attribute_value_less_than(self, attribute_value: int) -> ColumnElement: + def filter_by_the_lowest_attribute_value(self) -> ColumnElement: """ - Filters the view by the hero attribute value. - - Args: - attribute_value: The value of the hero attribute. + Filters the view by the lowest hero attribute value. Returns: The filter condition. """ - return HeroAttribute.attribute_value < attribute_value + return HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)).scalar_subquery() @view_filter() - def filter_by_attribute_value_between(self, begin_attribute_value: int, end_attribute_value: int) -> ColumnElement: + def filter_by_the_highest_attribute_value(self) -> ColumnElement: """ - Filters the view by the hero attribute value. - - Args: - begin_attribute_value: The begin value of the hero attribute. - end_attribute_value: The end value of the hero attribute. + Filters the view by the highest hero attribute value. Returns: The filter condition. """ - return HeroAttribute.attribute_value.between(begin_attribute_value, end_attribute_value) + return HeroAttribute.attribute_value == select(func.max(HeroAttribute.attribute_value)).scalar_subquery() - @view_filter() - def filter_by_the_lowest_attribute_value(self) -> ColumnElement: - """ - Filters the view by the lowest hero attribute value. - Returns: - The filter condition. - """ - return HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)).scalar_subquery() +class HeroPowerFilterMixin: + """ + Mixin for filtering the view by the hero power. + """ @view_filter() - def filter_by_the_highest_attribute_value(self) -> ColumnElement: + def filter_by_the_most_popular_power(self) -> ColumnElement: """ - Filters the view by the highest hero attribute value. + Filters the view by the most popular hero power. Returns: The filter condition. """ - return HeroAttribute.attribute_value == select(func.max(HeroAttribute.attribute_value)).scalar_subquery() + return HeroPower.power_id == ( + select(HeroPower.power_id) + .group_by(HeroPower.power_id) + .order_by(func.count(HeroPower.power_id).desc()) + .limit(1) + .scalar_subquery() + ) class AttributeFilterMixin: @@ -634,6 +776,8 @@ class SuperheroView( # pylint: disable=too-many-ancestors SqlAlchemyBaseView, SuperheroFilterMixin, SuperheroColourFilterMixin, + SuperheroHeroPowerFilterMixin, + SuperheroHeroAttributeFilterMixin, PublisherFilterMixin, AlignmentFilterMixin, GenderFilterMixin, @@ -706,7 +850,7 @@ def get_select(self) -> Select: ) -class HeroPowerView(DBInitMixin, SqlAlchemyBaseView, SuperheroFilterMixin, SuperpowerFilterMixin): +class HeroPowerView(DBInitMixin, SqlAlchemyBaseView, HeroPowerFilterMixin, SuperheroFilterMixin, SuperpowerFilterMixin): """ View for querying only hero powers data. """ @@ -721,15 +865,18 @@ def get_select(self) -> Select: return ( select( HeroPower.hero_id, + Alignment.alignment, HeroPower.power_id, Superpower.power_name, ) .join(Superhero, Superhero.id == HeroPower.hero_id) + .join(Alignment, Alignment.id == Superhero.alignment_id) .join(Superpower, Superpower.id == HeroPower.power_id) + .group_by(HeroPower.power_id) ) -class PublisherView(DBInitMixin, SqlAlchemyBaseView, PublisherFilterMixin): +class PublisherView(DBInitMixin, SqlAlchemyBaseView, PublisherFilterMixin, PublisherSuperheroMixin): """ View for querying only publisher data. """ diff --git a/benchmarks/sql/config/data/superhero.yaml b/benchmarks/sql/config/data/superhero.yaml index 4664f969..bb556c46 100644 --- a/benchmarks/sql/config/data/superhero.yaml +++ b/benchmarks/sql/config/data/superhero.yaml @@ -2,4 +2,4 @@ path: "micpst/bird-iql" split: "dev" db_id: "superhero" difficulties: ["simple", "moderate", "challenging"] -db_url: "sqlite:///../../../data/superhero.db" +db_url: "sqlite:///data/superhero.db" diff --git a/benchmarks/sql/config/setup/collection.yaml b/benchmarks/sql/config/setup/collection.yaml new file mode 100644 index 00000000..3a7073b0 --- /dev/null +++ b/benchmarks/sql/config/setup/collection.yaml @@ -0,0 +1,12 @@ +name: COLLECTION +views: [ + "HeroAttributeView", + "HeroPowerView", + "PublisherView", + "SuperheroView", +] +fallback: "SuperheroFreeformView" + +defaults: + - llm@selector_llm: gpt-3.5-turbo + - llm@generator_llm: gpt-3.5-turbo From cd63a3829a6bc1698abb5aa4eaa87e66877e6554 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 23 Jul 2024 15:48:53 +0200 Subject: [PATCH 27/34] fix ci --- benchmarks/sql/bench/views/structured/superhero.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/sql/bench/views/structured/superhero.py b/benchmarks/sql/bench/views/structured/superhero.py index 1e09da8a..1cbecb8e 100644 --- a/benchmarks/sql/bench/views/structured/superhero.py +++ b/benchmarks/sql/bench/views/structured/superhero.py @@ -1,4 +1,4 @@ -# pylint: disable=missing-docstring, missing-return-doc, missing-param-doc, singleton-comparison, consider-using-in +# pylint: disable=missing-docstring, missing-return-doc, missing-param-doc, singleton-comparison, consider-using-in, too-many-ancestors, too-many-public-methods # flake8: noqa from typing import Literal @@ -67,7 +67,7 @@ def __init__(self, sqlalchemy_engine: Engine) -> None: super().__init__(sqlalchemy_engine) -class SuperheroFilterMixin: # pylint: disable=too-many-public-methods +class SuperheroFilterMixin: """ Mixin for filtering the view by the superhero attributes. """ @@ -771,7 +771,7 @@ def filter_by_attribute_name( return Attribute.attribute_name == attribute_name -class SuperheroView( # pylint: disable=too-many-ancestors +class SuperheroView( DBInitMixin, SqlAlchemyBaseView, SuperheroFilterMixin, @@ -819,7 +819,7 @@ def get_select(self) -> Select: ) -class HeroAttributeView( # pylint: disable=too-many-ancestors +class HeroAttributeView( DBInitMixin, SqlAlchemyBaseView, HeroAttributeFilterMixin, From 1c8fb18c017d1aeb8dc6f8cb18c2a98114759bd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 24 Jul 2024 09:57:53 +0200 Subject: [PATCH 28/34] add tests + update README --- benchmarks/sql/README.md | 21 +++-- benchmarks/sql/bench/pipelines/collection.py | 11 +++ .../sql/bench/views/structured/superhero.py | 12 ++- .../sql/config/setup/llm/claude-3-haiku.yaml | 1 + .../sql/config/setup/llm/claude-3-opus.yaml | 1 + .../config/setup/llm/claude-3.5-sonnet.yaml | 2 +- .../sql/config/setup/llm/gpt-3.5-turbo.yaml | 2 +- .../sql/config/setup/llm/gpt-4-turbo.yaml | 1 + benchmarks/sql/config/setup/llm/gpt-4o.yaml | 1 + benchmarks/sql/tests/test_evaluator.py | 82 +++++++++---------- 10 files changed, 76 insertions(+), 58 deletions(-) create mode 100644 benchmarks/sql/config/setup/llm/claude-3-haiku.yaml create mode 100644 benchmarks/sql/config/setup/llm/claude-3-opus.yaml create mode 100644 benchmarks/sql/config/setup/llm/gpt-4-turbo.yaml create mode 100644 benchmarks/sql/config/setup/llm/gpt-4o.yaml diff --git a/benchmarks/sql/README.md b/benchmarks/sql/README.md index 1f3b5cfa..bb49dc80 100644 --- a/benchmarks/sql/README.md +++ b/benchmarks/sql/README.md @@ -6,9 +6,7 @@ This folder contains benchmarks for querying SQL databases with db-ally. This su - `IQL_VIEW` - measures correctness of SQL queries generated by the structured views. - `SQL_VIEW` - measures correctness of SQL queries generated by the freeform views. -All benchmarks are run on a dev split of the [BIRD](https://bird-bench.github.io/) dataset. For now, only one configuration is available to run the suite against the `superhero` database. We plan to extend it to all databases in the set to cover all cases. - -New PRs adding support for new databases from BIRD or SPIDER are welcome. +All benchmarks are run on a dev split of the [BIRD](https://bird-bench.github.io/) dataset. For now, one configuration is available to run the suite against the `superhero` database. ## Run benchmarks @@ -16,7 +14,7 @@ New PRs adding support for new databases from BIRD or SPIDER are welcome. Before starting, download the `superhero.sqlite` database file from [BIRD](https://bird-bench.github.io/), change its extension to `*.db` and place it in the `data/` folder. -Run the whole suite on the `superhero` database: +Run the whole suite on the `superhero` database with `gpt-3.5-turbo`: ```bash python bench.py --multirun setup=iql-view,sql-view,collection data=superhero @@ -29,15 +27,20 @@ python bench.py setup=iql-view python bench.py --multirun setup=iql-view,sql-view ``` -Compare IQL generation performance on multiple LLMs: +Compare IQL/SQL generation performance on multiple LLMs: ```bash python bench.py --multirun setup=iql-view setup/llm=gpt-3.5-turbo,claude-3.5-sonnet python bench.py --multirun setup=sql-view setup/llm=gpt-3.5-turbo,claude-3.5-sonnet ``` +For the `collection` steup, you need to specify models for both the view selection and the IQL generation step: + ```bash -python bench.py --multirun setup=collection setup/llm@setup.generator_llm=gpt-3.5-turbo,claude-3.5-sonnet +python bench.py --multirun \ + setup=collection \ + setup/llm@setup.selector_llm=gpt-3.5-turbo,claude-3.5-sonnet \ + setup/llm@setup.generator_llm=gpt-3.5-turbo,claude-3.5-sonnet ``` ### Log to Neptune @@ -54,3 +57,9 @@ Export evaluation results to Neptune: ```bash python bench.py setup=iql-view neptune=True ``` + +## Run tests + +```bash +python -m pytest +``` diff --git a/benchmarks/sql/bench/pipelines/collection.py b/benchmarks/sql/bench/pipelines/collection.py index 3245e82d..1efe9b9c 100644 --- a/benchmarks/sql/bench/pipelines/collection.py +++ b/benchmarks/sql/bench/pipelines/collection.py @@ -50,6 +50,17 @@ def get_collection(self, config: Dict) -> Collection: view_cls = VIEWS_REGISTRY[view_name] collection.add(view_cls, lambda: view_cls(self.db)) # pylint: disable=cell-var-from-loop + if config.fallback: + fallback = dbally.create_collection( + name=config.fallback, + llm=generator_llm, + view_selector=view_selector, + ) + fallback.n_retries = 0 + fallback_cls = VIEWS_REGISTRY[config.fallback] + fallback.add(fallback_cls, lambda: fallback_cls(self.db)) + collection.set_fallback(fallback) + return collection async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: diff --git a/benchmarks/sql/bench/views/structured/superhero.py b/benchmarks/sql/bench/views/structured/superhero.py index 1cbecb8e..76f9e290 100644 --- a/benchmarks/sql/bench/views/structured/superhero.py +++ b/benchmarks/sql/bench/views/structured/superhero.py @@ -784,7 +784,8 @@ class SuperheroView( RaceFilterMixin, ): """ - View for querying only superheros data. + View for querying only superheros data. Contains the superhero id, superhero name, full name, height, weight, + publisher name, gender, race, alignment, eye colour, hair colour, skin colour. """ def get_select(self) -> Select: @@ -828,7 +829,7 @@ class HeroAttributeView( AlignmentFilterMixin, ): """ - View for querying only hero attributes data. + View for querying only hero attributes data. Contains the attribute name and attribute value. """ def get_select(self) -> Select: @@ -852,7 +853,7 @@ def get_select(self) -> Select: class HeroPowerView(DBInitMixin, SqlAlchemyBaseView, HeroPowerFilterMixin, SuperheroFilterMixin, SuperpowerFilterMixin): """ - View for querying only hero powers data. + View for querying only hero super powers data. Contains the power id and power name. """ def get_select(self) -> Select: @@ -864,13 +865,10 @@ def get_select(self) -> Select: """ return ( select( - HeroPower.hero_id, - Alignment.alignment, HeroPower.power_id, Superpower.power_name, ) .join(Superhero, Superhero.id == HeroPower.hero_id) - .join(Alignment, Alignment.id == Superhero.alignment_id) .join(Superpower, Superpower.id == HeroPower.power_id) .group_by(HeroPower.power_id) ) @@ -878,7 +876,7 @@ def get_select(self) -> Select: class PublisherView(DBInitMixin, SqlAlchemyBaseView, PublisherFilterMixin, PublisherSuperheroMixin): """ - View for querying only publisher data. + View for querying only publisher data. Contains the publisher id and publisher name. """ def get_select(self) -> Select: diff --git a/benchmarks/sql/config/setup/llm/claude-3-haiku.yaml b/benchmarks/sql/config/setup/llm/claude-3-haiku.yaml new file mode 100644 index 00000000..5bff1f8f --- /dev/null +++ b/benchmarks/sql/config/setup/llm/claude-3-haiku.yaml @@ -0,0 +1 @@ +model_name: claude-3-haiku-20240307 diff --git a/benchmarks/sql/config/setup/llm/claude-3-opus.yaml b/benchmarks/sql/config/setup/llm/claude-3-opus.yaml new file mode 100644 index 00000000..e979cb7c --- /dev/null +++ b/benchmarks/sql/config/setup/llm/claude-3-opus.yaml @@ -0,0 +1 @@ +model_name: claude-3-opus-20240229 diff --git a/benchmarks/sql/config/setup/llm/claude-3.5-sonnet.yaml b/benchmarks/sql/config/setup/llm/claude-3.5-sonnet.yaml index 2eee59c7..3aa9a2ee 100644 --- a/benchmarks/sql/config/setup/llm/claude-3.5-sonnet.yaml +++ b/benchmarks/sql/config/setup/llm/claude-3.5-sonnet.yaml @@ -1 +1 @@ -model_name: "claude-3-5-sonnet-20240620" +model_name: claude-3-5-sonnet-20240620 diff --git a/benchmarks/sql/config/setup/llm/gpt-3.5-turbo.yaml b/benchmarks/sql/config/setup/llm/gpt-3.5-turbo.yaml index eff838ef..d0025309 100644 --- a/benchmarks/sql/config/setup/llm/gpt-3.5-turbo.yaml +++ b/benchmarks/sql/config/setup/llm/gpt-3.5-turbo.yaml @@ -1 +1 @@ -model_name: "gpt-3.5-turbo" +model_name: gpt-3.5-turbo diff --git a/benchmarks/sql/config/setup/llm/gpt-4-turbo.yaml b/benchmarks/sql/config/setup/llm/gpt-4-turbo.yaml new file mode 100644 index 00000000..71ca1151 --- /dev/null +++ b/benchmarks/sql/config/setup/llm/gpt-4-turbo.yaml @@ -0,0 +1 @@ +model_name: gpt-4-turbo diff --git a/benchmarks/sql/config/setup/llm/gpt-4o.yaml b/benchmarks/sql/config/setup/llm/gpt-4o.yaml new file mode 100644 index 00000000..64a0df26 --- /dev/null +++ b/benchmarks/sql/config/setup/llm/gpt-4o.yaml @@ -0,0 +1 @@ +model_name: gpt-4o diff --git a/benchmarks/sql/tests/test_evaluator.py b/benchmarks/sql/tests/test_evaluator.py index dd84f4af..26adf003 100644 --- a/benchmarks/sql/tests/test_evaluator.py +++ b/benchmarks/sql/tests/test_evaluator.py @@ -1,93 +1,89 @@ -from unittest.mock import AsyncMock, MagicMock +from typing import Dict, List import pytest from benchmarks.sql.bench.evaluator import Evaluator -class MockPipeline: - async def __call__(self, data): - return ["mock_result"], {"mock_perf": "mock_value"} +class MockDataLoader: + async def load(self) -> List[str]: + return ["data1", "data2"] class MockMetricSet: - def compute(self, results): - return {"mock_metric": "mock_value"} + def compute(self, results) -> Dict[str, float]: + return {"accuracy": 0.95} -class MockDataset: - pass +class MockEvaluationResult: + def dict(self) -> Dict[str, str]: + return {"result": "processed_data"} -class MockEvaluationResult: - def dict(self): - return {"mock_result_key": "mock_result_value"} +class MockEvaluationPipeline: + async def __call__(self, data) -> MockEvaluationResult: + return MockEvaluationResult() @pytest.mark.asyncio -async def test_compute(): +async def test_compute() -> None: evaluator = Evaluator(task="test_task") - pipe = MockPipeline() - data = MockDataset() + dataloader = MockDataLoader() metrics = MockMetricSet() + pipeline = MockEvaluationPipeline() - # Mocking the internal methods which are not the target of this test - evaluator._call_pipeline = AsyncMock(return_value=(["mock_result"], {"mock_perf": "mock_value"})) - evaluator._compute_metrics = MagicMock(return_value={"mock_metric": "mock_value"}) - evaluator._results_processor = MagicMock(return_value={"processed_results": "mock_processed_results"}) - - expected_result = { - "mock_perf": "mock_value", - "mock_metric": "mock_value", - "processed_results": "mock_processed_results", - } + result = await evaluator.compute(pipeline, dataloader, metrics) - result = await evaluator.compute(pipe, data, metrics) - assert result == expected_result + assert "metrics" in result + assert "results" in result + assert result["metrics"]["accuracy"] == 0.95 + assert len(result["results"]) == 2 # Assuming two data points were processed @pytest.mark.asyncio -async def test_call_pipeline(): +async def test_call_pipeline() -> None: evaluator = Evaluator(task="test_task") - pipe = MockPipeline() - data = MockDataset() + pipeline = MockEvaluationPipeline() + dataset = [1, 2] - results, perf_results = await evaluator._call_pipeline(pipe, data) + results, perf_results = await evaluator._call_pipeline(pipeline, dataset) - assert len(results) == 2 - assert "mock_perf" in perf_results + assert len(results) == len(dataset) # Ensure all data was processed + assert "total_time_in_seconds" in perf_results["time_perf"] -def test_results_processor(): +@pytest.mark.asyncio +def test_results_processor() -> None: evaluator = Evaluator(task="test_task") - results = [MockEvaluationResult()] + results = [MockEvaluationResult(), MockEvaluationResult()] processed_results = evaluator._results_processor(results) assert "results" in processed_results - assert processed_results["results"][0]["mock_result_key"] == "mock_result_value" + assert len(processed_results["results"]) == len(results) -def test_compute_metrics(): +@pytest.mark.asyncio +def test_compute_metrics() -> None: evaluator = Evaluator(task="test_task") metrics = MockMetricSet() - results = [MockEvaluationResult()] + results = [MockEvaluationResult(), MockEvaluationResult()] computed_metrics = evaluator._compute_metrics(metrics, results) assert "metrics" in computed_metrics - assert computed_metrics["metrics"]["mock_metric"] == "mock_value" + assert computed_metrics["metrics"]["accuracy"] == 0.95 +@pytest.mark.asyncio def test_compute_time_perf() -> None: evaluator = Evaluator(task="test_task") start_time = 0 - end_time = 10 + end_time = 2 num_samples = 100 perf_metrics = evaluator._compute_time_perf(start_time, end_time, num_samples) - assert "time_perf" in perf_metrics - assert perf_metrics["time_perf"]["total_time_in_seconds"] == 10 - assert perf_metrics["time_perf"]["samples_per_second"] == 10 - assert perf_metrics["time_perf"]["latency_in_seconds"] == 0.1 + assert perf_metrics["time_perf"]["total_time_in_seconds"] == 2 + assert perf_metrics["time_perf"]["samples_per_second"] == 50 + assert perf_metrics["time_perf"]["latency_in_seconds"] == 0.02 From 1f0214a8271304324817afd85805f296f60ba4b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 24 Jul 2024 10:56:02 +0200 Subject: [PATCH 29/34] add tests for ex --- benchmarks/sql/tests/test_metrics.py | 49 +++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/benchmarks/sql/tests/test_metrics.py b/benchmarks/sql/tests/test_metrics.py index df437915..f26233e4 100644 --- a/benchmarks/sql/tests/test_metrics.py +++ b/benchmarks/sql/tests/test_metrics.py @@ -1,9 +1,21 @@ +from dataclasses import dataclass from typing import List +from unittest.mock import MagicMock import pytest -from benchmarks.sql.bench.metrics.iql import ExactMatchIQL, ValidIQL -from benchmarks.sql.bench.pipeline import EvaluationResult, ExecutionResult +from benchmarks.sql.bench.metrics import ExactMatchIQL, ExecutionAccuracy, ValidIQL +from benchmarks.sql.bench.pipelines import EvaluationResult, ExecutionResult + + +@dataclass +class MockDataConfig: + db_url: str = "sqlite:///:memory:" + + +@dataclass +class MockConfig: + data: MockDataConfig = MockDataConfig() @pytest.fixture @@ -34,9 +46,36 @@ def evaluation_results() -> List[EvaluationResult]: def test_exact_match_iql(evaluation_results: List[EvaluationResult]) -> None: metric = ExactMatchIQL() - assert metric.compute(evaluation_results) == 0.5 + scores = metric.compute(evaluation_results) + assert scores["EM_IQL"] == 0.5 -def test_valid_iql(evaluation_results): +def test_valid_iql(evaluation_results) -> None: metric = ValidIQL() - assert metric.compute(evaluation_results) == 1 + scores = metric.compute(evaluation_results) + assert scores["VAL_IQL"] == 1.0 + + +@pytest.mark.parametrize( + "acc, avg_times, expected_ex, expected_ves", + [ + ([True, False, True, True], [1.2, 1.2, 12.2, 12.2, 13.2, 13.2, 232.1, 232.1], 0.75, 0.75), + ([True, True, True, True], [1.2, 1.2, 12.2, 12.2, 13.2, 13.2, 232.1, 232.1], 1.0, 1.0), + ([False, False, False, False], [1.2, 1.2, 12.2, 12.2, 13.2, 13.2, 232.1, 232.1], 0.0, 0.0), + ([True, False, True, True], [1.2, 3.2, 12.2, 15.2, 13.2, 17.2, 232.1, 287.1], 0.75, 0.5960767767585372), + ([True, False, True, True], [3.2, 1.2, 15.2, 12.2, 17.2, 13.2, 287.1, 232.1], 0.75, 0.9726740826467557), + ], +) +def test_execution_accuracy( + evaluation_results: List[EvaluationResult], + acc: List[bool], + avg_times: List[float], + expected_ex: float, + expected_ves: float, +) -> None: + metric = ExecutionAccuracy(MockConfig()) + metric._execution_accuracy = MagicMock(side_effect=acc) + metric._avarage_execution_time = MagicMock(side_effect=avg_times) + scores = metric.compute(evaluation_results) + assert scores["EX"] == expected_ex + assert scores["VES"] == expected_ves From ef0b0be51fa4344913dee60bce48beec91099a26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 30 Jul 2024 03:40:53 +0200 Subject: [PATCH 30/34] refactor --- benchmarks/sql/README.md | 10 +- benchmarks/sql/bench.py | 52 ++- benchmarks/sql/bench/evaluator.py | 3 +- benchmarks/sql/bench/loaders.py | 8 +- benchmarks/sql/bench/metrics/__init__.py | 32 +- benchmarks/sql/bench/metrics/iql.py | 216 +++++++-- benchmarks/sql/bench/metrics/selector.py | 60 ++- benchmarks/sql/bench/metrics/sql.py | 36 +- benchmarks/sql/bench/pipelines/base.py | 88 +--- benchmarks/sql/bench/pipelines/collection.py | 87 ++-- benchmarks/sql/bench/pipelines/view.py | 164 ++++--- benchmarks/sql/bench/views/__init__.py | 4 +- .../sql/bench/views/structured/superhero.py | 438 +----------------- benchmarks/sql/config/data/superhero.yaml | 3 +- benchmarks/sql/config/setup/collection.yaml | 9 +- benchmarks/sql/config/setup/iql-view.yaml | 8 +- benchmarks/sql/config/setup/sql-view.yaml | 3 +- .../setup/views/freeform/superhero.yaml | 1 + .../setup/views/structured/superhero.yaml | 4 + benchmarks/sql/tests/test_evaluator.py | 8 +- benchmarks/sql/tests/test_metrics.py | 245 +++++++++- setup.cfg | 7 - 22 files changed, 757 insertions(+), 729 deletions(-) create mode 100644 benchmarks/sql/config/setup/views/freeform/superhero.yaml create mode 100644 benchmarks/sql/config/setup/views/structured/superhero.yaml diff --git a/benchmarks/sql/README.md b/benchmarks/sql/README.md index bb49dc80..0b7da2d9 100644 --- a/benchmarks/sql/README.md +++ b/benchmarks/sql/README.md @@ -17,7 +17,13 @@ Before starting, download the `superhero.sqlite` database file from [BIRD](https Run the whole suite on the `superhero` database with `gpt-3.5-turbo`: ```bash -python bench.py --multirun setup=iql-view,sql-view,collection data=superhero +python bench.py --multirun setup=iql-view,sql-view,collection +``` + +Run on multiple databases: + +```bash +python bench.py setup=sql-view setup/views/freeform@setup.views='[superhero,...]' data=bird ``` You can also run each evaluation separately or in subgroups: @@ -34,7 +40,7 @@ python bench.py --multirun setup=iql-view setup/llm=gpt-3.5-turbo,claude-3.5-son python bench.py --multirun setup=sql-view setup/llm=gpt-3.5-turbo,claude-3.5-sonnet ``` -For the `collection` steup, you need to specify models for both the view selection and the IQL generation step: +For the `collection` setup, you need to specify models for both the view selection and the IQL generation step: ```bash python bench.py --multirun \ diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py index d2668e88..a86c6ee4 100644 --- a/benchmarks/sql/bench.py +++ b/benchmarks/sql/bench.py @@ -8,15 +8,20 @@ from bench.evaluator import Evaluator from bench.loaders import CollectionDataLoader, IQLViewDataLoader, SQLViewDataLoader from bench.metrics import ( - ExactMatchAggregationIQL, - ExactMatchFiltersIQL, - ExactMatchIQL, - ExactMatchSQL, ExecutionAccuracy, + FilteringAccuracy, + FilteringPrecision, + FilteringRecall, + IQLFiltersAccuracy, + IQLFiltersCorrectness, + IQLFiltersParseability, + IQLFiltersPrecision, + IQLFiltersRecall, MetricSet, - UnsupportedIQL, - ValidIQL, + SQLExactMatch, ViewSelectionAccuracy, + ViewSelectionPrecision, + ViewSelectionRecall, ) from bench.pipelines import CollectionEvaluationPipeline, IQLViewEvaluationPipeline, SQLViewEvaluationPipeline from bench.utils import save @@ -52,26 +57,33 @@ class EvaluationType(Enum): EVALUATION_METRICS = { EvaluationType.IQL.value: MetricSet( - ExactMatchIQL, - ExactMatchFiltersIQL, - ExactMatchAggregationIQL, - ValidIQL, - ViewSelectionAccuracy, - UnsupportedIQL, + FilteringAccuracy, + FilteringPrecision, + FilteringRecall, + IQLFiltersAccuracy, + IQLFiltersPrecision, + IQLFiltersRecall, + IQLFiltersParseability, + IQLFiltersCorrectness, ExecutionAccuracy, ), EvaluationType.SQL.value: MetricSet( - ExactMatchSQL, + SQLExactMatch, ExecutionAccuracy, ), EvaluationType.E2E.value: MetricSet( - ExactMatchIQL, - ExactMatchFiltersIQL, - ExactMatchAggregationIQL, - ValidIQL, - UnsupportedIQL, + FilteringAccuracy, + FilteringPrecision, + FilteringRecall, + IQLFiltersAccuracy, + IQLFiltersPrecision, + IQLFiltersRecall, + IQLFiltersParseability, + IQLFiltersCorrectness, ViewSelectionAccuracy, - ExactMatchSQL, + ViewSelectionPrecision, + ViewSelectionRecall, + SQLExactMatch, ExecutionAccuracy, ), } @@ -113,7 +125,7 @@ async def bench(config: DictConfig) -> None: run["sys/tags"].add( [ config.setup.name, - config.data.db_id, + *config.data.db_ids, *config.data.difficulties, ] ) diff --git a/benchmarks/sql/bench/evaluator.py b/benchmarks/sql/bench/evaluator.py index 2a5a201d..5903732f 100644 --- a/benchmarks/sql/bench/evaluator.py +++ b/benchmarks/sql/bench/evaluator.py @@ -1,4 +1,5 @@ import time +from dataclasses import asdict from typing import Any, Callable, Dict, List, Tuple from datasets import Dataset @@ -81,7 +82,7 @@ def _results_processor(self, results: List[EvaluationResult]) -> Dict[str, Any]: Returns: The processed results. """ - return {"results": [result.dict() for result in results]} + return {"results": [asdict(result) for result in results]} def _compute_metrics(self, metrics: MetricSet, results: List[EvaluationResult]) -> Dict[str, Any]: """ diff --git a/benchmarks/sql/bench/loaders.py b/benchmarks/sql/bench/loaders.py index 8ec23be3..8e5d8387 100644 --- a/benchmarks/sql/bench/loaders.py +++ b/benchmarks/sql/bench/loaders.py @@ -54,9 +54,9 @@ async def load(self) -> Dataset: """ dataset = await super().load() return dataset.filter( - lambda x: x["db_id"] == self.config.data.db_id + lambda x: x["db_id"] in self.config.data.db_ids and x["difficulty"] in self.config.data.difficulties - and x["view"] is not None + and x["view_name"] is not None ) @@ -74,7 +74,7 @@ async def load(self) -> Dataset: """ dataset = await super().load() return dataset.filter( - lambda x: x["db_id"] == self.config.data.db_id and x["difficulty"] in self.config.data.difficulties + lambda x: x["db_id"] in self.config.data.db_ids and x["difficulty"] in self.config.data.difficulties ) @@ -92,5 +92,5 @@ async def load(self) -> Dataset: """ dataset = await super().load() return dataset.filter( - lambda x: x["db_id"] == self.config.data.db_id and x["difficulty"] in self.config.data.difficulties + lambda x: x["db_id"] in self.config.data.db_ids and x["difficulty"] in self.config.data.difficulties ) diff --git a/benchmarks/sql/bench/metrics/__init__.py b/benchmarks/sql/bench/metrics/__init__.py index 86d72f78..f0edc124 100644 --- a/benchmarks/sql/bench/metrics/__init__.py +++ b/benchmarks/sql/bench/metrics/__init__.py @@ -1,17 +1,31 @@ from .base import Metric, MetricSet -from .iql import ExactMatchAggregationIQL, ExactMatchFiltersIQL, ExactMatchIQL, UnsupportedIQL, ValidIQL -from .selector import ViewSelectionAccuracy -from .sql import ExactMatchSQL, ExecutionAccuracy +from .iql import ( + FilteringAccuracy, + FilteringPrecision, + FilteringRecall, + IQLFiltersAccuracy, + IQLFiltersCorrectness, + IQLFiltersParseability, + IQLFiltersPrecision, + IQLFiltersRecall, +) +from .selector import ViewSelectionAccuracy, ViewSelectionPrecision, ViewSelectionRecall +from .sql import ExecutionAccuracy, SQLExactMatch __all__ = [ "Metric", "MetricSet", - "ExactMatchSQL", - "ExactMatchIQL", - "ExactMatchFiltersIQL", - "ExactMatchAggregationIQL", - "ValidIQL", + "FilteringAccuracy", + "FilteringPrecision", + "FilteringRecall", + "IQLFiltersAccuracy", + "IQLFiltersPrecision", + "IQLFiltersRecall", + "IQLFiltersParseability", + "IQLFiltersCorrectness", + "SQLExactMatch", "ViewSelectionAccuracy", - "UnsupportedIQL", + "ViewSelectionPrecision", + "ViewSelectionRecall", "ExecutionAccuracy", ] diff --git a/benchmarks/sql/bench/metrics/iql.py b/benchmarks/sql/bench/metrics/iql.py index c9339f86..d7068ea2 100644 --- a/benchmarks/sql/bench/metrics/iql.py +++ b/benchmarks/sql/bench/metrics/iql.py @@ -1,15 +1,112 @@ from typing import Any, Dict, List -from dbally.iql._exceptions import IQLError -from dbally.iql_generator.prompt import UnsupportedQueryError - from ..pipelines import EvaluationResult from .base import Metric -class ExactMatchIQL(Metric): +class FilteringAccuracy(Metric): + """ + Filtering accuracy indicating proportion of questions that were correctly identified as having filters. + """ + + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: + """ + Computes the filtering accuracy. + + Args: + results: List of evaluation results. + + Returns: + Filtering accuracy. + """ + results = [result for result in results if result.reference.iql and result.prediction.iql] + return { + "DM/FLT/ACC": ( + sum( + isinstance(result.prediction.iql.filters.source, type(result.reference.iql.filters.source)) + and result.prediction.iql.filters.unsupported == result.reference.iql.filters.unsupported + for result in results + ) + / len(results) + if results + else None + ) + } + + +class FilteringPrecision(Metric): + """ + Filtering precision indicating proportion of questions that were identified as having filters correctly. + """ + + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: + """ + Computes the filtering precision. + + Args: + results: List of evaluation results. + + Returns: + Filtering precision. + """ + results = [ + result + for result in results + if (result.reference.iql and result.prediction.iql) + and (result.prediction.iql.filters.source or result.prediction.iql.filters.unsupported) + ] + return { + "DM/FLT/PRECISION": ( + sum( + isinstance(result.prediction.iql.filters.source, type(result.reference.iql.filters.source)) + and result.prediction.iql.filters.unsupported == result.reference.iql.filters.unsupported + for result in results + ) + / len(results) + if results + else None + ) + } + + +class FilteringRecall(Metric): """ - Ratio of predicated queries that are identical to the ground truth ones. + Filtering recall indicating proportion of questions that were correctly identified as having filters. + """ + + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: + """ + Computes the filtering recall. + + Args: + results: List of evaluation results. + + Returns: + Filtering recall. + """ + results = [ + result + for result in results + if (result.reference.iql and result.prediction.iql) + and (result.reference.iql.filters.source or result.reference.iql.filters.unsupported) + ] + return { + "DM/FLT/RECALL": ( + sum( + isinstance(result.prediction.iql.filters.source, type(result.reference.iql.filters.source)) + and result.prediction.iql.filters.unsupported == result.reference.iql.filters.unsupported + for result in results + ) + / len(results) + if results + else None + ) + } + + +class IQLFiltersAccuracy(Metric): + """ + Ratio of predicated IQL filters that are identical to the ground truth ones. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: @@ -22,19 +119,33 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: Returns: Ratio of predicated queries that are identical to the ground truth ones. """ - results = [result for result in results if result.prediction.iql is not None] + results = [ + result + for result in results + if (result.reference.iql and result.prediction.iql) + and ( + result.reference.iql.filters.source + or result.reference.iql.filters.unsupported + and result.prediction.iql.filters.source + or result.prediction.iql.filters.unsupported + ) + ] return { - "EM_IQL": ( - sum(result.prediction.iql == result.reference.iql for result in results) / len(results) + "IQL/FLT/ACC": ( + sum( + isinstance(result.prediction.iql.filters.source, type(result.reference.iql.filters.source)) + for result in results + ) + / len(results) if results else None ) } -class ExactMatchFiltersIQL(Metric): +class IQLFiltersPrecision(Metric): """ - Ration of predicated IQL filters that are identical to the ground truth ones. + Ratio of predicated IQL filters that are identical to the ground truth ones. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: @@ -47,19 +158,32 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: Returns: Ratio of predicated queries that are identical to the ground truth ones. """ - results = [result for result in results if result.prediction.iql is not None] + results = [ + result + for result in results + if (result.reference.iql and result.prediction.iql) + and ( + result.reference.iql.filters.source + or result.reference.iql.filters.unsupported + and result.prediction.iql.filters.source + ) + ] return { - "EM_FLT_IQL": ( - sum(result.prediction.iql.filters == result.reference.iql.filters for result in results) / len(results) + "IQL/FLT/PRECISION": ( + sum( + isinstance(result.prediction.iql.filters.source, type(result.reference.iql.filters.source)) + for result in results + ) + / len(results) if results else None ) } -class ExactMatchAggregationIQL(Metric): +class IQLFiltersRecall(Metric): """ - Ratio of predicated aggregation that are identical to the ground truth ones. + Ratio of predicated IQL filters that are identical to the ground truth ones. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: @@ -72,10 +196,22 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: Returns: Ratio of predicated queries that are identical to the ground truth ones. """ - results = [result for result in results if result.prediction.iql is not None] + results = [ + result + for result in results + if (result.reference.iql and result.prediction.iql) + and ( + result.reference.iql.filters.source + and result.prediction.iql.filters.source + or result.prediction.iql.filters.unsupported + ) + ] return { - "EM_AGG_IQL": ( - sum(result.prediction.iql.aggregation == result.reference.iql.aggregation for result in results) + "IQL/FLT/RECALL": ( + sum( + isinstance(result.prediction.iql.filters.source, type(result.reference.iql.filters.source)) + for result in results + ) / len(results) if results else None @@ -83,55 +219,65 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: } -class UnsupportedIQL(Metric): +class IQLFiltersParseability(Metric): """ - Ratio of unsupported IQL queries. + Ratio of predicated IQL filters that are identical to the ground truth ones. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Calculates the unsupported IQL ratio. + Computes the exact match ratio. Args: results: List of evaluation results. Returns: - Unsupported queries ratio. + Ratio of predicated queries that are identical to the ground truth ones. """ results = [ result for result in results - if result.prediction.iql is not None or isinstance(result.prediction.exception, UnsupportedQueryError) + if (result.reference.iql and result.prediction.iql) + and (result.reference.iql.filters and result.prediction.iql.filters) + and (result.reference.iql.filters.source and result.prediction.iql.filters.source) ] return { - "UNSUPP_IQL": ( - sum(isinstance(result.prediction.exception, UnsupportedQueryError) for result in results) / len(results) - if results - else 0.0 + "IQL/FLT/PARSEABILITY": ( + sum(result.prediction.iql.filters.valid for result in results) / len(results) if results else None ) } -class ValidIQL(Metric): +class IQLFiltersCorrectness(Metric): """ - Ratio of valid IQL queries. + Ratio of predicated IQL filters that are identical to the ground truth ones. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Calculates the valid IQL ratio. + Computes the exact match ratio. Args: results: List of evaluation results. Returns: - Valid IQL queries ratio. + Ratio of predicated queries that are identical to the ground truth ones. """ - results = [result for result in results if result.prediction.iql is not None] + results = [ + result + for result in results + if (result.reference.iql and result.prediction.iql) + and ( + result.reference.iql.filters.source + and result.prediction.iql.filters.source + and result.prediction.iql.filters.valid + ) + ] return { - "VAL_IQL": ( - sum(not isinstance(result.prediction.exception, IQLError) for result in results) / len(results) + "IQL/FLT/CORRECTNESS": ( + sum(result.prediction.iql.filters.source == result.reference.iql.filters.source for result in results) + / len(results) if results - else 0.0 + else None ) } diff --git a/benchmarks/sql/bench/metrics/selector.py b/benchmarks/sql/bench/metrics/selector.py index 66c8ab3b..42b20ef8 100644 --- a/benchmarks/sql/bench/metrics/selector.py +++ b/benchmarks/sql/bench/metrics/selector.py @@ -20,8 +20,64 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: Ratio of predicated queries that are identical to the ground truth ones. """ return { - "ACC_VIEW": ( - sum(result.prediction.view == result.reference.view for result in results) / len(results) + "VIEW/ACC": ( + sum(result.reference.view_name == result.prediction.view_name for result in results) / len(results) + if results + else None + ) + } + + +class ViewSelectionPrecision(Metric): + """ + Ratio of predicated queries that are identical to the ground truth ones. + """ + + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: + """ + Computes the exact match ratio. + + Args: + results: List of evaluation results. + + Returns: + Ratio of predicated queries that are identical to the ground truth ones. + """ + results = [result for result in results if result.prediction.view_name] + return { + "VIEW/PRECISION": ( + sum(result.prediction.view_name == result.reference.view_name for result in results) / len(results) + if results + else None + ) + } + + +class ViewSelectionRecall(Metric): + """ + Ratio of predicated queries that are identical to the ground truth ones. + """ + + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: + """ + Computes the exact match ratio. + + Args: + results: List of evaluation results. + + Returns: + Ratio of predicated queries that are identical to the ground truth ones. + """ + results = [ + result + for result in results + if result.prediction.view_name is None + and result.reference.view_name + or result.prediction.view_name == result.reference.view_name + ] + return { + "VIEW/RECALL": ( + sum(result.prediction.view_name == result.reference.view_name for result in results) / len(results) if results else None ) diff --git a/benchmarks/sql/bench/metrics/sql.py b/benchmarks/sql/bench/metrics/sql.py index c8594455..0b5899e7 100644 --- a/benchmarks/sql/bench/metrics/sql.py +++ b/benchmarks/sql/bench/metrics/sql.py @@ -9,9 +9,10 @@ from .base import Metric -class ExactMatchSQL(Metric): +class SQLExactMatch(Metric): """ - Ratio of predicated queries that are identical to the ground truth ones. + Exact match ratio i.e. the proportion of examples in the evaluation set for which + the predicted SQL is identical to the ground truth SQL. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: @@ -22,10 +23,10 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: results: List of evaluation results. Returns: - Ratio of predicated queries that are identical to the ground truth ones. + The exact match ratio. """ return { - "EM_SQL": ( + "SQL/EM": ( sum(result.prediction.sql == result.reference.sql for result in results) / len(results) if results else 0.0 @@ -40,9 +41,9 @@ class _DBMixin: def __init__(self, config: Dict, *args: Any, **kwargs: Any) -> None: super().__init__(config, *args, **kwargs) - self.db = create_engine(config.data.db_url) + self.dbs = {db: create_engine(f"sqlite:///data/{db}.db") for db in config.data.db_ids} - def _execute_query(self, query: str) -> List[Dict[str, Any]]: + def _execute_query(self, query: str, db_id: str) -> List[Dict[str, Any]]: """ Execute the given query on the database. @@ -52,11 +53,11 @@ def _execute_query(self, query: str) -> List[Dict[str, Any]]: Returns: The query results. """ - with self.db.connect() as connection: + with self.dbs[db_id].connect() as connection: rows = connection.execute(text(query)).fetchall() return [dict(row._mapping) for row in rows] # pylint: disable=protected-access - def _avarage_execution_time(self, query: str, n: int = 100) -> float: + def _avarage_execution_time(self, query: str, db_id: str, n: int = 100) -> float: """ Execute the given query on the database n times and return the average execution time. @@ -70,7 +71,7 @@ def _avarage_execution_time(self, query: str, n: int = 100) -> float: total_time = 0 for _ in range(n): start_time = time.perf_counter() - self._execute_query(query) + self._execute_query(query, db_id) total_time += time.perf_counter() - start_time return total_time / n @@ -95,20 +96,19 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: Execution accuracy score and valid efficiency score. """ accurate_results = [result for result in results if self._execution_accuracy(result)] - return { - "EX": len(accurate_results) / len(results) if results else 0.0, + "EX": len(accurate_results) / len(results) if results else None, "VES": sum( ( - self._avarage_execution_time(result.reference.sql) - / self._avarage_execution_time(result.prediction.sql) + self._avarage_execution_time(result.reference.sql, result.db_id) + / self._avarage_execution_time(result.prediction.sql, result.db_id) ) ** 0.5 for result in accurate_results ) / len(results) if results - else 0.0, + else None, } def _execution_accuracy(self, result: EvaluationResult) -> bool: @@ -125,13 +125,13 @@ def _execution_accuracy(self, result: EvaluationResult) -> bool: return False try: - result.reference.results = self._execute_query(result.reference.sql) - result.prediction.results = self._execute_query(result.prediction.sql) + ref_results = self._execute_query(result.reference.sql, result.db_id) + pred_results = self._execute_query(result.prediction.sql, result.db_id) except SQLAlchemyError: return False - reference = pd.DataFrame(result.reference.results) - prediction = pd.DataFrame(result.prediction.results) + reference = pd.DataFrame(ref_results) + prediction = pd.DataFrame(pred_results) # If filtering works correctly, the number of rows will be the same # TODO: Sometimes a different number of rows is okay, e.g. if df has aggregated values that are expanded in gt diff --git a/benchmarks/sql/bench/pipelines/base.py b/benchmarks/sql/bench/pipelines/base.py index 58ba5186..38bcb304 100644 --- a/benchmarks/sql/bench/pipelines/base.py +++ b/benchmarks/sql/bench/pipelines/base.py @@ -1,8 +1,6 @@ from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional - -from sqlalchemy import create_engine +from dataclasses import dataclass +from typing import Any, Dict, Optional from dbally.llms.base import LLM from dbally.llms.litellm import LiteLLM @@ -10,37 +8,25 @@ @dataclass -class IQLResult: +class IQL: """ - Represents the IQL result. + Represents the IQL. """ - filters: Optional[str] = None - aggregation: Optional[str] = None - - def __eq__(self, other: "IQLResult") -> bool: - """ - Compares two IQL results. - - Args: - other: The other IQL result to compare. + source: Optional[str] = None + unsupported: bool = False + valid: bool = True - Returns: - True if the two IQL results are equal, False otherwise. - """ - return self.filters == other.filters and self.aggregation == other.aggregation - def dict(self) -> Dict[str, Any]: - """ - Returns the dictionary representation of the object. +@dataclass +class IQLResult: + """ + Represents the result of an IQL query execution. + """ - Returns: - The dictionary representation. - """ - return { - "filters": self.filters, - "aggregation": self.aggregation, - } + filters: IQL + aggregation: IQL + context: bool = False @dataclass @@ -49,26 +35,9 @@ class ExecutionResult: Represents the result of a single query execution. """ - view: Optional[str] = None - sql: Optional[str] = None + view_name: Optional[str] = None iql: Optional[IQLResult] = None - results: List[Dict[str, Any]] = field(default_factory=list) - exception: Optional[Exception] = None - execution_time: Optional[float] = None - - def dict(self) -> Dict[str, Any]: - """ - Returns the dictionary representation of the object. - - Returns: - The dictionary representation. - """ - return { - "view": self.view, - "iql": self.iql.dict() if self.iql else None, - "sql": self.sql, - "len_results": len(self.results), - } + sql: Optional[str] = None @dataclass @@ -77,38 +46,17 @@ class EvaluationResult: Represents the result of a single evaluation. """ + db_id: str question: str reference: ExecutionResult prediction: ExecutionResult - def dict(self) -> Dict[str, Any]: - """ - Returns the dictionary representation of the object. - - Returns: - The dictionary representation. - """ - return { - "question": self.question, - "reference": self.reference.dict(), - "prediction": self.prediction.dict(), - } - class EvaluationPipeline(ABC): """ Collection evaluation pipeline. """ - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating IQL predictions. - - Args: - config: The configuration for the pipeline. - """ - self.db = create_engine(config.data.db_url) - def get_llm(self, config: Dict) -> LLM: """ Returns the LLM based on the configuration. diff --git a/benchmarks/sql/bench/pipelines/collection.py b/benchmarks/sql/bench/pipelines/collection.py index 1efe9b9c..918cfbd9 100644 --- a/benchmarks/sql/bench/pipelines/collection.py +++ b/benchmarks/sql/bench/pipelines/collection.py @@ -1,13 +1,17 @@ from typing import Any, Dict +from sqlalchemy import create_engine + import dbally from dbally.collection.collection import Collection +from dbally.collection.exceptions import NoViewFoundError from dbally.iql._exceptions import IQLError from dbally.iql_generator.prompt import UnsupportedQueryError from dbally.view_selection.llm_view_selector import LLMViewSelector +from dbally.views.structured import IQLGenerationError from ..views import VIEWS_REGISTRY -from .base import EvaluationPipeline, EvaluationResult, ExecutionResult, IQLResult +from .base import IQL, EvaluationPipeline, EvaluationResult, ExecutionResult, IQLResult class CollectionEvaluationPipeline(EvaluationPipeline): @@ -22,7 +26,6 @@ def __init__(self, config: Dict) -> None: Args: config: The configuration for the pipeline. """ - super().__init__(config) self.collection = self.get_collection(config.setup) def get_collection(self, config: Dict) -> Collection: @@ -46,20 +49,11 @@ def get_collection(self, config: Dict) -> Collection: ) collection.n_retries = 0 - for view_name in config.views: - view_cls = VIEWS_REGISTRY[view_name] - collection.add(view_cls, lambda: view_cls(self.db)) # pylint: disable=cell-var-from-loop - - if config.fallback: - fallback = dbally.create_collection( - name=config.fallback, - llm=generator_llm, - view_selector=view_selector, - ) - fallback.n_retries = 0 - fallback_cls = VIEWS_REGISTRY[config.fallback] - fallback.add(fallback_cls, lambda: fallback_cls(self.db)) - collection.set_fallback(fallback) + for db_name, view_names in config.views.items(): + db = create_engine(f"sqlite:///data/{db_name}.db") + for view_name in view_names: + view_cls = VIEWS_REGISTRY[view_name] + collection.add(view_cls, lambda: view_cls(db)) # pylint: disable=cell-var-from-loop return collection @@ -79,32 +73,67 @@ async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: dry_run=True, return_natural_response=False, ) - # TODO: Refactor exception handling for IQLError for filters and aggregation - except IQLError as exc: + except NoViewFoundError: prediction = ExecutionResult( - iql=IQLResult(filters=exc.source), - exception=exc, + view_name=None, + iql=None, + sql=None, + ) + except IQLGenerationError as exc: + prediction = ExecutionResult( + view_name=exc.view_name, + iql=IQLResult( + filters=IQL( + source=exc.filters, + unsupported=isinstance(exc.__cause__, UnsupportedQueryError), + valid=not (exc.filters and not exc.aggregation and isinstance(exc.__cause__, IQLError)), + ), + aggregation=IQL( + source=exc.aggregation, + unsupported=isinstance(exc.__cause__, UnsupportedQueryError), + valid=not (exc.aggregation and isinstance(exc.__cause__, IQLError)), + ), + ), + sql=None, ) - # TODO: Remove this broad exception handling once the Text2SQL view is fixed - except (UnsupportedQueryError, Exception) as exc: # pylint: disable=broad-except - prediction = ExecutionResult(exception=exc) else: - iql = IQLResult(filters=result.context["iql"]) if "iql" in result.context else None prediction = ExecutionResult( - view=result.view_name, - iql=iql, + view_name=result.view_name, + iql=IQLResult( + filters=IQL( + source=result.context.get("iql"), + unsupported=False, + valid=True, + ), + aggregation=IQL( + source=None, + unsupported=False, + valid=True, + ), + ), sql=result.context.get("sql"), ) reference = ExecutionResult( - view=data["view"], + view_name=data["view_name"], iql=IQLResult( - filters=data["iql_filters"], - aggregation=data["iql_aggregation"], + filters=IQL( + source=data["iql_filters"], + unsupported=data["iql_filters_unsupported"], + valid=True, + ), + aggregation=IQL( + source=data["iql_aggregation"], + unsupported=data["iql_aggregation_unsupported"], + valid=True, + ), + context=data["iql_context"], ), sql=data["sql"], ) + return EvaluationResult( + db_id=data["db_id"], question=data["question"], reference=reference, prediction=prediction, diff --git a/benchmarks/sql/bench/pipelines/view.py b/benchmarks/sql/bench/pipelines/view.py index f0108b42..37969365 100644 --- a/benchmarks/sql/bench/pipelines/view.py +++ b/benchmarks/sql/bench/pipelines/view.py @@ -1,18 +1,23 @@ -from abc import ABC +# pylint: disable=duplicate-code + +from abc import ABC, abstractmethod from typing import Any, Dict, Type +from sqlalchemy import create_engine + from dbally.iql._exceptions import IQLError from dbally.iql_generator.prompt import UnsupportedQueryError from dbally.views.freeform.text2sql.view import BaseText2SQLView from dbally.views.sqlalchemy_base import SqlAlchemyBaseView +from dbally.views.structured import IQLGenerationError from ..views import VIEWS_REGISTRY -from .base import EvaluationPipeline, EvaluationResult, ExecutionResult, IQLResult +from .base import IQL, EvaluationPipeline, EvaluationResult, ExecutionResult, IQLResult class ViewEvaluationPipeline(EvaluationPipeline, ABC): """ - Collection evaluation pipeline. + View evaluation pipeline. """ def __init__(self, config: Dict) -> None: @@ -22,36 +27,53 @@ def __init__(self, config: Dict) -> None: Args: config: The configuration for the pipeline. """ - super().__init__(config) self.llm = self.get_llm(config.setup.llm) + self.dbs = self.get_dbs(config.setup) + self.views = self.get_views(config.setup) + def get_dbs(self, config: Dict) -> Dict: + """ + Returns the database object based on the database name. -class IQLViewEvaluationPipeline(ViewEvaluationPipeline): - """ - Collection evaluation pipeline. - """ + Args: + config: The database configuration. - def __init__(self, config: Dict) -> None: + Returns: + The database object. """ - Constructs the pipeline for evaluating IQL predictions. + return {db: create_engine(f"sqlite:///data/{db}.db") for db in config.views} + + @abstractmethod + def get_views(self, config: Dict) -> Dict[str, Type[SqlAlchemyBaseView]]: + """ + Creates the view classes mapping based on the configuration. Args: - config: The configuration for the pipeline. + config: The views configuration. + + Returns: + The view classes mapping. """ - super().__init__(config) - self.views = self.get_views(config.setup) + + +class IQLViewEvaluationPipeline(ViewEvaluationPipeline): + """ + IQL view evaluation pipeline. + """ def get_views(self, config: Dict) -> Dict[str, Type[SqlAlchemyBaseView]]: """ - Returns the view object based on the view name. + Creates the view classes mapping based on the configuration. Args: - config: The view configuration. + config: The views configuration. Returns: - The view object. + The view classes mapping. """ - return {view: VIEWS_REGISTRY[view] for view in config.views} + return { + view_name: VIEWS_REGISTRY[view_name] for view_names in config.views.values() for view_name in view_names + } async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: """ @@ -63,7 +85,8 @@ async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: Returns: The evaluation result. """ - view = self.views[data["view"]](self.db) + view = self.views[data["view_name"]](self.dbs[data["db_id"]]) + try: result = await view.ask( query=data["question"], @@ -71,34 +94,61 @@ async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: dry_run=True, n_retries=0, ) - # TODO: Refactor exception handling for IQLError for filters and aggregation - except IQLError as exc: + except IQLGenerationError as exc: prediction = ExecutionResult( - view=data["view"], - iql=IQLResult(filters=exc.source), - exception=exc, - ) - except (UnsupportedQueryError, Exception) as exc: # pylint: disable=broad-except - prediction = ExecutionResult( - view=data["view"], - exception=exc, + view_name=data["view_name"], + iql=IQLResult( + filters=IQL( + source=exc.filters, + unsupported=isinstance(exc.__cause__, UnsupportedQueryError), + valid=not (exc.filters and not exc.aggregation and isinstance(exc.__cause__, IQLError)), + ), + aggregation=IQL( + source=exc.aggregation, + unsupported=isinstance(exc.__cause__, UnsupportedQueryError), + valid=not (exc.aggregation and isinstance(exc.__cause__, IQLError)), + ), + ), + sql=None, ) else: prediction = ExecutionResult( - view=data["view"], - iql=IQLResult(filters=result.context["iql"]), + view_name=data["view_name"], + iql=IQLResult( + filters=IQL( + source=result.context["iql"], + unsupported=False, + valid=True, + ), + aggregation=IQL( + source=None, + unsupported=False, + valid=True, + ), + ), sql=result.context["sql"], ) reference = ExecutionResult( - view=data["view"], + view_name=data["view_name"], iql=IQLResult( - filters=data["iql_filters"], - aggregation=data["iql_aggregation"], + filters=IQL( + source=data["iql_filters"], + unsupported=data["iql_filters_unsupported"], + valid=True, + ), + aggregation=IQL( + source=data["iql_aggregation"], + unsupported=data["iql_aggregation_unsupported"], + valid=True, + ), + context=data["iql_context"], ), sql=data["sql"], ) + return EvaluationResult( + db_id=data["db_id"], question=data["question"], reference=reference, prediction=prediction, @@ -107,30 +157,20 @@ async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: class SQLViewEvaluationPipeline(ViewEvaluationPipeline): """ - Collection evaluation pipeline. + SQL view evaluation pipeline. """ - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating IQL predictions. - - Args: - config: The configuration for the pipeline. - """ - super().__init__(config) - self.view = self.get_view(config.setup) - - def get_view(self, config: Dict) -> Type[BaseText2SQLView]: + def get_views(self, config: Dict) -> Dict[str, Type[BaseText2SQLView]]: """ - Returns the view object based on the view name. + Creates the view classes mapping based on the configuration. Args: - config: The view configuration. + config: The views configuration. Returns: - The view object. + The view classes mapping. """ - return VIEWS_REGISTRY[config.view] + return {db_id: VIEWS_REGISTRY[view_name] for db_id, view_name in config.views.items()} async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: """ @@ -142,7 +182,8 @@ async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: Returns: The evaluation result. """ - view = self.view(self.db) + view = self.views[data["db_id"]](self.dbs[data["db_id"]]) + try: result = await view.ask( query=data["question"], @@ -151,26 +192,15 @@ async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: n_retries=0, ) # TODO: Remove this broad exception handling once the Text2SQL view is fixed - except Exception as exc: # pylint: disable=broad-except - prediction = ExecutionResult( - view=self.view.__name__, - exception=exc, - ) + except Exception: # pylint: disable=broad-except + prediction = ExecutionResult() else: - prediction = ExecutionResult( - view=self.view.__name__, - sql=result.context["sql"], - ) + prediction = ExecutionResult(sql=result.context["sql"]) + + reference = ExecutionResult(sql=data["sql"]) - reference = ExecutionResult( - view=data["view"], - iql=IQLResult( - filters=data["iql_filters"], - aggregation=data["iql_aggregation"], - ), - sql=data["sql"], - ) return EvaluationResult( + db_id=data["db_id"], question=data["question"], reference=reference, prediction=prediction, diff --git a/benchmarks/sql/bench/views/__init__.py b/benchmarks/sql/bench/views/__init__.py index 732779e2..9c7230e7 100644 --- a/benchmarks/sql/bench/views/__init__.py +++ b/benchmarks/sql/bench/views/__init__.py @@ -3,12 +3,10 @@ from dbally.views.base import BaseView from .freeform.superhero import SuperheroFreeformView -from .structured.superhero import HeroAttributeView, HeroPowerView, PublisherView, SuperheroView +from .structured.superhero import PublisherView, SuperheroView VIEWS_REGISTRY: Dict[str, Type[BaseView]] = { PublisherView.__name__: PublisherView, - HeroAttributeView.__name__: HeroAttributeView, - HeroPowerView.__name__: HeroPowerView, SuperheroView.__name__: SuperheroView, SuperheroFreeformView.__name__: SuperheroFreeformView, } diff --git a/benchmarks/sql/bench/views/structured/superhero.py b/benchmarks/sql/bench/views/structured/superhero.py index 76f9e290..db57498e 100644 --- a/benchmarks/sql/bench/views/structured/superhero.py +++ b/benchmarks/sql/bench/views/structured/superhero.py @@ -4,8 +4,8 @@ from typing import Literal from sqlalchemy import ColumnElement, Engine, Select, func, select -from sqlalchemy.ext.declarative import DeferredReflection, declarative_base -from sqlalchemy.orm import aliased +from sqlalchemy.ext.declarative import DeferredReflection +from sqlalchemy.orm import aliased, declarative_base from dbally.views.decorators import view_filter from dbally.views.sqlalchemy_base import SqlAlchemyBaseView @@ -174,17 +174,18 @@ def filter_by_height_cm_greater_than(self, height_cm: float) -> ColumnElement: return Superhero.height_cm > height_cm @view_filter() - def filter_by_height_greater_than_percentage_of_average(self, average_percentage: int) -> ColumnElement: + def filter_by_height_cm_between(self, begin_height_cm: float, end_height_cm: float) -> ColumnElement: """ - Filters the view by the height greater than the percentage of average of superheroes. + Filters the view by the height of the superhero. Args: - average_percentage: The percentage of the average height. + begin_height_cm: The begin height of the superhero. + end_height_cm: The end height of the superhero. Returns: The filter condition. """ - return Superhero.height_cm * 100 > select(func.avg(Superhero.height_cm)).scalar_subquery() * average_percentage + return Superhero.height_cm.between(begin_height_cm, end_height_cm) @view_filter() def filter_by_the_tallest(self) -> ColumnElement: @@ -279,226 +280,6 @@ def filter_by_missing_publisher(self) -> ColumnElement: return Superhero.publisher_id == None -class SuperheroHeroPowerFilterMixin: - """ - Mixin for filtering the view by the superhero superpowers. - """ - - @view_filter() - def filter_by_number_powers(self, number_powers: int) -> ColumnElement: - """ - Filters the view by the number of superpowers. - - Args: - number_powers: The number of hero superpowers. - - Returns: - The filter condition. - """ - return Superhero.id.in_( - select(HeroPower.hero_id) - .group_by(HeroPower.hero_id) - .having(func.count(HeroPower.power_id) == number_powers) - ) - - @view_filter() - def filter_by_number_super_powers_greater_than(self, number_powers: int) -> ColumnElement: - """ - Filters the view by the number of superpowers. - - Args: - number_powers: The number of hero superpowers. - - Returns: - The filter condition. - """ - return Superhero.id.in_( - select(HeroPower.hero_id).group_by(HeroPower.hero_id).having(func.count(HeroPower.power_id) > number_powers) - ) - - @view_filter() - def filter_by_number_powers_less_than(self, number_powers: int) -> ColumnElement: - """ - Filters the view by the number of superpowers. - - Args: - number_powers: The number of hero superpowers. - - Returns: - The filter condition. - """ - return Superhero.id.in_( - select(HeroPower.hero_id).group_by(HeroPower.hero_id).having(func.count(HeroPower.power_id) < number_powers) - ) - - @view_filter() - def filter_by_power_name(self, power_name: str) -> ColumnElement: - """ - Filters the view by the superpower name. - - Args: - power_name: The name of the superpower. - - Returns: - The filter condition. - """ - return Superhero.id.in_( - select(HeroPower.hero_id) - .join(Superpower, Superpower.id == HeroPower.power_id) - .where(Superpower.power_name == power_name) - ) - - @view_filter() - def filter_by_the_most_super_powers(self) -> ColumnElement: - """ - Filters the view by the most superpowers. - - Returns: - The filter condition. - """ - return Superhero.id.in_( - select(HeroPower.hero_id) - .group_by(HeroPower.hero_id) - .order_by(func.count(HeroPower.power_id).desc()) - .limit(1) - ) - - -class SuperheroHeroAttributeFilterMixin: - """ - Mixin for filtering the view by the superhero attributes. - """ - - @view_filter() - def filter_by_attribute_name(self, attribute_name: str) -> ColumnElement: - """ - Filters the view by the hero attribute name. - - Args: - attribute_name: The name of the hero attribute. - - Returns: - The filter condition. - """ - return Superpower.id.in_( - select(HeroAttribute.hero_id) - .join(Attribute, Attribute.id == HeroAttribute.attribute_id) - .where(Attribute.attribute_name == attribute_name) - ) - - @view_filter() - def filter_by_attribute_value(self, attribute_value: int) -> ColumnElement: - """ - Filters the view by the hero attribute value. - - Args: - attribute_value: The value of the hero attribute. - - Returns: - The filter condition. - """ - return Superhero.id.in_( - select(HeroAttribute.hero_id) - .group_by(HeroAttribute.hero_id) - .having(HeroAttribute.attribute_value == attribute_value) - ) - - @view_filter() - def filter_by_the_lowest_attribute_value(self) -> ColumnElement: - """ - Filters the view by the lowest hero attribute value. - - Returns: - The filter condition. - """ - return Superhero.id.in_( - select(HeroAttribute.hero_id) - .group_by(HeroAttribute.hero_id) - .having(HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)).scalar_subquery()) - ) - - @view_filter() - def filter_by_the_highest_attribute_value(self) -> ColumnElement: - """ - Filters the view by the highest hero attribute value. - - Returns: - The filter condition. - """ - return Superhero.id.in_( - select(HeroAttribute.hero_id) - .group_by(HeroAttribute.hero_id) - .having(HeroAttribute.attribute_value == select(func.max(HeroAttribute.attribute_value)).scalar_subquery()) - ) - - @view_filter() - def filter_by_attribute_value_less_than(self, attribute_value: int) -> ColumnElement: - """ - Filters the view by the hero attribute value. - - Args: - attribute_value: The value of the hero attribute. - - Returns: - The filter condition. - """ - return Superhero.id.in_( - select(HeroAttribute.hero_id) - .group_by(HeroAttribute.hero_id) - .having(func.min(HeroAttribute.attribute_value) < attribute_value) - ) - - @view_filter() - def filter_by_attribute_value_between(self, begin_attribute_value: int, end_attribute_value: int) -> ColumnElement: - """ - Filters the view by the hero attribute value. - - Args: - begin_attribute_value: The begin value of the hero attribute. - end_attribute_value: The end value of the hero attribute. - - Returns: - The filter condition. - """ - return Superhero.id.in_( - select(HeroAttribute.hero_id) - .group_by(HeroAttribute.hero_id) - .having(HeroAttribute.attribute_value.between(begin_attribute_value, end_attribute_value)) - ) - - @view_filter() - def filter_by_the_fastest(self) -> ColumnElement: - """ - Filters the view by the fastest superhero. - - Returns: - The filter condition. - """ - return Superhero.id.in_( - select(HeroAttribute.hero_id) - .join(Attribute, Attribute.id == HeroAttribute.attribute_id) - .where(Attribute.attribute_name == "Speed") - .group_by(HeroAttribute.hero_id) - .having(HeroAttribute.attribute_value == select(func.max(HeroAttribute.attribute_value)).scalar_subquery()) - ) - - @view_filter() - def filter_by_the_dumbest(self) -> ColumnElement: - """ - Filters the view by the dumbest superhero. - - Returns: - The filter condition. - """ - return Superhero.id.in_( - select(HeroAttribute.hero_id) - .join(Attribute, Attribute.id == HeroAttribute.attribute_id) - .where(Attribute.attribute_name == "Intelligence") - .group_by(HeroAttribute.hero_id) - .having(HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)).scalar_subquery()) - ) - - class SuperheroColourFilterMixin: """ Mixin for filtering the view by the superhero colour attributes. @@ -589,43 +370,6 @@ def filter_by_publisher_name(self, publisher_name: str) -> ColumnElement: return Publisher.publisher_name == publisher_name -class PublisherSuperheroMixin: - """ - Mixin for filtering the publisher view by superheros. - """ - - @view_filter() - def filter_by_superhero_name(self, superhero_name: str) -> ColumnElement: - """ - Filters the view by the superhero name. - - Args: - superhero_name: The name of the superhero. - - Returns: - The filter condition. - """ - return Publisher.id.in_(select(Superhero.publisher_id).where(Superhero.superhero_name == superhero_name)) - - @view_filter() - def filter_by_the_slowest_superhero(self) -> ColumnElement: - """ - Filters the view by the slowest superhero. - - Returns: - The filter condition. - """ - return Publisher.id.in_( - select(Superhero.publisher_id) - .join(HeroAttribute, HeroAttribute.hero_id == Superhero.id) - .join(Attribute, Attribute.id == HeroAttribute.attribute_id) - .where( - Attribute.attribute_name == "Speed", - HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)).scalar_subquery(), - ) - ) - - class AlignmentFilterMixin: """ Mixin for filtering the view by the alignment attributes. @@ -645,44 +389,6 @@ def filter_by_alignment(self, alignment: Literal["Good", "Bad", "Neutral", "N/A" return Alignment.alignment == alignment -class SuperpowerFilterMixin: - """ - Mixin for filtering the view by the superpower attributes. - """ - - @view_filter() - def filter_by_power_name(self, power_name: str) -> ColumnElement: - """ - Filters the view by the superpower name. - - Args: - power_name: The name of the superpower. - - Returns: - The filter condition. - """ - return Superpower.power_name == power_name - - -class RaceFilterMixin: - """ - Mixin for filtering the view by the race. - """ - - @view_filter() - def filter_by_race(self, race: str) -> ColumnElement: - """ - Filters the view by the object race. - - Args: - race: The race of the object. - - Returns: - The filter condition. - """ - return Race.race == race - - class GenderFilterMixin: """ Mixin for filtering the view by the gender. @@ -702,73 +408,23 @@ def filter_by_gender(self, gender: Literal["Male", "Female", "N/A"]) -> ColumnEl return Gender.gender == gender -class HeroAttributeFilterMixin: - """ - Mixin for filtering the view by the hero attribute. - """ - - @view_filter() - def filter_by_the_lowest_attribute_value(self) -> ColumnElement: - """ - Filters the view by the lowest hero attribute value. - - Returns: - The filter condition. - """ - return HeroAttribute.attribute_value == select(func.min(HeroAttribute.attribute_value)).scalar_subquery() - - @view_filter() - def filter_by_the_highest_attribute_value(self) -> ColumnElement: - """ - Filters the view by the highest hero attribute value. - - Returns: - The filter condition. - """ - return HeroAttribute.attribute_value == select(func.max(HeroAttribute.attribute_value)).scalar_subquery() - - -class HeroPowerFilterMixin: - """ - Mixin for filtering the view by the hero power. - """ - - @view_filter() - def filter_by_the_most_popular_power(self) -> ColumnElement: - """ - Filters the view by the most popular hero power. - - Returns: - The filter condition. - """ - return HeroPower.power_id == ( - select(HeroPower.power_id) - .group_by(HeroPower.power_id) - .order_by(func.count(HeroPower.power_id).desc()) - .limit(1) - .scalar_subquery() - ) - - -class AttributeFilterMixin: +class RaceFilterMixin: """ - Mixin for filtering the view by the attribute. + Mixin for filtering the view by the race. """ @view_filter() - def filter_by_attribute_name( - self, attribute_name: Literal["Intelligence", "Strength", "Speed", "Durability", "Power", "Combat"] - ) -> ColumnElement: + def filter_by_race(self, race: str) -> ColumnElement: """ - Filters the view by the attribute name. + Filters the view by the object race. Args: - attribute_name: The name of the attribute. + race: The race of the object. Returns: The filter condition. """ - return Attribute.attribute_name == attribute_name + return Race.race == race class SuperheroView( @@ -776,11 +432,9 @@ class SuperheroView( SqlAlchemyBaseView, SuperheroFilterMixin, SuperheroColourFilterMixin, - SuperheroHeroPowerFilterMixin, - SuperheroHeroAttributeFilterMixin, - PublisherFilterMixin, AlignmentFilterMixin, GenderFilterMixin, + PublisherFilterMixin, RaceFilterMixin, ): """ @@ -802,79 +456,25 @@ def get_select(self) -> Select: Superhero.full_name, Superhero.height_cm, Superhero.weight_kg, - Publisher.publisher_name, + Alignment.alignment, Gender.gender, + Publisher.publisher_name, Race.race, - Alignment.alignment, self.eye_colour.colour.label("eye_colour"), self.hair_colour.colour.label("hair_colour"), self.skin_colour.colour.label("skin_colour"), ) + .join(Alignment, Alignment.id == Superhero.alignment_id) + .join(Gender, Gender.id == Superhero.gender_id) .join(Publisher, Publisher.id == Superhero.publisher_id) .join(Race, Race.id == Superhero.race_id) - .join(Gender, Gender.id == Superhero.gender_id) - .join(Alignment, Alignment.id == Superhero.alignment_id) .join(self.eye_colour, self.eye_colour.id == Superhero.eye_colour_id) .join(self.hair_colour, self.hair_colour.id == Superhero.hair_colour_id) .join(self.skin_colour, self.skin_colour.id == Superhero.skin_colour_id) ) -class HeroAttributeView( - DBInitMixin, - SqlAlchemyBaseView, - HeroAttributeFilterMixin, - AttributeFilterMixin, - SuperheroFilterMixin, - AlignmentFilterMixin, -): - """ - View for querying only hero attributes data. Contains the attribute name and attribute value. - """ - - def get_select(self) -> Select: - """ - Initializes the select object for the view. - - Returns: - The select object. - """ - return ( - select( - Attribute.attribute_name, - HeroAttribute.attribute_value, - ) - .join(Attribute, Attribute.id == HeroAttribute.attribute_id) - .join(Superhero, Superhero.id == HeroAttribute.hero_id) - .join(Alignment, Alignment.id == Superhero.alignment_id) - .join(Publisher, Publisher.id == Superhero.publisher_id) - ) - - -class HeroPowerView(DBInitMixin, SqlAlchemyBaseView, HeroPowerFilterMixin, SuperheroFilterMixin, SuperpowerFilterMixin): - """ - View for querying only hero super powers data. Contains the power id and power name. - """ - - def get_select(self) -> Select: - """ - Initializes the select object for the view. - - Returns: - The select object. - """ - return ( - select( - HeroPower.power_id, - Superpower.power_name, - ) - .join(Superhero, Superhero.id == HeroPower.hero_id) - .join(Superpower, Superpower.id == HeroPower.power_id) - .group_by(HeroPower.power_id) - ) - - -class PublisherView(DBInitMixin, SqlAlchemyBaseView, PublisherFilterMixin, PublisherSuperheroMixin): +class PublisherView(DBInitMixin, SqlAlchemyBaseView, PublisherFilterMixin): """ View for querying only publisher data. Contains the publisher id and publisher name. """ diff --git a/benchmarks/sql/config/data/superhero.yaml b/benchmarks/sql/config/data/superhero.yaml index bb556c46..23412721 100644 --- a/benchmarks/sql/config/data/superhero.yaml +++ b/benchmarks/sql/config/data/superhero.yaml @@ -1,5 +1,4 @@ path: "micpst/bird-iql" split: "dev" -db_id: "superhero" +db_ids: ["superhero"] difficulties: ["simple", "moderate", "challenging"] -db_url: "sqlite:///data/superhero.db" diff --git a/benchmarks/sql/config/setup/collection.yaml b/benchmarks/sql/config/setup/collection.yaml index 3a7073b0..2eafb34a 100644 --- a/benchmarks/sql/config/setup/collection.yaml +++ b/benchmarks/sql/config/setup/collection.yaml @@ -1,12 +1,7 @@ name: COLLECTION -views: [ - "HeroAttributeView", - "HeroPowerView", - "PublisherView", - "SuperheroView", -] -fallback: "SuperheroFreeformView" defaults: - llm@selector_llm: gpt-3.5-turbo - llm@generator_llm: gpt-3.5-turbo + - views/structured@views: + - superhero diff --git a/benchmarks/sql/config/setup/iql-view.yaml b/benchmarks/sql/config/setup/iql-view.yaml index 9b6bcdde..e652bc3b 100644 --- a/benchmarks/sql/config/setup/iql-view.yaml +++ b/benchmarks/sql/config/setup/iql-view.yaml @@ -1,10 +1,6 @@ name: IQL_VIEW -views: [ - "HeroAttributeView", - "HeroPowerView", - "PublisherView", - "SuperheroView", -] defaults: - llm: gpt-3.5-turbo + - views/structured@views: + - superhero diff --git a/benchmarks/sql/config/setup/sql-view.yaml b/benchmarks/sql/config/setup/sql-view.yaml index f501b0d8..e4e1f7d9 100644 --- a/benchmarks/sql/config/setup/sql-view.yaml +++ b/benchmarks/sql/config/setup/sql-view.yaml @@ -1,5 +1,6 @@ name: SQL_VIEW -view: SuperheroFreeformView defaults: - llm: gpt-3.5-turbo + - views/freeform@views: + - superhero diff --git a/benchmarks/sql/config/setup/views/freeform/superhero.yaml b/benchmarks/sql/config/setup/views/freeform/superhero.yaml new file mode 100644 index 00000000..aa0cf958 --- /dev/null +++ b/benchmarks/sql/config/setup/views/freeform/superhero.yaml @@ -0,0 +1 @@ +superhero: SuperheroFreeformView diff --git a/benchmarks/sql/config/setup/views/structured/superhero.yaml b/benchmarks/sql/config/setup/views/structured/superhero.yaml new file mode 100644 index 00000000..6497bf6c --- /dev/null +++ b/benchmarks/sql/config/setup/views/structured/superhero.yaml @@ -0,0 +1,4 @@ +superhero: [ + PublisherView, + SuperheroView, +] diff --git a/benchmarks/sql/tests/test_evaluator.py b/benchmarks/sql/tests/test_evaluator.py index 26adf003..ea328e8b 100644 --- a/benchmarks/sql/tests/test_evaluator.py +++ b/benchmarks/sql/tests/test_evaluator.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass from typing import Dict, List import pytest @@ -15,9 +16,9 @@ def compute(self, results) -> Dict[str, float]: return {"accuracy": 0.95} +@dataclass class MockEvaluationResult: - def dict(self) -> Dict[str, str]: - return {"result": "processed_data"} + result: str = "processed_data" class MockEvaluationPipeline: @@ -52,7 +53,6 @@ async def test_call_pipeline() -> None: assert "total_time_in_seconds" in perf_results["time_perf"] -@pytest.mark.asyncio def test_results_processor() -> None: evaluator = Evaluator(task="test_task") results = [MockEvaluationResult(), MockEvaluationResult()] @@ -63,7 +63,6 @@ def test_results_processor() -> None: assert len(processed_results["results"]) == len(results) -@pytest.mark.asyncio def test_compute_metrics() -> None: evaluator = Evaluator(task="test_task") metrics = MockMetricSet() @@ -75,7 +74,6 @@ def test_compute_metrics() -> None: assert computed_metrics["metrics"]["accuracy"] == 0.95 -@pytest.mark.asyncio def test_compute_time_perf() -> None: evaluator = Evaluator(task="test_task") start_time = 0 diff --git a/benchmarks/sql/tests/test_metrics.py b/benchmarks/sql/tests/test_metrics.py index f26233e4..71396139 100644 --- a/benchmarks/sql/tests/test_metrics.py +++ b/benchmarks/sql/tests/test_metrics.py @@ -4,13 +4,24 @@ import pytest -from benchmarks.sql.bench.metrics import ExactMatchIQL, ExecutionAccuracy, ValidIQL -from benchmarks.sql.bench.pipelines import EvaluationResult, ExecutionResult +from benchmarks.sql.bench.metrics.iql import ( + FilteringAccuracy, + FilteringPrecision, + FilteringRecall, + IQLFiltersAccuracy, + IQLFiltersCorrectness, + IQLFiltersParseability, + IQLFiltersPrecision, + IQLFiltersRecall, +) +from benchmarks.sql.bench.metrics.sql import ExecutionAccuracy, SQLExactMatch +from benchmarks.sql.bench.pipelines import EvaluationResult, ExecutionResult, IQLResult +from benchmarks.sql.bench.pipelines.base import IQL @dataclass class MockDataConfig: - db_url: str = "sqlite:///:memory:" + db_ids: str = "db_id" @dataclass @@ -22,48 +33,238 @@ class MockConfig: def evaluation_results() -> List[EvaluationResult]: return [ EvaluationResult( + db_id="db_id", question="question1", - reference=ExecutionResult(iql="filter_by_column1(10)"), - prediction=ExecutionResult(iql="filter_by_column1(10)"), + reference=ExecutionResult( + iql=IQLResult( + filters=IQL( + source="filter_by_column1(10)", + unsupported=False, + valid=True, + ), + aggregation=IQL( + source=None, + unsupported=False, + valid=True, + ), + ), + sql="SELECT * FROM table WHERE column1 = 10", + ), + prediction=ExecutionResult( + sql="SELECT * FROM table WHERE column1 = 10", + ), ), EvaluationResult( + db_id="db_id", question="question2", - reference=ExecutionResult(iql="filter_by_column2(20)"), - prediction=ExecutionResult(iql="filter_by_column2(30)"), + reference=ExecutionResult( + iql=IQLResult( + filters=IQL( + source="filter_by_column2(20)", + unsupported=False, + valid=True, + ), + aggregation=IQL( + source=None, + unsupported=False, + valid=True, + ), + ), + sql="SELECT * FROM table WHERE column2 = 20", + ), + prediction=ExecutionResult( + iql=IQLResult( + filters=IQL( + source="filter_by_column2(20)", + unsupported=False, + valid=True, + ), + aggregation=IQL( + source=None, + unsupported=False, + valid=True, + ), + ), + sql="SELECT * FROM table WHERE column2 = 30", + ), ), EvaluationResult( + db_id="db_id", question="question3", - reference=ExecutionResult(iql="filter_by_column3('Test')"), - prediction=ExecutionResult(iql="filter_by_column3(30)"), + reference=ExecutionResult( + iql=IQLResult( + filters=IQL( + source="filter_by_column3('TEST')", + unsupported=False, + valid=True, + ), + aggregation=IQL( + source=None, + unsupported=False, + valid=True, + ), + ), + sql="SELECT * FROM table WHERE column3 = 'TEST'", + ), + prediction=ExecutionResult( + iql=IQLResult( + filters=IQL( + source="filter_by_column3('test')", + unsupported=False, + valid=True, + ), + aggregation=IQL( + source=None, + unsupported=False, + valid=True, + ), + ), + sql="SELECT * FROM table WHERE column3 = 'test'", + ), ), EvaluationResult( + db_id="db_id", question="question4", - reference=ExecutionResult(iql="filter_by_column4(40)"), - prediction=ExecutionResult(iql="filter_by_column4(40)"), + reference=ExecutionResult( + iql=IQLResult( + filters=IQL( + source=None, + unsupported=False, + valid=True, + ), + aggregation=IQL( + source=None, + unsupported=False, + valid=True, + ), + ), + sql="SELECT * FROM table WHERE column4 = 40", + ), + prediction=ExecutionResult( + iql=IQLResult( + filters=IQL( + source="filter_by_column4(40)", + unsupported=False, + valid=True, + ), + aggregation=IQL( + source=None, + unsupported=False, + valid=True, + ), + ), + sql="SELECT * FROM table WHERE column3 = 'TEST'", + ), + ), + EvaluationResult( + db_id="db_id", + question="question5", + reference=ExecutionResult( + iql=IQLResult( + filters=IQL( + source="filter_by_column5(50)", + unsupported=False, + valid=True, + ), + aggregation=IQL( + source=None, + unsupported=False, + valid=True, + ), + ), + sql="SELECT * FROM table WHERE column5 = 50", + ), + prediction=ExecutionResult( + iql=IQLResult( + filters=IQL( + source=None, + unsupported=True, + valid=True, + ), + aggregation=IQL( + source=None, + unsupported=False, + valid=True, + ), + ), + sql="SELECT * FROM table WHERE column5 = 50", + ), ), ] -def test_exact_match_iql(evaluation_results: List[EvaluationResult]) -> None: - metric = ExactMatchIQL() +def test_filtering_accuracy(evaluation_results: List[EvaluationResult]) -> None: + metric = FilteringAccuracy() + scores = metric.compute(evaluation_results) + assert scores["DM/FLT/ACC"] == 0.5 + + +def test_filtering_precision(evaluation_results: List[EvaluationResult]) -> None: + metric = FilteringPrecision() + scores = metric.compute(evaluation_results) + assert scores["DM/FLT/PRECISION"] == 0.5 + + +def test_filtering_recall(evaluation_results: List[EvaluationResult]) -> None: + metric = FilteringRecall() + scores = metric.compute(evaluation_results) + assert scores["DM/FLT/RECALL"] == 0.6666666666666666 + + +def test_iql_filters_accuracy(evaluation_results: List[EvaluationResult]) -> None: + metric = IQLFiltersAccuracy() + scores = metric.compute(evaluation_results) + assert scores["IQL/FLT/ACC"] == 0.6666666666666666 + + +def test_iql_filters_precision(evaluation_results: List[EvaluationResult]) -> None: + metric = IQLFiltersPrecision() scores = metric.compute(evaluation_results) - assert scores["EM_IQL"] == 0.5 + assert scores["IQL/FLT/PRECISION"] == 0.6666666666666666 -def test_valid_iql(evaluation_results) -> None: - metric = ValidIQL() +def test_iql_filters_recall(evaluation_results: List[EvaluationResult]) -> None: + metric = IQLFiltersRecall() scores = metric.compute(evaluation_results) - assert scores["VAL_IQL"] == 1.0 + assert scores["IQL/FLT/RECALL"] == 0.6666666666666666 + + +def test_iql_filters_parseability(evaluation_results: List[EvaluationResult]) -> None: + metric = IQLFiltersParseability() + scores = metric.compute(evaluation_results) + assert scores["IQL/FLT/PARSEABILITY"] == 1 + + +def test_iql_filters_correctness(evaluation_results: List[EvaluationResult]) -> None: + metric = IQLFiltersCorrectness() + scores = metric.compute(evaluation_results) + assert scores["IQL/FLT/CORRECTNESS"] == 0.5 + + +def test_exact_match_sql(evaluation_results: List[EvaluationResult]) -> None: + metric = SQLExactMatch() + scores = metric.compute(evaluation_results) + assert scores["SQL/EM"] == 0.4 @pytest.mark.parametrize( "acc, avg_times, expected_ex, expected_ves", [ - ([True, False, True, True], [1.2, 1.2, 12.2, 12.2, 13.2, 13.2, 232.1, 232.1], 0.75, 0.75), - ([True, True, True, True], [1.2, 1.2, 12.2, 12.2, 13.2, 13.2, 232.1, 232.1], 1.0, 1.0), - ([False, False, False, False], [1.2, 1.2, 12.2, 12.2, 13.2, 13.2, 232.1, 232.1], 0.0, 0.0), - ([True, False, True, True], [1.2, 3.2, 12.2, 15.2, 13.2, 17.2, 232.1, 287.1], 0.75, 0.5960767767585372), - ([True, False, True, True], [3.2, 1.2, 15.2, 12.2, 17.2, 13.2, 287.1, 232.1], 0.75, 0.9726740826467557), + ([True, False, True, True, True], [1.2, 1.2, 12.2, 12.2, 13.2, 13.2, 232.1, 232.1, 3, 3], 0.8, 0.8), + ([True, True, True, True, True], [1.2, 1.2, 12.2, 12.2, 13.2, 13.2, 232.1, 232.1, 3, 3], 1.0, 1.0), + ([False, False, False, False, False], [1.2, 1.2, 12.2, 12.2, 13.2, 13.2, 232.1, 232.1, 3, 3], 0.0, 0.0), + ( + [True, False, True, True, True], + [1.2, 3.2, 12.2, 15.2, 13.2, 17.2, 232.1, 287.1, 3, 3], + 0.8, + 0.6566867943411235, + ), + ( + [True, False, True, True, True], + [3.2, 1.2, 15.2, 12.2, 17.2, 13.2, 287.1, 232.1, 3, 3], + 0.8, + 1.00057728666646, + ), ], ) def test_execution_accuracy( diff --git a/setup.cfg b/setup.cfg index 77b7ef07..8ea3dff2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -56,16 +56,9 @@ examples = pydantic_settings~=2.1.0 psycopg2-binary~=2.9.9 benchmarks = - asyncpg~=0.28.0 datasets~=2.20.0 - eval-type-backport~=0.1.3 hydra-core~=1.3.2 - loguru~=0.7.0 neptune~=1.6.3 - pydantic~=2.6.1 - pydantic-core~=2.16.2 - pydantic-settings~=2.0.3 - psycopg2-binary~=2.9.9 elasticsearch = elasticsearch~=8.13.1 gradio = From c6781ebb73fb3c914ed6ed7f31e3cd2acc3b3cc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 30 Jul 2024 10:06:46 +0200 Subject: [PATCH 31/34] add iql gen exception --- src/dbally/iql_generator/iql_generator.py | 10 +++- src/dbally/llms/base.py | 3 + src/dbally/nl_responder/nl_responder.py | 3 + .../view_selection/llm_view_selector.py | 3 + src/dbally/views/structured.py | 60 ++++++++++++++++--- 5 files changed, 68 insertions(+), 11 deletions(-) diff --git a/src/dbally/iql_generator/iql_generator.py b/src/dbally/iql_generator/iql_generator.py index af578410..6aae12fd 100644 --- a/src/dbally/iql_generator/iql_generator.py +++ b/src/dbally/iql_generator/iql_generator.py @@ -5,6 +5,7 @@ from dbally.iql_generator.prompt import IQL_GENERATION_TEMPLATE, IQLGenerationPromptFormat from dbally.llms.base import LLM from dbally.llms.clients.base import LLMOptions +from dbally.llms.clients.exceptions import LLMError from dbally.prompt.elements import FewShotExample from dbally.prompt.template import PromptTemplate from dbally.views.exposed_functions import ExposedFunction @@ -52,13 +53,15 @@ async def generate_iql( event_tracker: Event store used to audit the generation process. examples: List of examples to be injected into the conversation. llm_options: Options to use for the LLM client. - n_retries: Number of retries to regenerate IQL in case of errors. + n_retries: Number of retries to regenerate IQL in case of errors in parsing or LLM connection. Returns: Generated IQL query. Raises: - IQLError: If IQL generation fails after all retries. + LLMError: If LLM text generation fails after all retries. + IQLError: If IQL parsing fails after all retries. + UnsupportedQueryError: If the question is not supported by the view. """ prompt_format = IQLGenerationPromptFormat( question=question, @@ -82,6 +85,9 @@ async def generate_iql( allowed_functions=filters, event_tracker=event_tracker, ) + except LLMError as exc: + if retry == n_retries: + raise exc except IQLError as exc: if retry == n_retries: raise exc diff --git a/src/dbally/llms/base.py b/src/dbally/llms/base.py index 7e2381e1..e6f3d3dd 100644 --- a/src/dbally/llms/base.py +++ b/src/dbally/llms/base.py @@ -69,6 +69,9 @@ async def generate_text( Returns: Text response from LLM. + + Raises: + LLMError: If LLM text generation fails. """ options = (self.default_options | options) if options else self.default_options event = LLMEvent(prompt=prompt.chat, type=type(prompt).__name__) diff --git a/src/dbally/nl_responder/nl_responder.py b/src/dbally/nl_responder/nl_responder.py index 7a8f98e4..38473e98 100644 --- a/src/dbally/nl_responder/nl_responder.py +++ b/src/dbally/nl_responder/nl_responder.py @@ -59,6 +59,9 @@ async def generate_response( Returns: Natural language response to the user question. + + Raises: + LLMError: If LLM text generation fails. """ prompt_format = NLResponsePromptFormat( question=question, diff --git a/src/dbally/view_selection/llm_view_selector.py b/src/dbally/view_selection/llm_view_selector.py index b4069bb1..955bb288 100644 --- a/src/dbally/view_selection/llm_view_selector.py +++ b/src/dbally/view_selection/llm_view_selector.py @@ -48,6 +48,9 @@ async def select_view( Returns: The most relevant view name. + + Raises: + LLMError: If LLM text generation fails. """ prompt_format = ViewSelectionPromptFormat(question=question, views=views) formatted_prompt = self._prompt_template.format_prompt(prompt_format) diff --git a/src/dbally/views/structured.py b/src/dbally/views/structured.py index d7b8d99b..df98f1db 100644 --- a/src/dbally/views/structured.py +++ b/src/dbally/views/structured.py @@ -4,8 +4,11 @@ from dbally.audit.event_tracker import EventTracker from dbally.collection.results import ViewExecutionResult +from dbally.exceptions import DbAllyError from dbally.iql import IQLQuery +from dbally.iql._exceptions import IQLError from dbally.iql_generator.iql_generator import IQLGenerator +from dbally.iql_generator.prompt import UnsupportedQueryError from dbally.llms.base import LLM from dbally.llms.clients.base import LLMOptions from dbally.views.exposed_functions import ExposedFunction @@ -14,6 +17,30 @@ from .base import BaseView, IndexLocation +# TODO: Move this error to generators +class IQLGenerationError(DbAllyError): + """ + Exception for when an error occurs while executing a view. + """ + + def __init__( + self, + view_name: str, + filters: Optional[str] = None, + aggregation: Optional[str] = None, + ) -> None: + """ + Args: + view_name: Name of the view that caused the error. + filters: Filters generated by the view. + aggregation: Aggregation generated by the view. + """ + super().__init__(f"Error executing view '{view_name}'.") + self.view_name = view_name + self.filters = filters + self.aggregation = aggregation + + class BaseStructuredView(BaseView): """ Base class for all structured [Views](../../concepts/views.md). All classes implementing this interface has\ @@ -57,21 +84,36 @@ async def ask( The result of the query. Raises: - IQLError: If the generated IQL query is not valid. + LLMError: If LLM text generation API fails. + IQLGenerationError: If the IQL generation fails. """ iql_generator = self.get_iql_generator(llm) filters = self.list_filters() examples = self.list_few_shots() - iql = await iql_generator.generate_iql( - question=query, - filters=filters, - examples=examples, - event_tracker=event_tracker, - llm_options=llm_options, - n_retries=n_retries, - ) + try: + iql = await iql_generator.generate_iql( + question=query, + filters=filters, + examples=examples, + event_tracker=event_tracker, + llm_options=llm_options, + n_retries=n_retries, + ) + except UnsupportedQueryError as exc: + raise IQLGenerationError( + view_name=self.__class__.__name__, + filters=None, + aggregation=None, + ) from exc + except IQLError as exc: + raise IQLGenerationError( + view_name=self.__class__.__name__, + filters=exc.source, + aggregation=None, + ) from exc + await self.apply_filters(iql) result = self.execute(dry_run=dry_run) From 6ae80d7cc24f7fbf624a1c4b5da9c2d8e7e0cb44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 30 Jul 2024 10:12:32 +0200 Subject: [PATCH 32/34] move to separate file --- src/dbally/views/exceptions.py | 26 ++++++++++++++++++++++++++ src/dbally/views/structured.py | 26 +------------------------- 2 files changed, 27 insertions(+), 25 deletions(-) create mode 100644 src/dbally/views/exceptions.py diff --git a/src/dbally/views/exceptions.py b/src/dbally/views/exceptions.py new file mode 100644 index 00000000..277064a4 --- /dev/null +++ b/src/dbally/views/exceptions.py @@ -0,0 +1,26 @@ +from typing import Optional + +from dbally.exceptions import DbAllyError + + +class IQLGenerationError(DbAllyError): + """ + Exception for when an error occurs while generating IQL for a view. + """ + + def __init__( + self, + view_name: str, + filters: Optional[str] = None, + aggregation: Optional[str] = None, + ) -> None: + """ + Args: + view_name: Name of the view that caused the error. + filters: Filters generated by the view. + aggregation: Aggregation generated by the view. + """ + super().__init__(f"Error while generating IQL for view {view_name}") + self.view_name = view_name + self.filters = filters + self.aggregation = aggregation diff --git a/src/dbally/views/structured.py b/src/dbally/views/structured.py index df98f1db..d7c40cd9 100644 --- a/src/dbally/views/structured.py +++ b/src/dbally/views/structured.py @@ -4,43 +4,19 @@ from dbally.audit.event_tracker import EventTracker from dbally.collection.results import ViewExecutionResult -from dbally.exceptions import DbAllyError from dbally.iql import IQLQuery from dbally.iql._exceptions import IQLError from dbally.iql_generator.iql_generator import IQLGenerator from dbally.iql_generator.prompt import UnsupportedQueryError from dbally.llms.base import LLM from dbally.llms.clients.base import LLMOptions +from dbally.views.exceptions import IQLGenerationError from dbally.views.exposed_functions import ExposedFunction from ..similarity import AbstractSimilarityIndex from .base import BaseView, IndexLocation -# TODO: Move this error to generators -class IQLGenerationError(DbAllyError): - """ - Exception for when an error occurs while executing a view. - """ - - def __init__( - self, - view_name: str, - filters: Optional[str] = None, - aggregation: Optional[str] = None, - ) -> None: - """ - Args: - view_name: Name of the view that caused the error. - filters: Filters generated by the view. - aggregation: Aggregation generated by the view. - """ - super().__init__(f"Error executing view '{view_name}'.") - self.view_name = view_name - self.filters = filters - self.aggregation = aggregation - - class BaseStructuredView(BaseView): """ Base class for all structured [Views](../../concepts/views.md). All classes implementing this interface has\ From d1b9857cc684375e6fdbae86a47ba6c1492787f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 30 Jul 2024 10:56:34 +0200 Subject: [PATCH 33/34] update docs --- benchmarks/sql/bench/metrics/iql.py | 39 +++++++++++++----------- benchmarks/sql/bench/metrics/selector.py | 19 ++++++------ 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/benchmarks/sql/bench/metrics/iql.py b/benchmarks/sql/bench/metrics/iql.py index d7068ea2..07cf90c9 100644 --- a/benchmarks/sql/bench/metrics/iql.py +++ b/benchmarks/sql/bench/metrics/iql.py @@ -6,7 +6,7 @@ class FilteringAccuracy(Metric): """ - Filtering accuracy indicating proportion of questions that were correctly identified as having filters. + Filtering accuracy is proportion of correct decisions (to filter or not) out of all decisions made. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: @@ -36,7 +36,7 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: class FilteringPrecision(Metric): """ - Filtering precision indicating proportion of questions that were identified as having filters correctly. + Filtering precision is proportion of correct decisions to filter out of all decisions to filter. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: @@ -71,7 +71,8 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: class FilteringRecall(Metric): """ - Filtering recall indicating proportion of questions that were correctly identified as having filters. + Filtering recall is proportion of correct decisions to filter out of all cases where filtering + should have been applied. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: @@ -106,18 +107,19 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: class IQLFiltersAccuracy(Metric): """ - Ratio of predicated IQL filters that are identical to the ground truth ones. + IQL filters accuracy is proportion of correct IQL generations and unsupported query identifications out + of all attempts. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Computes the exact match ratio. + Computes the IQL filters accuracy. Args: results: List of evaluation results. Returns: - Ratio of predicated queries that are identical to the ground truth ones. + IQL filters accuracy. """ results = [ result @@ -145,18 +147,18 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: class IQLFiltersPrecision(Metric): """ - Ratio of predicated IQL filters that are identical to the ground truth ones. + IQL filters precision is proportion of correct IQL generations out of all IQL generation attempts. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Computes the exact match ratio. + Computes the IQL filters precision. Args: results: List of evaluation results. Returns: - Ratio of predicated queries that are identical to the ground truth ones. + IQL filters precision. """ results = [ result @@ -183,18 +185,19 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: class IQLFiltersRecall(Metric): """ - Ratio of predicated IQL filters that are identical to the ground truth ones. + IQL filters recall is proportion of correct IQL generations out of all cases where an IQL + should have been generated. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Computes the exact match ratio. + Computes the IQL filters recall. Args: results: List of evaluation results. Returns: - Ratio of predicated queries that are identical to the ground truth ones. + IQL filters recall. """ results = [ result @@ -221,18 +224,18 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: class IQLFiltersParseability(Metric): """ - Ratio of predicated IQL filters that are identical to the ground truth ones. + IQL filters parseability is proportion of syntactically correct (parseable) IQLs out of all generated IQLs. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Computes the exact match ratio. + Computes the IQL filters parseability. Args: results: List of evaluation results. Returns: - Ratio of predicated queries that are identical to the ground truth ones. + IQl filters parseability. """ results = [ result @@ -250,18 +253,18 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: class IQLFiltersCorrectness(Metric): """ - Ratio of predicated IQL filters that are identical to the ground truth ones. + IQL filters correctness is proportion of IQLs that produce correct results out of all parseable IQLs. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Computes the exact match ratio. + Computes the IQL filters correctness. Args: results: List of evaluation results. Returns: - Ratio of predicated queries that are identical to the ground truth ones. + IQL filters correctness. """ results = [ result diff --git a/benchmarks/sql/bench/metrics/selector.py b/benchmarks/sql/bench/metrics/selector.py index 42b20ef8..3d72b269 100644 --- a/benchmarks/sql/bench/metrics/selector.py +++ b/benchmarks/sql/bench/metrics/selector.py @@ -6,18 +6,18 @@ class ViewSelectionAccuracy(Metric): """ - Ratio of predicated queries that are identical to the ground truth ones. + View selection accuracy is the proportion of correct view selections out of all view selection attempts. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Computes the exact match ratio. + Computes the view selection accuracy. Args: results: List of evaluation results. Returns: - Ratio of predicated queries that are identical to the ground truth ones. + View selection accuracy. """ return { "VIEW/ACC": ( @@ -30,18 +30,18 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: class ViewSelectionPrecision(Metric): """ - Ratio of predicated queries that are identical to the ground truth ones. + View selection precision is proportion of correct view selections out of all cases where a view was selected. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Computes the exact match ratio. + Computes the view selection precision. Args: results: List of evaluation results. Returns: - Ratio of predicated queries that are identical to the ground truth ones. + View selection precision. """ results = [result for result in results if result.prediction.view_name] return { @@ -55,18 +55,19 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: class ViewSelectionRecall(Metric): """ - Ratio of predicated queries that are identical to the ground truth ones. + View selection recall is proportion of correct view selections out of all cases where a view should have + been selected. """ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Computes the exact match ratio. + Computes the view selection recall. Args: results: List of evaluation results. Returns: - Ratio of predicated queries that are identical to the ground truth ones. + View selection recall. """ results = [ result From cb70ee9751c60801d7cacc1e8a251b37438a7da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 30 Jul 2024 17:50:29 +0200 Subject: [PATCH 34/34] add view names for eval results --- benchmarks/sql/bench/pipelines/collection.py | 2 +- benchmarks/sql/bench/pipelines/view.py | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/benchmarks/sql/bench/pipelines/collection.py b/benchmarks/sql/bench/pipelines/collection.py index 918cfbd9..dfc127cf 100644 --- a/benchmarks/sql/bench/pipelines/collection.py +++ b/benchmarks/sql/bench/pipelines/collection.py @@ -8,7 +8,7 @@ from dbally.iql._exceptions import IQLError from dbally.iql_generator.prompt import UnsupportedQueryError from dbally.view_selection.llm_view_selector import LLMViewSelector -from dbally.views.structured import IQLGenerationError +from dbally.views.exceptions import IQLGenerationError from ..views import VIEWS_REGISTRY from .base import IQL, EvaluationPipeline, EvaluationResult, ExecutionResult, IQLResult diff --git a/benchmarks/sql/bench/pipelines/view.py b/benchmarks/sql/bench/pipelines/view.py index 37969365..d4ae8515 100644 --- a/benchmarks/sql/bench/pipelines/view.py +++ b/benchmarks/sql/bench/pipelines/view.py @@ -7,9 +7,9 @@ from dbally.iql._exceptions import IQLError from dbally.iql_generator.prompt import UnsupportedQueryError +from dbally.views.exceptions import IQLGenerationError from dbally.views.freeform.text2sql.view import BaseText2SQLView from dbally.views.sqlalchemy_base import SqlAlchemyBaseView -from dbally.views.structured import IQLGenerationError from ..views import VIEWS_REGISTRY from .base import IQL, EvaluationPipeline, EvaluationResult, ExecutionResult, IQLResult @@ -193,11 +193,19 @@ async def __call__(self, data: Dict[str, Any]) -> EvaluationResult: ) # TODO: Remove this broad exception handling once the Text2SQL view is fixed except Exception: # pylint: disable=broad-except - prediction = ExecutionResult() + prediction = ExecutionResult( + view_name=view.__class__.__name__, + ) else: - prediction = ExecutionResult(sql=result.context["sql"]) + prediction = ExecutionResult( + view_name=view.__class__.__name__, + sql=result.context["sql"], + ) - reference = ExecutionResult(sql=data["sql"]) + reference = ExecutionResult( + view_name=data["view_name"], + sql=data["sql"], + ) return EvaluationResult( db_id=data["db_id"],
X64ebcJ{jOG8` zdw&JZs#k;bi5$n(&m!Br` z-3U4FmXqo3SKg<*Z}x`07ka0Zy;S!3ve%U*$yV1`<|R7aiPBrK{GU$*`X3T?{&vr` zSOCe&qlT!e~Xngb_TXtB^M0biJ z*%n5%<8fkcd*6ycKJK#|$VXc<@d`E#I9>Pbe9ULzsG#eIH*=~mpIQB~uV34cAM#n) zIB^4=%6rAE4XHiM`Br!gos47J`S~dAb}?rb;oji#3k4J7Fc$;vu)u?hebzgdFUt@5 zEDW~__u4atY|K_4>pu{WkDU9PfFh5krsNO$ta0eqvi2aP@P57(`3P@TG-;Ncdi8(&V{LmOD{L&#JyPn#-BtPJ@9PvtC#<-X9M%{1~X18NMngkBRi{QhT<@@QJ zB5QK1i^1*saa-;U;|A2-u=C^)yARBK5~kf3F3unDS@q+rna4#rPlm5-=1eb%LHuY` z4gq9E-fP{uBp;@BlMKDvxafgo56gdNNVcACBw&#p0D66P%>K@dd|zRz>A>uX9HG1W z6ivFLaZbLMt}gCzbh6CWZccB2UUefpo|>QU@maHUm*p1tUPPjO2arx2WD@31!9|_pmB-9r60|LQyua_LNG5ST~6M zuISUiAJ%&p=6CaaCZTjQ@3T7<#cTtcH$5BQ1t;UaQuk!n{QMQ%*FEB(yQ#`a?52=o z#^>%=*h(+T?}8^3wffxvcQL_~0-GbF;-Nz9Qew$hFGkb|%CB#YlRVJmzd_Mt&1xM61Uy>&IFv=xI66SVuc%<~M4u7xoa= zAIAV76X49&eO-_!6_<9J$*)?*kl_lY(ngZ-^QzD@S4LQR{Ru*nC^4%(oV&9P8(bC_+oDb|$#1^<{g@{QO!*!L_h}v&h#WL6mkSQ^9@kFm3PA$o%{o z+JX&?axgqsN8`bba6+1U2+*XBRS2s%wR1-Pa_v2LhTFTqX5%ryk~#2)-OKWqv90Uo zDcvLA4s#HtJ1o!Knu;f4_;#cc{c$su6SJeMr{^zq?ng%n=i~~`gT#deOY~X!)w0~2 z5#}(HFR>ee_bZ%nYDZK46+UZ%YO@R8EjUjilu9Pg`Nh;s%U{B5DQ+UW;)k*MXX-H} z7Ufs5Fl6G%56_8*`Z}3yoSi2EyW(T9o^aSx;TKW$_}0mJvO!J2H+Lbc#Jo-UmGqx( zkKBe^a0ZO=Spuhr7|wd|K^9_#z+3NIxj26jchG}4rGM+}3P zD#5EeOu_i$z?AeD$2E!C%HXRCcTbX*S(W(`8maW(VfvTNq&NMW^X<_pojPvb06e zIlg&jzFGHcolA)0?#*SVpptJ%ei<*+B%}K8_fW653A-2@M0kFFDU)EDyb%BM{sSOt zBDU-jOB-2lULI<_ette!*z3mYY27Z|Q}hj}$14Q$OXz)Gou@nZLuMO4=fbie-or3N zhOm)BsI-1OE04SfVyO#5W+*V~c7SPd>ytCs6?yjB`9&N^zF*Hq)Q9Nix@<6)Fb)^E zWKjLKz+u8u@=db%<+l7tCy7ik9B$4NY^-%0dg66Mv3N4Dup@RrTrtNmw?J5D-)IWf za*k&`X@6=?zL7P=nSWu1=q1jDwNu?nE%^n^i=rpQuJF(>ilOpWKsnbR0)1(M14JM{ zpB-SF1KGPmNpK695qJv8d^^*7X?~v1n!HClll5zM1^1#-i#e!>dFa03A@Dche96Gx zz%KN1u?deS%uK>ESZ`UApUYreizUrdp|MmuaH{lkwa)-w{@^GHo_{2Lj{||R) zo_L2qCxS>6_th4?*MsZx9C1y%hVf87z{|8W{|`5lFDJBTCV?TCFmGIt5746G{O%s( zFk=J4GV*c2IV_TOa&mJZk27km>jc5f>K?;rq#&Wd3C3znQ+_s8kYbz`+1;a|#Lz}~ z?va4tt(};TB^ny!aOlxxO?kErwZNCwbGuQ%eICg4##5pf5uGElSRb~X9y24)u%Q-vywOQ3^D~&6tDexp0kFgqA{G-#kekrDf19!baG#BDMxhH$~Wh^*!vqV7JgnDEhYg?3u(G}%nULlqoj*k?)KHEa;5%94c zVeLN9S)ZReVTsclOwb{=QGvXIbz*2G`u{c753QOn*L%`$0jes$UwIV!|FxCVDxRzO zSjDX%04uQl|6ck1bOHTJdk65C7D z|A(Iid0YZ(i#o5qe;2-@cp|05O~k0} zUDceQ?6ZPFrmXOH=Ha;4v4c4N`|qZVX?{KpaPeMvJXOL`-3%NIuLF?=!QO)ddN!Cp zkEvVKGP+0bG3nhM;rufi=sFbXE1VcGxXgq#OY@VMJ9=y^{GK+;Q7Kw)oH>s|@7UvD z^v1;G3HM#zn6Kdi@%!S(y+AZcY~o<)eT|~EPF{)i?55hhpPH_Su5XHfhI?4hw=@rU zU%#xlf_NyB8wcuYJ>VRy?nt4cd&^kiXS$~5tC_C_yD-b5m54HXjc;1MimoZjE#1Q$ zWhgj(s(X6AlJDGAEG@;ia1h(T`U`+>=HJ{?ZG?(!~6+TYQk+d z0|FuxVq6*?73+AUoYfcRVTlbF7dyOi%2AF=)r8F$($6?$Qrg-M%ZeZ{m3O#1j&$E?47>F*iT0G}fT3@%{vM6^w z?+efN>eaod3M0VE>;BJvYr;n?~yGJiIW}xuc(hr7%8*tCS zLS=d`RahGGCZ>BZ#0tc;4`}~nb8dt$U0Fz$MkFy7V{nk_aPWXHV9O?;7|#Bpi=h{; zn3e+!6V%Eu*?bVFSjbsGrFq zc_h3v2VS}uPF@(TNEB~+XAbi20*j*2ZcZin(~+@3WHEwZ zh=wB|)g$30xgJ)$zLzsW#A-uyI}t7q!CDZ-y2Y!b0ukx#-UYc3Z>FHQ^u5gs#rlKm z2O{Gi)TZe~S1<<-T3W#%3 zcqL7qnGADLLCT7 z=7izgF)<9p_n$E1w>`wtXjD{g(~{fE!ejar=31{f zsmOT&An+cfiVz5adm8Aa`#7pz-ICkmvzCn?R@&X}HO}6{`rD$j7)} z3>E@nc<|wt9JuO)Xg-{}VvEV1>Ne0d1KEP@dvB6cd2p{a({g~T6T~`nW1)vjThZ1+zW6W*VK4CcJ6N#6=l97 z(}KAU`sEUbIdde(x{3G3hbOqv>rqZ;3V(n0^)m}|0IZIwCv73tQ(SzscXzB8K8lYB zXLfQP#tW4+<~DKb;#_sBwuOdd!4;zu-M(OXZX=yl3*ww|7nKgxrCR*eQ7^a@%WPfu` zU|9DM9RYyG5IZay>Myj)ajnxo;Yb6Lcf}<+5ZT&}!m>0wQRL%{)nGCl33Ns<3yI)O zq3*Y1BS}uiG5@W=FVT9`UR#&Doc*-0(=yWGu^?=@uGDUtkprTguto30l8wVwB)13) zZbt4>`BvVA2V2%oOi8c?NWg76DiNwyvv~i@ZwV`s1B`V}UzxbZa4Fa+W@LSFe7z*H zN>lCgDe5B?mxd7f(^H$;myqbIfH`N>_Qec!= z0sCwA)$FR-MqYq*HJ8?`tZ61Uz`UB-HFY)TlOLdrNP_?2e+hen=lnk*rr@`*EBJ!{ zQU7QB50NY2e*Xvj_xj&KzJS+}Gce=7ft&%u{)7HL|0~HGu+zVl?15{^9k9Z`)W6U_ zhx`Fk{geEa%-nzaUiSUL_X4&F&-%XW`-bn!z9+~faK`ru--EtmCa^Di)0$;#4-FE>w22d2MUna}IbJfpQKSPv* zC#oN=eyIAv>Z4>DxU2fk>f5SsA=kig^+5Ih>b+zeXs=#dy{fvId;@{%y6Q>Q<<(Zz z%T+H{Jy-QCIR~D`H{#){hsZi`wCdifyQ=Oa@4ziphl!LpK<0tHL`-O}T1)PM=BkBN z0rC+}sw%ItDqpUAk^BSCRz8D8#S>&8c&PG0FqM1BL2zf~ZI!oF9wrOHK;{0*y_MU^ zL$J1TRb_MKLNXE5k)g1>(jphZi(oI$Ry;#Cf+s2-CQspm6-O)Xt+=b=&WhVAZmBq2 zFf>lXs%dT5vZuEm{d_-VU@pJ{$lxa<P+nI)sl1#lhA(?x^gic(*87b2 zY3~!>hrJJZA0(IIz23XLcY1I0-r_y%9rg}*_j~t}(XidS*1O8v>|N*$c&vWLnZEIUee!@J7vEW54jma@ZT!({_y`^)y0Z70X!+Okz; z&1DPA0%dh&lgi4=tkRcDUo3sD^x4v9N}n!$qV(a?he{tTJz9D%`3~*_m*x4X`CX0VLB&5<98nWg=EQM>fP~N}euxqU7O{ zhe{qSIa+dW$z3IPk`wWklEWp#4p3FL-~O0=!i!0ZeZu~z(vR8?Dt*vCuJpM55v3oo zHBPVnfPKIIe!u-;r60DBDLrO?Na=^{qe_q3A5{85`vXcpVBe?oKKuPj-*3N9>HF;W zDt)j09;NTG?^SxQ{cff2w(n7TkA1h&yX|)=eV6@CrSG)gq4XX0+m*iEzDwy{_S=-c z&3>!Wx7u$}`WE}mO5be1N$H#HH!6Lj{RX9PuP^mX=YmA=-#L+Ks% zYm~mmeznq9+qWyd-M&rfZT787Z?#{g^i_6VY2MB$&DmL{Sv#XNW2cp-?OT-IV&AOv zX8R_kH`zBTz0tlw=?(VvO0Tz%C_Q42DIK#9D?MzFDjl^CDLrJTl&0(vr6YDyY0^$8 zP1x5dz0Mw1I&8<4#%*lpnIAEGNa>IrRT{Mil@8hml^(PsN+b4w(gC|)X}^6y=>a>e zG;H@N?X!E8_S!v4d+dV1! z)!w3Xi@jOtX1haahuyBU-QJ{hlf6;tMtg(O4R)K-HhaC&_4YcY>+Dvgt@c`_Ywa~k z*VvaUz1+S`>1Fn%N-wonD_w2BLg_2)OO#$>uTr|ozF6tS_DZEI?TeINWUo-V!oE=H zh4yl#%k37WEq1fgW_y{^W%g2~OYNZ2puI%t5__@I#r7hli|i()P4+^i3++awjrIbi z3+(wy=iBp?&a>w#oomlgI>&BM+F%Ei2JCvJ_4aI~v+Y?*XW28A&a`JJoncQ`I^CY8 zbedhKw9c+oT5C^LI@O+{bc%g}(hKa#N+;XrD?Q&nPw9E~B&Cz=8l^S1U#Z{rDfQXa zN~`TErB!yN(n`BRX@y;`lw$^^Ub{>whYLzeZI4n?)+#0Ru~OWZlp+u+wXA;$B|h{& zmHyNEhthvoe^>f<>u*Z`X8niK|FHh5^sm;-N?*4AqVzA;pOyaE`jgT>SuZJl$@-(x zKU#lK`UmUxN`G&?sPskacS?U}{Z{F3t=}m9jrD@k7pz|^{k8SH(&w#TDgBl8oYLp4 zUn>2j^$VrHuzs%e=hn}Z{>=KR(w|yCQHq7B(jQyTDt*@akjz4IV0~Zd z_pR?K{hsw*rQfx_qx3u0GfJPazOD4z*0+>?%lf9$Z(83_`VH&rO22M>P3hOHuPXhj z^|aEbt*LQ$E?pQ z{k-+4(nqb&DgB)FS*4$~9#Q&;^{~>1t+PtcTAxw+8SB$ZKW&{+ddB*c(ob2RRQgHl zw9?boLrNdAPANTQeM0FctdA@GxOGzLN$X=uKW3d!dcyjs(vMmXDt*v8uJpL|5v3oo z9#Hy#b-&X4tq&{xuysu7G3!H0KV%(MdX$LsGJij4eL(34toxMSN0f8<`}>KzFZ6xZ zdzHSIh~x76dx)ej^j@N?3w<~7$A#WQ%yFT26LDPVyNI7J^qoXB7y1t3j|+V}G24aS zMJ#inZzF2C(6eIr>QgucPLQ|X=7>y^IVdY#hOTCY`l zhjoY2*I2Jn`fBUdN^iGrS9+Uuo6=jYTa~`bdX>_=l~;uChYm@ZN=Z2~B4siuWg;Qv zb=OHbJS=5AE@dnx<EM;Gxl)b%D_Vh>@ z3Q4(tzm(TrE9EO+DdjcSNO|?uQeJhHl>7EcdF7Q-?%gZpo;_0T-Yw-7S4g>Qmz3Sz zQtsR-<&GUvZr?6tSC^FAwn^F9DdpC!Qf}EI<>t*&c63PD-Y(^)O;T>$DCLF?Qns~8 zxqiKr>()ux+A8JRwNkEGBjx3nOL^I4QeJwgl&e=u`HELadC4VGu39DK#TQGta;20P zT_oj-6;fV!p_I#)OWD#QWplHX%a%#Gbg7iVpp;9NNV#~il#3Qg+0-QE!i7>cHcGi* zft2&-OF3_zlym1wIcJWP4GmHT0#er3OF4VCl(S|@ITOL#JwuegATK}ty{{KrCEg2xb!rqeYCG90^YhDKV zd9LPJaG$4Zo&fcEsOCX1pL@ywcxTORHMfBG4A%^R_Ux_M4%V}_W)(=!Lb5>C)l8}> z2jzL${~{RAv;Jp5c%JY-48HRqxghWL-{rp(Z08nyNrwFc{{7%O+x_jJI;+SCxzHc* z*MaDi`z`RC7k$rx<~-wj8Z75w@D#n zIjA&hB1#9%fYO7eUunb~P O8dzaD!q5W^ePRT9;JOIq_o%US1LJ3rC!LqQt5tk zjnZq))kbEVRKX0OsK%^szD&2FW8$jvJE+ii9!y~1=W-DP$v?KV4< z?j)P5)Z1aYlx{cMly;d;rQ6I_rJdw;m3mvvW~Ez9htkcaU1^8eq_myf-%ayJ%mnprRoUn4g%gkz}mzq~7U2QH=`Uyb0ELVD=X;He|G%IZ}%ak^orAn8P4Oi~B)GSdN zG>erkF^iNgHcd(wkrP(xHJL`G3(W$hjb^^m1!kVo`DD43dh^U2rE^V#(m5uew87LX z4Uq3v>eZWBN@tsyN@tlFN@tqsN@tj9N~fDTrPEBU(mFF$X|0)}bSl|!<$hDlWTh9F z^Oa6E=P5nkOj3FtnQx`uB;!|FV|+^erdlb1xs+Cu4Oi+_nF^(qrd(--@hUAhWlFuK zRB4&3`UN zRr*)^Wu-6Me^L4u`_D@MZ2w8=pX`^EzGVMV=^yPsDE))|d!@g(UsU>{{X3<FYm=?nI+mHyg(Ug`7puay4EeopCg_AizG(*A|gU)Voa`g8kdN`GemROwIc zpD6u_{bQv+wx3n{toWf}zS8g8-&6WM`@2fNYkx=SckE}BK4X7d z>9_4~DgBoHO{L$ozoGOS_Scnu-Ts==ui0N!`c?aBrBB;mQTi48%Syj&e@W?=>@O<) zqWzT8r|d5%{eu0Z(kJaFls;iUuJm#HF{O{$pI7>M`%$Hj+MiSUIs3CpKWjgt^bz}E zr4QR@m7cXfqx3WOr5dRzGjqvnXek6} z{L6gBDE~5FGRnWq7me~S^ORBkWximPf0-wZ@-Op*QT}BfH_E@vV@COx`Mgp7Wga!k zzs%>1@-OpQqx{P}Vw8WGhmGpYkojUhVLqz#3!y2rSCWIR{B13kJ9&=yOqAj zyi4i5=ABC4ZQh~u9`kmkcbmJEzRSE#={wC^mA=EgMd{nko0Z;W-lX(xM)?<(>~GND z-(v1m`eyTbrEfB?Q~E~pTBUC=cPPEnyhiEk&8wBZ&fKo_wdOXZcbHq1zQ(*t>8nj% z>Fp+`^fr@KdaKDOeU(xEWpd^g$Nv8-R?Syyj@P7XHr7n_|HA*c{{jDN(bz8a*PxT# z@4Lyj&9~TBTK%o+j}iZWXZ1Xd|93yp|2I`NaCZG{8*8|5d<-%uVe-%viy`)BX>ydU5sc);7`UEwV&`$E~>8UbKt=?kS#l^!p>skF0n zisu*D|KH;o^Q`mKl>E5lINty5B{NFAteCUr4srl2w*N-vzq9uH?d$C>au5B>`bt3) zQyR?SfmX1maV;_0iPid_>}fc+B|O89Svh>r#tl*(%Z*k%sz)`<{>koO4yUum36>q9 zBlvo3#PkBgkwUx=Oyh76$3W$5sws#2+2mrN6Jp)D;qC(Kmx@ab@#dm>?-Lsr8oCT+2Z3OzbL8bv5T86M`#ZNRC}&d^~eSA(+XLQ1XZvC|PH^r{tQ%z{xR; z!JwG5FPN(id4suyv{gg2s5Ksc9}KsJBFV9W&8%2UiOH;ZEWK~VdAUZPRpa`?S>*Ez zPPt%FZUM%0<6(DgLI%dWN0vHvR;`$b;S1Iq#(xlRgSO>4@hFqZ@hME7q zaF4jmk-Ckc#~<=gcXU*g%+O4BD|K8kMjsGA&N-Sdj}jqb<{uDaZrd zOZF1SRwI@d-e)f%zVLCC!QMS5hmRN`!<|`e44#L5;y-s~C^d30L~>T^o7VHwa`=!G z9I=`C;%B4r+DOIKl*22mp@C+)&(-h5-g4Y0YpdguMHtH**2@zj+-=9>9E)lERD`2iESw9I^Y>3Ur1s5;PO{4FMR;|tjy)tdKhj3ZN zmaeN&KO%An44FV5jT zHD5WIa)#}0MHvkIg$Y2shwxf)j8G-(U1JV6DspS-(957YwI@d1!M+rOb*75uh$R2OTumj&h_yg;@#slC%ww? zR@dSju2c<0)<)JR2@?#D$23nOJFMd#qhiO()V;DUhac61#FFE+jc;lW$EgY4PVQGF zzF9f^rm(ft=`O28BBc$-I3{LhlSudtb z@~sk-WMuq<1mv_^?6=hC=)uCKBrig~#?~IJ7lSa=U`e&FB#%ajgw*rya!-X~InVz%7&qE$8ePYA`n}`Lhk}q>W zl)afRUDip5ZNn;h+z3`Y)SS2=yO(CgDavh~*251?d>CXM2Az4~=B^Z@?UUULv-nEE zB(zHytZfpdrE64jkifJgP(A);M7&7!duC;CU?CRw{B8Io!>&n9kvc3}{^IQQvKcrG zN^dn75w=9@lG`G^xSSMzgLBrjWO16BVE&EC@4yJFVBEI%zpyx7l*Pb$zQ%1-R-o7P zCPDQbQq$SJA+fJh3vE6)!4ftknvgTg<3KH7f+{c?1AD(xGg+PV2}vb`&@ML zifP&F#)Umul6sNP!#X{4qP|=w{zHVnQ0c~r&(o?wS0&EW z$&E|1_*RY2FEm{SIb@{F@9YAYJ!Mck6JG`WqarlbwuJh<>ge46A++J0^pDPafJvw5%HKg zS)8$)%tFp|gV*$?oH-Qqs(JshHM6q&bul`e&ayh=xCX$R#E&nW9H^J4=;ubBEFB`fL7EN61GKQCF^);~ zl!h@eC2_}^pc;v-Jk5_f>CwZxkgx$6iXCa&v$>-E=(f{{bKeKAS$Eoi-YWiwgYo_>L@PE$#R{x;? za{nCPpFsdV<9mZI?py7vtA3&SbJcIG4pp~Q`-lMWRMq`N0N7QvvT8cE>u0f4k5%rh zY_9lQ#n&o6RdG*6x?+7rUHS9npDKSV*5j9zpXdF9_i^tBz1Mqpc$b&`tL%GaXUpD< zO?R-Y3hV8sO5a-ATY3p7z@Mu^e^1G7Hl)SqnRq~1wKRDmh<^$#?(`#DI z1@<59r^p9z#BR6ic!B@nrzN`wIkvbn7rF##R^;>Eo%lQj+^w4EU8oADcLcL^&3v6P z&ia9O3m+3dyUs>}JFzGl;|YAZYHNupb=k7)6%!b`4$OfF{u!)!z=1HqO-Leu2PzqZ zNv|RD|BUP|o>27tB#R!KH9ktj$1u~Hv$&eoURGSck)a3?yP}fWj9ICOn8144>ZT$(QPumZa9(kZHf#@N`iRICy8%5XJ>afbBLSkDq^!msgVHs@t_w_ zVNn)0GXXtZ>5xo9sIRV=aLz9ko|^hBE@uU@mK`>;$2V$AZ02kV7>KW5$0lQ7VBpqJ zsX>%T7rVk$5CPRme&D+$h24WBDzgvvmkD9elUfjD;lzw(XCUsrQB7z6+c$|fVUtT675>H`cPx`QtIf| z?Y1^X2#Tn_HLiZd>mw`{ddQi5%~_nw#6?d3lP1O-4{O;L5YC;%RDwc8h*R$a_xSPh zEG}m=*0>Fju8#$Ag?5FZEs?|^>s2CT$NI(FUP6<5O6FwoLR;)keK0I8k^q~XxB;WN zk(JInb4q1h7WXqJnKE}bR3eL0P^SN>`s~Gg)gTY0rE-0LyuS^|2Ih!TiO*_tb|uYn z-4C^2h%-A9-ypf@Rk*?^W` zvP1>J7n(VG%D#DFb_I`d`Hw!PcOclYyR}Cm;owgW$8(Zho$Q*H1)n7qYoX0s#}0<% zA4ra`Aqy5erI3%uAJ29jlIwiU*%n0@bfj58s_u;k;6C>^Hv3ZRXy?pqGu*ML zR%@m8L?r4#VW?q6fXEiT&W-zZe|Y?ZdYwOJoo&i4^;s7d;T6eaC64k)A`&KDRKlPs zlic(tx@Kj;Y75wow3GN15-D9AHJM}c7G}X_o!GZBV~xbplYdN+)Pe9AbI~seKM1@2 z!ptn_tSYh8)T%us)Ap46dC<@8KvNb8TEbQ`Ui!5p80sSoG%EzRZ&~o5s-8it@Bq?GB>->nWS=en95evaKarB0I!Fk0Rr$MT{4w{ zVrTfnz1?rhF^I9_|MWu0gLQ z3&#anfLE4sVHepdsTqU51zEI}X{;Wn@3;maDdDDcA^uPzmXwf>C)O;_0tqlB$_YUgJ`gd{jE z@5$`7w-CbQ3gUu`4l18(b^V+P1+y2x=axBL$t>T(N*IpBP@Pz>pj7Ni0gjpmHC6eO z)>AXH0H@-wtxL1KT%q+Nz zq}i30!Hu`zX+0Y6ubooqv7Wcv8nTm|Whd=saQPb8CZI8c5u(bx=nTOu;OBTSHob+^ zS)Yja;th|G$t)-8ORc{N3FRfbZ)uh-ZN94{GQWv_EY9Fb$s&#J#WEqX&V-ysqpi-HThCb>>tV zAlwd{e=Iy28k685@@pq2M$TVM-39eo&>9)S!U6~}tB5!d>60K1#0_a*lm$zjaO$*$ zJ-(LJV`8NH^#~VPe7pdbf&5aVRqAgreFpEsGR0VLfk7#Ri8X(J)6aW>MA%mvml- z8N4|q853f_R}x*G)sPsBOSXm*yS`-4!Yt@1Ge`LnQ5rU<`atWzF8lgim`^{~O|s4` z%39JsnWXYww6BLJhLsQ$jY$TxBXbsf@V*rbGB?ZKCza_4DuY=#X@z!$21Df620!C< z?`v6*0eMw{R;IU9!ilgR`nVs}&KMp{|~F;&mlqv<{;e3X;JQ+orM z8yr?pXsSeEfXx!ma7209>3tVwKwX35K_1FowFP8I;j^r(t?yVh z->G?^<_>JUFT?`i1^;LLxB3rYt=){2{TJUC@v6Vxca5(J8-TA>f22BDy|U^bRZo%u zFj}>xYHH<6m0z#CzcO37sd7POWyLQlPLKm&PsP%T61>Te;y>Px$M{U|KfGVY0^oY@ zPH%%o0DM*1?y}m_pK$j7#?k|&8%n2oe&)F!Z|kUMgU4U;-I51NZZC$7;dh*Yij0XK!}};>CRl_JXByeWphN0V_!M8YIX*c7-im>und-4iF_4D zWW{+}G62Mms6-vciwY8*9bQ$aOA=9bLm&f52%j`{o~0Q;V(`B4iCqY#L=4IBcw8m< z#X*%XuFn7vFD{&&$V1Q@5G%>#*^3Ytth-=YW>~gCx1ID#;p}hx!@nN_&dY!k7w~WC zM?u>qnKB?dN<~F&9Y?iiubZBU=`xVTEm2>cYU>C&77N!kWPpT=$@0oK0}#r;bp*Rg z1{*G@Ewvt5kr5QQcoyye%e#P0(RSQlx>t#XV?pg1n0K+ZWu@ z7?NETJfmw~rbpM1Gy9F*5j#Zuc+h(%*8{T?k?`osh71_+WR@gzL>P5PY!KD4ml#PB z#!JisPVShN0RoozV!}=lK{`0)>}S`SYCI(uWq^MhoiIPn^}I5@qbt#grzwGn=o9@* zU~^(A8r!L^Wf{QXg4!z29U4xNoczC%J~U(JvDPJ-YiNa@(K#*c4WrYz>bC}6W8cD} zL=*Nr&^kMFwQiB_`r2EXM+dqd2d^Bq zaKNcK$s0m&O(2Ba2)t=?8U-M3adV8gQLNz>mp+m!8p|TzzR~4>Jv&#@*H0 zIqj0OVU7^Sr}b70E}{ha8fl@&M@for!RvJ4S#kKk>^Uux!Oy13^!%x1s}Meg`nB<<&@plL3C6kO)VJwwU`{NeL{Wr6jESM0-<^qX24jT*v?L;6{;3)%j{s@03fei zkO6Nk;LtM7s7f2w!{Au~?I}c1dPE+d!)<4p|?1 zA0l4!F2#_8MFhpk*UZXfRPMW^C9{q;6s_yz)?B+DdoE#p1zRiOZkirZ00AHsx zIFs0Yp2IMu&7?)LPG31K1N=H6a9exmP<&8%$f>T`8Bo^3>79%U$FXAwlft)&Jz7_F zNd`o9zRQ?adqj(d z$v*SxntLzGTnv{}qeSHlk{JO_1|kUd&5US%k=j%ra(g z5k;DKTQ`YqcS{DecJ9CTmD&n-MI#byRyM{YQ=UO3FFn(UWZa;(5{#aP&=eiGzIbq| zzgbc?F35n{PGBYgK4gI#`^D)~$E7Me1BhDBQB@bbh?pLjL-_#5+tz%cq z-7JgH$nt%iZ&n6mcFGKuQiXGEiVvo?uxDXa+JbG?<&!gjvXfm<68UXYyg%qMvom0? zpb=a~FB{LMc;69eTCl_{YA(HI^DV0~^9uaKc}2BPbvm;X{8GbRkE=XHV7f{5UpMIWPQ8#=caPcCjhQ*TRD*C`d5GC0sh~$fr13=r z+ehj`;m)S+pgA->)1VVVUsGP61vw%c5ggjLCeKkt95#zp!?^v{mM|x4B`^>G*eBbgN^S#=4rLVz?`RTpjY{kB_K!EA*T%NgQM>m5TntEaIk7kKU>!+E1*zWA8g8iMmvU_+{f5hg2axDNx*a2Vgxk5 zG8Zsg)x2CMm~7dDl2KBHd)fWY{YwcluXX0?H5q{EWvxy#>5=hDZvQ36|1i$f3*V@K z@w(vj42n{%!j;a0C9zCU)8w7n(VSuS7d2Vig^zdtw+h_dE0$#thyjtEF{MRO2XXHB zuRvr$+jd`=so^0-)wgiP#D_QwycL+n$vb^&@3f46oKXnu;ammd?Tg2jWaymQBBG(+ zO>{(I4zjfkdaG!_$sLO_z}VulplvQM-$=f;gqnjAznu9r64*eQj7JF>ZI)($vK^~_ zor4>R+OC2=OofQMV)5+yov?qg6 zRt+Uu#X_WreEG#l{DD=C84&GRMcZ$=ag_JRTOuq_4O>u2J>2vWxEILw1mg;+DWUJd zUL-5#W@N1J5%tHoZ`IrkOst3)nT^cBjUeIVN{bH#YyES5C12UNy*>lTE!dqfbm@ID zP}M_`VeFYl1R%};($*^8<;*MLfxyxjSz=7Xq=2u-lO+vI`{TlweW3%4IaTl8!z44fP{^kNG&Jm9Y zjpQDjlL72@07h=h(1d!}Q2>NdDvp$3R0$-#e{uR|?kb)F@?-1`hvOKUu$)9??Mn$L z6-jzMVrPvE)RYG4Ch6IDZL%g0peG>A_0yB<;)8>=9>1aP*@JV_fZfg;Id`V690T@g z!n=CCZ{UxJXptz_%sIg!1%ECfoA=c8 zb%GrbpF!(6Kwu~D)1hCa%2UpQN4XDknYP*Yh*U+#ayRK7Pt)ETq z5qtU4bR0$l{Gr#Em$#|=jRf9j@CyKEml*w^2iJRNc;=*IOg*3q8AxPaq^qIO&`_#C z&zqAPpW|fIJY^+F1BW5||WdZM{L1eaU{; zC20`g3A!|T85`TKM4YPfshyOpgQARrboN5z8$r1w?d6R0LAatTprcudPB(&d5GU{7 zqKc+;ggGhryY?NeL>-B56Ekz5hLpT9H^Lr>pyijO1qv>1A8qkoL{Of|4%#0^?IJ-q zIv#Do_POw9pE7_Q&1pd40*uV8aIfv+4#TNr7m2I`d`3A)X>L*SZp0R)4=^!{?A*O% zBdu2pE%EG9?H`7I<%MZr<0cms(yr9DNl(JG;Lz+G2Mz>w%gJ4$PhrR>tz2^&{CKYG zn{@6~t{JH$>XMtt83_5>tWO2gz{d?uvJ{-j7RQ8{H0be|Yt`WX<`vkF`0~rrJ=9+< zYo4h<_irtvICAmkt)Wq*74Sj_9LUq#*oU>)JD zn7izol?G3CJji4^8p#>X1Ynx+^%?u-`t-FlL1!X^%nk9maqzt$GB;E8+6&X5$A#pt zS_4H$ZZa6|M%5rq2*%~G4Tq;=9wuBcyf_Vltcg+Ft`NJ5Xnas?KM>ZDEyczM`(##` zb)qSKHMLzkaOW9)V&1bc8XprguTfNJdYK7-`l^{}uw;=5W$Czeg;gUll78>n#x(G8 z5l!KRLXsn39n6s7!h(i_4%z2V?Q2S3NliUoQvTA0j~0qVD?qZ;jFQIK1(6?EF*Usx zwU2Jnk!7R^tsZQsD7|2DdJkU|b~_@xA!o@yOw6O;dMdZ*fk|0i$&ne&X>j4W%tWW{ zhW!Yob2q(+bP-~N37bBKzZL`7;6q1#%(}Hc4K%C+B&!g9qL*~yJ{6o>S0j4#lJqV* zq?o>g8syRpCAMN$!agtVV(~-d1J&G8M_1LR!G3|}oLOgCS{;3?#LP(p^@`b_bgax3 zxe=CcR~!x{YklMZv#Uy4=BIbiYi`0MnX^=)f47Inq<{A?y`mfp)YUCbZ|7!3yg=&7 z7Cg~3(>aZmA^tPIyScCI3#*@hRe_(mxTf6gz)%uIKx zSjuqmHu8=F7_8U4ZxvEhNYGyO1L2ooZoK*0q0LSZ@$=SV2)1WmDOhadv zmy;V2nV(7=ozR~yIw{tKBsur^Ff~ns08ew*2s@c`fy;`}Phw=C^m&*v4@^yi{W=&j zEsnG0j(H~pY8fT4bmZQ literal 0 HcmV?d00001 diff --git a/benchmarks/sql/dataset/bird_dataset.py b/benchmarks/sql/dataset/bird_dataset.py deleted file mode 100644 index 67a009cf..00000000 --- a/benchmarks/sql/dataset/bird_dataset.py +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import annotations - -import enum -from pathlib import Path -from typing import Iterator - -from pydantic import BaseModel, RootModel -from utils import load_data - - -class DifficultyLevel(str, enum.Enum): - """Enum representing BIRD example difficulty level.""" - - SIMPLE = "simple" - MODERATE = "moderate" - CHALLENGING = "challenging" - - -class BIRDExample(BaseModel): - """Class for storing a single instance of example.""" - - db_id: str - question: str - evidence: str - SQL: str - difficulty: DifficultyLevel - - -class BIRDDataset(RootModel): - """Class for storing BIRD benchmark examples.""" - - root: list[BIRDExample] - - def __iter__(self) -> Iterator[BIRDExample]: # type: ignore - return iter(self.root) - - def __len__(self): - return len(self.root) - - def __getitem__(self, key: int) -> BIRDExample: - return self.root[key] - - @classmethod - def from_json_file(cls, file_path: Path, difficulty_levels: list[str] | None = None) -> BIRDDataset: - """ - Constructor for loading the dataset from a json file. - - Args: - file_path: File from which the dataset should be read. - difficulty_levels: Difficulty levels by which the dataset will be filtered. - - Returns: - Dataset object initiated from the file. - """ - - data = load_data(file_path) - dataset_obj = cls.model_validate_json(data) - - if difficulty_levels: - difficulty_levels = [DifficultyLevel(level) for level in difficulty_levels] - dataset_obj.root = [item for item in dataset_obj.root if item.difficulty in difficulty_levels] - - return dataset_obj diff --git a/benchmarks/sql/iql/method_call_visitor.py b/benchmarks/sql/iql/method_call_visitor.py deleted file mode 100644 index 0a8553bb..00000000 --- a/benchmarks/sql/iql/method_call_visitor.py +++ /dev/null @@ -1,68 +0,0 @@ -import ast -from typing import Any, List - - -class MethodCallVisitor(ast.NodeVisitor): - """Visitor to extract method calls from an AST.""" - - def __init__(self) -> None: - self._name: List[str] = [] - - @property - def name(self) -> str: - """Return the method call name.""" - return ".".join(self._name) - - @name.deleter - def name(self) -> None: - """Reset the method call name.""" - self._name = [] - - def visit_Name(self, node: Any) -> None: # pylint: disable=invalid-name - # I had to name this function this way because otherwise - # it won't work withast.NodeVisitor. - """ - Updates the method call name after visiting a Name node. - - Args: - node: The node to visit. - """ - - self._name.insert(0, node.id) - - def visit_Attribute(self, node: Any) -> None: # pylint: disable=invalid-name - # I had to name this function this way because otherwise - # it won't work withast.NodeVisitor. - """ - Updates the method call name after visiting an Attribute node. - - Args: - node: The node to visit. - """ - - try: - self._name.insert(0, node.attr) - except AttributeError: - self.generic_visit(node) - - @staticmethod - def get_method_calls(tree: ast.AST) -> List[str]: - """ - Return the method calls from the given AST. - - Args: - tree: The abstract syntax tree to extract method calls from. - - Returns: - A list of method calls. - """ - - method_calls: List[str] = [] - - for node in ast.walk(tree): - if isinstance(node, ast.Call): - visitor = MethodCallVisitor() - visitor.visit(node.func) - method_calls.append(visitor.name) - - return method_calls diff --git a/benchmarks/sql/iql/metrics.py b/benchmarks/sql/iql/metrics.py deleted file mode 100644 index 35146421..00000000 --- a/benchmarks/sql/iql/metrics.py +++ /dev/null @@ -1,145 +0,0 @@ -import ast -from typing import List, Tuple - -from iql.method_call_visitor import MethodCallVisitor -from loguru import logger -from results import TextToIQLResult - -from dbally.iql._exceptions import IQLError, IQLUnsupportedSyntaxError -from dbally.iql._query import IQLQuery -from dbally.views.structured import ExposedFunction - - -def _count_hallucinated_methods_for_single_example(iql: str, allowed_methods: List[str]) -> Tuple[int, int]: - try: - predicted_methods = MethodCallVisitor.get_method_calls(ast.parse(iql)) - - hallucinated_methods_count = 0 - - for method in predicted_methods: - if method not in allowed_methods: - hallucinated_methods_count += 1 - - return hallucinated_methods_count, len(predicted_methods) - except: # noqa: E722 pylint: disable=bare-except - return 0, 0 - - -def calculate_hallucinated_filters(results: List[TextToIQLResult], filter_list: List[ExposedFunction]) -> float: - """ - Calculates the ratio of hallucinated filters for a given results. - - Args: - results: List containing TextToIQLResult objects that represents predicted filters. - filter_list: List of allowed filters. - - Returns: - Hallucinated filters ratio. - """ - - hallucinated_filters_count = 0 - total_filters_count = 0 - - allowed_filters = [filter.name for filter in filter_list] - - for example in results: - hallucinated_filters, total_filters = _count_hallucinated_methods_for_single_example( - example.predicted_iql, allowed_filters - ) - hallucinated_filters_count += hallucinated_filters - total_filters_count += total_filters - - if total_filters_count == 0: - return 0 - - return hallucinated_filters_count / total_filters_count - - -async def calculate_valid_iql(results: List[TextToIQLResult], filter_list: List[ExposedFunction]) -> float: - """ - Calculates the ratio of valid IQL queries for a given results. - - Args: - results: List containing TextToIQLResult objects that represents predicted filters. - filter_list: List of allowed filters. - - Returns: - Valid IQL ratio. - """ - - valid_iql = 0 - - for example in results: - try: - await IQLQuery.parse(example.predicted_iql, filter_list) - valid_iql += 1 - except Exception as exc: # pylint: disable=broad-exception-caught - logger.warning(f"Error while parsing IQL: {example.predicted_iql}\n{exc}") - - return valid_iql / len(results) - - -def calculate_exact_match(results: List[TextToIQLResult]) -> float: - """ - For a results, it calculates the ratio of predicated queries that are identical - to the ground truth ones. - - Args: - results: List containing Text2SQLResult objects that represents ground truth query, predicted query. - - Returns: - The ratio of predicated queries that are identical to the ground truth ones. - """ - exact_query_matches = 0 - - for example in results: - if example.ground_truth_iql == example.predicted_iql: - exact_query_matches += 1 - - return exact_query_matches / len(results) - - -async def calculate_invalid_iql(results: List[TextToIQLResult], filter_list: List[ExposedFunction]) -> float: - """ - Calculates the ratio of syntax errors for a given results. - - Args: - results: List containing TextToIQLResult objects that represents predicted filters. - filter_list: List of allowed filters. - - Returns: - Syntax errors ratio. - """ - syntax_errors = 0 - - filtered_results = [result for result in results if result.predicted_iql != "UNSUPPORTED_QUERY"] - - for result in filtered_results: - try: - await IQLQuery.parse(result.predicted_iql, filter_list) - except (IQLError, IQLUnsupportedSyntaxError, SyntaxError): - syntax_errors += 1 - except Exception as exc: # pylint: disable=broad-exception-caught - # I haven't figured out yet how to handle it better :( - logger.warning(f"Error while parsing IQL: {result.predicted_iql}\n{exc}") - - return syntax_errors / len(filtered_results) - - -def calculate_unsupported_iql(results: List[TextToIQLResult]) -> float: - """ - Calculates the ratio of unsupported queries for a given results. - - Args: - results: List containingTextToTextToIQLResult objects that represents predicted filters. - - Returns: - Unsupported queries ratio. - """ - unsupported_queries = 0 - - for result in results: - if result.predicted_iql == "UNSUPPORTED_QUERY": - unsupported_queries += 1 - - return unsupported_queries / len(results) diff --git a/benchmarks/sql/pipelines.py b/benchmarks/sql/pipelines.py deleted file mode 100644 index 8feeca5b..00000000 --- a/benchmarks/sql/pipelines.py +++ /dev/null @@ -1,285 +0,0 @@ -import json -from abc import ABC, abstractmethod -from dataclasses import asdict -from typing import Dict, List, Tuple - -from datasets import Dataset -from iql.metrics import calculate_exact_match as calculate_iql_exact_match -from iql.metrics import ( - calculate_hallucinated_filters, - calculate_invalid_iql, - calculate_unsupported_iql, - calculate_valid_iql, -) -from results import TextToIQLResult, TextToSQLResult -from sqlalchemy import create_engine, text -from text2sql.metrics import calculate_exact_match as calculate_sql_exact_match -from text2sql.metrics import calculate_exec_acc, calculate_undefined_error_ratio, calculate_valid_sql, calculate_ves -from views import FREEFORM_VIEW_REGISTRY, STRUCTURED_VIEW_REGISTRY - -from dbally.iql._exceptions import IQLError -from dbally.iql_generator.iql_generator import IQLGenerator -from dbally.iql_generator.prompt import UnsupportedQueryError -from dbally.llms.base import LLM -from dbally.llms.litellm import LiteLLM -from dbally.llms.local import LocalLLM -from dbally.views.freeform.text2sql.prompt import SQL_GENERATION_TEMPLATE, SQLGenerationPromptFormat -from dbally.views.freeform.text2sql.view import BaseText2SQLView, SQLParameterOption -from dbally.views.sqlalchemy_base import SqlAlchemyBaseView - -Metrics = Dict[str, float] -Results = List[Dict[str, str]] - - -class EvaluationPipeline(ABC): - """ - Evaluation pipeline base class. - """ - - def __init__(self, config: Dict) -> None: - self.engine = create_engine(config.db_url) - - def get_llm(self, llm_config: Dict) -> LLM: - """ - Returns the LLM based on the configuration. - - Args: - llm_config: The LLM configuration. - - Returns: - The LLM object. - """ - if llm_config.model_name.startswith("local/"): - return LocalLLM(llm_config.model_name.split("/", 1)[1]) - return LiteLLM(llm_config.model_name) - - @abstractmethod - async def __call__(self, dataset: Dataset) -> Tuple[Metrics, Results]: - """ - Runs the evaluation pipeline. - - Args: - dataset: The dataset containing the questions and ground truth IQL queries. - - Returns: - The list of IQL predictions. - """ - - -class TextToIQLEvaluationPipeline(EvaluationPipeline): - """ - Pipeline for evaluating IQL predictions. - """ - - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating IQL predictions. - - Args: - config: The configuration for the pipeline. - - Raises: - ValueError: If the view name is not supported. - """ - super().__init__(config) - self.view = self.get_view(config.view_name) - self.iql_generator = self.get_iql_generator(config.llm) - - def get_view(self, view_name: str) -> SqlAlchemyBaseView: - """ - Returns the view object based on the view name. - - Args: - view_name: The name of the view. - - Returns: - The view object. - - Raises: - ValueError: If the view name is not supported - """ - if view_name not in STRUCTURED_VIEW_REGISTRY: - raise ValueError(f"View {view_name} not supported. Available views: {STRUCTURED_VIEW_REGISTRY}.") - return STRUCTURED_VIEW_REGISTRY[view_name](self.engine) - - def get_iql_generator(self, llm_config: Dict) -> IQLGenerator: - """ - Returns the IQL generator based on the LLM configuration. - - Args: - llm_config: The LLM configuration. - - Returns: - The IQL generator. - """ - llm = self.get_llm(llm_config) - return IQLGenerator(llm) - - async def compute_metrics(self, results: List[TextToIQLResult]) -> Dict[str, float]: - """ - Computes the metrics for IQL predictions. - - Args: - results: The list of IQL predictions. - - Returns: - The metrics for the IQL predictions. - """ - filters = self.view.list_filters() - - return { - "exact_match": calculate_iql_exact_match(results), - "valid_iql": await calculate_valid_iql(results, filters), - "invalid_iql": await calculate_invalid_iql(results, filters), - "unsupported_iql": calculate_unsupported_iql(results), - "hallucinated_iql": calculate_hallucinated_filters(results, filters), - } - - async def __call__(self, dataset: Dataset) -> Tuple[Metrics, Results]: - """ - Runs the pipeline for evaluating IQL predictions. - - Args: - dataset: The dataset containing the questions and ground truth IQL queries. - - Returns: - The list of IQL predictions. - """ - filters = self.view.list_filters() - examples = self.view.list_few_shots() - results = [] - - for data in dataset: - try: - predicted_iql = await self.iql_generator.generate_iql( - question=data["question"], - filters=filters, - examples=examples, - n_retries=0, - ) - except UnsupportedQueryError: - result = "UNSUPPORTED_QUERY" - except IQLError as exc: - result = exc.source - else: - result = str(predicted_iql) - - results.append( - TextToIQLResult(question=data["question"], ground_truth_iql=data["iql"], predicted_iql=result) - ) - - metrics = await self.compute_metrics(results) - results = [asdict(result) for result in results] - - return metrics, results - - -class TextToSQLEvaluationPipeline(EvaluationPipeline): - """ - Pipeline for evaluating SQL predictions. - """ - - def __init__(self, config: Dict) -> None: - """ - Constructs the pipeline for evaluating SQL predictions. - - Args: - config: The configuration for the pipeline. - - Raises: - ValueError: If the view name is not supported. - """ - super().__init__(config) - self.view = self.get_view(config.view_name) - self.sql_generator = self.get_sql_generator(config.llm) - - def get_view(self, view_name: str) -> BaseText2SQLView: - """ - Returns the view object based on the view name. - - Args: - view_name: The name of the view. - - Returns: - The view object. - - Raises: - ValueError: If the view name is not supported - """ - if view_name not in FREEFORM_VIEW_REGISTRY: - raise ValueError(f"View {view_name} not supported. Available views: {FREEFORM_VIEW_REGISTRY}.") - return FREEFORM_VIEW_REGISTRY[view_name](self.engine) - - def get_sql_generator(self, llm_config: Dict) -> LLM: - """ - Returns the IQL generator based on the LLM configuration. - - Args: - llm_config: The LLM configuration. - - Returns: - The IQL generator. - """ - # TODO: Implement SQL generator - return self.get_llm(llm_config) - - async def compute_metrics(self, results: List[TextToSQLResult]) -> Dict[str, float]: - """ - Computes the metrics for IQL predictions. - - Args: - results: The list of IQL predictions. - - Returns: - The metrics for the IQL predictions. - """ - return { - "valid_sql": calculate_valid_sql(results, self.engine), - "undefined_error": calculate_undefined_error_ratio(results), - "exact_match": calculate_sql_exact_match(results), - "execution_accuracy": calculate_exec_acc(results, self.engine), - "valid_efficiency_score": calculate_ves(results, self.engine), - } - - async def __call__(self, dataset: Dataset) -> Tuple[Metrics, Results]: - """ - Runs the pipeline for evaluating IQL predictions. - - Args: - dataset: The dataset containing the questions and ground truth IQL queries. - - Returns: - The list of IQL predictions. - """ - tables = self.view.get_tables() - examples = self.view.list_few_shots() - results = [] - - for data in dataset: - try: - # TODO: Refactor this once the SQL generator is implemented - prompt_format = SQLGenerationPromptFormat( - question=data["question"], - dialect=self.engine.dialect.name, - tables=tables, - examples=examples, - ) - formatted_prompt = SQL_GENERATION_TEMPLATE.format_prompt(prompt_format) - response = await self.sql_generator.generate_text(formatted_prompt) - response = json.loads(response) - params = [SQLParameterOption.from_dict(param) for param in response.get("parameters", [])] - params = {param.name: param.value for param in params} - stmt = text(response.get("sql", "")) - stmt = stmt.bindparams(**params) - result = str(stmt.compile(compile_kwargs={"literal_binds": True})) - except Exception: # pylint: disable=broad-except - result = "" - - results.append( - TextToSQLResult(question=data["question"], ground_truth_sql=data["sql"], predicted_sql=result) - ) - - metrics = await self.compute_metrics(results) - results = [asdict(result) for result in results] - - return metrics, results diff --git a/benchmarks/sql/results.py b/benchmarks/sql/results.py deleted file mode 100644 index 7eea01e0..00000000 --- a/benchmarks/sql/results.py +++ /dev/null @@ -1,23 +0,0 @@ -from dataclasses import dataclass - - -@dataclass -class TextToIQLResult: - """ - Represents a single TextToIQL result. - """ - - question: str - ground_truth_iql: str - predicted_iql: str - - -@dataclass -class TextToSQLResult: - """ - Represents a single TextToSQL result. - """ - - question: str - ground_truth_sql: str - predicted_sql: str diff --git a/benchmarks/sql/saving.py b/benchmarks/sql/saving.py deleted file mode 100644 index a2ab81d9..00000000 --- a/benchmarks/sql/saving.py +++ /dev/null @@ -1,54 +0,0 @@ -import json -import os -import sys -from datetime import datetime -from pathlib import Path -from typing import Any - -from datasets.utils.filelock import FileLock - - -def save(path_or_file: str, **data: Any) -> Path: - """ - Saves results to a JSON file. Also saves system information such as current time and Python system information. - - Args: - path_or_file: Path or file to store the file. If only a folder is provided - the results file will be saved in the format `"result-%Y_%m_%d-%H_%M_%S.json"`. - **data: The data to save. - - Returns: - The path to the saved file. - """ - current_time = datetime.now() - print(type(current_time)) - - file_path = _setup_path(path_or_file, current_time) - - data["_timestamp"] = current_time.isoformat() - data["_python_version"] = sys.version - data["_interpreter_path"] = sys.executable - - with FileLock(str(file_path) + ".lock"): - with open(file_path, "w", encoding="utf8") as f: - json.dump(data, f) - - try: - os.remove(str(file_path) + ".lock") - except FileNotFoundError: - pass - - return file_path - - -def _setup_path(path_or_file: str, current_time: datetime) -> Path: - path_or_file = Path(path_or_file) - is_file = len(path_or_file.suffix) > 0 - if is_file: - folder = path_or_file.parent - file_name = path_or_file.name - else: - folder = path_or_file - file_name = "result-" + current_time.strftime("%Y_%m_%d-%H_%M_%S") + ".json" - folder.mkdir(parents=True, exist_ok=True) - return folder / file_name diff --git a/benchmarks/sql/tests/unit/test_iql_metrics.py b/benchmarks/sql/tests/unit/test_iql_metrics.py deleted file mode 100644 index 871fbb1c..00000000 --- a/benchmarks/sql/tests/unit/test_iql_metrics.py +++ /dev/null @@ -1,65 +0,0 @@ -from iql.metrics import ( - _count_hallucinated_methods_for_single_example, - calculate_hallucinated_filters, - calculate_invalid_iql, - calculate_valid_iql, -) -from results import TextToIQLResult - -from dbally.views.exposed_functions import ExposedFunction, MethodParamWithTyping - -ALLOWED_METHODS = [ - ExposedFunction( - name="filter_by_name", - description="", - parameters=[MethodParamWithTyping(name="name", type=str)], - ) -] - -VALID_IQL = "filter_by_name('Cody Brown') or filter_by_name('Linda Smith')" -IQL_WITH_HALLUCINATED_FILTERS = "filter_by_name('Cody Brown') and filter_by_age(100)" -IQL_WITH_SYNTAX_ERROR = "filter_by_name('Cody Brown'" - - -def test_count_hallucinated_methods_for_single_example() -> None: - hallucinated_methods, total_methods = _count_hallucinated_methods_for_single_example( - IQL_WITH_HALLUCINATED_FILTERS, [method.name for method in ALLOWED_METHODS] - ) - assert hallucinated_methods == 1 - assert total_methods == 2 - - hallucinated_methods, total_methods = _count_hallucinated_methods_for_single_example( - VALID_IQL, [method.name for method in ALLOWED_METHODS] - ) - assert hallucinated_methods == 0 - assert total_methods == 2 - - -def test_calculate_hallucinated_filters() -> None: - dataset = [ - TextToIQLResult(question="", ground_truth_iql="", predicted_iql=IQL_WITH_HALLUCINATED_FILTERS), - TextToIQLResult(question="", ground_truth_iql="", predicted_iql=VALID_IQL), - ] - hallucinated_filters_ratio = calculate_hallucinated_filters(dataset, ALLOWED_METHODS) - assert hallucinated_filters_ratio == 0.25 - - -async def test_calculate_invalid_iql() -> None: - dataset = [ - TextToIQLResult(question="", ground_truth_iql="", predicted_iql=IQL_WITH_SYNTAX_ERROR), - TextToIQLResult(question="", ground_truth_iql="", predicted_iql=VALID_IQL), - ] - - syntax_errors_ratio = await calculate_invalid_iql(dataset, ALLOWED_METHODS) - assert syntax_errors_ratio == 0.5 - - -async def test_calculate_valid_iql() -> None: - dataset = [ - TextToIQLResult(question="", ground_truth_iql="", predicted_iql=IQL_WITH_SYNTAX_ERROR), - TextToIQLResult(question="", ground_truth_iql="", predicted_iql=VALID_IQL), - TextToIQLResult(question="", ground_truth_iql="", predicted_iql=IQL_WITH_HALLUCINATED_FILTERS), - ] - - valid_iql_ratio = await calculate_valid_iql(dataset, ALLOWED_METHODS) - assert valid_iql_ratio >= 0.333 and valid_iql_ratio <= 0.334 diff --git a/benchmarks/sql/tests/unit/test_main_evaluate.py b/benchmarks/sql/tests/unit/test_main_evaluate.py deleted file mode 100644 index 7a6a0de2..00000000 --- a/benchmarks/sql/tests/unit/test_main_evaluate.py +++ /dev/null @@ -1,26 +0,0 @@ -from unittest.mock import call, patch - -import pytest -from bench import evaluate -from omegaconf import DictConfig - - -@patch("bench.e2e_evaluate") -@patch("bench.text2sql_evaluate") -@patch("bench.iql_evaluate") -@pytest.mark.asyncio -async def test_evaluate(iql_mock, text2sql_mock, e2e_mock) -> None: - cfg = DictConfig( - { - "e2e": {"dataset1": {"key1": "value1"}, "dataset2": {"key2": "value2"}}, - "text2sql": {"dataset3": {"key3": "value3"}}, - "common_key": "common_value", - } - ) - await evaluate(cfg) - - e2e_mock.assert_has_calls( - [call({"key1": "value1", "common_key": "common_value"}), call({"key2": "value2", "common_key": "common_value"})] - ) - text2sql_mock.assert_has_calls([call({"key3": "value3", "common_key": "common_value"})]) - iql_mock.assert_not_called() diff --git a/benchmarks/sql/tests/unit/test_method_call_visitor.py b/benchmarks/sql/tests/unit/test_method_call_visitor.py deleted file mode 100644 index 12b845cc..00000000 --- a/benchmarks/sql/tests/unit/test_method_call_visitor.py +++ /dev/null @@ -1,17 +0,0 @@ -import ast - -import pytest -from iql.method_call_visitor import MethodCallVisitor - - -@pytest.fixture -def method_call_visitor(): - return MethodCallVisitor() - - -def test_method_call_visitor(method_call_visitor): - assert method_call_visitor.get_method_calls(ast.parse("")) == [] - assert method_call_visitor.get_method_calls(ast.parse("filter_by_name('Cody Brown')")) == ["filter_by_name"] - assert method_call_visitor.get_method_calls( - ast.parse("taller_than(180) and (older_than(10) or heavier_than(50))") - ) == ["taller_than", "older_than", "heavier_than"] diff --git a/benchmarks/sql/text2sql/metrics.py b/benchmarks/sql/text2sql/metrics.py deleted file mode 100644 index 770a44ea..00000000 --- a/benchmarks/sql/text2sql/metrics.py +++ /dev/null @@ -1,268 +0,0 @@ -import time -from dataclasses import dataclass -from typing import Any, Dict, List - -import pandas as pd -from sqlalchemy import Engine, text -from text2sql.text2sql_result import Text2SQLResult -from utils import batch - - -@dataclass -class _ExecutionResult: - """ - Represents the result of a single query execution - """ - - results: List[Dict[str, Any]] - context: Dict[str, Any] - execution_time: float - - -def _run_query(query: str, engine: Engine) -> _ExecutionResult: - with engine.connect() as connection: - start_time = time.monotonic() - rows = connection.execute(text(query)).fetchall() - execution_time = time.monotonic() - start_time - - return _ExecutionResult( - results=[dict(row._mapping) for row in rows], # pylint: disable=protected-access - execution_time=execution_time, - context={"sql": query}, - ) - - -def calculate_exact_match(dataset: List[Text2SQLResult]) -> float: - """ - For a dataset, it calculates the ratio of predicated queries that are identical - to the ground truth ones. - - Args: - dataset: List containing Text2SQLResult objects that - represents (ground truth query, predicted query). - - Returns: - The ratio of predicated queries that are identical to the ground truth ones. - """ - - exact_query_matches = 0 - - for example in dataset: - if example.ground_truth_sql == example.predicted_sql: - exact_query_matches += 1 - - return exact_query_matches / len(dataset) - - -def _check_exec_acc(example: Text2SQLResult, engine: Engine) -> bool: - gt_query_result = _run_query(example.ground_truth_sql, engine) - try: - pred_query_result = _run_query(example.predicted_sql, engine) - except: # noqa: E722, pylint: disable=bare-except - return False - - df_gt = pd.DataFrame(gt_query_result.results) - df = pd.DataFrame(pred_query_result.results) - # If filtering works correctly, the number of rows will be the same - # TODO: Sometimes a different number of rows is okay, e.g. if df has aggregated values that are expanded in gt - if df_gt.shape[0] != df.shape[0]: - return False - # Returned view may have the same columns, or more columns than the ground truth - if not df_gt.columns.isin(df.columns).all(): - return False - # Check if dataframe equality, disregarding indexing and order - # commented out way is also ok but slower. Leaving it here just in case - # return df_gt.merge(df[df_gt.columns], how='outer', on=df_gt.columns.tolist(), - # indicator='indicator').indicator.drop_duplicates().values.tolist() == ['both'] - df = df[df_gt.columns].sort_values(by=df_gt.columns.tolist()).reset_index(drop=True) - df_gt = df_gt.sort_values(by=df_gt.columns.tolist()).reset_index(drop=True) - return df.equals(df_gt) - - -def calculate_exec_acc(dataset: List[Text2SQLResult], engine: Engine) -> float: - """ - Calculates execution accuracy score i.e. the proportion of examples in the evaluation set for - which the executed results of both the predicted and ground-truth SQLs are identical. - - Args: - dataset: List containing Text2SQLResult objects that - represents (ground truth query, predicted query). - engine: Engine. - - Returns: - Execution accuracy score. - """ - - rows_matches = 0 - - for group in batch(dataset, 5): - results = [_check_exec_acc(example, engine) for example in group] - - for result in results: - rows_matches += result - - return rows_matches / len(dataset) - - -def _check_valid_sql(example: Text2SQLResult, engine: Engine) -> bool: - try: - _run_query(example.predicted_sql, engine) - except: # noqa: E722, pylint: disable=bare-except - return False - return True - - -def calculate_valid_sql(dataset: List[Text2SQLResult], engine: Engine) -> float: - """ - Calculates the proportion of examples in the evaluation set for - which the predicted SQLs are correct SQL queries. - - Args: - dataset: List containing Text2SQLResult objects that - represents (ground truth query, predicted query). - engine: Engine. - - Returns: - Valid SQL score. - """ - - valid_sqls = 0 - - for group in batch(dataset, 5): - results = [_check_valid_sql(example, engine) for example in group] - - for result in results: - valid_sqls += result - - return valid_sqls / len(dataset) - - -def _calculate_ves_for_single_example(example: Text2SQLResult, engine: Engine, reps: int = 5) -> float: - ves = 0 - exec_acc_score = _check_exec_acc(example, engine) - - if exec_acc_score is False: - return ves - - for group in batch([example] * reps, 5): - gt_results = [_run_query(example.ground_truth_sql, engine) for example in group] - pred_results = [_run_query(example.predicted_sql, engine) for example in group] - - for gt_result, pred_result in zip(gt_results, pred_results): - ves += (gt_result.execution_time / pred_result.execution_time) ** (1 / 2) # type: ignore - - return ves / reps - - -def calculate_ves(dataset: List[Text2SQLResult], engine: Engine) -> float: - """ - Calculates valid efficiency score that measures the efficiency of valid SQLs generated - by models. More details about this metric can be found here: https://arxiv.org/pdf/2305.03111.pdf. - - Args: - dataset: List containing Text2SQLResult objects that - represents (ground truth query, predicted query). - engine: Engine. - - Returns: - Valid efficiency score. - """ - - total_ves: float = 0 - - for example in dataset: - ves = _calculate_ves_for_single_example(example, engine) - total_ves += ves - - return total_ves / len(dataset) - - -def calculate_no_view_found_error_ratio(dataset: List[Text2SQLResult]) -> float: - """ - Calculates ratio of NoViewFoundError for a given dataset. - - Args: - dataset: List containing Text2SQLResult objects that - represents (ground truth query, predicted query). - - Returns: - NoViewFoundError ratio. - """ - - total_no_view_found_error_ratio: float = 0 - - for example in dataset: - if example.predicted_sql == "NoViewFoundError": - total_no_view_found_error_ratio += 1 - - return total_no_view_found_error_ratio / len(dataset) - - -def calculate_undefined_error_ratio(dataset: List[Text2SQLResult]) -> float: - """ - Calculates ratio of unspecified errors for a given dataset. - - Args: - dataset: List containing Text2SQLResult objects that - represents (ground truth query, predicted query). - - Returns: - Errors ratio. - """ - - total_no_view_found_error_ratio: float = 0 - - for example in dataset: - if example.predicted_sql == "Error": - total_no_view_found_error_ratio += 1 - - return total_no_view_found_error_ratio / len(dataset) - - -def calculate_unsupported_query_error_ratio(dataset: List[Text2SQLResult]) -> float: - """ - Calculates ratio of UnsupportedQueryError for a given dataset. - - Args: - dataset: List containing Text2SQLResult objects that - represents (ground truth query, predicted query). - - Returns: - UnsupportedQueryError ratio. - """ - - total_unsupported_query_error_ratio: float = 0 - - for example in dataset: - if example.predicted_sql == "UnsupportedQueryError": - total_unsupported_query_error_ratio += 1 - - return total_unsupported_query_error_ratio / len(dataset) - - -def calculate_dataset_metrics(dataset: List[Text2SQLResult], engine: Engine) -> Dict[str, float]: - """ - Calculates Text2SQL evaluation metrics for a given dataset. - - Args: - dataset: List containing Text2SQLResult objects that - represents (ground truth query, predicted query). - engine: Engine. - - Returns: - Dictionary containing: exact match, no view found error ratio, undefined error ratio, - unsupported query error ratio, valid SQL, execution accuracy - and valid efficiency score. - """ - - metrics = { - "valid_sql": calculate_valid_sql(dataset, engine), - "no_view_found_error": calculate_no_view_found_error_ratio(dataset), - "unsupported_query_error": calculate_unsupported_query_error_ratio(dataset), - "undefined_error": calculate_undefined_error_ratio(dataset), - "exact_match": calculate_exact_match(dataset), - "execution_accuracy": calculate_exec_acc(dataset, engine), - "valid_efficiency_score": calculate_ves(dataset, engine), - } - - return metrics diff --git a/benchmarks/sql/text2sql/prompt_template.py b/benchmarks/sql/text2sql/prompt_template.py deleted file mode 100644 index 60349f38..00000000 --- a/benchmarks/sql/text2sql/prompt_template.py +++ /dev/null @@ -1,19 +0,0 @@ -from dbally.prompt import PromptTemplate - -TEXT2SQL_PROMPT_TEMPLATE = PromptTemplate( - ( - { - "role": "system", - "content": ( - "You are given the following SQL tables:" - "\n\n{schema}\n\n" - "Your job is to write queries given a user’s request." - "Please return only the query, do not provide any extra text or explanation." - ), - }, - { - "role": "user", - "content": ("{question}"), - }, - ) -) diff --git a/benchmarks/sql/text2sql/text2sql_result.py b/benchmarks/sql/text2sql/text2sql_result.py deleted file mode 100644 index bf92ac8a..00000000 --- a/benchmarks/sql/text2sql/text2sql_result.py +++ /dev/null @@ -1,10 +0,0 @@ -from pydantic import BaseModel - - -class Text2SQLResult(BaseModel): - """Class for storing a single instance of Text2SQL evaluation result.""" - - db_id: str - question: str - ground_truth_sql: str - predicted_sql: str diff --git a/benchmarks/sql/utils.py b/benchmarks/sql/utils.py deleted file mode 100644 index 41a7f38b..00000000 --- a/benchmarks/sql/utils.py +++ /dev/null @@ -1,73 +0,0 @@ -import os -from datetime import datetime -from pathlib import Path -from typing import Any, Iterator, Optional, Union - -from neptune.metadata_containers import Run - - -def load_data( - file_path: Union[str, Path], - encoding: Optional[str] = None, -) -> str: - """ - Load data from a file. - - Args: - file_path: Path of the data. - encoding: Encoding of the input file. - - Returns: - String read from the file. - """ - - with open(file_path, encoding=encoding) as file_handle: - return file_handle.read() - - -def get_datetime_str() -> str: - """ - Obtain a string representing current datetime. - - Returns: - String representation of the current datetime. - """ - return datetime.now().strftime("%Y-%m-%d_%H:%M:%S") - - -def batch(iterable: Any, per_batch: int = 1) -> Iterator: - """ - Splits an list into batches of a specified size. - - Args: - iterable: The iterable to be batched. - per_batch: The number of elements per batch. Default is 1. - - Yields: - A generator that yields batches of elements from the original iterable. - """ - - length = len(iterable) - for ndx in range(0, length, per_batch): - yield iterable[ndx : min(ndx + per_batch, length)] - - -def set_up_gitlab_metadata(run: Run) -> Run: - """ - Set up GitLab metadata for the Neptune run. - - Args: - run: Neptune run object - - Returns: - Neptune run object with GitLab metadata set up. - """ - - merge_request_project_url = os.getenv("CI_MERGE_REQUEST_PROJECT_URL") - merge_request_iid = os.getenv("CI_MERGE_REQUEST_IID") - merge_request_sha = os.getenv("CI_COMMIT_SHA") - - run["merge_request_url"] = f"{merge_request_project_url}/-/merge_requests/{merge_request_iid}" - run["merge_request_sha"] = merge_request_sha - - return run From abcbca5d1aa3b7ec27fd26d6c1e5e8ba1277bd6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Mon, 15 Jul 2024 12:00:27 +0200 Subject: [PATCH 14/34] update .gitignore and README --- .gitignore | 6 +++--- benchmarks/README.md | 2 +- benchmarks/sql/data/superhero.db | Bin 237568 -> 0 bytes data/candidates.db | 0 4 files changed, 4 insertions(+), 4 deletions(-) delete mode 100644 benchmarks/sql/data/superhero.db delete mode 100644 data/candidates.db diff --git a/.gitignore b/.gitignore index 969cabb4..03fca33e 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,7 @@ licenses.txt **/dist/ **/checkpoints/ **/outputs/ +**/multirun/ # Other env files .python-version @@ -74,7 +75,6 @@ coverage.xml # dotenv .env -src/dbally_benchmark/.env # coverage and pytest reports coverage.xml @@ -87,8 +87,8 @@ cmake-build-*/ **/.terraform.lock.hcl **/.terraform -# experiments results -experiments/ +# benchmarks +benchmarks/sql/data/ # mkdocs generated files site/ diff --git a/benchmarks/README.md b/benchmarks/README.md index 9bda6e91..0a549fb5 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -4,7 +4,7 @@ This folder contains scripts that produce reproducible timings and evaluation me ## Setup environment -From the root directory of the project, install the dependencies: +Before installing any package, make sure you have Python 3.8 or higher installed on your machine. From the root directory of the project, install the dependencies: ```bash pip install -e '.[benchmarks]' diff --git a/benchmarks/sql/data/superhero.db b/benchmarks/sql/data/superhero.db deleted file mode 100644 index 4cfbee0ba8b6033a7415184caff90d363fe444c2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 237568 zcmeFa33wdE)i&DIGu1skGd(TImL*x1HL@(rvMui`UL{MmWqFacco8=CNE%CHk7mTo z$hM4GdL)A(ge`;^0)#z4*h9b&k^mneKuBT;2_)g;140N8LI@!VAtbr)IjzP12;ZMP z_x|^P?sLa5^&Zt;r|X>VuIf6cY16t$DjeJ$PjrV;!8uBnB80Lc7*rHxA%3Ue=fcm2 zUl2bxenbDn5b;0YLS_2Sv8EiOcwMgv|HoFFKjr(KZoxC-zD>U0 z`BO;ySbduQ`ycAdBxN}8&p9B+i2S^g648HZD%2JYcZC!2ojvjW;lv1)yRxyis<}4U zTvffUHaHR=oa_xce?~fjbsL(4HMOg%TGllOH?*urC)FHWwXw0bZuN#>eeJg3WNs_cQ4wscT~*syyJ2N*Q!v@v<8&@K87b7vr)%o@ zZ}kjyLC}+kbcYgsbmg7SI{k}sdBw%z;`)(}5f`|9B7C3j7Da!$SO0#`hTGWr&wDq>Y>d#!z(f4h z7@S<4;aEpFF(^SJEE?j(xeqsYm#;iPi^V^b$yuaim*CwxYO zgHz+AG8Fe2*-xH(CzTrMpSR+`SUoQ{SJ+J~|5P9SyeCOL$2hk&RzpMA|6fx;RQjK( z9=ouBRZm)S)5F6bCiIg!F&zE>gz6ct>@!r4x58HQ^}K=tVejPLIa9$2wUheg#GTx! ze~&><4gZuyYTk*RaITvZ%9A?b#Qoptgj2&m>x5!w=kkTJPiIyPi{LP!p46A&=##qx zMey&jWw@Sy+9^f6z?a9lp`OT@lP}NUxM=ncK9+9SIP$zYn7S{NXzvOoCeNN(@u^4J z6P`7D_6{oZU_)cw`l`ll&NC}N2u;5HyxQW=8q#pd zowJ5Xi3jOl@W70JZ>sRX?7}f8Dum%$hl$VN%BSWT-*f)W$u`8|&r}E|#^$zup;&*m zerNp>MCMW!jJOJ z!EY{p3-F`w1Qy{(-wIIK9Q?-Mmxo_Ieg*iA#jg;*arhPCHy*!Y{6^sy#IFRu3HVW) z6Y(p3HvO~r2-ewFx5$B+6+ea`&La3I5h3R9VE6xZ>m}=H>pS=(^Do1J3?q_7WVzi>SC#I6sP6rxGcT@mj*T})(6&G z)_+<*ww|yaw(hlVu#Q?stOHiu+HGyO)>|vBh1N8y#LBYtz$bwZ18)X?7I-G`NZ{VU z)qzU`=LPx#vA}7lCi5@DfeZ&S9LR7W!+{J3G91WoAj5$S2QnPUaNz$R9574q7P<{@ zm+}2G8p9od2`;5{@Xu=4b}~xv=IZ?I54x1%EnP4uKp8#7cwObcjPb~_ITDYCQ=^Jp zN+sT0L;Ksp2^jyWj>cmh#yF%mMWXw{34bA~K;ao&&>V}V>9386P^>fTE5K_n{kb|3 z?(^pl=B$o}+V^UC_y`_J#u!x9(3|LqhV@*058n`p?X^bZ1=*>=sn!^UOiqM12OrX} z58=Hz6*96BT^H+!bjCed==++`!En^H&;_&?j?JLadICt`KXsEnw?{{GKYw>0?Fjh5 znd(G*f6Qqn;2r*(`{*%HmeWlSDy8b2jJl2*xOS`pk&l>j8*PXmhp z1y#Lmy@{9ys{nab-C(32O8~_i;=z?{C5YbtUDg|P{{LM-=3j;b84hGPkl{dv0~rov zIFR8$h65Q6WH^xFK!yVu4*Y+V0~(eA$Pj>mHGqOu;qDODJFp0lyB-9+{}%*eiuG6P zH`Y(A?^zF6H(8fiY3%%;X05XpSY=k8r3F3;{AO^+|LcJp0+$62p-ARmh65Q6WH^xF zK!yVu4rDlx;XsB184hGPkl{dv1E0YGKQ`ryw?v{H$rK#UjmED0jFwmgTlvXQG}x5t z?TEya0qo9~Hb+vSSg<-C-dm`<@=FPlU-_*CN&yT(OVtDvS1e+7# zy(Tv5OPb=nsjgr{D6uySoAu>Q@mOE5A<~}eO@xy^Y}pqzb@hhfS0?1graitLXrm9| z<1q^x_hlQZR|YGCYv2Ybob+SszIc749WI07yHi=%zAs-NO6&_qv2ETRX-|&AK7Qre z>A^<+zd0V=8wtmT0$G8yx>O$P>W+ zera8MJT{mm3mg39YeETl2g4RUYDs2crynt~Sg5i(l)x5#$ij|)+3IjSfr{7lwuh2_ z?D-ehtQ@RjG+y{;)PM=WHSt7psO_!EyEIlzXX0C zcsuYKRtcU9JRNu}@DR=h+!nY#a3#(MoFA}pLg4g3BoGR03v3G11*!sz0&+|8OU?65+w3z>HzQ`q+-7bv>&z;1 zkvYq(FiXq=Gut#w;s1;O_x`v2ulaxGf6o83|1tkV{(Jqm`LFjM^tx?;pND`QG)t?t97iL*Em=hkf_>Zt-31yUcgQca|^Z z+vD5i+w5ECtM)DQP4`XojrC>uH18+g54>-AU-7=+ea8E!_W|!6-W$9}y%%{0ya&BK z-re5q-bU{l?=tUP?-Xy)JI3qxs>Vmgd&V2a&yC~8lg1;)ea5ZEwZ`Sf1;#nXJ|k+h z8C#7FMvZZbG1Hi2j5Bf!kLT~64?XXAUiJLc^Q`A_&oR$ko|`;Zc`oss>pA2}c)C2N zd73?IJu5u(J<~iBJo%o0$E|;?zpuZkzpOv6Kc#2lSZUp||P{ z`YL^iK3kuxkJm@*UR}}tq`j-XuDzuFPpOKHs2jm^{26k;~*3+n0pZiw#weHK^7r4)H?{i1pZSJk^4elEEDejr>N$zp( z9Jj~ych`rmcU-T!e(HMG^|n_(#uB%*^xXyJQawS|{uG3u2uC=ZeuKBKMt_iMu zSHR^~KUUvY-&9|w39YBQ+^QkO13UF}CsW^O(bKz`y1!LVhnf2NDm~r7)P41Ox}B+e zk*|%ZyBqX$h^f0e^z<&K?nK;9rtUz^r!n<4r0!tqcBF1+>b3=X`c$TFMQSTkUq!py zn7Re6ZDs0aP+OR~3Djn$ZbXR|rfxuqW~Q#I($h^$eFdqFOkInfZer>hrY=P9S2FcE#8oqO0cxpY>U^}Zf~g~DcR5q%VI0etI*j_3GBtn_OPJ~hwV0_i zYCeT28?`KA>KvplWa@0xw}7d$P~Uu}&P2X>Or3$$xlHw89CMgDfcj=LwSS48p2buW zC1x^}05yZD(?LyVss}Y!G8IRC)0pZ;*{MuL5jTaYNU5H#V5$polbPC$TFRLUqX(0i z>Og&EOtm9*B2yvss+6f+h?~IFPSjGu)M*%Rkf|MrD`sjt>Ko70sVG~-)HbA!V`?j? zLZ-HW8q3sXlqg`T1ts#CYDQ`vQ%#`8Fx7}Fn9I~Av^$!q29y}Z)JEjXVQK?Xvzc0l zxGbjXL0L?#MTr1YYe1Pytp??1Y86K5W2zREm#LMg&tR$=l!vJ*l+c-4j(i$Z%Pn3{mHai)TxVoVjU((P`hijXhL)Ht-Zm#ISZc@I-#ks4vD05v=MFCVFn{u_gK z9sQS!I7k1DMx3MnMj_78e>tEW{g(~O(SKQ>9Q|jZSC0NO5$EVXKU#D2pAYpp`p=8L zIQq{3<>)^TQXT!LgL3qrhEzxYxe@2+KNs3?^q&gK(SHJ|tpAjMpk3B~%HL6z^`G)L z#IgQUK0zGoKjkl=SpO*>A(i!?@@Mpv^`G)5P^|xyKO!INKjlN@WBsT60Tk;$p$f!)WZ5t`86okf6AN4$NEqC6)4t!${R>!{ipmA^|Ag_UPoV8|0%D5L#+RlR}shh zPx&uUtpAi(P=fWJ@}Fpz^`G)GQd$2gKL^G7Px%>AS^p_7p*7Zj%8OOH&H7LIDNp$gtOLUv{pYmOlW&Ni-0gCmX@*hZL{ii&R)>!{3k3nWw|0$0mj`g4N z9h6}Gr#ymG)_=-3(FW^3Mo-2gUkN`8r0)`cJ{r zgU$L+xfc}cKjj|O$NEpX8~IrODR_FYS^p__AeHr>@-?Kg{!?y49P2*?PYO2cKjmhm zvi?(ULRr>-3Z4pV)_=-ZP=fWJaxJbA>p$fh#IgQU@GM}n{!_k;60HA}t9I(F{*)_Q zbk=?fo(Wj_DOa@UtoxKN?$lZJDVNvltofA7T69)?%B2lD>pkU?7M<0ea&e2!T2J|W ztIkSKxv)cLou_=RLuZwz;6Z>jo^oD`&I(UCT(7gfQwCafR(DFeMQ3fN*bO=>JLQ}f zE$SAAIJ-sLOL$h5wukV{1}#E(s731{JY$L0NqDec+fCS4uZ0N@G-w@!y)9ZhVX8rE zBTUw7A;Lt9wu|ueR&6I?e5ZCAVXRf#LD=1@Z6}O2Xr~hHZP8i@BlX%g!md_rD`97g zwuNwai?*3C+^V$@cI?!e3ES$mCc;pQ)=0RkRog_kqe^Qa+}@yVBs_Jewt;Y4i?*I{ zYpb@7a7(>bPq=xfwwADEr&dSU+@Y-@Y--U~6E?PLs|Yu((rO7C8nhb1jUC!b!gUQ= zHR0NwS`}gaPHhEYU5B=ua80YWjBr(pwv@28L0dvtvr}74xN?Q*%EC$;lvKDh_JL(8%H>yUMnOFwrXPui#xOe!tt$I zK4DRVmPa^lr#6PLuwKh099yrACd_ZuMiJ(fYB_{sO0{gl+!ig1a8!e45$3dL0mAGB znn{?|p!o^S2F*w4Z`Hhnz9pIg#7oYkFLxmO8>U)p!NFYMdo zTkMLtKRQ<@AQ7z`+4sn?_Tc~Z?(75JKme^RgB*nzc8LQ9x%RaoM#-sd-D#w zHP14N48P}N&)c4#dmi!J>$%c%*mJrkc->hGx@6%iK z<$8slrTtlZLp!cLuHB6H-Sf0w?KEwzHdmXVWy?Ry|B~O8$K);Ya+#Jfxkc8>>9Saw z?!UUvh+UT;B#eGygIi_}{<*(=Nv%=f>_> zB-xGy$5gV2r(~+*>?lr@|H@-&7 zwsEGbVmNkdWn-u>$Hw8VsasGCdC+Psm54{j*f`@gXVd;rq9cOkVYYh`Y@{_*8w<;& zjiC;VuPGc$Vi;IkF5Qe(WcVr%CnL!$8_UWgj9G(E#aPF1xf`P{O}$tF4ptNAx-1(j&}CcTb$5sr zUK=aWh3mu76k5x(vGP1`xHV_px;>8N@?0A$(6eChr8knoI(smL{$iOuoM&SpdL9@S z;p=k3SOPY!M>qAhRbs&#yh~U%7NlV_W*;V%V_RmFjRk2WcVamn%!(wjR^J}Zwy`K( zQ5)-`KRSZV@!s~X(KgnlXEaB;!@>0-Oj1lhhhJErF2mwHDrjoQ2)jKtoliqdD3N7j zUAnv>8j7Wc20PYPT*~70Ck+3DcKv7L3ZXNxa4M4Q&N&OyrXik4VMAeP)@7fGj**ZM zb9*sGlihiTF#HjjAY(ecN$OhO8AxajN5hbiuD%nR8GR5VsRchePm1kBY!gWn+N;9E z3}dz>ix0Sz+UEEHNJj{l3=$U(VHtjeS7Y{L9uHqHYA)$qu6?*#BUh!aySq0Afy(Ly zN4A7$(hW+2nS%ILN8*FC+e(6;jyOO-u|biYz>rtsQgmX-!Ky@eJmEVX+aD{VahmL% zX3wA=>48wkQdXQUaWvZ79f|Q38Xd#9)`TL7ldfBKH)?I}iX^aoGuW~}3hq-$(m2WN zy~r>;1)93CyAs%g>BKXjIT6AJN=^h9XhR4)GT<2Gy9dQ`x)4Vqc_NEC!JWdY_U+8oL%VGYIK{F+F-SXJ6+1shD5w=(9qIJ3^Gqcn>|ci>OwvNS;e zz1tzOtD_NU%}(=FBvo}l9-Wq2hd8i$xb=ch^=iId*ifTm6qVrxAj8lY+8ooEdF_OHWa;{MTJ zS@r0D6L#w`pB-JFzZQ2g+^$mb&P1rEi`CEWX!t-lnODbR+ujw6lij0o%C{K#2}kvr_z(2MQ6-H~v@Ta61}+s!J)s=}bFVyQ^w;Ka&VfdWn8 z=yBkBnJX}OmG!ft9-0!S~)G%GQ!QtdB0Asrc<=5gc0~iTp&X#) zSYLz7d9%=3LpX$u%ieC>;n9f>o*7v)X)bhydcw)B$Zopl1!kb=pls0{KX*D*XASN& zF}g@jPoZwCN(^Xa6q;;bypLF`Ps5kbYkRwUtf@%B%nv69RhefBZb5ukvnr4@sC%k$ zcS;5(lM;iP9?Y79$FLE;@)JigYZAK1cMcj2O`!mu#Y$`F{_Wfi3-BPeh_+>S)05Uo ziX$5jVigXXhXip99>nH23bL{dw~r1-29gDM9$Pdh#2XXz6gZq@(nDE8JekBjh}y?9 z8Sdot7~xog!{b;H-3M_k5BTvUR$LQ4fSU>!pM@u|@+#VYAG%Umc>XFMyuOa+_Tu5I z5Zmo^kI2FE*3=r}^@wK7!XsCCQ{*7#&ftxQxAJRxd!mD~mW2nd@*%#GXeBLr;2Ml! z6~%k{%tgBY2JpC5N)NGve9y*nR>k0mhAi6QLF*)ocD%E{E)Ej_q_#)n;cCVpx2jSY zcw28OoXo`o)~rFD)!WIsfi74!p0X+i?@72kkj8|C$E-3u3GGc{K4R|m|jl z58Hq51YQaJ7(4vm2;7L(fP;a(*wJ4Tm>ZZ77!y#<56xehFJd*|G4t!@t>zc8bAQN; zneFCQb3OL#=U`o+!1Vh6?*G95ivP#{NBwu9>CC?j2QnPUa3I5h3kd^vBFWYE#46-PX?1cVeE<|4J>dJ zYzy_K_Jne(*JbXL+&2QMftO-GfyQFIG1STVkCdEDM=f&?spR zbrT&9#e7)FC~6Kx@#>ih`>>QzG_(ie#~KDcJ5OLw0v+&T38N4zz=>2mVqp2ApeYo= zLW6+?i-N{D?Nb<7sVJcJ>~PADMT%m4ogV7IQcTQ;wTYq);XW*7g}c32mw=b{{lV4o zj%+MVRIKmCT3#wRBZxKISlGbgM1d3K$Ld7!`fw`LMQbEpEKR^DU4r`J!LkHaFe0IZ zABz#i>w0^-F!D%;iG_%g`b1w(3Tr;0m>=s9#r5$-FONG1OAk})Lwjl5sV;;xD6Bmc z*2TDiEUY(_*R|tqJRWSq=0-S{gJp)Pyg*aAxi^YE7nHED)=)Ma*B**xVX>jyi5TRL zi4}(uC#D`Y@j`z)rD2~#6J~WjUlA6vD#3v zDjH9Oum}>xGD>ffP5upL_h6+VuZAyN7A_nptl`Bwa}#hT$Q3-5ll`$Rt}1*lDK|6!LW8v;H+_Yv2IXU73)agsvB4@D5&ZV9gN4= z8$eaGrz_;x0gU1*y0HpiRnqtWy7eN-+8o#b_#^xPzHR*yo&a9Nn|)niy!i+7C47s2 zKEAvk>;I$wC;nsptNlIx&Hjb{fbYlnTK+=p+t2s;z3*bj{zC5#?`&@tzH@)f=r>x7 z`9=Z0VSmN*fag3 zC%FIMe%5`ZyUV@UjSsk7KXzT`I^sIjWvIVXkE=JSXQ~_3+3INVs<=yBCJu;G#UhcT z{ANgDd85p-x{ zim5z#D7Xu?l^FPkY#vW_B+&tB#Lgw=a;zg1&D9Ebno6nSOSU$8zBYY#;_ylUY#49jwc#eIlIgE+3T7h2hsJ8dhtGJ{xxA2ruW%bcqq zpU~Frd$Em>+>b8mhN&tyuP)nSD#aC4)!A20QvZQ#K=836I*j zaGgP%gXsuy5Jcr+j1-?FDvv1V6}5n`7O@9?po?6I1>9t+(k)HpSIV{d&74dPGIPUy zSYZw)Ds#<-CJZ`0*cTLpe#Mo9(NT>4q(6d_vF061Iy;%c`0ij=BpHM@ZHr)GH5`nE zy2BNw^0e~O)JCv=@nDU(Sm9W3)=Y$ZaHV44XQHhy*c^_<`jYXOd(0+eX0D=VlsTM= zhAT~VRs+U92XQofFq8&^b5VV(Lka9GV11C*CWGtZPuRPtsG6~RbQ zWKB2*1F8*OSo7-%u5Vu%@6&Z1^Z0({8+jW{C08MSI5PyR@9tt#xqJEe^_Yl?!*)Ye zsza&$G|_^a)j%E^x^L<_q?ZpBRFRG@V;d#04kkttIC_-Cs%_G6Lt2!>GwP9QOmtE+2V3^4` z6{i7AkiKAT6t|JKFm@v9A@4&4rc{GrgVIWq3pbZgcOyMnrQ*TOVXV-f-WxthSMcWf z*OFpfW#y8Rl%>u&h)a#5Cf-J$)%lk<+fnQ*|&A3P3lk(uonTwIU07=fYs_ISPcE1**Krg{5y7u8v zFCH}$&?zJ!%S%qdz>6t6uD?fArQ+Sey3LJv#=|}bp84GFHF=91WzJa?#mQ1vm*=in zh_V&KOdCvqT7iDUeoiO~HEpNOVH*x2sOLd?YNpKtKS^y24igU!$=XC@Z*XvqLwvBi5i{~I#0SHb zb4PL$&pso!Zs3b(<;k#@XYiBVXPL`JghZaFL_xm|RTi#RZi-SE)mEx2VELRF&it`5_S~oHmq% z;)6+f?p!=099M2Ano5%l3HS%WslsCz4ri*xqx`C=Jk~Z1TrC>ZFvNsJJMRKH>bEC^ z`!fWsQyo7BQ}WJ5OK`g=t^-$wD#P5yE>aXv4-9!QFyw#i7X#ouO-_zaOlb zIvJefwg$Tr!X|ilZ7-Mx!Ne(2+7*H-gKXZtJXns*xkFb@tvbDz#1FfOFbhH zpM24_Vgh%VU=^O`aX2^;Z{MqHn7Y3byGtjaXo;ggaQoz3TZmz@)vXE}E zC*^c*vTL#XjQ1#P#PxM^_8%WC8;>gt6@vb;PQq;tcjDe;)esQ?1MY8~R)qE|PNHiz=geY{Ag3D9DIN!J;U>5Q!nk;cH@Rz zP&x{Qi>V7vVR{`JUL|lZa;eIP%5AkdCO-8eMmW5P?#5GWuz?=f_LC@9IT6iWcw!Cr zCh@fyJxPzGI}asmCS_yNm5mG-pQ}KWzMPb{K8)`K5|j;3 z6WCV9*>Wc;zd$XraH&Qf+p0(;c%ITz%l)kd0q~BKX^5g~Pv`(HFJmB{pcuoWldo%s;Qn#u+|f|$y8x`IB; z@lhk};;hy2HvDfGm=5=Cs_;T_oS?Kh$E-Q{ssl?Yq^lf7Wh!?rodz8YrVaDH12+Jy zxP&l|@XgdP&64oY)^Q$wBSCIwPEiiXHO}!@>Y7!e^V`heOcA{Oj%Rg#G0|x*N0McXlS~%;bk9Uj`y$H?nsn96D0Tc1;(Me?~8|u zAWZxcg6fEEXbx{%iAp_h0hDa(G#7J~CYF;NhIrD#3boip!{fhn9V;*2shWvfO2Hzh zX}U*MR%0n_=qY%8B-$14#kYmfI^kpq_>AVK2-?-3dpH;2f`NGm7xr&KxG23J;VJ1t zggN#KgjsF${%={gAXLVD#`*ta)_v9;)-5;}U|R>Rq!qQounoAyYOvP84!|ka9C!vS zv&O?tV3y^9C4i3ue+axA_!YhrcrkDsUkW@P_-5e#z@33x0$&Lng*AW+0{yTb*b9$= zoq=8WdY}=f32FjM1M>qj0+V48U~FJiz#ow2-_1Xp@53g*>*g=a7w}ENlje8KW3UQv zyLpp&jd_K6v3Ue`0S=l;oH_`brbzB_z3!BW8GSOXdG9fGZZsISAf-Pa6j z0X4oQzPY|>uop1SH`?d-xnVKjL#&a!;eFZrW7rIM%zMmxkM~yZ_1-JJmwM0l+TK3z z>E4Jp}N5yK#eY zm2nwt2=p6g7)fKV(Qce-G#T~AN@KAx$CzrA8imFv!*94fpTL&DyPh{ZFMD2qHG%JX zzUBFb=T6Vfo@+c`^n4!YCC>EpVvQ#3+2LvNtoPJ_nNT$dl_av2OD>{g3)@ z^X~yQjD(xW~d0fzR!7{nhmc*Kb_Ebo~OWQO~-ba6RI>-*pG<5nS!M z+;yRA02T>SuBfZSwcXV$`zN`Cm^DWBml5X3{)vQ!>@Nk*sFnQ_2yI-IyxVXX4K(^)h{k zaI8$9L6{@c2MINq?gLirm+1q9+huw`A@c1bM7~}^L#9)N$e$!c^$Ap8p`}lE33ak^ zv6k*(;;>A|kzw)ynT`?G%5*p35}A$?7R&TrLNu_45Di2KQE?X`D(*zZleP42)K{)t zqNT%39MRGpOr&MH9XZQ;Wx9=UuS|ysyJUJ7;Z~X6N!TLOrx9+F=^cbiWqLaydU7gZ zxlFebqDR{Z(W9+|qh)#vA$qo%Fk7Zu2(x6mnb0TGO@z8kHxi=fn+U=A211NrBSuiJ zr8l6zlaxzjdOhMNp}*@0TV=YQ5dB(9I7O!G2rFcI4I%oonh^b2MTq{?5{{DT8bb7I zB_aAzO^EiZfMuwE1tHRx6C!;XaAHcPmlAf!^b*2tGQF5^vrL~txLl?e5rRJp3BjKQ zgtKLOKH*H6o<}%Yrson)lIb~w=;v%g^m7(rzD&;~93#^+2+{B9gy>HtA^10qFd)-Y z2{EOn5PD>~f)IS048Bd&(&gY=sdBMQll+%rCXxJ?u99hz|5Egi(FU|AbDNCi$O$@sRvaKsl2C3FtrMUra!G zQUDV)Ee!>rmM8-HAU>K=@5-^NZKnWN|Dxd@d zA{9{LmT5tVfsqObwn&>)K(IvGPyr%1R@$Tkg89-W9S|HXZPEe39BGpd2xd#0bU;vd z+t2|Luwfz-USO3(#GX{sMG|q`yFy zHc5YhCT&Q+C{W!t#NQ~8HVJ<|x@sknrcDJ0$%1=uQOJm5@;!+5}H5-0G0UXrc_G>tZCe%^TUpRjz#HfhGsWV`g#91@oW{G2Fz||6m%77Rb ziT@~!i^P8v#zEpg3jHGSAEis22m_)$68}-sZA1JGbf2U@+o$26mr%2)Jre({h$QLH z+Ac}>vrd&H`&pYMiGJ25Ns^znT9V*rt&$}7SxY6vUS!RdB=%V|B}skOG)Y3ARW3>9 zvx+5&eAZY=5}!3jlE7!>NRs!gED3QJS*9d$&+_AIX?VbAhNlJzWIlBj2?Zb-V3 zr9sYB3-gNP-0GAh=2o*LDYuqO5^`&vBpJ6VB#F2+NrniDB?MepVj z5^Wli1lqqtl05q>B*ahMl97!^(dnAdhu1S(uT}l#IEh$M}wN6Rms%?`bt=dva z!m7=dB&*tVNusJvktC_wBuRp*6-$y+EnkwDYB`dm6eqk%NHs%}jB0KTBC1N|B1s}D z*GZC4xm1#X;?OtAC*CATJn=d~(kZhg38z#g$)?+-K{WBX{Sir`>DnquGF?j~38rhd zB)N2rl_Zv~(UPRnl_NAiBweP2L<*N*l0>?E5&|h)nhfCWJnxQTg!#8k zLs)PNo#Y>T<7$M3*EJy=cg-$@MOSqr9RH<*2#dd{AslttD1^a_scgxGRCdDo3lUB{ zcNs#fe-c&-L>J7ZS%0y9kDdS5te;uWSx;M!Sr1wFTDMu(TUTc6|7Gm|W$gb6k+J_L zM8^J~5E=V_LS*d!VZE(BWB)H>|4(G>{~01<{|`LK*#8q5`+phxe;NCKWS{T4csZO(Vocsw9*xnw!wwpt%TLJ2jP1t;d!qK4!qz#foTFA5yG8S?|K` z|4Y^nasK~d>mKa)UkyKiN364~l(om&Wo?Exz-nuuHQkzMjm1vC23r6h1m40f{|kX< z0*?kB2;3340sH$G1qNUhpeL|9upK-5YXZw)8(>Nx2Z2BMaC;VOh)39H^*1y6(AG`Gv{Q3TX-|hR@_rC881?9<=wyAC!6 zFNBrB1HKsc=v#dazE#+vpABn+d)L7V;X?0p??mrdZ}?51DrKI}fjeY(5Ty~Ew)u5&MU&vQ?ObwqsThh6l) zxPIsQ7531dbA8YCE!Wpwx4FLJ`l9P|F59)=)$M9`ZF6mO)w&kDX1U5;MXpge@$nD! zkLqvK*VGr)AE^IW5*RREW)Lhkh95Nrh-33I<9A z<~S&rC>5efZO{+HNU0Ew6bCb-GI$A#rqoqQup&r0&sSv9u9}JfYQL9$zhhe%@h#HE6 z@lqjHs+Ia-m@gHgnremtQz5FTEKHaRv4U!b5mO&_eCQk8v0@Vj2r}W7H%EHVkeS6TNABLe*+P_B} zOr2tjo=U*jDQ&u=r!aR)yW|)V44%?%GfKeZDfWSh8!&o`onIOo%%0MoDr$k@Q`+^! zm4WF~>}L`Mou3+eq!?)VGkzHsaM^g9>xpn;06~h*y7AuMf0y-0P^BZN#gufeLZltH`&D zDOepJU>ou3D;OHvh*x2Cd|(HszKmM7GxZBlr!w_(^nh)|t1lrR+lW_RL@ir6^{40y z+lW_xvPvId8}aH3NNwTN=RmQIc=h-KeSmGmt3N_5Y$IO%0aDpUy!w5_HE@Y%Ky75| zX{2so3bw`v)-&}z^kp4WPa+@Nh*!Uhp04G%$5Dc9#H+9?KEO8O)$gFhYEJz&;@C#K z`Yn{L<+w+X$~NNFhmpFHVN5wG5d)TNvX zuOtIYn7RvnUd+^;NM#%G>K%w<8}TZKBl&gySwl8$qTnMI76RS7D`mfNjL97lUg>oO%)RjbrNbNW~FD^z;J6v5k232=cLw zc=cRRIJ<(>0kp<8;?;iifNjL9X_Uox<4Cou^Z~XJubzVvY$IMh6ZNr;c=ZrUu#I^2 z4DdONOB_U;#Z(_=Y=Ef)C~GpcAC#Y|UQlc!UQMB_m*WzMV;k`*{F4l@jd(SVIGs~t zpx8#d+Kv9XCcy$lrTpZVhd~73L?L-{Z4f5>iujgUfmAr0H>acR5li`wl?VfY%N~hR za_ZI&y`SyHt6NYX8;n;sqbysDS6dLrCgW9jD(Uax5>1G6^j{+=NB?a?2}l1mpbba= zZA6@-|KO>l-_d{T5$EW?dQgu3TZ>dj|J5PR(SK_Y=jgxHXxGtytI%3I_W(Xi`W^jO zgE&Y3Rbw2E{;LAz=)V<6b@bozD!t#)f6GvxqyONkq~Fnh3z5&!e+!V$(SPt%((mZM zdC2GJzd5V)en=d(HBSm`4{N@j{ftZK1cr<;D)3BJfIx?r=u1}|7pnQ=s$`2 z9R24)YmWZIO}yXHe**bf|A~KqV*MxJv!tK(pZFU_#QINs0*dvY_$w&Zf8t}b#`;gd zk4Zo4Kk*UzQcC<2e?}bZKk+A&VErdP1P@sMi9aBf^`H0v6zf0nKFYHG6Td^vtpCJ& zD9id!{1#%is@j6;#{U=^SEv)|po*DXC{|P)Z^t1jG|A}^4|B06o$NEqF0u<{% z@pDkD|HRLbkM*B;2^8x;@gh=L{|Wdk>1X{XegewH{QEH|)_>x8^o8}Gcn)!_|HN^4 z6=MA-egscLtpCIh;G>H5pLiBtg;@WI@58GQ>p$@f^0EFCPs3jk>py`fiZts#@jc{Y z{U@G89P2*;KPG9`e*%6?(yaf)e;}3hpLiS;>p$@*Qd$3r@4#ym>p$^r#IgPp-$ESg zKk*3SSpSJ{qBYik;$ifK^`CeM`B?vnW9TXCKk*>H|KH+jR))|1zYZIKFIYdYp0vJW z(U<>Uvu?7kv97Q#&V2u$`Tjri{eR~B|IGLQ^i5#q`~S@M|C#UqGvEJbzW>jB|DXB( zKlA;6=KKH5_y7O;38W$iqZztd3fi)0XzLIR=FhuuLqt8T#>Q=M|*W5 zWB)H>|1V?zFJu4Dv4NAZ|A&vqSS^w_oQ(ayjQu~=F&CJz|Ch1NHifsVjwfvq?>ur9DFup)2@P7h3nRlo^>aX3Mc z74Qb!unYK+`GNVa`KI|QP7=Ie{?L5Nd>p3<9x(4=+kn@aN6pL33(a$Js^FlRg8#uT zoGjRGZiaopwK!d{%v@;BW($EqbF4Yq44AMV>Ho<8zW*(pGkD2=-2W6j6CU&51N(s2 z`H$kP!4dyC{sZuO(BJ5! zJnB2j6Q>V?zI&R zzU6%tClHQ%pYlFBXdm!8?@{k1IE8SI_kg#@+l6xoTfGh5HQp6Ci7?YU*&Foc<1B*5 zs~8^{@8dMWtHw*lapNhRM>uBOW87+7hZ6~x7)OkAi~~56&}Hm0wi*pMm9WBCXv{Pw z<6J_%k!5%c1t$~U_q^qK)$svZS%@tilof9Q}aa zgVPGT^sRb>z6R$N7V0zg$$Ahc7P54Yu4o_O%)(pRtJ+K2ahzIsR6C~Kquq*g3rDp} zv?JO%IJwZHb!oe_tvI`|Mq8mR)Mn!JLQu=svNR9QFMK55mv6~eaf0Evd`dnlkI8%F zt@1i~R9+&F$aCZY*(1B;F1b}U$Tf0>TqtMC$ucPOWtQ|v1@-~ocfaL+)%}wDxce#h zqwZtwd)&9WuX7)DU*bOEKF58)-2;CVyWCsd4emAW74C)ZneNH%AS{k&kzHihN3Qo> zZ@FG|z2rLXddl^v>zL~v*RAkdanyAQd{>;~I^gOdA=6!|D!wl6W4uq?%XqK2hw&b9 zH{;#nF2=jWos4&iI~eZ}Ut|25xSjEKaU0`p;#S66#a9`>DsExCMcmAIv$%=zCUGO< zjp7Ez8^raD*Nf{IuM=Nk{EE1i@mg^W<2B-H#;e7b8NV#9V!TRR$#|tW%6L?KiSbL~ z3dSqM7a6}OE@!-4e1Y)`;xfj|#HEavic1(T5f?LFEG}ZaNPM30^Ws9r3&rOcKPN6= zyg;1Kc)mEocto7Xc%C?y@mz73@vs=G;w;9q z#F>m|ibITt#2Jidh=Ys=MIU3IIKX&7>}T9B_A%}gy^Osg#h4OF#-vCvCdBECr;8rO z9ua4Zix^`}bTf8~C}ULYW!x+FFzyi%#)#-*>=K=fonklRZV_e-<10iOXNPEKY!_{e zZ6d@N61y08iJgo)#c7PEi5-kP#CFE*;#9^{MJr>g*v7a`Y-QXkwlHoHn;AEY7RDCQ z%-AfN7@I^RW24x_xJfiHHi(Uk8^s334Prgxda;giov3H57i$^UiaN$Rv4(MtSk1Uv ztYTaxY8h)q4P%X1$+%KfGggZ##wxLbafMjUxLhn_Tqc$>E)`1{mx#rTi^VC7r-((2 zi^M|4g<=8Y0x_R)zL>{2Pt0YUE9NlH5wjU*i&>1b#7xGSVg}<3F`aR`sAQ}Z(-^0T zsf<&_6vio{g0VtOW}FP;j&z;N#U#c_qKvUjOk|uWN*PPV1jY%Xgs}uR^r&1=6f+i! z@r>g|5o3`U$2d+DG8T%ljALOwj>;9lZX02~$YabCV;IMXT*h26nsGFYf>FLvB8M?Y zWHV;N(iX*M35yX%gc$?EWHg1J(GOcyl+P!;j9wU@qQ7zOk&OU~ zM~pDl#0X>IjIbHbs3`v+RQ{p-o$>F=-x&Xj;cDjzd`to()XFUm)ZA1Qxk z{Il{W#y=^4Wc;J@A>)V29~l3je8BjD@_WYLEAKPDul$bjcglN=?|_>%G>vA1Xg!{DJZ;I<73LBjE^ecVf>ErZN_ga-(vih@(AN2 z$~PImsXWa1u<{V&L&`D6W6Fb!4=N8ZKA?Pq@f*thjQ1;FXZ*TyALD(>y^QxN_b}e0 z+|78mau?%W%AJgNDt9p6p?r<;Ys&46w=1_X-lp8jc&qYN#;+>3Fy5lv%y_eM6XQ+F zjf^)cH!$9yT+evDavkG!%2yb_qFl>(t#S?HHOkeDS1Vs;{IYTt<5kL)j8`g08ILMo zV*HYF1>+UU7a6~(T+Vp8@&(2(D3>u_rd-N+sd5S9CCbH&7b_PreqQ-J z7>iNZ-A!RMO5xtU6z#+)QCp6NQb96mHr?VM7Cj z8#hw8VFQKh*HgG|9fkGv6s}!MVO<@CYt~S>dNqZsR#8}6OJPk7g)3K5SY1tFRTYIR zR#3QnIfcuXQMhy|g-e!DxOg#zr<_9JqD2%gTu9-91r*MoPvN|I6waMX;hZ@X&Yn%- ztXUM!oJrw~85B;RPGMywh0~@{ICUz8Q>IW@Q9vp~pj^u2ZOK6iWUK!0jd)uOahj z-qZ6h?f>sk-dAAx@0fM2)o6_k{5J3-yq}*FSRV-DUHiwdxPOkh)0}1c{crl8hSmJE zztg`M-v3_nebaZDuM^(?JlpZDJHJ=Yt9-TE@GZv3ZlzwtR^n=!@nSIYY(M1 z!*>UKb_L;uBHEXXhSj`cxYu#PVbmfUPOFM)X*Gp5zSJSlkPVS|DiPce*&C0BBH?|J zRC^cv#U*=s;U#4p`AEUdb*v4yR`578w0G!~4Z&YTyw5$(hG#2sN5jiBih@MAiP#eA zjD-`Xp%>Y3WL3gGRFK8vSsCw)M&PswzBk~)#H&uX;mN9Gs?#qTNEGgH$V*@6kjJEE zM8LZbtwXBHeH$j(aAcKNG1wj#2<>g|gYOOU9?=zppB!=m^ZKfM8}6)fb1`08KSImV zaEN}cVsom!5Y9Bp!MUNN5WJ|uWfr+8S_wuB{{fFf)*Kr?t;*R|ki)u2Dsm7Wjo^bS z9NoZ9a>(7tnlK!y#5>rD*@MdMg*F^q4KmHZzVphiUJMLwM&N-)xv$WMXRE?NuF4mY>9m#6^={5y1VDku;JCJWYEddNyTcy@ac&PYTy|Uh139-xP8G? z8?LSjD~M6fG{asg8fw58+Q}JdkQ{iZ0eJFJ;ihVe4fj{J0 z>4=5lB@5m~?%%ZBhV!f9Z9J!VV!&ev{Kv&(HQkX^A`+~EUy?W+Hab(Y4;5f$Ho?(Y zM<4uS?SsdYWj5SmIW?d;%)EcE2DdufhTkl>>*CQdn?v33K!%->muyO&S2;Zduw95aW?VOqg!EBu|n{mb4|1n#{$Izk=l6tl>NZ>>VQgv2~t zLb#miP4rb3!y^scDuzmI)K;!gBCl?)gwJ2{b=4T|3n%%5rrVcIw&7B%sFd?xW;lk8X*g=*kCp5F)<>YU8#ImwG9tgGgi}h@jvy$ zX$U^;_Cdz#>Q)W=;2ied1CK(8g|kM-f#Th3OYIW$gn9cK84~i1*E#YseG2V%(!p%MsEwbU` zr)21^LDxAJi^tkR4ksVnRAj@!R{5a&Mdl=Ggh;M|_thk3TbT`~T17?FXLOy)!@XWI zg`0|lDtyL5<*i+1lY_1B5KGjRp|)t)85jJpkuxwjf5HSB+Er-kWtX$T8aU+afj_xU zy%=)(edXE-Hk@(6apaKLSNFovS4X_DuXk@exG{mdPCVb8XTu{GJOgqra_f_{IfDXk?ad1earCrK3bEnzlachRtMWK4& zSd{irc1Qb~;GU1%hT^{N#KBkhV>4{*uT%`qK+F$J-V`xxGj4V9_*%NmUGZQ|0(>)k zxTX)bPqN9|){v)3DuKBd3AG33ZR!mt;D;@KAOPQ9w=bP!!`D_3WD5htHB^T(Zib(0 z&K1C^S2$fguiW-RxmD5#kr^&W;|Jina($>HLQe74hIj9Vzb`0FuUTm0OVGj*t;#v; z>RcqAkHxZW50v2vmnVJz7Y2T{%0_(PRmaJrWCAWpqa8!;IdMavCn|^MZsmzku8sOm z(6%Uq>8awm0{*p%`MJWm%7}-bt9W;Fs4W8K;{FAI0>YfSc|oC#=aeBA$q;UGh!q5n z;DH$K?{aLo*ZRNMd($v0s&nnXt7_L%yY|>kLpROTKm*M@G=j*eD4-yd%qR*7GBh$H zW2+3JK&1CZ6k$)jr`|nbj#C6fR2+gxG|C}J8bOZ(#;9>Z6NkwEUU!=uJtxUO*Y#fS z`w2dD-_)?yv(~Em-Mx3MdKQ*G`}?)DWYJ1*tBo*^hCd8cm*JGzvsw?IyK*HKlxt?< zYUr{Ni)MA)Zy9QP)`ajah#iTm>NQr$5gu)^YUgoC4tZ^xl+a2Rh;1cD1@H7j%LZI4?bRjFaIL7~ zX04g4fQrRVuU%ET3d=rYfi^v>Ui+lWdUPGf6i4I2TC`;LfBObjpSx;R>D(2VG>pF| zv(9y1hd|k5>R|N2f^}G`d=9$nDgYKs6OX`OIz0I2qYKs|3Q&o`s@`olwi}A-vIcjd zx<>JjUAbU!DgKni{V^9Kw0zzwT&udTQ4=-v{hF@+(e=YR7td>hp{=^A`E7^>7qIF! zMUX|auy*YVb=epWZ@X2f<8OnjPMbAnE|x>RSy{@GQmj#oKWh)oDq4vS{ec=;JnH7s z)vFfWg2lgK0QrTe=g`!yD{v71?EazE=*ZRJ9Trr@>cQ$k>n(8F1;psV$%V_=?uOR= zP~?(TE4$?v7UrNApPt`inYxfv(-HO$2Vl+9IZIZfAEz~}3p{kLS`nUqaMTQHy|kg7 zf8*gI<%~Q*_Zi?w&45lz-~e>qA39VWF%C`w{7ftcu<`e&T1F8FY`qwT!AI34#359N zq6sT!b>pXoLpOA}RSj}Od_Owk*4eWr&RV<{H|AWd=!rk3(3qM_dM&~|ga^+YgTFQA zTIJu1RS$tTFfuS<9G2I;8LRbTExewk_%q?ZSNg-qAc9f+qek6#xI3_T^ZW&BwP-FQ z_{8Mq3)SC$>hAvAK;sNhX8}G?cv~!BgVa7{`whhnUxdXayJ6|pF7q*VKcWmP!(V*E z@pmDHSgkQT2aQo{r9RTP?>sc-NAB>$!D{_pbe|k>Oxd7UaSjxJF0$gmsta(^YTm5g=Jhj#}{?I#g zQ}G;}@q<_VeO%OE7!zSe-ITz>#JIqgwY(YO{O$7+?+bL-6|-k!MN_0?!zG=it<=BB z^8eZYC;xl?H~jzUe+k+D5BPtM41f{;ZvPJdCjVOhQvW=x`8SC<058LH{$f1kZ-Qt1 z2A=SLhUfcl;_3b?z88Fle2@BSd~xIfEcb2omHAfUErQut<8PdA1Tq2k@pbhTVST?G zpVNE6`<3@&x3Ccn*0U_0(V;zdfFvp1Y6}aD``~XBM8)kM#`q48j_I zU62*90MF+g*KDlYH_kP} zHQ3e1)zwvm41qcL7UZw?dHX~A9jw{+8lK5NjU0jd?UWtDihbMgM1Hls*q)0lffMYJ zcpiVL-Q8|)x4>$BEBB00-&tQETi}1;Dg1A(Ut7Pj9<^$%gtgZS;u-uM)@|0U)*S14 z>pJTiJb}N|>Tb2i`h2;T3lB{GBA*~<;2EsWcTAp@gM9iPL)O51u{Pg&SuP9k?0t%i zkzvRi*jqYF8$5aU3G@kyyKUSN3b4Wh2sIoy^bx&Ah_JI08iVe zIL0`JIR-jUWQ(I_f}43=GfwPDa2 z+2x}cB)hy617(+oqC|GNDGFtmi=vtAvMCzLE+>U8>U(aKBeF{;ur$AVE%LltGb61u+cAy0BP#Xpek>KqVgC$r-fn6IYI!SOnMOz83qbQW%T8d^8 zTtks3!POK^BzPM|BMGje@Jetcg-3!bC~OHXr!dUmG8M%D2`*I|`Xh=Z6o_Im1){i> z0#PiYK;@+rsC*#>Dqlc>I_FcMs(BQs>J|!-;9LsB3eHjAPxi~&D8ZZ6rhc;|IGbXu z1ZPnUlHg1V)NvC3d;-*Rws3r z;2^c38x9$$MAw-T96&Kmg8eBl$XEs$yD)D3C@@NwD$xb;^`$^GeU#|DK!TT0;Pl=U zlO$L|Fgn&JygXHgv*R z7gOM*4oVc4O0Ye}JPEd=m?Oa=3bdvz1uoe(6u5X=Qm@+= z6qQPV@F^NE0l)`OTn0&i^eGxB0n(?aPy(b+Q8NjUK1B$T^eIA!q)(A80n!IcCk8+t z8*3>B2%ol1B|!MJ#Yu!u+Z+iHK3Fy}0Qlf(&U^`wJ%wW>K=u^&lK|OMh_lF^La_p1 zkFK@K+A0CUrxi{kd|FMH0O8XL=Mz4yhDm_%X@%wxKCMt9;nS+81PGs2oh3l{v}!8> z!lxBlO8B%wO9`J=7!|^&72+U#TA|5=Pb-fE2%lDH2I12RZ3ld8tZx_~ds-q2vZv)` zRsihLwH9hH03W3=7^F{&)e<0mT1=Dx>C*y(LHe{n^q>!(ZgrIa@zbJM0>n=X#7g|M zK&-@13xr7gv_J;~KR64ql0PjlJmgOcTtVbd3j|I6w9q9${xnC)f5d**vp4*{L@~7!M36MWc(VOH?(~f2U{IRigVSxC_zg+^vPyQMS5I^}VBtZP+ z<80z5e~tu*pZr-8Ab#?vNr3ptN5RBT{tyWeKl$i);wQgE0>n>#cL@+b`JE&{{N&?K zBYyHbNPzgsM_&*>`REJcCm#_KKl!+hh@bq%5+Hu^(M`Y)9!;W~$e( z3&REgL15U(pM3Ne`IGOE0Qr-TK_Y+ha5Z2Z3XIpM=2+Nayv*l`IB2H<>XIpGbty3a`UB} z{K?Iea`Gp)v6PcPxs9Zp{K?Ica`GoPTgt&7)MZOK0hB9JP5|XP%yIz8#xjZJ1Ed`M!DHg?QcnEjAX4HdN0)NqCmW{|KiT7? zocPJUT*`@`Y!pcRWTQahC%cc76F=D~kod_imU7}J8zmDz+2{k}CmRhVezMU%;wKyJ zBYv_`JMrU3?Z6KnUZYa-$B#y>Lh%;x>XMN=$Z@YEGKETyxC*2k_`$*GDDuaJ z(IJ0a2%7wHA!zc)g{zPJvDcaSK;-s6rJkV|`oF;&0DtoT!T&bC2RP|}72gCr?|tc}cVuGvu2nRVHTz07T{Sx{5CO|X}_%{2$MF}Jy9++=NJZgUNbmTY8hb4)a~ zHZr$))&+dS(p*vs7JS${^P%x#|a z6%=!uXMKs|nA<$-JdR^-^QRY*45a^+~yb8|GT*UpSp@K zuK$NVxOck6gyva4n`1GbdDf0O7Sow$Z69Ybn|apVlPo4P&$?@r#a!lDTc%q~WuAov zHJHgfYvVYJiOjR^oFt3XdS!P^l2Y~i_A6we`dv0o7O3AFCdqvDd;LI}r+%*+CAX;G zYe&gk^?S`YnWKKMnj|-?-z&$-Z1sD^D4C^xFP|hc)$e7~vp`R)?@Y!+1S8yA%K#m}ZcTZy0f+b_etLe0_aA@TPwo%n!)LT7f3sJ@~WtQ||}fw=qZHg!g5v82GgJ0NzHZ@us{H?*ra) z?>*j)c*B1MRt}ttxdK!0e*ZPzq22-BOT1mNeqf=usW->#@*1A+JYRbL^W5k8nP-bAZVgA%1qN?uwsXE7d3YF3A!2*|}l@es+q?#LtfVZpF{yJ?rqZ!~KGv z?RVwkXS;HBZqd)wxox-g!_PKb2jge!%^lSH{|Wn%te?#Pf5!g{eBw{|AN21;zQ1*t z{eO%92IK~~+CSKT33B|k_UB>lzwY}Ba{ImOtH-DOUt{L~BfctM6#4wNBX_`UzD1b# zKh-w|pX~eldipvbe_tca`v2a0-up-7?K|oH4RZD!z?}b-cb_+aTzwn7%aNz=M(;%A z=)2t87oXtU;`6%?^Zma;X1;TtH}J{*MbA^7$1vN!!t($=vu{F9zQy>&J{=kPhU4>k z3G(r^!l!kYC(Hd6KC8ctTzs#%pLairOnjB@Fy{H+$Nvwmv#t}amymt$7p^*20yF$~y0*C1A@klXt{Ys}A@AN`*Cnpb$hw#3 z^0;*5-20RLE@t#^$(D_Z9gWuDiyZ&W2UQ8L2Eyzy6XCSYA@!x>iXMi zFDAR{`YE*yvt4!l7_!3c$8=YvqSk)Qch&Wm)HY0b)%6#ZQZrtmuop94b^QqP!0pGB zS6x4>O2M30rBE^^z3Tcwbp~d=DupvJ?N!$gs8}%XRoC|;_uPI=d{qi9!pv7)&ma%o zeoTE;YLB%ab6=IZ&)Sd4uezR6bzt_ZQYZz}Uv)jHwqgFOQrLzGu)3a5+b{!GDQv?O zSY3~+ZI}bA6t-a!tgc6prEY(rQXv&{s|JO7TPn3rozbE}VQ=#Wg}u!h6iO*jYOgxC zX@kPv{04=+c}hK~_BLry*xOjC`&D~$mD-~ErIAvbmC8|SlTz7Am8stFHz=IzQ)+|i zQ*VPp^d6U8(-64o#_Ib#9hYg($i1js~^Ys=HmOmTFsBg9=-98yeIet8TqgdB{Upx2{3m zZ`G|;%0hW{YZ?^JSgn*GpJCl?4XV$3- z)IJr<^$iNOO;_qZ+!QWJ4s){R!`TI6!9yH=?oD&|p2^}|(CH?l$D zjBAuCR*_z#0SN_fS{|kJ6pLqX@ZvsBVv;E(DU-cgGKIMJf zTjx#UyMQ2`>fhnL&3mhNj(56uy!UGF5PTcZ&0B0Me$-v#jw3&RxqGX-4Brcsx@WtmAv^yF_h5G)cUN~2z8T0tZo><%uUsFy-gVV8 zGyl{0ZeYJFvi?6aNzDEzTLviOx~Zq0Y;Yhrig_%Gt!}aT?aQ*5}p- z$in}+^@{a^bqMeO*I04n;4infT4mNstJIopO+yC$;nrZQkJZ&GvYJ^rmQyasSMssE zE2rhS97Xp1C*&bXOIYrc?XnTM_m{{mGD9ZHC>bi3A@hE*w2~&`5ySk({0w>b-!xB} zFPqPszce2ZozVk!`=+vDH!LSm`Kr%yvveuKf{?!H&L;ZjN@2=8i@VU+KVvp1{gUD683LvtTXi9;z@*ylm4LbaJysB0Qhu8G; z3#ASQ4>3jR;O`KHQU`n2^e>L7h!ObCZqz+~dkuA0GatN=~!pb2$QVS=Ca7itU9Kted;p4XcxzxhNHT|;^sfCF{ zgoF;WZPh8QNbux*IGQVZ9H=p(f-ZHSIi3(tn=Ahob;h~`oY z$A)MuwJ>Z5+p1N5t$&mikXrb)rhl|XYGKz9tECoh4N)q!Fl&efQVXw!m@2ifYKZYt z3#Wz{D77$Zh^|r#pN42AwXkUjbO&4-0^I?VhCr{vqaiG*g+)W4gW%8*x>*Z@wy~>L z*)#rd4_LLzo}qSFwaT8Mwn{DRS<^p2uyAJx1PgP9K(O#;h>21QYlc9KaAt_HQVU~- z7$UXsWe7wKTZR}QwQyyK?ota=hA5U=crrv=sf8s&U|itH5QS0;Lxw;T;l~i@PuMX8 z+6p&@$dOu@F$5Y7FNVN11uKR?>*2%@7;zXe1X>RthR~%JHmvD?xLj)C!VrU`7A6dV zP~pK4s2vszf!g7~5V%fYz!12W;J*+E1NIAn+Tp$ss0-!`fx6(mn*KgYhV?=eNiCcg z0wu$EA#gf;7Xqz-?Lwd&xGn_Bf$2hsS=(L>)caO#J2fch)D={ucAdkZw57nHw4uPD zw5GrnR7f#PYFkm@Qff(oVQN7!RBD@3pvlcBa7`6ZU{IP;U@Y<}&HMv4|Nhi@AZ^g0_Z)|NdUcv?jeBQ zLsbOOdw!`UfZpjPDml5Pzk?AVfZo9|1kgJOhX8s9!%hIbJx^*0ptmss1kl?UKmzFP zJgFgo-fko{1kl^I)DS>#JEevIdTXH606?1lRx_y~f!^{;4GHv?W!8W|w*HpX5I}Es zkQxH$&9+iQ0KHi#H3ZO`sEPo36IBsFZ|Y_Z0A%ZLS~cL0uAj}iTWScPvt?340G(Yg zH3ZPvMN&fmot-B&1kl;>QbPcp9VRsd(Al9C4bH~l^XKrY-6b*f6nGe4f%5xok9Mbbw~~Qa|Z1qf6la#8uI51PA7lPpn>Gi z88ndmIfHV@pED?j{5gXT1AlD&j8y~t==vMDh-<(fO@Cvx)Br#b^Q8v-fj|R+9|(*9 z>GK9Ulk|B5LqYnyfx1YaH$-YkpEpdi2K2G@H>8H}IgN%AKBo}{;Zu(;Bz)>oFyT{= zf(f5`6ioQkqcwz2J!&9)>Tx>ZQ!i!>;A89cRt?yrzRd_&HGq#&7(vqK)E!bo`kX?$ zNS{-;Ne$_9YKGL1KBq7!q|d2NQbYQjLidtBrw|b7a|&^gKBo`|>2t~>HKflex73h6 zrw|$Ga|+ii>2nIbLHfMDKx#;z*C$I2>GL|uA$?xQF{IDQIZ{LVoSZ2&q|ZqVF6na; zVURv2aTe&K=_gS;@B?9+HK32JpR{TKA6-9@RW3E8&xum0A$?AucGBm>RH-3-PGC$) zpA$o*hV(hnQ))<`6R4E*Ie|(^pA#4i(&q$fCw)%%rH1r5;gTBC=LFhL`kb(&hV(gs z+Y0ot^%GJ<_#B@iHGq$%A4eaMKF1LS>2v%VsUdxi<3=HUj^mObeU4*rNT1`Kq=xi4 zj<`slq|b4LO!^#mN;T&b2 z=eStappUMh=znZmKstKT1 zdrCC{^lD?NCV*aTB-I4at0L6|&~H~sH2|dPznv-7AP~fKsV0Dai^>R~-(Dis1ki6g zNi_lVTU)9Lpx>e#0_f#YQcVE8e5F(qKreTfY69rx4pL13y__f21klS4sV0DaGt8<6 zfYcX%z_25Ih<55<(?|Ug|B?WY9}( zq?!zR34xJ8FCj28=p_V32EByZ$)J}|DH-$<>LP<)vZ~3TqpB`2NYjs^3L@wzsvv@n z;#werj-oCi=qTzUf{vmJBIu}?)j*J~AGNAMAoYbS3M7JFMDRq=izt~0dJzQ@K`)}8 ziJ%wJy+qK9D47U)5v?GCUPKi{(2K|qt3LmK^538TkNb~e-M=TW>R%db{@sTa|2AU1 zza^LfFavA-jlxQQ{rufA-@m25vEPk30AKq)^}Ua|{=f76#`m1>N#Dbm1rYN+;Jeqi z#kbzK+_wO8{HI_Zz%buHUvFP0U!kuFX7?N3Z@iy*|KNQSGXY-qKJWdd_YrTkw*s^J z@AYo+uE$(}1>TvM(LcsJ%sbHA+uPaOhO+_0^H+?O~e#o81 ztbqI6+ua+nI^SaVE$$iai9gEz*UH_*?Qt8fZ(N^YX26@SldhLt&%1u3$QBR6xSHnFxNm=Z_Ey8gEjg5$o}`e{iXfUzm@&(3arN0!|q_Wv>V%Q zAcHX=3MD4bU(d>NqJeGmtV>wQjN9u_Q+1TOE$=gf0_O7L-TF(_gHoBi20QH3v<7jGDFC6x6QoM zTy5TJ&M~hyuQRW~+76eR-Ocu9OS7@*Hg(6>Sa0ur$62iQ@EgZ-jwi9^LzN@;lk9&t zIVL%-bzJG_@92e<_6i;O4j-1U_|Evk_@nV(SZD9I#;=X1jK_^SBWdh2f>>qm4&yfC zR%4EFy>Xp!jd8hgsnOkNZ?rIS4HrJE{{@%1RMT-^#7)~p-I*`yQccG_0;bb(kIa^8 zI_{C{rJ9a=1O%qz9vLsyblf9Dq?(RfZ=(vYFNfjOU zaIsXuadB3mRMBt`H^Z)>9&XZNfq7pPdYWgp+Ym^dC=1LXa_DR#Kg4?P`faOv} zyFIu-s%WetQ7X({B$TR{HIMHc~~uJ%A7ipaY1E z06O58Dgx*L0wRD8ARqw9)(=P(`SS!CNd7#5lgOVZ(7)u*FLI=c{CONzkw1^)iX?v? z$64ghyZ3;tW>JVpT{sLB7`1jmv;kKkDH=Me+}{%HCm*-{1k zK%j2Y=Mh|?q|d`>C+YKWo@7X$hi%D_J`X!3L;C!@P%@;?&znhx^!a%s$&fxj7s-%5 z53P|5>GRMa$&fw|4U`P&^H7OoNS}vL4(am{${~FoLOG<*Lnw#z*^hEapZzF@^x2Pc zKp#!te~Dy>pZ%RBL;UP7mJIQ;zk_6mpZ#qmL;UPVUBu6R)J6R4M_t6v{%pwrKPV6_ zC4KgzcG73RZe~CqTi-7k!l!P&WC)+S$&w*_>c&cj@To(9fRCovp$5{Y4mFTIb*O># zsYAh}Pn{_l(x=8N8Pcb^t(5_N)bso;k|BOFWs)I&GV3Kn{AAD@#82in$q+vo1VsE~ z=1GS5$xM?B@sk-M8R92{$cUc|x{mnCUYNJ{dHg^vR&{q)!Ij3;Ni4#>xOb zx?Y)euVhG{N(2x3XnJKo$pAkPxsm~WAW$ITlb#_N!Y4gaGK5bW4J3Th7!II{-jDJ zL;j=|NQV4LVKm5})GW!6KdJGOA%9Z1PRO6sHIgBJQWzHUCxxiNACx>mG5`=nZ^@89 zsh*M{e^Q+!L;j>NKIBgdEhB$YXc_sF!myA(DU1gBlfr0_KPij``IAEH$)6NjPyVDV z$&f!Oj1T#f!uWtc2m|8-0NHxV%78y=4oN^V1W*#4KmaAtWB^FhlV~Xkltcqbpd=bd z0wvKd5-5pKNuVSmB!Q9$j08#|FcK(c8@#85m(Dv6;uT1^ba(Q0BS?vYAh2z8hqicFSDa7fc5mrEr;1cBwyvb5#;m;e3w|Ht|N!~Y=v|4*O)o4)Vxw*Mb} z|K&S{RRe$R`xV~wuk|H-dwl`l-M-uLmj5E(&A#cr@xH5lL-2-wH(xtnbG!*)`?BzM z|DU|?dEdYagD-gx<6VHCdn@r?|8DONU=p7cD7_xNL;`#tyaE&k=61)iCnDV{N&VR$E?x2LnGji;#x znIPSN#T)z|;;n$+yI*x5aX;mL++F8Ry7#$*crV}%_igS)c>X`#J>Gq_dkEGv?1nc3 zn!6jhZFd%)|NqJLp6d$9apS#koh-?fk{ReE#3d+0j|(%yW93j{oue|CiPyR<%`O?Xh-R zcUc>(71qKZp8t>dKYIS(Lpn%HX)JEh&9BW*&G*f-=I_kkn9rF9&Hupj|LDJb{-1~J z|N1}4{=dhu({Yz$gJT8Of1Kr*>KKdc{{!&(zmuc&Km7dvE8{Vv)<_t8jev2tal5g~ zSY+I6OgF|GBaA`DB}Nyctx;fP8;n<_PY;Fr2XzggJ{3|&>-6HJ~W8-yAKVb{q949 zV86D$&q~96)%?w!RvPxJ)DA06|J{p+>}mS%-qn((|L!f7H2rt)3`x^}_fC{F{dezp zNz;G#4w5wecW+lo(|`9COPc<>w@}jb-@VNwP5<57NYeD*y*ZMm|L)C}H2hc7_u7(% z{X#e;4fll*Nz;7y>Q)-&t0tE2lr-J?cMqzk{q8~awBJ3tq+!3B{yn1 zprq-(yGtZZ_uY+h=)SwrQ*__mh?(xY8==yDccTir?{173-FLTF(r{l*-)%{n=DS-r z(=cCK-z{l+?|mqb-g_U~M-1({P13~BF0_Ui+J$S27}|xV6GOXDCo!}Ots#bXiIoP1 zbUlbCG-+^1(}VLQ4Gcldku)h3#1%ja1<@Q*C|D$EQYeU?CxwD&1t}EtNSYK1q7G6h zh%0~;3Zmypp&*(A3fX#4(u7a|brM2>*^(xN0y8B|2nBE<5JCY&K?nr~OPUZ0pyLUl z0D6NE3ZQohp#b6{gaT+XArwFqgit`tG$9m_G$5quNuZqwnFQL2kV&AO zsFVcSiAqVJowy)Kpq;3l1loz(NuZsmodmkKm!v@;O~1F9q<|oZ97%yd5ZRIffFQh* zB7g4nNQ(TqSIiXnW9#=?Dd0!fci;=P6#27bfuzWv9n&R6{_L16De`9rsvv)M=#nCT zwwFnY{MkNUQsmF}VUi+$w&P&(XM1Bwkw4oTNs9d0jtI%0?T8Tk(e&+>l>&ZreVdAd z{Mj~9QsmFJagri`wvCn)`LhjmkU!fBB}M*h%aauOvkjGzKig0l`LoS7Q{a!SZ?jUs zkFMXNssMj9{hoo6B7W{cImFLB2%7l02L%#8_n<)H=N=SD{M>^AfgfAH$4Y@dy1rEv zNc?PFBPrr%>s(0@KU>i+#LrgLMf_|n*^u96~uwiHW>{Mph*QsmDT z1VsL9K|tis7Q{jRZ1GEq{MmvF2mI0WEiOqBKwB(H5kOl+QUuTz-An;Mw!Xzmfj{ak z1Vl&xZALr<&}PI#0BuG*1kh%L0RUn-C8HvDqAQiGN^2hq{yJM*^(lI%4SN63@V#0DKe;RlBCF>GTeq_P#Nw% zGN|lYNs&Qi!zD!qmEnFQgUW_Tk_;*vEJ-q`Y>*_$pt63FB!kNON|FpJ>mx}pNYl$o zBnb#Xbe1F;RMtU~WKdaqNs>Wj?IcMCl@&>n3@U3aNiwLcP?BU&Su;tJL1hJ!B!kMD zN|FpJ%aRF)@6GN`PvB*~z%Mv^3h%5o$L1|f`WNfJS29!U~GWws=VpfaZHZ;AZ8K-vh@vC5&+Wm^;vgI zk_1|Rha^d$_0uIu0*Qu|}<0 z|L-sA^ZzIQbN(~_6aHiVXZ=t5ANE)IWBvz_0pKqG2LDQbseiVA8gl-PKn{RD$oSXR z-_-B-i|>2im&o?_4zd6oN3OqTd{6iu@}+%Y-+jmfu+g{1x5RggZ-#Foa{LYTU4~2m z#lBXY0pRd{>-`+r{oeAv?tR7kg7=X3QDpW@c=sY3z}?>4y{nMb@8+Mp0npCd+}p@& zd$T-$MmE3qkP+ZNJui68S>~DVxyduxGa6a^1|TayCr@in zKHdN@{{9BQ>wkX(p!(l=0|4Iu{P`!}06c+j04jg@24FMt11xjRcirTg>>BO5$~C}M z;_8G90r@VU%e23HqNF_zf-KgTIa0~t+%lP z;H%aV>nZDTtIkSV`+l?n;9TnlYl1b>y29#b^{_fvEv?3w1E9;-@~OQ4!wP`U$&>Oh z9<2QTuK;M7-d}&9pNdx|M|O>iWv8%~le=tJEe* z(s);nmn4mMCCaApu9zrE8t)2}MB`n7l4!gu+*T6CtLw}0hfxyFtJFP`r1vggB}sbk zauiALU5;^~_b%@)NqX;cL`m;mj*+7GE(d1my~|Mvy?41=lJH(lUk@zPq%OBn@%tiG|!UO%e+zrbvRWyLgZ!=5b3O zNzio{mq>!HySRfS=(>v$#mzhhEvM}+M$2b$3*wqdfd=11fd=15;grM-3XueDcd^4v zz;KsAQ zT@u47iX}n(EJXJZKMPU$mE48OiJyh2{0eT#mc->0UP%n0Kvjb&P}Lw+)j}&VP&Iu% zW}ha=pZRkn(O(@if0!gLqZld)@@IZWNnFY;EhN#G0z=V<0v%8O%$p&J-fGJ{>?)x^ z?Y$__2R)UzWxgbOP$1Oq6bP&vg(-=yO3cBf(S@R`Bsx=|ft@IDR!52)Nfc9{r5z|x zPJ0Sn677_jjs7X3Ks;?JaC#dG9Nd}$y-}!oW40t(sSUH%NTMYLMxX@+DhGS83&Ylo zLd-;gI%$?9nyL*m*GnRwVwoiJDA1KnC~(1(JTq~f=WllUr%#u6ucZfGR&72J{|@#Pe@#FtU%W_+n?=?xNJqBdMVPvVOy zaMG<5PKhs~5Hnt?j=kQBFI4TDjypTPK<%17L*nx(5b`{Vwi3UE0s+pYK!I~8ToS*T z0tL=i1x}avEVW@8x?m;+&c2BPXA?cs(5e~Sf?{q^V(KD^Ur#Yd;?pT^k~rBjb+E*z za!X%{Pod~6@yQhE!ATSyBu?~9MadJmC0pXxQJ{CmQ#j1{I2GVjD?V1Wc#3NA7`1B( z8akSyjl{2|K&YcAFcL)16oh&WxA-M~HHAog1O@tixGH&y6(6Qbo(vz3U)7*c$Cc`! z$qOVtlmeAqLD5y>ms4Q;hftt;!e=t7AH*%Fejo*^A3%ZX`%|Fj37^S`qaU{*j!RV> zlO^6)ZJ0Dc;(aKFNu2DNI7{Na)s~54C0;^-D0)$#hMp8CkL;PKOPuVPfHCc+c1@Tn z@vaoO(8-<&xW37r3H>DAiMvW9PWDXbDe+=%!FY9`XeaUZ6lh;N3be0?qN&8&QeXtz zP#~Vx6u8g}DKMsl&jhc;37-jwknkCgRy0?~j7MJ*KI2g`;DaqFhx8ebD>Pqq4`R<# zd&Xf%no!J_IN>u6-IL2LQzYJq0)3f7frb)3I@o$wjg$czI%wmwec zWY5_35(j%Uee5+7w<&r^obVZo8=CML>ytR)GiHs%37;{g5+{7dAV5Q%HD;>Bbqd5z z_Kd+*MD~nnBNb%Nm_n%_d&V@A3bJQRBdH*J#$-zc*)zr~6=csCk5rI7V=Sp4d&Y=V zkUe8esUUmC=w=1jW9wt2g6J8If{C8dIZ{FNj7E7x&uGi6AbLhi1<^BVoKz4!qnuU+ z(4*@kaZ^-)Jxbjr6@ZVXk1Ul6&=}XL$es~67VNS05mG_) z3~wtHM9**(NAwKGu|&^sR88~@$4Nkstq+$9l4n@6fAIeQQUBA(|F_?t@`wDp{M-C@ z`d9lGBj^7O{t5n($oSvS-^1VG-s&BcboT4?`k~%pXMma((am()E$+9lZbln(IZ^Pv8H)!?nt_$aS-8x@$bL0}gTZ zb#-&Kb2WE0a@npd`_IS^c+Ng!pRkYF&)HAf58G9C%znVW*WO~Ux0l-s?3wlyd$c{w z9%%QrJKJsW{=eTA=U<)YogX^icK+V^s`H5RDd*$PI%m?k&lz;yzr3Rhd3{Fc6YXSws7V;UCu1)&(@!;bJiK_gmuh%);ehYhn2CS*8SGctj(DJzvM^r z{|8tlRwt{qm2dehQ@)cgOiBG7p;v%%7W;c>jO5xx?IKt~HmM^UNE~N#?cYP=5CR z+x!2f<2%O}jt?DgJAUtY)p5k}l;d$nog?Yk=LkCP{$KC^_i=P}6gisx{r&%sjdzXH z#&P4Q@r?0=@sN==!p42Zc4MQl##mz9V$3im7$c1b&Tb_f_rryXKi6>wUW&gV|4pw|ut zCF!+;J4yw;c5pMZ0$ywDgQbF2J8+&<&}s*wWLoV&luWA~XiEjHb|9*t)egYHwAuk3 zq@s&z*8rSFs~wOh6|~v`IGt8IK&%Q_tx76%uT=rBRqAf3pxO4HB^5N={^O;hNR`z8 z8mXY!_P<;zXtw=_NCnNdKUz<-?cYKwXtw=PEzPz+YNOfqx6KNet*!T$3VQ8j1Ehjp z+YiUmYx|+c;kBCH52He}?T6u|+4jT50kgIBeo{fNy%hZfueJ3{tqNGJuJ^?w@d|ov z-!)P}ukCxARM2btUL_Uu+P=6-;I*3G7ZJNDP&v)EF9N06_66%{wtbyaL9^|P?xorG z)un=F+ozXQ&}{qQbX?k+-UqSMZ2KT~nr$D%PP6S}nK78Ht@n`_z4nr35~J5%;*}V^ z_7ac8=(U&NSa_|iUm`JDZSP(Zqt*6qYsFx-x?X}YiqUIJ7D)_VtLY`PB}TI?nIbWo zZOPRVquG`Wkr>UkWU$0wwm5i@#OSsqog_xLEh&^3+!nhK7|a&JmRKo;Q)2Ym5=&zA z+7gi%y|zS`7`?XFP>IoNd-atVy|!0JiP39&b&%N2s+?Z76@%I8dQW`E6r#7_~%0{Fq{2%h{Y!k~aZ*n(>d z0J8NWi4i|-7f6iwX^TUMpSB1P__1}giu7rNAt8O*pyi}b8w`N0ifeC^f$iIG075jyG9x`)I_pVqA<3i@bz>&6l#ep=^PQQ${M z;<%s{1%H$ZNR$97Tq98cNYe|KNt6UCTp&>rsBnfvK_KiJC{Y5au(w1Bpu(;aC4dT> zNt6I8Y$QQ)p0QW@hjzqC~GzB{m-+Q8K7`dx?@k%@I5pgk9)#LZ~?+1cYq8xkSMrO>dSb zQ6i{dy+nzig5eS+f(p=cL{LGAM2Vn+o)QIuG`*leqGV7(Q;C8>*n+BvpaM&xL{I^G zg$OD@oj{PS7g$jcNZ0dKYlxtHR6zvg<8&e@AEy&R`8b^j%0pm8P~IYm5VDjkOv(_1bJ|g0707WE|Vx3 z^K zgKP{J8Dw{tC>dmTlPDQv<18}BMpa~x-AE#2kewqDGRU?i0tRWiEfN8OG~J0^B#?E7 zL`WcOy+lYLYmr1qAnQhnkU-WHiI70nIEj!z))GbR$qyb zKvr*wkU&;PiI6~6bBT~ZR#SZe3&;S2q{(t0O&i`+MH2@5(_V*d`|Gnuu z>3iAtJXZR9#8>UBz&d~BzOBA8-%4L8)&ZF28|NG08|>@j>*_1Q`hGcB3E+bFEAPkN zcd@qLaqm&@Gu|h#7C`#n`uzXX`TxK4JmRUwiU4~&J3V)K$~-GQrJmWIX;>3rglDj) z57zK2@-*}0c$~-p@YTP}0I=krWB_ouzQx)AAGqFfz3zI&^@8h=>rq#YD~{Cx%3WJs zWv-R3QrB$PG}k!S2&@m#$JNzU{rt^2s-#DLhKIweeS>=p5 zA8_95+~QpCT;W{koaLPA9P1qJ9OS&j*~MA(_Y44E{lg3Z_gUMmjn*1#vGqTn{}>n793d z3;-?6T+?N0j=wlQah!9Uahz}*b3E%f==cvu#u0Jc=h*Jp=vd=e;<&{z!!gk@$}!Z@ z&(Xut!O_yu*x`0)#@EKD#{0%u<9EhyjOUCejfahDqr%u@>@@B&HW(|6g~lvnsxj6W zZv3-103<@wmC+KR>B{92q3NP+5t^>xR;B6U+l~lL7mbe4bfrWhG+pT~5t^=amIzH3 zuSG;)x|%NSB|_JgLW$6I1@|>wS8zMib@Ba4gsv;No$0#bln7l{042JvI3xnsMS;2* zf$7>h_Rw=pz=@t~B4m25IZz_>T(ejr^js5_({s&c5~1gss2rZF=@@_rOxM;;D+15e zbqD@tjnH%*D4C|~0PSeH4m69V>j0Bzx{e$x0@KAmbsAk4Gbkc-U46bp=(_lpF+$hX z5h7ey!)Hf{z;+>eN`$Vf7f6JztD}GDx+)^LF7mZ1roncV0@LWbTA4)XyBd&8-_-y& z`mP4J(RVeBDt%W2fa$v$>Zk8&=py>AhWhEd8itO(tAWJyT@8(;?`r5h`mToFgYRm( zh8StQTAoB`yc#-$#;fH@gvP6(hiSYThL^^xWlMy{t6_LyyeP+#2z^(>Xwr8vAQAYk zja?Dgu8!=*K`R2^RVpA6WxU#j)e=$0t6e}3E92EJpv}s7wF|Q)qKsF&FkK?bc(n`H zNkkd1c44eUl<{g8&rG+KTyKT zc(v~fC9I5B`(7lhj92?^jD(f(YTpf$urgll+eQ*r#;bkPOTx-{wQt%=SQ)SOO*08A z&k+3pe?HjLzmGNrdcqFWhSNp~?!!TYOA`IWvwXgAQei+89RJnwe^J-rsAmzN; z*S#gIoLBq0gM^jyYG31gIIpdJEn#K6+F!CItc+Lt3+jULYTBPiOBlWj(OAODcD1h% zv$9?7D}^P(&g*6v2(q>F5(a@Z?F)36LXh?a zYETH$zCaBMLE0B6SRqLJ0tG7sXE>^$RA`34Us>{4;mtW{vc)u{IRt^NQn4(f3}2(pZBLpi1>MbiiC(C zqz4TVKkw&Bi1>LQ#SlO5+Y%yv-WLfGKj+p<2>8*oa|0y={y?B|;0FR{kv`|V5+Z%h zc_c*oyf;!pq|dtxB?S6t+Pl|Ci1>N;atRSX?+%d=@$>E=2@yZ>LS^K4)2nsflRn6486tg<(=tT* zoJAO<&)JR=B7M%HTSy-yt_+brNL(2rea>b}i1ay&CX+sAZ8HS=*xFeM5k6-S4&ieK z#}Ynga4g`XX=f}6kv?Y-JLz*qH$$M0t(}n&;dA;%2@yW0F>Hj-=`j)_d`@2}A;RZ0 zdJyo@w9^Qk^f`^OA$?Bwlo0828jT@+kfkz2`kZbfA=2kGhM)8~je#V6PB)bh>2o?? zLZr`WbOGptD$oUh4+NS;_MApvkUgi-KChUALST#mXj=VX36VbaxN1nB`hF53 zed_y4i1exNBO%hKzC=Q#Pd)mD^r`PIA=0P5vxG>Wdh`_OQ;(h^ed-G&MEcaDJ3t>) zitYe@AaW%{`qZNrNS}K20_jta#*;qv4hfMy^%z{x$JXj4MELw3ClNls#}z^NAZ2HW z@HvIKInpV!eVgwN|}7UA*=l^mI>mGZ;n@5ls|v0bD?g3uuC%BDiZ5(Wn?>B6-~2-}!Z(JQ_`&eBRHyeBSqi z|J-9!b?>>i>U@89ZdIKt1<=Q~kY0WNP+lyv7;1-pBHfq@$(}7iTHV; zrxb{v7rIG-_(7`90`Y?kodx3O1(5>r^ZZmP5I@fkkOJ}ZeCOid|H%LM*XW1QKSW=S z?v6hFgU|oVqEoN~;E3qp=&8*AcWm^iXdr5155Pl_1CiGvFGQY+Y>#Y?+!MJSy8vb) zwULU*)saj8DgR&B$nlZ3|H%LUVfYW>m&3cmPlvaL9|-@a&;P&s_y3l^fB#>6{(nvA zhrj>-_VfRL_5J_F!Ls0#;CaCjc>aGXb_P5tcx>>fU?6A*4hIee4g_Aq-hj^pwg)!j z?f=^YHwQ9-+Q5JE_W#@Ne(VtVl>4ZAzk8Q^t6Okya^vm|?v?JvZkap9J*_n+ z*VY&HiT5k-XWl=0-}dhJKIeVP``!G1H+mO)FY(UxPW{L8|L^7hD^P8pNBB`~pGWvz zq~Z7cQBt7VK98`W+CGmsq}o1@_@UbFY3mfAwjO&orjrHA?e0=3P;Pf4q$sz$5r~xA z-H3k5?e4G?D7U*qQlQ-KcBDYL-R&_2$gOMdmIBrGxe-#J+U~kg3RK%&2!5*Vu9i}u z+U{y01*+{Xcu2MVUAYvfw!ecrRNLR3Ed{FW@48EYYWwUgDL`#)`&l$WxqWt+6ezdP z;uy;9vt6V>xqTMCQ*NIHutxGM3=8BIr;Ag7-g@k3@W)gEf~yvWgNpmi6;hz$J~K`V zR9qy2El_cvIYkOoTqJ@mP;rs=wLryv1{Z;fi?pu=D(*8uDNu2d+qFQ&MQ+yu755n< z1uE{&B~qZ`?p!DZD(=onQlR4Q94rMY?#{kapyKZADFrI-PV|+EyA%FUad&o=0u>k8 zUJFoM+eWt60tI&`ZW9XbPB=!v-HEZM;3Dm7fr7i!CxxH!EHIgZyJNl-D7ZU@OM!yB zV~`XmxH~Yw6xh05)NP&8b)UE~U?bDs5K)ppG*8=qxiChcR zTO@KVP;ZgYvOv9kYOWNhw@;lV1?ugS)1^SYeR8xEsJBnTFY4`+B~qZ?K8Yrvx3>Kx znxNo5i6$txPofD5?vt(*D7a6eEeh@vCrE*U``gw|0fOtXAIBqw0u}f1#ZsW+K8`yf zs8v5cPzqGs$8iHtaUVykRNTkWDijyTU|1-)kE2xz?&D~cg8R5%3KZPO(L4qBaSS*G zcRRX4!QI|V@)X?dog`1e-QGd+6x{9YB~QWK-cIrqT;!|FQ*gJ3Bu~NJE|RCaH7fPObyKTPYskhsPOP+eWtwi$F+im?MPrcpN zSMmhVw$73#fVQ=fJOQ+=wd4sPII9stq}r*3lcAdp%soIDY<6^|bCK#=~H zbn;-3S~1BJLR+tqJR!7oiR1~Pt&1g32yI;`c|vIGEXfl>TPH{!5VGy9<0MZCZ5=In zQfTX7$&*4`2TGn4+B!h;q|jD$g%sM_NAjf5*6xxgg|?z|q|jDe22yA%MxPYgiqR*9 zwzic#DYO;+CWW@Pk~}H2wWZ`qp{=+uq|jCj2`RMIkvu81RZJcfa_y~7o(y^v5AyP4 z(4*)(8T2SRNCrJRO7djTqeCT620e-kLIypGUXVeL!e=t*QTRg!JqkC-pht1d$)HE! z1{s7*oOv?nQQQS&&?9A%CxafDBzZFE5sV=j^a#d~40@!CTpcp#VT>vn^ziYLCxafw@RC8u(wQfN9>!pjK@a0r z27_GtVaXFg58;j=f*vZDJQ4KJT*(7Lw*Amqk|%>6!Z?vZ58<+tK@TAS$e@Q%F&Xqw zQ1WEZL%1en&_fs%GH45GCWE%jkvti+1-_F(TX6f3L0fRMkU?9}2{LF4Iza|)K@()q zmZ;>(pe?S+gF&vn#mNIfy0u!RhX~p{Nb*F`W(*4vv>CS#5wsaW zKm={>Ejc1+b5F?;L7TfujtJV^S#m_s=8lphf{?m2M+9wdEjc0x8B23S(B_ts1A@?< z7Lp@_ki#@b25t6BjttuDlN=ee*&{hJXw!9)BZD@LkQ^Db=}gIyL7U(Y8MLXpOk|TZ|Kt03{l9}d+p9j!7@$&#`CVuYkAUWa( z$xL&^&;2-y__-ek6F>LkVB+UK98CNmKWUEmL0Zxr@pB&zCVr5OG)Mew#Apyd8!-sP z&qj2R__=qu2vo$$&o&H!wb>}8BcSh z4>F$SNFQW8&5=HLdn8Bt{CbAuNS|M$qofZ~mgYzwWFgIwKFC6v1AT1!*R4$s_;KxD zOOEup>w3wNK6l}kC4KH%B019MF4RZ*+=XjH`rL&OB7N?{ZAAJY`Dl*xLGsZY>2nvx zg7iTa(j4h?7cMsGgLI@h(&sMtPWs%5MoFJL(J1M2=Pb#QK6eh59O-i>Muqgbv!~=h zAKShYmjw8Mf!0Z%JCBwe>2oLCAbswH8>G*jaD()@Q=A;=v1iJpY>>!_*s9vR0|J8;o!dHhc4KE1K2$zOOg@=Vt5BCUn4z~}t35UbJ&=;Xk zL+^*)480WkedvkM!=a6#^`YiaF0?#U{l7f_|2+6{@ZI36!RLcJgO3F_aR-1^!G>Tm zxGZ>8a8YnUa7M5+I4U?S*gx1a*d=&e@aSM9D1om6p9TIHcssEF2fhF(?f_U8m=ZWI zFd{HGaO%JO0${tl*}ccT-M!h(xV3JDd$oJ1yTF~{mf{P5VQzo7r`yFn&OO?VxW@mb z|Ic{-|EB*X|L^@z_#gIf^so0f`*Z#k{+R!I{}uj){yF~1_yS;rfADv{0I;3I&Y{15 z0k9K006c)5Ue{nbtKO+`ZgduZ>-qms=QO9g)5-at=l}n12Y`S60^qwV03ZKHUjY2i z3c!zk;OqZ~eE0h9@U8ZJKLg;IzJb2pzMuIz_>S?l@HyUZynprn?_U6n_m1=q^`7SK z?)~l#fYM0CU3Y^tQgPQ^Esa#%b&I5tio0&1G*WTbT_}xI+;#J%k&3%+wlq?4*Ugkh zD(<=&(n!T!S0as6+;xMbk&3%+pfpl(*Y%S|D( z(n!T!=SU+Jcb!ifskrMrrV)zk+Uul|f{XmdjTBtuH*Tch-i~T0xJYZzy?rr#mg1ZKzLcv{wUnsb1a5@F|)@jm6!MzpZM8Ulk<3z!|6_k1_c*sa~mnRx1vG{?ycf9LU29yEm(bMq~hLEDvea!TLwrY75A2I(n!U+~zWAq~bPDmqsdX z^Au^M;xCZD6VZcpCOHu+-7v1lG}`1f|A>eTY{2{6t|6(+~&^G zNXc#PD2l13_S6ULN^+k~N{;x@q@DsB@lHWjxCy@BG|c2g&5q~JEeQwnYqZZQgO6FN%4 zZGy`b+$Ok8!EM4VLcwi%0ZGPd{%amEa9`Vy<`cWm2D(T_^fOtS;7Z-I!5`aBqZhE6o%lh9$SmewoJ9KE)LOEn&rLX+^tlP1l0HcJm?eE~@=BKUxyj>XK_8F30;_yk;%5b1 zC4N>MC0XJJ$r`i7&kCGR{H!oe7Wnbl%XR3%AKPAzt`b1YaXtaG90N@NEk~aTpylW@ z0kj-95COCteI|gG!+in>NglHV5K=m3383Y;fdC-aUM^YkCxw&9pA_z5@~5_gWWgWX zu0=HjP%Xv>0CMeG$&x?GNs=Xhl4B)H{v^9gmi$SgKJo`C6|>||@)*gIKS_8){v=(= zl0Qid2KkeOTi}mtCnZb#B+xtJ2gw$*#7`nDS>h*wcqe`mj>!T)uAPu9=~L6v$$~x} zJC3*RvcwM(A!dLd+eRY94Ecj>hZ*uG-cmB;PdqFc@+a;}hWv>;CIkMsc3d*VPwYa; z5I?ap$q+xWA(A0}VkMFxeqtv`hWLrKkqq!-+c7i+`oL%*8NdgINQUgGE|m<~Q{7H7 zWKVTSGGtFRjv;$0hf9X+sYDZGPi1GxkUhw3m?3*A(H7ZL3E#<{iW?+D_EgN04B1mL zP%>ms1x^QhY`X%D58wDpJjt3L-;I1>x9oT)C2g~_A<0i`Yb~| zqz@7ZW=Nl9sE71f=5aEhkH=n$_wzEu&(g(`A%2!llnn8+6jz7%S=vJ~z>jS&?I0P_ zXDO;CeU{?tkUmR8CIkAo_EO0ZJ~z&n4B>Ml27&Oo(JvXo=LS?v_}lMF zTqS(2M^6c#>(7@A;d4FOBz&&NtwH!)kH!g~>v44mpX(jT5I)zVr-To31ZD^yG6+Ch>be6H;x8NvtI@G^jpZC?uyNuO(fBpK4@TDVI3Tnm3l zAEd?0kUrPKAJXSqTzAsvTKEI{pdPPe2%l@NkqqH;%|gi#KG&Qs8N%n9u96{quJK3) z@UiWyakwuy$LzQ`AmPb2R~-i*8y z`F-Sx$ix3}{(n1sID9C4ApBbRh43@s?cvSgd&0MeZw_a|-~ao+f4FD3OZd3((cwt= z+dBa63OyNmBy?Zs&d@EPd}u`|7P>xkMQCAYPH1vyTwV3>oNu}BpP&B^^8NSs|1Ev~ANcG4Ztq&}Dt!N+^e*#W$b`jV`yE*a>pZ7&)v z84B(qpbvtJUl3i?+eLk)fqJ{Bvoug|7qylK>g^)GG*E9Z86XYR+e_L>1NHWjmeK&d zwe3s7(m=tzBqR+K+)Es3px|C2rU8QM+Lt&D&|8mvF{Vom6x@q1k_HOy#g1u!;JWt3 z(m=gkh!&`~3(*4gb|G4z-Y!H7)Z2wTP)+X`tSgcb5k0 zZ8=7cdRyK}8mPDB9i@SKTi#w8sJG?ENCWk@yp=RiZ_8cN0KIkXa%rI2E?6K9RNDnO ziE6t5-J;qqXd?|&+XXnAYP$d(rP?lVrGaX@0NsMx+V%n?4V2pj9;X3v>#^tIQDOu2 zc3y93px(}d&(zy_XpDM04~!}K zqTbHMS=8IPIE#8aSDXgut;a6I!`=o6u39l^pyHO@APrR9vc=Ls#Vxy18mPEs)1-ll zTUH_sRNOLLa4K#Ynx*2Fb&v)sZW;PP#VtcWsJLYqG%9WxT%+Qa;a;HPmIbAOidzOB zskmi+X`tekp^H@9GK?M+*JIDYo(T<<+&N36fs#9Cp)^o(=ir)Aa_0<}21@Rnq0&If zodaJfxpR6*10{D3ZVyWC9QaDfodaJfxpUwvC3g;drR2_OAq|w=Ik=N4xpM;2K*^m0 zUn#kB;439}4lX$*caFz2KyqDsj?)0e_1Lp@peebtako=)XTw!Uu5Hi8uu^kp50VCI z?rdB;YVK@D8mPImu9XIA?yR}eK+T;6JWz9I!BuMREDRzEGz)hH2{aQmlRz_3GYK@a zqco5}GtmSIG!ti$Kr_9PCV^%QkTeN2qn)HlpcyTlGzjFerz5FAnh2VHk)(;B>Ek3# z1Wg|#X(DKPe@PQT)B8x82%6qW(m;@HPj4@2FbD>!CW5B7kTelA-IX*Dgn#l#nhctD zg`~-#X_F;Q22DF#(qz!Ifs!VJrlB4(Xj)rIlR?vtmNXeOt&OC~plL@*nhcuOO44M| zw2-99plKp$GH9BSG#NC_W71%dYfp31K#<2qraLDM2B}r&qyZtdl9DEcrcRVJDKr(s zKnhKTYoyTBE|MmNrlR+x&{XuE6q*XJNujCmh!mRIQqrK1ZBNB`5JOWj9>ma8F==4P zwWms&6qLlLmr3_5`deq{*NO3nWbjO+XuD(1bCPCW9vQlQbDL0d0^$ z6L1HSK@-pmFvzwiz3@EQ0Wp$6GEj| zN}3QVg(HMe>0n6{LZ!HNgitAloDeF-024w;fR`qON-@BMP$_y%2$kY~A%sfNeL|=d zw=yAAis2-La8Usv+a3>t3>rUH(qz#1&XOjB#-pob(D>GpCWFTNB~1pMe~qNcp!0FN zkU{68DKh9hcuEGH2M5WZ^WY#EbRHZegT~<)GH6@}Ns~dyMVBUn#^I(XgT^@~4F8alSL(^1k!YR#8Hv##e@31p_27?fkHmlzKqKJ~0W=c+5I`g0 z4*@h1ZV*5tJ*FN2a_x~)PyU?KLF&OD+a7+r)Du6$7E3+xGi-p=6F6bU z!*B(NpJAw&_!(xTp7Q^7gS=3kdkQ_}I0c>>0E~>dBr#xLjnF$fB5hJjgj?{<{$k2e_*87ck=)JC9LoN?+-s0ek%NE`2O%+;akIn z@J-=(_=fP6;fues|KG{s6aJC?|8VF~=s@VT&FfB`{uW@|KI7}^1bZ;Km7jxC;s>T*X;lQ{{8(wn@dbQmX)kRgEI#vv`P95`-ZU?n-q#6ro5HjYrL0v%e^zb6TG9n z!@UE%y}Vt$$9vm)qh9fR<@v0b{a=y;mNUNmIE(S+?hPd0duUzVGpfNz|uBXvW>aL^FN$Rep zalF)BL!-UaT}`8v)LlixW9pVDkbF+vVkM;)Q?9xzi&j$VuF!+Li>2;z8snvo^6nic zb(DASK&hj=d(kT8-HTQ!?_M+pdB^GPq>k$DMXOYIZ%e78x_etl9o60IN*&eR>z6vH zJI)fRqr7`PQb&3BER;IRyXQiwqr7|OOC9CiGgIm)@1DU@M|npUgF4DP^61sg&PUYSD2HjD4x4xMrbt>=HH`ApK@(yFT)TzE(-}IF_)pzTg z&QhoPZhg~H>Qvt?eCl7P`fh!LCRE=oeCl7P`fh#Waq6J&9_wo)6t7c(x4yrDghtHfJhp;eW5>nq$HD)H7=xEEC7t*^pTrxI^{6_PrY zcQv&buY6Lc5^sHl`xX-KT3<;W6yCPJESEYJc1KU!W68AnOZwssysW!0oF9 zvc7<)N+9bCc&Y@lzJRAnAnOY;bs&%nqfP;2{cXO~DS)iMogj4zAnR`!T?LSJ7J&iM;XzWT0J09FaRrcdxS!N1fULu~pA|sX;oefG0J09FTM8iSFuJ7xvJPXE z6+qTubV~tb9Y$LUAnP#trvS1J<4OWR9_w?xl1d=!b6iO!ko7qnQUY0@mr9)y$ohP= z)G2|i&v9!ifvnH_N}UqO`n;EFWPRRFQc57}^J65X1hSBCA*BSeK5r!{ zC6M*GNJPiXZFKfs#`ESf9dU#gFwVJXZWz$b67e{8*pDb>PQ^ zky83tNLP?j`dCO#kW%_sfAUL8>0^Czk))J9)+Y-jrS!2r87wKKkM&6>Nhy6Sq!&mj zeJrFGNGW|RBojy}eXLI~l1d-z6C)|5kA-9cDbUBYK5&wmyc3gwMwjCk6O;tdFn^lp=jTDwh=L z^U(lFkv<=xdD7=2R89JPgsMp&q+LjnJ|E%Afj+MFk)#Np53iOK;qxKtBYZx@O+xs5 zI7?E5&xcbaMfiMJDk;L}!?BVAd~EAOTsYDPX&h3d&xde_^g%v{6zPLB4k^;-L%2iw zAdN$c^!X6OOZp(6LyGkI5ce4A^C3J1eO&89NfADOL^XuZA8|I}^GDZ70X`n<13Vi^ zkv<>5L(=C1v<~{%)(2>m@c97uGvV_AqLlFYz>yT;^8wldd|c}TNs&G8&yW<^^Zo#- zC41fvnOd;NwcdAXfgX=_5U)+wf<0_4mPjq&V_OGLms-;2U|*>veGc}KTGHp>DN;-N9PBE!q|d?5QcL?pOQ&%riQ zOZpsaEw!Z2!4^_W`W$qn7W6?AKBpG=@mTNS{;4H@-kT=1V*LkHl>~a{1gRx~-oZeVK<~hL66l>D zODzfX4n~{=dIznOKuG0LO9H)v)=8juL~2Q(Kj2^z=npuC1o{L1i3EBZLq`I=6_#2O z=&g{{l0a{kNi7NVCN2R9^k!SBC4t^-Q+)sbuiyUrU-$p};phJseCz%HJQ&X1fE8S)?O|JNMOg_nn` z!`Fo`4__Fb9i9{(8$K&MDBLI94J!cc!Y#x8&^P~P|G#CSt3r!H^Fz}^7lh6Yoe?@c z)Fae6)IQV(`~UfVVE@1M-`oFhVsK3G%;3Oa@8Hk=qa6T>@BhyYObwhLI43Y9&@b@w zK*zvO0!Lv70NXw69{R@)0RR2||F6Ec1HfqR0PwHg|9{f|i2pwSo&H<=`G0!}#{#JhX?>hjz>%8hb@9cCQb2d45e`^PTtDQ@o1%Ky95ip3z2`@ zV1qB|TjsmUx5zi&H{ExE?_A#*zSDi(eVu$i_5H{f`horb?)9$w7a0HtdHZ;~c~9`R z^S1Q*J>Pi#`u7!pTIw$H-_%lf-zb+_>h2rBKXli&-as@{cHcnAQg+`!@KJW(0B2Ws_yHk2C8dYuXmDK%I@nx0A=@e)JNHU9gS0VUk9xyyU2`F zOWA!LQBB!>-D7GYyRP-RQw!DgSg+wRS1n{$t)$dacVAm9wbb3$&? zQFmX1^VD7B*r}!Nz6P*UcV81zOWl1#<(J z6u*|b`^q&^OWl11ou}@;GD~WyyU49mOWl2?r_@q+ky57?x@%jnU^pne$fQ$C;YCiI zB!%~tup}Y8I3^@XC@&00k`P`PK1ot{U-6hEbl1f{B_X>W>t!s*CaJr~jgzGAzC1ya z)LrDXNm6%_(isIG18yH1i2T^JWhlA613fh4K9`zA_~n!9hDB&oUk zMo5yHyKjIbsk!_5OOl$quahLHx%=8nlA61(l_aUT$fJ{_<|2H! zaC4J9doUOz&mK5N^6bHtAbIv+Fi4&~Z6yivphEae^z6a#5IuV^JVehPTqL4r4~7Tm z!NKSk$+O2NNs?y|E(yrvT6-i(@a)Ff1kY}qP4Mg%CkgO)tmm+Rl_YtdTP#VE=ecQ; zAbFlEkp#)}Tn9;zJkPa}1j+N9D+!Y4IdKvokH^}DcPtWMk8SO`ND@TPt~rt*dUlPI z1ktl=fFy{XU7aLB^z3RU38H6LYe^72$m^0IdUk~+LG7$^zC zXGc#-5I#HFOM>v(fk7aAcC?ZN;j;tdOZe>YOM>v(A(9|`p2p=Ne4Z|q1mI&^Poq)N z=V>%b`aF%{C4HX8l>>cn4B8@okSirY{5*}eh@Yp?7V(3mDGB1|Dbz#!JcW9QpQms- z@$=MABtiT#6*N`e5|j?)RCZDS=t0B!3f2?A(acS#UH+i)WiK-(Ni5I|cO zNCE(|t*y8QBoH!#BuF4+21$@W$nlXNfwsbV5(pVR5+u-8ct`?mHIg8K9>!TD(8E0? zK>}@=APEv^3!Eo`wse#P3AE)zNsvHWe3Br6HqDX*3AD*02@>c*I7R|Jh~XuH9_%Iw z66irVMgk#YL4pK&P)q^@a;*m?K>$6_LlOkg{nto>0JDEu<$%kU;n2a*;syw=xM3$hGcw5(Lmjd@qn7fHr~~1klEbQbPc3 zyg+IQppE0Ch5*_)KxzmejNfkQ&nG9`uFuxu>(#fIhZ$Piv_G zer)S*{1fP7Tfc!p`22dh)DS*MGf+eL{JM+O5I(=gS%lABKbIQ92Ve2m5I%RFA~l51 zogJly@VR5C)DS**^pG0D=Z>F94dHVKJSBYYz~B%*cZ8&d@VNuN13s>Ght!Zgw`2Ip zp4&0}WY6t5o9wwAV*>WL*6mIW(BrYzV(q1d>{(kbHDu3P3=`S2wwKh9J!{cXvS%%7 zCVSR)kQ%aQEry-!S&Jsfp0&81WY5}w)Q~-EMQX^NwH{Lg_PEwssR4Ry>$U|_L-O1< zUusC6+a^g3$#WZgCwXq;-~YIstmxtBC((n^H>3Na&mjZgHmm?_h~5!xj^?8`VGqEi z(W|11kON?5bYgUD^lW4SJT2M-SpbfY9usXDb&&_~Z`=pq&B#8i13Vqs7TJVMfa|yu zz)g{w$kNDF%mp|LdjX7#jNn>8k4P6}13U)10k{!6{I~F*!taOQ!utTbu^+%=_#R+G z_>ORMI3Hf|gKq?!@~s^Kznc;8+dBgM`-}iz2R{q`G5B`y-(>`x${hiR1p6T)V8`H3 zf~|wWpeOLRz$bzC0>`5unVO?2L8Jc1oPS&dJV+&X1i| zj_X+RxqK||%B%9c?3Bl3liV%0V;7DL{>uBIHv+og(e%+05o5kIe>NYOx6D4X%RFfw zG547}%`GO6Hv+2uFL)#1Jl_c4VBe{}Q+y}+j`bbo3t&fp!`?&Q1K!uXFLW~rBeg>^;pe1deC3Bs-=bk z+>Dwaz_!)AL~5wO&7c((xEZvf0^?Kr8eAkCgVw0P%@{>0aPugsp#nD#mKrK>^B}3A z0yo1WDsVGMO9gK3FEvnL)Pv!K1jB%{RN&^WQbPsCr}l9wFut&lQ-Se?eVhv1+(zP5 zV0>5~rvf*(lsJ%ydRj=F3fvr$I2E`#AaN=%KE97rft&pjrvf*N#Hql|M&eXpe2yQ7 z0=rhT6NdnMtkoE)I3!rDYKc>WS6?S_YVhhM5~l{QULbL5@ap*zzexR9T_$mg@apjr zrwFeeCvl4K>QNG>2(KP2afmPu9wc$9@almQrwXqgAaSbj>i!a^3a{=dajNj@lO;|S zUfosVRN>VaDXQ>lj1*OPHAadmyc#1#6<&>zn#{H^QdHs97%8goYK#VUrwZTPN#aytq#%e>g>ObRRN+79zoER!hk~lDggC|NH6oOGIaX<*h2#J$Hg<%pWg9^CJWKaP;BZKhGf1C^| z^pQ9jRKV3Gg9_*?8H6kVaWbfYOGpMG3qYIL{J_T6G3_05=2lQqe%qiF`7V-YvmSt;UcOCxEi}<2DWed8{m^W^ocI z3#Un-EJlL_%A)TiP!=5ofm|!=!~r0WmBFfKoCHD^f;b73xm4mLP-daTNuUfaB?*+7 zE^!hlgDXe^WkyS!1j-;RNuUg_APJNiE-?})Qz9`EDAQMBBv7Wa#7Lk_M~RU@nf4MR zfigdl7zvbVCovKz(?((>P^PuSNFd}uh><{f1_;_^F4-#1AqU#E74II7t8ES4DY zvjRg&{Hz!tG2&-Me~A%4D==`x&kDFf{H#Eu#LtR=#E732Xp8tk27?&!vjRgv{H$mjTj=LvD{w$v>G4cmF31Z|AauUSIpXH8;fj_PuOZ=p|N{sla#U&)PB}V#G;Zl-5 zRTwGKrwT2QK2?4bBYmnQM*39ZUr3)ykHkoyW#|LxvkbQ(>9Y)-A${;=e~k252LDK( zWpIo1S%y0c^s%jF@Q?UehARpDxY$4=M*1vWATiQsDI6tzmZB-rXXyxukv>Z?N~F(H z_yzjd)>4cK-~$5{lRZm~#K@kdxWm967n8~u(Q{)<6AOB6`-V0q7SOVliMd+3Cgw+Z z{Xi3QwCrkPqGe|jGg@{uF`t$lOw6k#&i81E^KC5=yq1=JQ+)%<>z0`6>$SYnR9~lM zxv9QZ%UP!S8ZEn<>Z`TvWU8;y(lymfP+oh3sa~w*KvR9CmIF-n6Pxk3XQ~%z*~V1=Ld&B}^(9)iGSwGrDZc83UfaFaR9}Suy=JDVzEDg2 zbGepcsu!TV8mG-ic@?UkrzQS>F3KfnwhZNBbYqT|rKWnemLk=&ytd~GYlW$vi65?5 zY^rByd6B7}uH`sWJxxpWbE=l`cZ!zqce0jnV3L+$Q$0~jkFRI zpe4FBUP~N)zLxO!Je0p|ZK}s<=}Prj_;H!~F$O;!jb3ydMg}x{*TB0N1p#5dH!$xfA;x*&O7+~&;K_e1K_&o z>S!*yA{vk07+vyD&;R>HyCVzWarg?LMbsa)un*uTk%Pzs`2FYqEAS=2jgcjhMUe%O znURT+v5~VQgCqSS-6NeN$6+_X77>5MLN>rp!Uw}|hW8;Kz|-Mv;Z4W~xGuaJ-vj9L z{~N7`Tlgm6Q0TqT8`u?KSLmtG*3g5< z3vherSD{Ai3sB4pcyVZMXd1o?7!?{G8W`#mIyrPw=%@HHAQF<`*TK(`8}Pm08^L|S z=YmfMw_$gHdxGnNtFbz;A{Y@st#+@+Zh@D$^W5p!Enu{JraQ>(i~R!i?SMAe zFF^cX`~T|y81DzXfgJ;O`JeJ{^*`vp8+!)+%HQZ;jy(fz@GtiN!apCo22Svg@t@@{ z!L9+_{hhIIU|YN;;Kz3Zhn-KHgU*}SIq*5WC$J4)4s3AlaGIUGa})LsSn6DbZwJbq zSce9UY{CPDpvA(nM2El2*9=#~%llOh^Ti91{ zw|9s4G4Cet-TbN`=Uw5AdvEkEL1w`P$Sg3?JJx#^0_N}k`KkwD8E%nO55%(GFLYUN z0M^xhfn~k^T4H(ebS<&Gc$$`2r|YL>e^Y&`mRPUrt0k5l`)G-EyWU!2ovxRbSf}f$ zrQ@safp&f&)!os~C3uOo`sXy_ruq~da0wbbS<7jrx|^0*NBo(VXrQZ>-Ar{CEwQfH zSxYq3NlPqPcGMEh^Hx5>x$C{O`pJ zP4!Q-9B-568+Tm^K$f4*U!t* zKUYgMr0eJ97*MRAyX8`i<#W#h>n5bD!0!uYm}*@=Uw{jx>*otlkFK9DKs~yCzMz$_ z8tdoo0;$6Cxo4hLVXAcfeBJ<4rR(SOjx|-fem<|AsnYfHc`Z$quAk2fn<`yDpBFM! zx_&;-@l|2{+?^*?SU&g6)p~UOeC{MurR(Q&QJ=1#&uwq2bp3oT>eKb}xou39uAk3s zZK`zrd~PdKrR(Q&TbL?cKcDNGDy*N|bN!}D7trU5uL=w3?p#w5=&&y6WRl0s&*40$$`gvJrQ>E+YWoTa4&&$v}*3a!SG_T9&WoTBH&&$v~ zT|O^E^SXRqhURtoyv$>&boqSFbW^3v=W|AzDqTLGgL-xOd=Bct^0_^ywNzpK+%p@Q zzpAi+4kKx*bOn9(VpF9n=(8u9DqTUJJ>FF53i@pH04wPBZ1g}E&}X9ux_~~ro2k+T z^x2(El`f#qhUdC~J{z9v0{ZNuOw|Z2;khoL&xXfXK(}Y(g6InRY{zr3>h@E;LoTfIbV&0|53cGy?+Iv(PaGz$`SQ0GNg9 z6acd@APRt)C8kONFtewrQUJ`vc?y7;I8OmE6Xz)aW@2C!05jpH0$?WG0|4xqeqR*` z;LbEvh<|&=G*hMFKLh`);Xl3HRB8B6Ki*Vn_)kZ58vfJq?;8Ho@b4P_)1szI!+#on z*YKZ;QPuFDic!??pNi4b@Sh6zH2kNc9u5DgxSbIG?o?BS=(nffFb)4H@Lt1z3J%lo zpMq*N{HLJz8vau-m>T|*k26&o{*#uNDh>ZhIA6nm63)}`pNNa0;XeV*BK+-A9H-G= zic!|+FGW;n^q2NDRT}-JI8LL#6z6O7mtwRu`b*KiMt>>XMfAI+rb>f<{327O!9RY! zsY3AE+vD4s zN)7(;ZA_&G|M=FXQiFedD^sb#Ki)N!8vNt^rc#4{yw6l>@Q?SHN(8@s{t{D(=(o=w zXeu@M&+lm}HTcg*{Tlq|w=|v^q+%fH2Tj$GaCKppc#$+a~xkK zqTfA7DiQpi5s>LhL_drbrc%Ry#57Z>;XeZX(C{Ate>D6@pdk(a5ok!me+2x~@E?H& zHT*}wKMns8Xc*ydk3hp3|0B?_#{Y<*snqx%fqrWIk3c^V|LzD=so{S%s@3p68`Wv} zpB?g5BK+O6O{GTvnYa%%`p<+H8vSRYc8&fs(H%s;d#0(>;6DS-BKYkyP@6{o8K_O8 z{|q>#(SHU`)960~PHFU?0cSM&hvGPm{-NlEM*mQeN<_bBh*cq#2!9yMOr^&Eka4C` z<9|qrsnqx%a;&M;_#cAHt?@q?9%%dz?r$nJ{s&{2H2w#p1&#l~XhGwDu$W4X{}Kci z;@>X8aSDJE{JR36#Fa__z%$5-nMx(VpoOMV2{34asZ;_C8f7Y#0E1A05@1kQQ)z42 z(Nrn{2DLX8N`OH>HWf;MLG4V15@1kkQ=tSH)XG#S0S39ILJ2UigQ-vg48(a#fPpw( z2`~`HDFFtCO@$I*z${at1Q^iWR44%koMI}J00TOj3MIgR7N$Z8Fu-FflmPv&H5DL$ z-TxP+0syf4&o&hr|NSSK3XT8%15Aa+e}A;A@!!9VsnGcE55F}2`-`d2`0tPUH2zO- zX(}}SPit!`H2(WdH5D5F{f;pev$VwT0D#>Oek1ecw~je0fyd!t^B|K6xq@0Q5q=0D#>K z^@0F)FVqVF*u7A%#(yu=tMT6p^=kb0LcJRQJyEa5e^1n_@!u2mYW(*^y&C^LQLn~- zPt>dN-xKv}{P#q?8vi{}uf~5*)T{B|6ZIng?H<>e3I#xq#il|5&|{pbPyqD6{RjZq zJ=){>zjd#FvK9SH^n>Ud(HEjSqK`&5;xGSI(fVjCdM$STn-`secm9V*PmlgQdLrKU zk3_xrYyTsB^Z!!h*~nv&2O@XkJ^xH3f$#kk)uv^KO7x&5m`SBHKPDho{tjmCHV{qWa&htScX5VHFJ74PuB8GI3Y`fUx~7rX-* z{nPl1{W|3HpC6nW9EY#@`vb=bRaiF>g-+nwN^ z>kh$R;y=T_eLr#o{%`!BVb{Lb{m=WK#y9)-;_vS!|4sgC|26(a{<#?BBmW&4fg>Yu zWCV_kz>yL7|NRK~SGDtLlFZMf%)jawnxD!E{#9*h{z-o7Uv)IiPxRL|G(VD4{i}XN z^Fuk=zp6FOKT6cU>L{A;%UJ)aRx}Ulu`Ox7C&&0#wV-)GdiqyIX}+uVMrgjR2ZU+9 zrS*nrzNrTUX}%#t{Hp>qUzaoet6Z9|$pHT3W*G(6^@RX#P%G`j6t>J|#2!O^azhiH4i5r1^xJSJ2$9tzAxYn^yKqnp@SK z%V<8T^Wl%xn4(SHqCWf?<|_Pt2vYA zT3mpp88mNGAEwh>qpeM&d8^htmF6v4*%X@1TEk?TtM!0MG=HU|Gm+-aTG<4etMq_U znoT+m7tma(PL8KpQ1g76c{R_YnbW?FquHpfjis5zz0)*?W=5MBO|wCd9Yr&(gL*E_ zdNoJVyh*!x4$T#M+6bDyQkiS*?|wL9$J6jG_O^62GG1l59m+xYHjUwnpf$doe&f2r)cLp(44G;dOXcZTJLc*C+cbKX-?1seoC`c&7aV`Kx;Ub=6D^4AJaTv z$DzpI^K{IL{2iyB75O_>e=YKNjMh-(?`Y}lZz}S4lr~)C??^qM$lr6ch9ZAQ=&wco zo~^$Y`FoZ&T;%VWT0@b)!__SEcbJ+*{tnf9y~y7oT3M05gSE0Ee@k>6iu@g znjTx^Z$GW9$lp`-fFghUYTt_d?W4aI`P)lxm?D3BDjbXa?SUFr7Wvy9r>!jV_Y`=x zvdG_)(V>+^{&qvdD~tU787|?$g=J*~*!leFFR?mw3 zZKtOd`Fo6-MgATw$LRb2r+bzi`TPI>fxrLpZ5L-G#t^Zi~P$JhA3#$wg!cyT<;cE2-Fv?GZ0|sCPkd>AEb{IN zcHaBM^RDM*&o0kzJ)1qh@!aakc~ZFP_@94eaR$%^8n?1I1Nf2NqQx0NYc-2AfTJ`L z%ZRsDdcPNE04+5(iZg%~pzX@y3?Qth6=wh;jl|*%AgDDIX8!izHij~-i`0od9wX8`s$(3h2*0oY$_hd2YUztT|U48Zam;w*zak>oB`Mev@*^BY@K|szordy24KIcr*Q^gzoHFu24KIeO>hQa@7Hc}24L?~*Es{QU(#PW z1F-k%mFEn=eo>p?48VR~D?6E;-=o;%48Y#4zj6j(Kc|&(24L^fUpWJ?f2Xc<24Fv{ zm2n1OKLe>*$r*sH6ZMsx0oXgVh7;Mu)A}oC0QOV*D`x=qllm)X0QM95D`x=qZ`I@s zzI)>;2b&H(IN&{}~r0DCoty1*HLtyBF1 zX8`uiaJ|48fV~PAw!j&H-Gs^toB`N6zb|kGfPL$a@fSD)up2R7E^r25XY^Rk0PF@m zfHMF)tqpSqVAms#3!DMiE3{$G0I-KSPU8%~*6DkJGXOh{GFwh75O_;2dc>58TxCHztgqeB7djhPAwGqtMmIpk-t;4 zi6Va|X}v}MPSjtE{GFh8Ns+&$Y8Lr>fmT-J?|2>5B7e`MgE?RfGiaGdlvrADHQp8rv6&w?{KZG z$lo*cY8LrBRO>DBcZhbg$lntEwaDK=`fHKD1NE{L`8z-xF7o#@?R=5H{q)!(e@|5> zi~Q}YcS(`Iebg-Sx3>;dk-xpPwIY9es=r14;w3|Wp~&A;wB90rPu4*#^0%A*TIBD~ zw8usMcGX{t{OzPSOp(7G)tw@LPtt~q{MDI!p~&A8^vW0ct26mRk-x{O4@LgA*DF-y z?@zSeB7cw7(~A84u?}jHzwLBLiu^rBdtBsiTdlXq-!|$_k-tAue~bKWEyiCc^7kk` zpvd1=+T$XBTWV`X{Eb=#`7putMpkDbRe*TjZ|?7b{=nuML0m`GYkiHr<;h>{&4L5SUu>(= z+Mg$Xt-omv_Ay58dhYmDB{#t+5ddXjGuYqI9 zU+Yg=FZpZfJStEAT8Ge!Jo#&VtOt<4*j{0PKTrN*|AI69dGgo#qaHy1Vy6K#LH=6r zYs2KPbx_?Qf35elVe;3~c~qYKwcf?}=E+~{9qlpsYyClwC4a5AwFdIndP`3uf2}vw z9r72+so^jAYrU?gk-ye!>I3;}y{e~?zesV61IS-%znbJPKL5i3_g9TD!Fd@)sXa;Q;d2`aSOTJo#%qtG|-J)-$+w^5n0z zQ%@s*tsUAh`D;C`9U_0Nr*IqP$zMxnOnLIx`mJ`J{Iwp}%E(`9yS7IDT90WBC-L0*Wzt*qSB!8{D^o}BbEu9eM$zP--L~G=)bvu+H zPySlB=|GXc)*2lh^4GdWFE{yX{Yozj`D@)Q7wF&rLp_a0{{H`e_wWDhc;Egv&KjrS ztZ=HGYn{um0x;bf@0^2u`uaNE@Sc5JC*t_92Jnd-Ku*3r@-%Yv--kEsS4$SF0L$=x zJr+D=GTyE~QwE6c(Dzep4Hpp0{MCGD-bOaQ-y=K!7OVuUHBF}8#Le~Co$o?3)0CS3 zgT42FvZ`3SMXPGBz1P|q$(bg{MsiSsP0l$eLD>E^ z?gb3;_w;v=-{!054>-R$-#ec=Z_C{PyqEuWr_R|YdGzJZMb3K3qF?4LaArsj{V=DW z)6Ge98p}KRjocCV+V`RFb>DNoM|?+pcq9K_UzM-acdjpA^5z%%X86X+@A38ab@H|F z)${qyzsxu0L-U$>#yn{5FbB*&S%iQ6dAlhK)TLO$lm%)J_CpUSTy+UHB4vTRdrDR_ zWr4gcO7=Qsf!c)~MOmO~04WRP4MqYf3*_BJLIxe_Y$r-73nX1nAZ39hILh{R-jyPG(2&Oa|O&E4G?=-fbtySow4#y}r;cSE3a0`1-14S+TT znz_5{1Ib-=clSv^g@GaN?s`D$0u9~WaiBGUF7ECaP(dK%?v4WG!%GAx6-ak?hk;h3 zU3smdJazzo@^A$5{-s?HNVK**69SZjo(6zcz@Hx|JCNz_c7U>A=L1>}WPp|dX`m$R z6i{Yhkz0En&=M)DJr^h;BdXm9v^X%vtvv^5q13J20JH#TJ{U)vf`WEpe$W0Gfrm`9N~7+^tOk%|O{|py?9v+EqYuqui~{1DY!F ztIY+Pgc>V>CIICCO_W&Gt^gVjbQX}@7k6v3f#hDeTbl(m8Y5Z`GzxW>0gXf(Ngxr@ zZfzz|270;_NN$L`wM&48qecQ~7^1cqXsFC@?IIxA^=|E%K&N6H3xQ6NIjUU%B&yo2 zoev~;%H7&|Km!63-P$vN`pN9qo(|LpXfBZ4J9lg60Ex7AYi9%XLa$~4os8(t1nLPi z1E>f5O$U;@=5FmYAh}cS)=mZL1}{^9y28t3Ah~1i)=mQIj5(SJ)CoP90Mrql#{+eM zzi~kAF*9R<+F@*CfYK0y(Li#0+^ro2)Eb^g0<{7f0VF!xt<3D1uM> z1BEf7en27E^#ux|jXppgKJ5(@Kx@5#{21lQKn_q(ARlU^0~z7?+8$0&YNUI$q(t+h zKhvuvC7SR3?Y&x3qWR9>+N&icnr~4`N;F^j2YR)nMDwLzJdhI27yh_cOG-4K`KNic zq(t*6Y)Ogc6Mr+WmXv5d_V@8>Nr~no_$4Kp_hCRvH1EMLDbc(O15%=S$DiTVk`m3^ z{xM!HDbc*;AL7-L63v_NOG-3vz!^_U=5=(3CnfVLJn*DsUh#*#TAq~5%V>=!CG(Ph zqF2k4l6eu%cv3RYV{AMrndjh_CnfVN#>SJ9dD`F5tK~__Jc%ac;wEu^0$mRiJ`QIg z!pC3`Bz)Ar$gA}TA3?(|;lqFd!UxcrpYT37a|rK6cYK8Rz^@^^8@8J8sDHLss|fFu z)^?poc!vaP*SUnZi|wwBgty5kcAZ0bNSy83KzOUPwrf4%K?(J)Lc*J5J?vUXcq2-A zQZfgmwOu?ZnL7C8Ny*$GD{2={O6Gcr&Muym%(d{#lajdxkS8T`wM@V+o|Mdf^noWO za}`?SNy%IZzdR|KeQ=h;K3oA0D+n)#!C8ctNpyB)6YhnxEW%4+u$*uY;;@YH5_B?2 zcrjom;cmdCgtZvk62e_*EkRf#YiHMD!fG_Jh_DK_XA)K-s0#^q0xlrjfnm-k+>UDV z2rJmc2i@UG$!vhZB=%uFTAN5%h@eg&T!%=GCtM4IafE9C#}XF6 z*%-oZYNor=p10|`Mg*Z@>nwf}$<=+{IndRT<=$Yl; zX(+Y)I~5)*|4#9D@pf7Mos5Pp|4xK~<=+YLVEK1EN-h76L#gH8v1r)x?-+Qn{5u-Z z^6w}>%fBPA)-C^Lz?tRW)8NeV?{LJy^6xN|TK*jjTg$(L;LP&xKp0s59e}Y}{_T%a z%fEfGU@ZUk5pvvR`L{PbSpMyWYL50-y}vVLnU|9Y^s{OkJL|NZ^{|GE7C zzrX)C|99X2*H{G*z#_K_z%O)BV-v-3PArG9;g8HzW}KK^si`w3PAsYy+H+_e?~Pb0R0p892J265v@@H z=pRsx3P67k3ZMee-@*eGfc^$MkqSV6jZ!KA{S_b;fc_GFpaRfepfxH0{W)5r0??mf z6jT8EQ`k}g=uglv6@dO2&Zq!1#CHu9fc_AXqyo?%z<>%szYhZ{0R0}qM+KnYLCmNC z^xLqd0?==vVJZOqCaO^Z=r_of4~_PfPMkANd=&vM=2G6eh$v40Q9pcr2^1Tqw7=v`YAZ00?<#&rKW}oKtrh4 zPyy)2;eiT3KZa5&0R1RRsQ~mND5V0>52KU{KtBXX1)v{9bf^II1F)q6(D$J`Q~>&3 ztW+uheGd$%05s%#4HbYsic%^7eHZ4K3P9fp4^#m92qH-Zpzpxgr~vd~c<8|R-j3){ z0qEP{j0!*>LQtsy^sR741)vY2zf=JF7OY7s01c^MLj|C3gaH+RK7djx09^+UQ~>%0 zku)__0Q!0~K?R_%gDn++z83wZ0?<(XHBptoU;sQ~m=aaK(Qptnf1YAOI- zhEgg3T?$AApf}5GR#O4!5{YCr6@V_5K2%cy=!>MaYAOJIq3o7wDgb?f48EEQKo`j_ ztfm6c=ff`*fId&=qM8aopNl?F0qBizMg^cZ$U3X00?>tkQ~(-MznTg_uSFA706JfW zSxx@cDR?0N>ec8D`B$$(HS({{10?_Im9Vw^n*&?Rzu6eE<=-s$wfwtGLS1e7Hwh1x ze=`vh%fCzE%<^vn&Mf~fMxZSJE<&m0-!tLY^6x@fsnwQ$q3^3L|ISAfmVeKHU(3Iz zqtx>6Tv;X6mVf5}TK=64XO@3wA*hyrXUgiUw){H-{k8l%9TQ;rcbY7XYRkV<(O=8I zQ($ZPcM=-5{5uf_mVYN;K$d^U$%d@9{5w|8In|bb$Dq{m?`Sx){5uk}Y58{q{969a zz%VWU4o9iw-(e`V{5urWV)=In8n*m97=yR`I|#Oxe+QzImVXCem014m4`}(f9|C3h zw=Y_={M!fpwfx%)i`MdQPx!U`n~tDb{_TNM%fH<*V#~kXP-^+NGZu{H-%jX`<=>8I z*zzxwe6{7@_LzCgzfkhkmVeXG2g|>0FhiDqTfwj8-jM(z;NodXTZ#_WEzi~jzzfnNTzY$E9 z<=-%7-tun<(DH8(aj^XBVL+CDU5wcBZvg$Z{Oe#8mVbS)wft)^S(bk_`b+**e_~aW zf7Krt8~IoL4g>P9`VDJ`{HuP22lB7_1+9^P)z7F#{#8E#l7H1d5eM?G`Vmc#f7K80 zOa4{3M_En&Ro}rc`By&{0+7U-gm1w~G9$K9mtxk$=?(uqFSh_u+y3 ztKLI3@~?Uq-68*~cci~nuYn9!JCEU-cLUME+He$}*}V|0*b^D)O&-2!kj8st4hW{Hq>7Yvf;bKRl3s z)qQA!{HyLoAIQJzC_IpV)m?}>`By?7fDIg)RA4?LoujUv&vaO#W3D!{dAr_ufi?0BKq4?VFgY+X zFgS2>pkts#;G}>nPXYYk|IGi6|0VyE@=m?O{saCikNpzg26>;}a{ogAOnIB$aDRV) zcX<+^i9hQ1$(!`PaXxb1koV|4>f9~Q0$k@@=2SadoFZqfv(j1WoZ(EBrvXlt_vdwX zTFEc)1$}?|{^|Qd-ktY~?`hwIz9aJHy#2mQd^>z4zK!zjf42Pk-fZ7QUxsg>FWuMP z*UT6HfByczin2g$!+xMFP|)60lm+sRunez?vOqz5S5X$oyIf>pQx>RVPy}Uxyg*KN zDrJGZWk$9UWq~RJ-%u8)^Zf(esxJ64?L7Z9x2iMHx&92dsuR#ge;>E1BhWei_HI=N zpbh?JZdH3AxvTD0wF4^j4{@v70b!Qv(+J$GBBY4Oob}Obu876h__osKL|#5!Y@NQv*a> zyH!jLnCp+r11zwcgAp+`U^e2y)PPy2%hUk5SMFB%U^fG0ObwWhh%+@nZj`%KObwVS z@vCHNz$B?r$<%-eKuisoD6y(!YQT6PrUuA;akrAG0dgGzXci3FQ5E<=OGBqHAPnjAJ#)z025P}_31A=ISsR157WokeGtuZyg zk5Mu;zyV@vfDbj88esel1C>MZg}bm#C9l-xM<>&()b##X(CG#pg@T6p3aY9}tPfF%xw8oQ?dC8gRRq~`{UW7BAl+5!O8&68+Ir!yC z$vlg(@uXy)b{cw>JSmwc(F9LQ<_UDYCGOCe$Kk96;bSmpPWY&^$g6Bd_y`(qO878f z6T%13T4Tcd;H(khz35Iu!h7Ji0pZ=StxtH=neA1cM0h7!t4DYT0u?8`9kwyT+c1hK z;UPGS5Z;Q`!h{DA>JZ^gSPwzM8&S%Wk~x6Zcv3QT@XM2uxdAJRCna+|qQjGtxfXtT zQZm;7@}y+0#su)BWcH&EJSmy0q_v$qDVZz9?@peS%sz3plP4u}g?QL`4!d(X3^ov6 zCehiso^Y=?+gV6>DGb&T?vXg`TuXR~baLm}gck#@A>0jEKv*kd+nG;0 zTuoSwCRPzv!8VVuQi8fOmvATGO2Qp7%$+%e+fi)=VFmo2MOco8vkAAMlqV%qh7t3m zWHuutJSmw?aK@98xk%<>Cr?V|LYeNJJSmwAFif75Oc4xtQZnbmFHcHlBUFq3d4Iyr-Ih3tTx(+ST)=`_MDG(44XIp7q+Wq^|jGZB(WgiD=y-p+}HOHew2 zFo8~vCtQqbJSmw)Xo4ptb0!RUQZftBGoF;pd<>o^B{L5}(m z84X+hod^TVzZ2lW^6z+*TK*k}Qp>+%(Xi#;G4NpdcQl~o-%)^;e@9@gTmH>}Gt0lH z!I|aX;fRCf-(e`V{5u%7mVXDqndRSsFtGeP0AsWK+aINtfBRy=SpMxJ73Gj^8c-%hAz`L`qdTK<)1 zg}t4Yf7=6E{%r?n`L`{au>6|_50-!1z`*ivYfP5q-&U}-{M!;Sv;5lvJ+u7V9MvrU zHUqT$+XT?^Z(}fw<=;jqwfx%<(DH8s#KH1!eY9rz_ayYp@^3w?b<4j|RI~gWK^!dq zhT*~TZ&23n4$Hru*zU0W>x#h+%fA6>Vu$5lzf9H+%fCK}?+(kq2DX-ewKTkg{Hy<;y=Z| zjK3d$CH_?W{`l?j8|2OZ)$y|UdGUgHcKpov%=ozY(0HGC=XlHbN%27J-}1Kqk7BRK zo{c>eI}*Dwc4e$Kwk>u+Y;7zjmWa)XO_VqM_m6dpwTU&11*3mNe~@?kzZHEU`e^iM z^p@z=(M#m5{>9Pt(Y$D8^o;0~=*Z}xXnM3=v`I7))sdefUqs%GycBst-s68La$RI^ zq%yKOvN4j1ERQUROplC-oEqsRZ|`p&iAS99ui>x5AIN+ApAJ6|J{+zKUlFbeZ;?0l zuL++OUKE}c9v>bS?i=nBZWXQ{c0<2~z6*UEdPCmQ|8VHe&`qJM2CRl#I%UT|t~ zRB*7oiNAfYX)qcz-p}5b-h1B5-jm*a-fiCX^8WoQuhcu&%lERph29K#^ZpR8x7W#Q zA#dIHyZ>^(aX)llbDwb^l>Gk#?ml;yyH(z`f3~~AUF^=5?Em3zKRN%mb{n`};CFe$ z{wI?A|9s#PIsM-p*e~zamlyvA3MKP@sk~Qza$rPYV4z2!ZJ===EP4O`^nWfV{}<(L z`uF&6^g{{O8EIf0=*2f0}=^{}lhplJnopACpu6FV0ua`_3!QQ_lU8@qdGJ zxt#gSob#LlC)+vGnJM}HL!CZy;%_PM%MbYe?fcgEk!1Tn>w8Gf`#1Wo^ws*d`7ZFS z_2u{yzBzK*Kh4+Q*Ui_)*U%RdpTd?RK!1XUDFXDza7GcJp}%)f1n3X3ohbtJ z2QZ)r(C@>5B0#@~Jxmdx-@(SE2+(iCmLfpEg@!2t^qZ(g5uo2d!xRDfbu>W{pkKo< zDFXDX=sHD!eg&lz0s3Xo4@H1}2~AK0=zqW&MSy-mm~#h3fPNmOlNsOV;EW=7z75VO0`wsSl_Egj3TG4n`XKsC5uk6unxqKO zQ2RS50`!eANN0Qxpp+s&*TDltfWARg%?^qHeLb3>2+-HTmLfo3i~dpsXbAru6ao5b z^p_$)?+2s^&{x5LB0xj<@1O|K`_LLifW91|rU=kb`#UHC^j>tGB0yh?iKPh8d*F;B zKttm1pa{^l@JkV(cZp=)K@p&m*YfW| zS*hDC|3cz#xBNRFO<4Xt1AZ<4o{mz>zjI}kY`6S72hj5GY&f(0J4=GP-SY2DS$*3r z|IR>vE&ooJ3D|D=cbY7X?UsM1qQ91Zr@+?o?<6#A`FA1=EdNfxfGq!xlMT7u^6ywV z>TI|CI|ikee@DZa<=>Gqo7*k_j(}gwzZn>&<=^2bwfs8_rIvq(Vp=T!4nf0~e+Ogm zmVXDq*7EN_bkg$g0IU+rzx@F%|Mo+mEdTaJYnFfepud)XdRZuz$-{967^M^G*Q z_CTrS-|iT(<=<{7wfx%|3&!$qCv?a1Z$~t2`4?h-yXD{Zn0d>;5cAtD|E8f2mVet| zhAjWKf?vzOEm6(#Zwmy~@^5p5#PV-5jM(yTQ~0&~+Zf%k{M!iqwfx)A@q61X|2BYu z<=^@kvE|>B(3<7ndVrRHfGq#I7_sHw z0QzhB*TE<(|N3BS`PX2wEdOfsm;9^##HuF$sy{F`@~`?G2IOD$8`ci_SN#ePT4K~f7MqA3HeujiI|ap)fX@z|EkZ?1o>BehK9+%>Qi)w{Hs1e!{lED9kreOt3E<} z$-nADjF|kZK7cLxSG^Apj{#B2oVe+qfOa@dz{#B34GO8f|Dk!E3 z@~?VG246w`RS&`$`By!F*2ur=et01Ns{7Cc`B&YGK9GOaQFtK#s=FlW735z9nN&gk zRY%}~{HyLj!{lFe7#_&K>Ja>rf7Pu*D;4Blbr99azY2n=g8Zv)hAsJ5-GtW2zv=)W z`B&A+imD+0DhQ$q@~^rMt&xA#HF9OBApffU2o(8OU4`h7f7L!rH~CjxhG`-Hs=a{Z zUv(*L$-in38Ycg$OE6;cueunvf~Rw6Q$%| zwF8j+tF|K!Y1{HwMIBUX@q6+})2`B#;|1Nm1KV=lLP@M z{HrcNf62e92op>GRp&bc-3mUTc%C!Ot>6=i=QhpL zsR1)k#?*l6U;(BE$c=Khf~f&h5kIB|OhOH&2222AYQRLqim3tPftVT~_r=``rUuBp zaJPb~0i!V@rUr~cU8V+%L>o*E5EboKFf||pJ!NWu+z@vwm>Mt~HJBPO3{hihz);LD zQv-&;3sVD5m2s3aHQ*GPqjIJOh@^JQnHnH>%H49N1`Kc}y5&p_=qIyZ&eVWDKuisg zd*^OBQv*a-yX8y`=q0@>XKKL765Vp92J{4CYCsS8V`_lhHFwLI8X$Md-EyV|bb}YB z26TlNrUu9zbGMwS0i9)z%9$F_2|ZwHKu36HYCs40V`@NqnVE8?2DHQ2m>Q5KF(_wh zfZQH;%b6O`8lIUN&`#u0kp={06#{_)Bp#FsR2IJU}}JI8V1Uz;0t%T0+#bKZGQA+dgZ)Ko9}(? zy>ecr&3C@mUO6w*=3A8VGHt%{4fM)+nKobg!~-wW<_llkE9Yg}eCC_xmGd%fK7}nW z)8-RjGq0SNY4fqKk5|siwD|~rd6_ov!+@7*^B(;2GHu?40WZ_$9bbl5&daoU+c(B5 z=VjWwas9O-;g!Ryz@J5vKm}Cy1H6D{p z9sKf`WNyHU;xWlwkLd82WUhr@9+S*9fIKFdt1$sQCYk-{1CL4ODzwI9lDQIoc}z0< z;Ecy4a|Jv^*`3Q_5Fxw_(Fqgog|iUhr7#E*?m-+p!b{Lem+)f10O4*xKVdD#<`C{e zYd*pntQ|vGjV3f<6>Jq@r37``d4xLw&n4U;!`!x!aJy97b`D{M_}#XFuv{A6ww`bs zN_k8&WisMzJSLgV5|V8^CYeoe#$%GXNakW2k4ff2neJ^oCYcLlnA>By$eB!()=!00SPA%zCu8lHDnkpl-_{Tqlv-wt{di49+541DH)%0B2c* zDcO44mJ_Z<9F`I0p@}46F8Yv3xDuUQO1MIHz_ulXXQ4Dfn1zNH6D|i_M7Rv_Ou|fr zWFg^F-#l;I0>ULIollrRC+86^Ml~Lj%px?wW0E-&20SL21?U-%NoGC<&tsCAhoJJ9 zWX^zJ9+S-JsK#TGnF|9Rlgu1I9+S*$c;GR~%t9P^OfoYO2g|=R5Hria)6p}_ztd1^ z`FAQjSpJ>j>*8&*{5u&9TmGF01Ixb?;KB0mc$8ZH9fwlOzhlv`<=-*zVEK16pyl6D zfR=wp$XefK`8Na3EdQPcXO@45BMz2-hoRK+?_k(k{v8BomVXDr!1C_^jLq_If0SDO z?TZCt`L~ad<2K8`z2U*~Z!c7{{M!>fv-~Sbu--Pyzdg{f<=^g@Ld(D1;KA~57kIGz z+Zj8{@^2?pv;5l;el7oYK+i1yN|vp+&GK(MK+C^v(S+sSGxdAh?(Wz7U-Gf-{z=h`L`LM<=-ZNmVX-y!)&ws+X$tWe;Wc?{%wFbSpKb#)-3;? zgq~Uct%tR4`8SGcmVYCNgXP~aJXrn>V*OhF^3H}@~{3kJdl6&zW~X<`d2hT{?)&rXXIb~ zGpdn)^-nV5t>j<*qqMe_{HuSEYFo*_`g@sxt>j<*t$5f<{?*?|9JZ2w_17pR|LU&* z$-nwb>BCm?ul_<>+e-e`pQAPMul~%}1?T@$&F16Z|Gyf4CjL}c$k*wwL1V%uZIvGuXM zSZ3^u*p%4F*q~T?tX-^0EE3bvpQ2wx-;KT$eIj~q^icG==-y~$baQlLG!I+B_PMI+0%^Uq?QOyc&5r@<8Npq%Lwrq$aW@a(-k@fNd1T#{w@4n_~Y;!;pf5+hwlvE6uv6FJ6s;VFuX3jGQ1=_H#{kv5grik9!?84 z3Wq{}hJFlv7J57MkI-YGyF&*<*M#4p#+k)2zFAG)$OM~YI^MhHzg~1uYvB4q1-oZ}67QuQ!zxOZi8}CE! zHSZbkLGKRlfVWTHO0m@|^3L{Fc#FN+-UM&B*U#(fwe}i#p8LD|z59v#ru)46hB_K))kX8pkKpwqYluoVhd3R=vPom9iU$p2HHv;pkG20)B*Y*a7G=VUl8)#N*$n| zM=5oHeh$v41N5^fr4GK;MV%PzUIHg)z5M2k3iXKpmhV;%ppPJu)B*YqjEy=#ABG3&0DU{6Lmi-RgEQ&?eF#CN4$!y4 z8Fhd@i2hOs=v%;z)BzfTe=Bu>z7Yo00r~(+sRMK!JWvPd8${S_r4G>7qY3H&eI0D6 z1N61%FLi*v2Hl|!&{w0s)B$=wAa#Jg3I@~x8rpv=b%5T7)~EyYHxhHu8@1L^=>iH4~I^iEt) zr~~wNG)x_!A>y}E2WW`+t<(W}8|IigKyQUJ>Hxh3)u;n>8A_=GbSWTpfZmMRqz=#} zh$MA@E=C`y1N22`jXFSIh}}XRpfAASsRMKob|H0uJ|BLm1N3>A3+e!UF8V+npf|!9 zb%5T0bw(Ya3jwJEGz9-v>HxhKO;88ud<>I1K&Rk=IzX>Rcc=sODpaEm(0PE=0eU5D ztq#b6tS@3Ihz%qo|>VPCXSRIgwkXRkC6wa&;NWhuZ0gDkRs{D6`;0*D*#p;05QEGL-Tv;VstPYq1Xm!ABII}un zmIQT+)d4eQ^=+~IJ45=r#q#fTnSd>pf2YaP*kbv2s`Ph@<=-i=wfs8?4O{-52m{N% z6EGmlzvE;>Zn6A3R!%!xEdP!{spa3%aAx^;q|D|P%fBPw*Ya-$hH3eCI7%)54nwKs z-=Q)sTP*($LBp1R2V?M-e+R+V^6x-&((>;BStVO6|Mmy8{M!$Kvi#c@ty%uy!dlFi+{96yu@^2i_@^2K-@^1u_W%)OZnYa8K z0<`=aL>w&tdKi%9Ul${`{2M@jE&nu#Y%Tv9OqS(ejsB8<)t^|^3Lzo?sxJ{U@~`>=2IOD$Ihr8~AyIT(~#Uj@ZfM*dX~$>7V#zv@A8R!06+4@hfeM%Tzf7K!QCI6~hg;vVQzv>{Wk$)8gQ5pGH z-3(juueu4Xk$=?zK=QAulND7){#6h}W#nIV9ak$~C?)@@ZD^SMs~~d9$iHffFk%_`S3%^I zk$+VQJdl4?G3J8&t1d!F$iM0W^q2griZHR{Uv<83pj*a=6wmWbbIbUU;<>&Iw~P-d zZuIqW%lMGuIllI886Q&I;A`fV@gYUIiRqT{A;m)95VwpEDX#N1bj$dV;u>EUw~P-d z7WhJL86Q&2hZjDinDV8&Wqe3+HQF7@?>FIZ2*2M%yQlK|P1HDr-*5T`;{2ascKrSQ z|NsBK|NkHV{$C@>|JVMX<@Eo7|26-!{)gok0B`YMBToQS`b*{fzsA49pYYF>-vAux zA1u!Rbo957I{>cpyYqwdnfwaiOU{$>6u@EUfZPMv?Nm6$&Ib7%z~#ggSO&z@fF$gg8j$H*L)No_5Qa8X$Md-7=;I4Dd~K%a|I_4|K%TfIdJ>4Ul{1ZW&VpL{z(F zObzITUNJS`WJH�X>138qfp&m>M8=&D}Dl2FRUqw~VO)-Qb0(0bSvRsR44w+%08l zKxdhwQlrA!TIf=`(m&=~D9HK37iwp+^7 zfClIvQv>RwU8V+{Bx|FTsR8xyDN_SPK)a<(4T#`VrUryDBBlm}V8_&eAlhJRfQL_+ z8W2EhObzg3luQk9fS4NKLk*?|7+=Fc=^}jLE|EJR`}SQ0dMve!urPN{13Y1~`Q9 zQAFod!bjlY6vBtiY_D`M;RERJAj10q2NK>3+X004pbz~C??!1q!lUrpm+($ZRv*GU z5Z~T}x5Ks<;cX~AneY($+mrBC^dX(_AR6vLcoS^96W#~|o{{9IU8FUhkxU&*c}6lf zpp<7MbG?asr92}^8km&wjAX6>=bT!pdmj3mD&D}CS@Nq$B~ z`oJ@ixdN~?yK_0_trg*AaMqG=FP26N!b{B}ue3Sg9yHvH@DjkLgck!gA>0kvn6MVr z8WHY7cN!Abpos>A)qwR0t5EGE!b+6ZBixCQ#0htxG)A}`1Bwz>07eMQVH+mghOYCB zWXb?}MlzdCA9>o42Ai-&T*8Ym$2=pM3jujXG8bU|@{DAPFtI!%ne)*Ho{`K(^p|HO za}Er6Mlu_uwaq*unf2mf^SNxIP~xz8BjGxU`sQ;8*8*-JTq9lIyq>T?n%G=On39-n zUPrhZ25SlPWb17{n=n`Uuz3yPN*UYc0>Tx5`GjW)Yiv#tX2I`j!sURg2$xCMH|G&% z!dWiiQoxmjOW-VrFd>U;^9sVn@W3;YSp>*4k~tFwJR_L}@UWb=^U-ylk<2`a?`EEn z%o!L3&q(HUc;Fey%!L8ZNM;To&q!uA40uK|vt*@i<{8P%gkQ_QGhkr(ce<>H&6a+ zI||V9?+Dmh{>=ci{CgSOasL`B(pro{@j`ZR;iP{HuRKDfw6bj8gKi z{t4B{zxqcI2>Dn4fT)vy_4nul`B#4n59D9{4Lp#4_17pR|LU(W7vx|4C908sHT2GA z@~{3J;UoX*&#*Vizxq>50Qpycf{>7Z^~YE{ zW6fjnm=paq`gQb!=&RAEqYp$6N9&?jL~Ej3qUT4~M9+#Yiq49Tj}D9WjdqE)iq?<1 zk>4WUMLv$a5qU21aOBR&O_8f2yCdb13nS|yD=g^m-_d+j+ zo($a=x-E2l=(12%s5EqLC_j`HS{RxU8XFoC>K*D7Y7wdz@(2GF{3iHe@U`GG!3TqP z1P=uF1$PCv28)7c2Ui3a2WJN-1cwLv1-k}Y2O9)E?|1Ke?-TD$?|JVL?=J6VZ@>J4 zYK3=^SLo$>OTE**$=(QWpx47|>oxYmo^t=`e(t{GzUV&g-s9ftUh7`!?sQAsbKKSL zGIzc^%^mHY;-2hwaGS|50Qv&I$U6Yv54;k1DsX?`_P`DD`+wDeGI;}FK_FY60hk#W z7Z@tP{?|FsQr`a;@c&z$0QkuNy4(YJ$bZCtqrCmE*1t{i|JV9+@Fi?i(ZL{$9S0zUK1AKgawkIsYGs#(vs7 zU=E8Gyu#GT!u#vbE1?w7uVV#M3h38l50p>}=vT3VDFyT^D5Vt8FT<8nK)(b7N&)>3 z*-#~v0{R8nd?l0u`gz&2C6ogCIe4HH(9Z&{1QF_|;fzv1KP3!NLMfo1l>J>oDWIQ# z0i}R`9MvcV^kZn4Qb0e7(q(k^h!9H&rGS1IrIZ5tA;6_Hcu+J`38jF30GpapK;MTZ zCDfKwpQ3DFrm-e+i|4z6J)A0vhtaWGt)g2OL9q6(FU6z7n>S z0(u_?KavKQ%Vna3Qb0rVmrx4my$BzrfW8z_rxeh85J^e_eKA5pDWGf7GfDxy3v)p! zpsO)>N&#Jk)+hyZC8nEFK<~sTCaGfDwngsxKxXlVYD z&a8GGwn!(!b5Tkupf{qFQb2D&X?q$J0#XX-b!dW8K(EEvC!j@B~gxeKDrYjJOduA6o9%fu~J|z46GEG zgKAa^%tjwvR+|Oe0O3pw-b#TP2$Yoq(_tW7DQq+i&a4!e3TIXdOhLm|3QQ8eo2(R= zCEtHMzk|@4<==s@wfs8((DHA87+C)8hteDlzAw6N z`L_>BE&ukyh%Nv2L?0~wrlaBItky&3ZIk8S?g)wH-)^X8`L{DlE&q0ct>xd2D7F0C z0nRM{wnwSu-*)JO<=-^)*Ya;0K+C_agiAJA{%whBmVaBw?%rhiw>dmm{%wY8mVcX~ zHOs$^F=ETVjnH4qzYy=6EdMruf#u)&D7E|x@xICOZ#_WEzj4_Yn=JoEQEK@&BA3NY zmVd)=X8AXS)-3-9VQcxS`Paa}@~_4Wk$)8= z{U-9S`U6oX|Ek{s$-nA17?6L}ujm>1S3$gQBLAwN(H-)y`U%}3|Ehlil7H2YuqFSh zA5cpERo|lt@~`?1e#yV;TR`%!`UaCl{#9RN6y#q8NxzBwtGJv-=`B!}`OuLEvt3JXiA^)lm(KGU|`T*VO%xdq$FZox!hic?s z^)7lw{#Eb5FZox!jR_$CDyaKSIKAs{Hvg&Hj#hTb8trfRnNed{HvY$=>VL<*>PoR|is~*RI z$iM0_tUmIudK68Nf7QbnJo#5WghfmKRSybJZi+Ba55O7uSKW^$$iM17l#+kdy;xD? zUv(6->9X2gFbELd2}u4`N5FFAUv&pc$-n9_I!XRj5Ja2Ezv@pL zR!sg?Hv^J?)lGopUj;!_O#W4MGN5Aeuew2IzL@;0u9Kh^lYiAU()D8Uui7ttC?@}^ zt7P>RlYiAd7?6L}Wq{;gwHHm0f7PWZCI6~DC?)@@OVAqmS6vLh6*4WwNy2S1Z^h(a1(8!s z{#9Gh2lB5flYLQ4{#7ODI{8->!%l7CeZnjrtG^G&*2%;yx(Gt=B+ zKBsuDndlbtImL}8rNbW87k92gtolx0u-hOUxp-nArhx`_e6DcEDmY z&@E8iBik%|&06N85#_GpBc@yA|(a+^MfS03B$z6auqBlmbie4=5 z0^AfmN1g=8ik>O=0VYIGiw=nPkhcLgjmG3zfPY24l{*1%MqY?K7P&{>2Y7wta(NnH zYvcmC7myptjLeHnlQ#kmiS&`@0a`~IMnVx4{we%r_amb;W_e5z=-f5xg*da+&o+_9FX?{ejoZ&o(gy|^n~0KxII)C+83&oHv?W2 zS})H9EDJ4=y8`1v!$SQ+-Q?YXjYAQ6GT@is*K%Lr_26^CM}kKsh=2e69f7|i@OK3M zj=WM3z%ZcC)TQo$p%(3N4-Bzr zhkM{uppWqBDM0Tl`E)SQt0)@;^rX7NJuuLsz3zblKu@5oKhR@9{VbAC`vN_xcDo1q z0NoESy)6>Yy?~CO#>o~*jh;YvDCtW&&;fP1d!UC!GNSH4*Qm?f1KljzQxW2igE#f@rk1NZM@$ zbTMkQv`9W}VUc{=+#>PP3}`p%Hnm7TZDNsp+8C%7pEj~cK5Yn8jTvkJbdi#{)Cal{ z=p>*Efa(ElL^R?SN!=LGIf!Z0BI!L|sbmT(Qb}K^63homu0r<&H{Q6DBB|GeHPHY z@Vwk2`E;2@@@W$2C_c>uI)dIWwMcru#3E@e0d$MRv~Drbwdlbji=_8w0$qW!g+O~? zw*cs3^ku$9(%L+rO4)gJX8>(cQulPAY}n1UNbKeSWujNJffB0Lt(yh35M?ug7NEuq zi=@VMpn0;w>ZVyFv6^a;v@r!}mJ++kK$Fx?w{8;9aP)a1&>&eQbrUR-8smXFqo?D5 z+N17Rpq6NNj73s58mI~SH_9UMGSVVxV+2qGv;qDTBxT?~9asC@I`E$kVMO3RLE;(w zr+q-+KS9z4_)qKfi(gX0HdJH`U|Eb$h2L4l5BjVscK@xHBpSnub zxOLz^RgD_pKUD(H;6HT%S_A*7B6UTeuA@9=y+rK~)O8@(6R2x1V3xWpP}fdC7qvG~ z*H%D#7^e~J#rc1+dR@g|j9(E?$r-K3wc-m#liW(9>IwBwfBN| zi{wO{?hWzT4!f^6By{c}TuB-XpnaQ%z_6i+)|-r8nuBdVp@BewP9KcYkdcM|F#~ zLHW~KzLj4*B0H72GLbtYQJ6L(k(-&wX%uL=C@QPbnrNjK!6bchA~$XP_@+_Q?98Zc z_1C9o9{*H6YOye?+r~9(itmN|XhbTJoty59_FfRx9s9~RI6*P+yagk&^K!G7rp?Wh z)+Z!ZW+&6;CUbJ~(`M$aT$xPe8`p{UogdZBhN6npF?#-p?A%1EaAYEtmyd=5z^P;k=&<7czQu9V+=j0ToP0Y?+n#j%!nU-hB;NtdE>BbrO zIBi@awIV5`D*RQA#pX=C#8*Gf0tVAQDnV>``_ z>NZ{BOgiX!b74Lzk(pUIDzP#zS86UzB@>y2UiUd-*Op@!w`LCPMkG?13H~EZ+$Uwc za*tdcRVA%wOMv5ul5|79IT(?hJ|daOFGzR%QT379+iVs;N1JGL)`(bgWX^IWuBj!3X|s~KxrO<8xlWVmV$JAE z=R9xvh-9)LCz&2KeWpnS`br^!VDvPOC~ZVu;n+lKS#~lfQ-YORkv1uBc_KF!i#2uB zfl)oDMs<_k$LpA>tTQf|n<`A3mX)28y((?;(vf+EuIqM_iC4EZof6fJ6r#fAGDYlH zIBlZprr~WS%feJP{z0QAk4O}(#kxzIV`5FDt%E%#i9zS%Rm~(ZkbdVEBvO--D-)@- zae4VGvUBr;PE%jfwJ`-g4 z{o$WACY-1-BYSCV^ax4(bk#TX}rX;{fRo`ZJiORHR675qA)i}yQ7PG zj+5SW$KW{ZabmYTJC&0*XZ*~9m;IKRN~DdH?QCKg*8YCIN6Tg!e9V>SjXq&Ne*Cy(PBK>( zUtWG9ZA3Cvkew^rCR>^q^&d@)`m2iUg(V4TU~FD0Gf$ep#9TkP*GSosmNu|(Mjo@5 z)l!%?IwyO5Vo9OdJ$#y2Ce!7sAN8a< z)NHtf)P4qT8!n&8e`HOhH*Gdd7IBjk`)63jdRbKbhja2!ZY{egJa1^m`cxvBGZ`x) znIlu2pOu}vyg;_+Bxx!r5pO+&yG!Or{)`@yu|BarnUgNh9LWw-he!54RX!gmpV?J* zYQ~yWUMev`#v)ynh+!cl6Kj@aCsRT<*fhi2og(9J1AAEvo<1d`Aa7;b_&GBQG5)N) z?4aX}YdY96K5dj5Y;DGmZ{l=kLpmc8t${DNd|u!#)xaI$B=zsx0e{r*ShZcx6hQ;(e6qOaHs z3mi1!BAEHlFzf_8F7YGqz(Ty}n9MPAzYrCHf4 zWJk$5zOql7o?_VKgkjGNnd}_?!!&Lr3;opQ>0G|(6s{`i8L4D`dN3GOM|7-ZkK><- z17mvdvnEa>$ETiC`&xI$s*_LTPxe#yj8vvvhto})1`!ig_b*BBCd=a(|41;pWu%to zWG~MuupGTAAy;(SfXhwm#?xt3r{&&^8} z9Nz%KtZo1-e|S`nE)qeu#@=+vNF|nLJUWmDuGLV0i~*)7~_^ZD+A=bbJ;~ z=Zw{Buq~&ivUA;l=cprUXWLF<(&V^_?gW#ZM5bJq3bI^BY!05)t)q`5W=r7PS(KT!gD zS%-E~x2@$Cx!Sg4-I=QrYei7WbtG+WVQy~7v}r5COO~Tdy!@%Qw3;m9s~|7cRu=ov zu4!~5@nlHT#Ne3Ro4GWvpaArj=iMsD`XM2={JT)B4S@(MX3 zu_Bogx56beo3`YV#K<8tT4rGV$dU}1Q(8oI_rWqw`9v0L3w|PT&pA7hnU_lbU;b4# zW!9Sf6$aOWD+hFKE}dwDx{O0}YdmUAe!-QlWpGg#K(ht-J3*p*RJTf)bzY28M&ExGS2)&7Wn=@|1I94@qgWz zx^c$Jm3irMC6JgMO}1<#jR->_2C~u`WvooB&r5G&zq~)treRd4ACtjCLJjTmnZ70= zyJMwX!kcwyAl2KT8?4?SBPTnVdpZ_gUTRgABd0(0qq=TP{ir^9G6KSw)X&ISCR#Xc z8U){3?9gKcv$90?CfDSrjTBj#{4bwdF*C07NwNq#o*4E?89B*yvc>aq@^cFRD~_LB zPb5WOh6htyj~`A>rj{mCfTBT?sk8|_(`-@GW~OyqX3erKW;~vOZNh~MxpQb<^O(et zpJSXlmT^{Moy@m{@cwz-qEeR-))0%=X8s3iLz(X zP9$TwFiqOLWGc_Dj=WW}vqic@)xBA5!?NrfoQPW3+K$LuBL58rg_#ac?-mkH`HR^@ z)}(jenX=er>k5-u8jGp}L(_u{-tkEYW@O5BVkQLq^5lQT+#N?wZ{_ibBj#LU9!+G! zr}C2NZXn*ylZo^)KSt^3!v4ah!kVFw_y5Xe}PN~}xdANxPoi9}7h zUs@5OJGT7%8B6lyK*00F=#|+yX(Mv-a`RUd##)NQC-srmj*J(pRc1lL=VZu9Lk@AN zbhpVs;o*65LL>tm``nkYBq`f=m8?lLnuP)L-(0t?Pu-i*+Xw>;wyn$M%w(jG$XmJO zxSF1vos*TfCO;_>AUB!cRst?P=%%B=7U>w1R5QA7`Y7mp;;+g(|F+Bb{C(xkeh4MT{6^lq;K@P9drp1>FV`F6#oZs= zr{uTo=DPje27y-twSgs)ryldaCBI3(N!~`+L~_rcajtbrPPu(#xCy||kAjReWKmJ87-tqoL1H_)!T&x$U+w6ku z^`Z|%-6eCT@Ir$PIxZd2fnMSY`2q`WjLMPVj<`3ES&$2pJC+kmNr?AJgKu2MmD^J@}>d3i+UsaSpQ@qb?~o>qmoNR zsb{5)l2fTvGcne=eqef0FX?XAzv|p_tj;tU%~DZ}a>A1Hf{Dp>(TEJ}QFOAjaZEd- zb2npM%f(Sl^{L&ha*?Z*E}ACBo))I&yjDuRy5 z*6ILUh~Ss)n^%w}bU6BKkh+|l^5yE33g}c|J3e(#Nx65sgMEjzyvfOxwfaIXAADNW! zilm&8EKKJ6Ae65k)~%>Z)U;}EX9Iq&8+q(T!c0Uskv6d~N9GP^Z(Y<;I{w$iOIZ;>b)8t~A zEJ$w>YZPr;)J|M+=ZKNa+JEwZa_jEG1sjGVz3_#p)|p^>26Yrd zVqm%Hy9#yI$D@5)Sy3X)cT6#(Hx;|`orU`A5)m0eh|2p+Vioz>#2y>D zEIo~*g{wqKE^@_*eJ|U;cjnBQ9t56MX;$pwqSkD^ef)aXbfh-~cRd7? z1&Knq*%|L=3aKc6CR(4ag|)bxQyzWSdT@n!tS993>!}^}X*9e>7b1~dNPYhWlv|g-@Gn`sNEOeIhI0=?nkY-w zCj`4%JwH8(*PZ6PguLz>)}?^B;V4_d7FCzH>0x6uY!e&Cg9#~MsmjltUZj@)i{E)z$zbz`U9%)G<@HV-#QwHy~ zOzsiszTIq|@%UyL{DF9&EkQ3UK zjD75~h3P68S6xt|i&(EIRNb9G6qI^?^b)YsTbt6Au*IM=1!R)EdR_c5h*M{%FCseY z=J0_7NZzpAO5dzBF#C)NT={mb+tE=X3;B`Q+<~ z8GWQMkpDh!1*-M#MK>eXD_i1<^75M}T);VtOe>I)1Z|@F!kehuKs{Z}=^KJYicmX!4L|RvA%2nZprm%b~?`7)tKw4-Ba9t&=K2GgW)7wH-fD7i^WH1 zY*E)cvJUY0i16f&jy9MxClH&Er?mF*&go;StvC%XFCQyZMiVnj4x>2B1eSIh`!L_d z1`%LcL19KK(Vt=-8~^!JWHPLpHj1KBkOGW7Dz*C?5IT;w%{)YlnBBUHE$h&_Xb50B zDO0J$Qvc8YKN!r+^|vMve%-&#!wJ!F zo38`LD2VgMzD{d^VfMvG>Ch`3@PA!68b<(@#d_?rhGFMzT^2e#;R@p~f#V!oy(o@m zs{6S1Y}~1`ovjE?wW|?MkH9PN2RySO#_d=vGNX8-m{XVc7zqVccR6$Qk|7y!86|z` zp+c3Rp3tsPY>ZVK2AM-%{}Ne0B=|(9jC8#VB&?&Rc#5-xN|RR38TOlxo+6GNI08* z3iWf0CclOH<%DYvv+~-7eO%&hFpOcJbBV|+yV%5n@`3XQrrUM1dudTYYqP9gw`NZd z_#{}WS6YP$Cg`iN{l40hWE%dgp==tI-85Si7Nhk>o&pn57a*az-kj zMYVYB2GRfbT3@tkepvJAntO=zAN2nXYyP+R!~RCUAN~Jv-}2kr|U5|HJsX z?oPx3`9`<2*9B*0SoXDY2*6sR_n9EZaH?F$!Fi zjdWBHY(%Awjs`gpx_d=S2C0WbQJw5^&q?k@9UWkBurW5WR9`#_d)kAzB*5?Pi!um4 zvx=IcT~oMb;)5JGS|8^C#(QUEkblNu0n4)e&UMaNacG9E2KYHrRT^UQB5jzHB$ zpbrGt1ZQm>JDuIfJv8%5=bX7}QHGsHcFe+{RgPccG;3!n**l0xsXi7uxnJVQNQUTd>%qvw zM@rOQO~DpVUHUNZTC@hT{<;A?tbZ(MymQl|KxoQMWR}?0%Nt^_o7f18BFXo1$aD0P zMd?F4xq(-4c#id#4MRi7!wH1>;wP0ZPP7KoDc)Vzi%u5n&l|v>`#J6C6X9lONZ3eQ zq$f5er^lza2ht;jCg?n{{y89FRRLp(o9qgde#*U8O`)7KT=&$<5}HZY|}B4WIu z))O)r5&|EVSVvdYrH9dVCd>488vOeW{c-+hjjTz>eWpdgaOo??^F_Aj!vTaQ#$L{z z$2dE2jw+eL!y?19#Z&z}tkyq`HEu6&PRC$S%iKGlSNyImA>B5>X*<&b2Bl-(C0Z1s zOucu8XI^@UsVOR-vd3 zI~Wa*30&9Dwg&X@Avl)hCk&ZPHl#0&p7_2zj-+n0t@(XOFG0A zMdLE-Cv961VMoQ(%2hVEhDIHOD)-Md&FTF#ZNeh^aT~|3Set;+59%QXCWIr2G0yOW zH_ItsefnA*b!X|&uxH1Y1<eHHaxr9z(bePQ5;rXw4p@v%OH?on)l=yevv>8oa^_fZ?kSXQCb{$7FI zBgmE@V>hL*geetu6zjWEfoZ%BHp&nJ)WrG}NIkW0ae6Oxi~5E2owm@CBOHoxHY{8N zshhnY<|tCcin{b3U<}<#Su8B3XWBw2Ukw$d7X;J0`KqvkvKxN84Gl*A0U$)d6;(nK z_QpglIWl8;`U?71l`nY<%!%3GumRakuOh>Qm=J;P!Ld<2nr-j_g@7bAq<6W4#x=r$ zzUf@li4+!`TUSG^V@uQBbdP)HA`AK()EQK}J}l^vBarSA@k1Q!SIA2pT{SJeQ~Fcc zY=xX=d|fYy^kKz5FTI2BrZ`+uTKP5oE*$B*g0sJX*JF`vt<*QPS z9^W1wlO7jgHj4UGSJ#~G;)X??$$DC9%Jx1H;M^XU%|n)%bsCIy8*^DTA%{bKg-b9_ zWuy1jQbPu@lbujcMQACH|8iSs7z>$zbDwj5F=lpps|v>4jM}{KmsohQG31swR!9|P z-p2G6SfZ;%OAmgrEz}R|MuYCdu9UWkT_H%vistlYYAQ&|t*H6b`Y}Oh-MuEzst4OU z;Q-+=1@MlyElPLDgcDZoyv-NZkM&05!%4^R#6>;zxaaKN=5#wU;{=5L$@OELnjtZ! zXe#~di2amFx)3D8W17;NXwvxP$H3gV#ARXK3S848d&? z!%=^aE)S;H!<|JAlfI|J9$lY^2)7KhjbS|)Z$-Lg_dr*RbNqC2U}<_C=N$Sr)Tf1? zE7V_~z#5vRvNMimFU*ZIMDHx$@^q`Stekz-wm!QaeK{T*0?HX!m%{K$RwF00L-d)$ zEl>3>ORwdfl+QV>dIT9Z$-k5i#LG53i38kM)K|XgUI&nq@`D zk{pfI)?>s~!ddR>^ySR_4TXV6dgYWPo=m2a+avLjM8G+2j4-Vcv9MrH4ySmhnPPnN z6YIw_)0gR_)g>mA_A}1aAu#Xwm}hqS(sSqZr}>)x-xKOb0`*mR>(i^@98=_SXQ