From 893d2d4cd2546245e26f057df0f2f1d835368f89 Mon Sep 17 00:00:00 2001 From: Massimiliano Pippi Date: Thu, 17 Nov 2022 11:09:27 +0100 Subject: [PATCH] refactor: Generate JSON schema when missing (#3533) * removed unused script * print info logs when generating openapi schema * create json schema only when needed * fix tests * Remove leftover Co-authored-by: ZanSara --- .github/utils/generate_json_schema.py | 13 ------------- .github/utils/generate_openapi_specs.py | 7 ++++++- .gitignore | 2 ++ haystack/json-schemas/.gitignore | 3 --- haystack/json-schemas/generate_schema.py | 23 ----------------------- haystack/nodes/_json_schema.py | 24 +++++++++++++++++++++++- haystack/pipelines/config.py | 6 +++--- pyproject.toml | 4 ---- test/pipelines/test_pipeline_yaml.py | 2 +- 9 files changed, 35 insertions(+), 49 deletions(-) delete mode 100755 .github/utils/generate_json_schema.py delete mode 100644 haystack/json-schemas/.gitignore delete mode 100644 haystack/json-schemas/generate_schema.py diff --git a/.github/utils/generate_json_schema.py b/.github/utils/generate_json_schema.py deleted file mode 100755 index d2eb3c5533..0000000000 --- a/.github/utils/generate_json_schema.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import logging -from pathlib import Path - -logging.basicConfig(level=logging.INFO) - - -sys.path.append(".") -from haystack.nodes._json_schema import update_json_schema - -update_json_schema(destination_path=Path(__file__).parent.parent.parent / "haystack" / "json-schemas") diff --git a/.github/utils/generate_openapi_specs.py b/.github/utils/generate_openapi_specs.py index 0479d46413..3010e69551 100755 --- a/.github/utils/generate_openapi_specs.py +++ b/.github/utils/generate_openapi_specs.py @@ -6,6 +6,11 @@ import sys import shutil +import logging + +logging.basicConfig(level=logging.INFO) + + sys.path.append(".") from rest_api.utils import get_openapi_specs, get_app, get_pipelines # pylint: disable=wrong-import-position from haystack import __version__ # pylint: disable=wrong-import-position @@ -17,7 +22,7 @@ os.environ["PIPELINE_YAML_PATH"] = PIPELINE_PATH -print(f"Loading OpenAPI specs from {APP_PATH} with pipeline at {PIPELINE_PATH}") +logging.info("Loading OpenAPI specs from %s with pipeline at %s", APP_PATH, PIPELINE_PATH) # To initialize the app and the pipelines get_app() diff --git a/.gitignore b/.gitignore index f40a950b93..a425b6e2d0 100644 --- a/.gitignore +++ b/.gitignore @@ -150,6 +150,8 @@ saved_models *_build rest_api/file-upload/* **/feedback_squad_direct.json +haystack/json-schemas + .DS_Store # http cache (requests-cache) diff --git a/haystack/json-schemas/.gitignore b/haystack/json-schemas/.gitignore deleted file mode 100644 index ca458b7d3f..0000000000 --- a/haystack/json-schemas/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -* -!.gitignore -!generate_schema.py diff --git a/haystack/json-schemas/generate_schema.py b/haystack/json-schemas/generate_schema.py deleted file mode 100644 index 427c2a85e5..0000000000 --- a/haystack/json-schemas/generate_schema.py +++ /dev/null @@ -1,23 +0,0 @@ -import os -import logging -import sysconfig -from pathlib import Path - -from haystack.nodes._json_schema import update_json_schema - -logger = logging.getLogger("hatch_autorun") - -try: - logger.warning( - "Haystack is generating the YAML schema for Pipelines validation. This only happens once, after installing the package." - ) - update_json_schema(main_only=True) - - # Destroy the hatch-autorun hook if it exists (needs to run just once after installation) - try: - os.remove(Path(sysconfig.get_paths()["purelib"]) / "hatch_autorun_farm_haystack.pth") - except FileNotFoundError: - pass - -except Exception as e: - logger.exception("Could not generate the Haystack Pipeline schemas.", e) diff --git a/haystack/nodes/_json_schema.py b/haystack/nodes/_json_schema.py index 218140c0c5..0c1946f86a 100644 --- a/haystack/nodes/_json_schema.py +++ b/haystack/nodes/_json_schema.py @@ -1,5 +1,6 @@ from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union +import os import sys import json import inspect @@ -176,7 +177,7 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[ node_name = getattr(node_class, "__name__") - logger.info("Creating schema for '%s'", node_name) + logger.debug("Creating schema for '%s'", node_name) # Read the relevant init parameters from __init__'s signature init_method = getattr(node_class, "__init__", None) @@ -405,6 +406,26 @@ def inject_definition_in_schema(node_class: Type[BaseComponent], schema: Dict[st return schema +def load_schema(): + """ + Generate the json schema if it doesn't exist and load it + """ + schema_file_path = JSON_SCHEMAS_PATH / "haystack-pipeline-main.schema.json" + if not os.path.exists(schema_file_path): + logging.info("Json schema not found, generating one at: %s", schema_file_path) + try: + update_json_schema(main_only=True) + except Exception as e: + # Be sure not to remain with an empty file if something went wrong + if schema_file_path.exists(): + schema_file_path.unlink() + # This error is not recoverable + raise e + + with open(schema_file_path, "r") as schema_file: + return json.load(schema_file) + + def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bool = False): """ Create (or update) a new schema. @@ -413,6 +434,7 @@ def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bo # commit from `main` or a release branch filename = f"haystack-pipeline-main.schema.json" + os.makedirs(destination_path, exist_ok=True) with open(destination_path / filename, "w") as json_file: json.dump(get_json_schema(filename=filename, version="ignore"), json_file, indent=2) diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py index 3cc19c9d97..364219f237 100644 --- a/haystack/pipelines/config.py +++ b/haystack/pipelines/config.py @@ -14,7 +14,7 @@ from haystack import __version__ from haystack.nodes.base import BaseComponent, RootNode -from haystack.nodes._json_schema import inject_definition_in_schema, JSON_SCHEMAS_PATH +from haystack.nodes._json_schema import load_schema, inject_definition_in_schema from haystack.errors import PipelineError, PipelineConfigError, PipelineSchemaError @@ -295,8 +295,8 @@ def validate_schema(pipeline_config: Dict, strict_version_check: bool = False, e "and fix your configuration accordingly." ) - with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-main.schema.json", "r") as schema_file: - schema = json.load(schema_file) + # Load the json schema, and create one if it doesn't exist yet + schema = load_schema() # Remove the version value from the schema to prevent validation errors on it - a version only have to be present. del schema["properties"]["version"]["const"] diff --git a/pyproject.toml b/pyproject.toml index f735d368c6..998a271533 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -239,10 +239,6 @@ packages = [ "haystack", ] -[tool.hatch.build.targets.wheel.hooks.autorun] -dependencies = ["hatch-autorun"] -file = "haystack/json-schemas/generate_schema.py" - [tool.black] line-length = 120 skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed. diff --git a/test/pipelines/test_pipeline_yaml.py b/test/pipelines/test_pipeline_yaml.py index 3b7d7a4ee1..c2cbd0e8c8 100644 --- a/test/pipelines/test_pipeline_yaml.py +++ b/test/pipelines/test_pipeline_yaml.py @@ -42,7 +42,7 @@ def mock_json_schema(request, monkeypatch, tmp_path): lambda *a, **k: [(conftest, MockDocumentStore), (conftest, MockReader), (conftest, MockRetriever)], ) # Point the JSON schema path to tmp_path - monkeypatch.setattr(haystack.pipelines.config, "JSON_SCHEMAS_PATH", tmp_path) + monkeypatch.setattr(haystack.nodes._json_schema, "JSON_SCHEMAS_PATH", tmp_path) # Generate mock schema in tmp_path filename = f"haystack-pipeline-main.schema.json"