From 6b018985cef04d1439d97a8015680adbe5d30e63 Mon Sep 17 00:00:00 2001 From: Stijn Caerts <stijn.caerts@vito.be> Date: Tue, 12 Mar 2024 15:01:17 +0100 Subject: [PATCH 1/5] implement the use of index templates (#208) --- .../stac_fastapi/elasticsearch/app.py | 2 + .../elasticsearch/database_logic.py | 34 ++++++++++++-- .../opensearch/stac_fastapi/opensearch/app.py | 2 + .../stac_fastapi/opensearch/database_logic.py | 38 ++++++++++++--- stac_fastapi/tests/conftest.py | 3 ++ stac_fastapi/tests/database/__init__.py | 0 stac_fastapi/tests/database/test_database.py | 47 +++++++++++++++++++ 7 files changed, 114 insertions(+), 12 deletions(-) create mode 100644 stac_fastapi/tests/database/__init__.py create mode 100644 stac_fastapi/tests/database/test_database.py diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index 0d896534..8a4ed2f0 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -14,6 +14,7 @@ from stac_fastapi.elasticsearch.database_logic import ( DatabaseLogic, create_collection_index, + create_index_templates, ) from stac_fastapi.extensions.core import ( ContextExtension, @@ -73,6 +74,7 @@ @app.on_event("startup") async def _startup_event() -> None: + await create_index_templates() await create_collection_index() diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 87ca8916..49c44a60 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -171,9 +171,36 @@ def indices(collection_ids: Optional[List[str]]) -> str: return ",".join([index_by_collection_id(c) for c in collection_ids]) +async def create_index_templates() -> None: + """ + Create index templates for the Collection and Item indices. + + Returns: + None + + """ + client = AsyncElasticsearchSettings().create_client + await client.indices.put_template( + name=f"template_{COLLECTIONS_INDEX}", + body={ + "index_patterns": [f"{COLLECTIONS_INDEX}*"], + "mappings": ES_COLLECTIONS_MAPPINGS, + }, + ) + await client.indices.put_template( + name=f"template_{ITEMS_INDEX_PREFIX}", + body={ + "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"], + "settings": ES_ITEMS_SETTINGS, + "mappings": ES_ITEMS_MAPPINGS, + }, + ) + await client.close() + + async def create_collection_index() -> None: """ - Create the index for a Collection. + Create the index for a Collection. The settings of the index template will be used implicitly. Returns: None @@ -184,14 +211,13 @@ async def create_collection_index() -> None: await client.options(ignore_status=400).indices.create( index=f"{COLLECTIONS_INDEX}-000001", aliases={COLLECTIONS_INDEX: {}}, - mappings=ES_COLLECTIONS_MAPPINGS, ) await client.close() async def create_item_index(collection_id: str): """ - Create the index for Items. + Create the index for Items. The settings of the index template will be used implicitly. Args: collection_id (str): Collection identifier. @@ -206,8 +232,6 @@ async def create_item_index(collection_id: str): await client.options(ignore_status=400).indices.create( index=f"{index_by_collection_id(collection_id)}-000001", aliases={index_name: {}}, - mappings=ES_ITEMS_MAPPINGS, - settings=ES_ITEMS_SETTINGS, ) await client.close() diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py index ebb2921e..2c513e31 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py @@ -23,6 +23,7 @@ from stac_fastapi.opensearch.database_logic import ( DatabaseLogic, create_collection_index, + create_index_templates, ) settings = OpensearchSettings() @@ -73,6 +74,7 @@ @app.on_event("startup") async def _startup_event() -> None: + await create_index_templates() await create_collection_index() diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 0f4bf9cf..f9f7a3c9 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -173,9 +173,36 @@ def indices(collection_ids: Optional[List[str]]) -> str: return ",".join([index_by_collection_id(c) for c in collection_ids]) +async def create_index_templates() -> None: + """ + Create index templates for the Collection and Item indices. + + Returns: + None + + """ + client = AsyncSearchSettings().create_client + await client.indices.put_template( + name=f"template_{COLLECTIONS_INDEX}", + body={ + "index_patterns": [f"{COLLECTIONS_INDEX}*"], + "mappings": ES_COLLECTIONS_MAPPINGS, + }, + ) + await client.indices.put_template( + name=f"template_{ITEMS_INDEX_PREFIX}", + body={ + "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"], + "settings": ES_ITEMS_SETTINGS, + "mappings": ES_ITEMS_MAPPINGS, + }, + ) + await client.close() + + async def create_collection_index() -> None: """ - Create the index for a Collection. + Create the index for a Collection. The settings of the index template will be used implicitly. Returns: None @@ -183,8 +210,7 @@ async def create_collection_index() -> None: """ client = AsyncSearchSettings().create_client - search_body = { - "mappings": ES_COLLECTIONS_MAPPINGS, + search_body: dict[str, Any] = { "aliases": {COLLECTIONS_INDEX: {}}, } @@ -203,7 +229,7 @@ async def create_collection_index() -> None: async def create_item_index(collection_id: str): """ - Create the index for Items. + Create the index for Items. The settings of the index template will be used implicitly. Args: collection_id (str): Collection identifier. @@ -214,10 +240,8 @@ async def create_item_index(collection_id: str): """ client = AsyncSearchSettings().create_client index_name = index_by_collection_id(collection_id) - search_body = { + search_body: dict[str, Any] = { "aliases": {index_name: {}}, - "mappings": ES_ITEMS_MAPPINGS, - "settings": ES_ITEMS_SETTINGS, } try: diff --git a/stac_fastapi/tests/conftest.py b/stac_fastapi/tests/conftest.py index 01160ee1..227509c9 100644 --- a/stac_fastapi/tests/conftest.py +++ b/stac_fastapi/tests/conftest.py @@ -23,6 +23,7 @@ from stac_fastapi.opensearch.database_logic import ( DatabaseLogic, create_collection_index, + create_index_templates, ) else: from stac_fastapi.elasticsearch.config import ( @@ -32,6 +33,7 @@ from stac_fastapi.elasticsearch.database_logic import ( DatabaseLogic, create_collection_index, + create_index_templates, ) from stac_fastapi.extensions.core import ( # FieldsExtension, @@ -215,6 +217,7 @@ async def app(): @pytest_asyncio.fixture(scope="session") async def app_client(app): + await create_index_templates() await create_collection_index() async with AsyncClient(app=app, base_url="http://test-server") as c: diff --git a/stac_fastapi/tests/database/__init__.py b/stac_fastapi/tests/database/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/stac_fastapi/tests/database/test_database.py b/stac_fastapi/tests/database/test_database.py new file mode 100644 index 00000000..74b09ad7 --- /dev/null +++ b/stac_fastapi/tests/database/test_database.py @@ -0,0 +1,47 @@ +import os +import uuid +from copy import deepcopy + +import pytest + +from ..conftest import MockRequest, database + +if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": + from stac_fastapi.opensearch.database_logic import ( + COLLECTIONS_INDEX, + ES_COLLECTIONS_MAPPINGS, + ES_ITEMS_MAPPINGS, + index_by_collection_id, + ) +else: + from stac_fastapi.elasticsearch.database_logic import ( + COLLECTIONS_INDEX, + ES_COLLECTIONS_MAPPINGS, + ES_ITEMS_MAPPINGS, + index_by_collection_id, + ) + + +@pytest.mark.asyncio +async def test_index_mapping_collections(ctx): + response = await database.client.indices.get_mapping(index=COLLECTIONS_INDEX) + actual_mappings = next(iter(response.body.values()))["mappings"] + assert ( + actual_mappings["dynamic_templates"] + == ES_COLLECTIONS_MAPPINGS["dynamic_templates"] + ) + + +@pytest.mark.asyncio +async def test_index_mapping_items(ctx, txn_client): + collection = deepcopy(ctx.collection) + collection["id"] = str(uuid.uuid4()) + await txn_client.create_collection(collection, request=MockRequest) + response = await database.client.indices.get_mapping( + index=index_by_collection_id(collection["id"]) + ) + actual_mappings = next(iter(response.body.values()))["mappings"] + assert ( + actual_mappings["dynamic_templates"] == ES_ITEMS_MAPPINGS["dynamic_templates"] + ) + await txn_client.delete_collection(collection["id"]) From 8ee27d99172cd0cc677f39304e7787b8a61c10d7 Mon Sep 17 00:00:00 2001 From: Stijn Caerts <stijn.caerts@vito.be> Date: Tue, 12 Mar 2024 15:50:07 +0100 Subject: [PATCH 2/5] document the use of index templates (#208) --- README.md | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fd0ab0fe..a6d04609 100644 --- a/README.md +++ b/README.md @@ -116,10 +116,12 @@ make ingest ## Elasticsearch Mappings -Mappings apply to search index, not source. +Mappings apply to search index, not source. The mappings are stored in index templates on application startup. +These templates will be used implicitly when creating new Collection and Item indices. ## Managing Elasticsearch Indices +### Snapshots This section covers how to create a snapshot repository and then create and restore snapshots with this. @@ -219,3 +221,52 @@ curl -X "POST" "http://localhost:8080/collections" \ Voila! You have a copy of the collection now that has a resource URI (`/collections/my-collection-copy`) and can be correctly queried by collection name. + +### Reindexing +This section covers how to reindex documents stored in Elasticsearch/OpenSearch. +A reindex operation might be useful to apply changes to documents or to correct dynamically generated mappings. + +The index templates will make sure that manually created indices will also have the correct mappings and settings. + +In this example, we will make a copy of an existing Item index `items_my-collection-000001` but change the Item identifier to be lowercase. + +```shell +curl -X "POST" "http://localhost:9200/_reindex" \ + -H 'Content-Type: application/json' \ + -d $'{ + "source": { + "index": "items_my-collection-000001" + }, + "dest": { + "index": "items_my-collection-000002" + }, + "script": { + "source": "ctx._source.id = ctx._source.id.toLowerCase()", + "lang": "painless" + } + }' +``` + +If we are happy with the data in the newly created index, we can move the alias `items_my-collection` to the new index `items_my-collection-000002`. +```shell +curl -X "POST" "http://localhost:9200/_aliases" \ + -h 'Content-Type: application/json' \ + -d $'{ + "actions": [ + { + "remove": { + "index": "*", + "alias": "items_my-collection" + } + }, + { + "add": { + "index": "items_my-collection-000002", + "alias": "items_my-collection" + } + } + ] + }' +``` + +The modified Items with lowercase identifiers will now be visible to users accessing `my-collection` in the STAC API. \ No newline at end of file From 292d4bf7656eb3d375fb9a210eb5e7d22a38307b Mon Sep 17 00:00:00 2001 From: Stijn Caerts <stijn.caerts@vito.be> Date: Tue, 12 Mar 2024 15:53:24 +0100 Subject: [PATCH 3/5] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63e8b796..1b15aa16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- use index templates for Collection and Item indices [#208](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/discussions/208) + ### Changed ### Fixed From b3804b54d0cb1037b56ad10f8a4baabdd056f475 Mon Sep 17 00:00:00 2001 From: Stijn Caerts <stijn.caerts@vito.be> Date: Tue, 12 Mar 2024 16:38:14 +0100 Subject: [PATCH 4/5] fix dict typing errors for Python 3.8 --- .../opensearch/stac_fastapi/opensearch/database_logic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index f9f7a3c9..d174e295 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -210,7 +210,7 @@ async def create_collection_index() -> None: """ client = AsyncSearchSettings().create_client - search_body: dict[str, Any] = { + search_body: Dict[str, Any] = { "aliases": {COLLECTIONS_INDEX: {}}, } @@ -240,7 +240,7 @@ async def create_item_index(collection_id: str): """ client = AsyncSearchSettings().create_client index_name = index_by_collection_id(collection_id) - search_body: dict[str, Any] = { + search_body: Dict[str, Any] = { "aliases": {index_name: {}}, } From 3f70857ac5e579fdb0f0791feace0d1f5084d6cd Mon Sep 17 00:00:00 2001 From: Stijn Caerts <stijn.caerts@vito.be> Date: Tue, 12 Mar 2024 16:58:42 +0100 Subject: [PATCH 5/5] fix test to be compatible with both elasticsearch and opensearch --- stac_fastapi/tests/database/test_database.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/stac_fastapi/tests/database/test_database.py b/stac_fastapi/tests/database/test_database.py index 74b09ad7..3f7fe5a8 100644 --- a/stac_fastapi/tests/database/test_database.py +++ b/stac_fastapi/tests/database/test_database.py @@ -25,7 +25,9 @@ @pytest.mark.asyncio async def test_index_mapping_collections(ctx): response = await database.client.indices.get_mapping(index=COLLECTIONS_INDEX) - actual_mappings = next(iter(response.body.values()))["mappings"] + if not isinstance(response, dict): + response = response.body + actual_mappings = next(iter(response.values()))["mappings"] assert ( actual_mappings["dynamic_templates"] == ES_COLLECTIONS_MAPPINGS["dynamic_templates"] @@ -40,7 +42,9 @@ async def test_index_mapping_items(ctx, txn_client): response = await database.client.indices.get_mapping( index=index_by_collection_id(collection["id"]) ) - actual_mappings = next(iter(response.body.values()))["mappings"] + if not isinstance(response, dict): + response = response.body + actual_mappings = next(iter(response.values()))["mappings"] assert ( actual_mappings["dynamic_templates"] == ES_ITEMS_MAPPINGS["dynamic_templates"] )