PyPI - stac-fastapi-opensearch - Versions diffs - 4.0.0__py3-none-any.whl - Mend

stac-fastapi-opensearch 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

stac_fastapi/opensearch/__init__.py +1 -0
stac_fastapi/opensearch/app.py +150 -0
stac_fastapi/opensearch/config.py +121 -0
stac_fastapi/opensearch/database_logic.py +959 -0
stac_fastapi/opensearch/version.py +2 -0
stac_fastapi_opensearch-4.0.0.dist-info/METADATA +379 -0
stac_fastapi_opensearch-4.0.0.dist-info/RECORD +10 -0
stac_fastapi_opensearch-4.0.0.dist-info/WHEEL +5 -0
stac_fastapi_opensearch-4.0.0.dist-info/entry_points.txt +2 -0
stac_fastapi_opensearch-4.0.0.dist-info/top_level.txt +1 -0

stac_fastapi/opensearch/database_logic.py ADDED Viewed

@@ -0,0 +1,959 @@
+"""Database logic."""
+import asyncio
+import json
+import logging
+from base64 import urlsafe_b64decode, urlsafe_b64encode
+from copy import deepcopy
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
+import attr
+from opensearchpy import exceptions, helpers
+from opensearchpy.helpers.query import Q
+from opensearchpy.helpers.search import Search
+from starlette.requests import Request
+from stac_fastapi.core import serializers
+from stac_fastapi.core.base_database_logic import BaseDatabaseLogic
+from stac_fastapi.core.database_logic import (
+    COLLECTIONS_INDEX,
+    DEFAULT_SORT,
+    ES_COLLECTIONS_MAPPINGS,
+    ES_ITEMS_MAPPINGS,
+    ES_ITEMS_SETTINGS,
+    ITEM_INDICES,
+    ITEMS_INDEX_PREFIX,
+    Geometry,
+    index_alias_by_collection_id,
+    index_by_collection_id,
+    indices,
+    mk_actions,
+    mk_item_id,
+)
+from stac_fastapi.core.extensions import filter
+from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon
+from stac_fastapi.opensearch.config import (
+    AsyncOpensearchSettings as AsyncSearchSettings,
+)
+from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings
+from stac_fastapi.types.errors import ConflictError, NotFoundError
+from stac_fastapi.types.stac import Collection, Item
+logger = logging.getLogger(__name__)
+async def create_index_templates() -> None:
+    """
+    Create index templates for the Collection and Item indices.
+    Returns:
+        None
+    """
+    client = AsyncSearchSettings().create_client
+    await client.indices.put_template(
+        name=f"template_{COLLECTIONS_INDEX}",
+        body={
+            "index_patterns": [f"{COLLECTIONS_INDEX}*"],
+            "mappings": ES_COLLECTIONS_MAPPINGS,
+        },
+    )
+    await client.indices.put_template(
+        name=f"template_{ITEMS_INDEX_PREFIX}",
+        body={
+            "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"],
+            "settings": ES_ITEMS_SETTINGS,
+            "mappings": ES_ITEMS_MAPPINGS,
+        },
+    )
+    await client.close()
+async def create_collection_index() -> None:
+    """
+    Create the index for a Collection. The settings of the index template will be used implicitly.
+    Returns:
+        None
+    """
+    client = AsyncSearchSettings().create_client
+    index = f"{COLLECTIONS_INDEX}-000001"
+    exists = await client.indices.exists(index=index)
+    if not exists:
+        await client.indices.create(
+            index=index,
+            body={
+                "aliases": {COLLECTIONS_INDEX: {}},
+                "mappings": ES_COLLECTIONS_MAPPINGS,
+            },
+        )
+    await client.close()
+async def create_item_index(collection_id: str) -> None:
+    """
+    Create the index for Items. The settings of the index template will be used implicitly.
+    Args:
+        collection_id (str): Collection identifier.
+    Returns:
+        None
+    """
+    client = AsyncSearchSettings().create_client
+    index_name = f"{index_by_collection_id(collection_id)}-000001"
+    exists = await client.indices.exists(index=index_name)
+    if not exists:
+        await client.indices.create(
+            index=index_name,
+            body={
+                "aliases": {index_alias_by_collection_id(collection_id): {}},
+                "mappings": ES_ITEMS_MAPPINGS,
+                "settings": ES_ITEMS_SETTINGS,
+            },
+        )
+    await client.close()
+async def delete_item_index(collection_id: str) -> None:
+    """Delete the index for items in a collection.
+    Args:
+        collection_id (str): The ID of the collection whose items index will be deleted.
+    """
+    client = AsyncSearchSettings().create_client
+    name = index_alias_by_collection_id(collection_id)
+    resolved = await client.indices.resolve_index(name=name)
+    if "aliases" in resolved and resolved["aliases"]:
+        [alias] = resolved["aliases"]
+        await client.indices.delete_alias(index=alias["indices"], name=alias["name"])
+        await client.indices.delete(index=alias["indices"])
+    else:
+        await client.indices.delete(index=name)
+    await client.close()
+@attr.s
+class DatabaseLogic(BaseDatabaseLogic):
+    """Database logic."""
+    client = AsyncSearchSettings().create_client
+    sync_client = SyncSearchSettings().create_client
+    item_serializer: Type[serializers.ItemSerializer] = attr.ib(
+        default=serializers.ItemSerializer
+    )
+    collection_serializer: Type[serializers.CollectionSerializer] = attr.ib(
+        default=serializers.CollectionSerializer
+    )
+    extensions: List[str] = attr.ib(default=attr.Factory(list))
+    aggregation_mapping: Dict[str, Dict[str, Any]] = {
+        "total_count": {"value_count": {"field": "id"}},
+        "collection_frequency": {"terms": {"field": "collection", "size": 100}},
+        "platform_frequency": {"terms": {"field": "properties.platform", "size": 100}},
+        "cloud_cover_frequency": {
+            "range": {
+                "field": "properties.eo:cloud_cover",
+                "ranges": [
+                    {"to": 5},
+                    {"from": 5, "to": 15},
+                    {"from": 15, "to": 40},
+                    {"from": 40},
+                ],
+            }
+        },
+        "datetime_frequency": {
+            "date_histogram": {
+                "field": "properties.datetime",
+                "calendar_interval": "month",
+            }
+        },
+        "datetime_min": {"min": {"field": "properties.datetime"}},
+        "datetime_max": {"max": {"field": "properties.datetime"}},
+        "grid_code_frequency": {
+            "terms": {
+                "field": "properties.grid:code",
+                "missing": "none",
+                "size": 10000,
+            }
+        },
+        "sun_elevation_frequency": {
+            "histogram": {"field": "properties.view:sun_elevation", "interval": 5}
+        },
+        "sun_azimuth_frequency": {
+            "histogram": {"field": "properties.view:sun_azimuth", "interval": 5}
+        },
+        "off_nadir_frequency": {
+            "histogram": {"field": "properties.view:off_nadir", "interval": 5}
+        },
+        "centroid_geohash_grid_frequency": {
+            "geohash_grid": {
+                "field": "properties.proj:centroid",
+                "precision": 1,
+            }
+        },
+        "centroid_geohex_grid_frequency": {
+            "geohex_grid": {
+                "field": "properties.proj:centroid",
+                "precision": 0,
+            }
+        },
+        "centroid_geotile_grid_frequency": {
+            "geotile_grid": {
+                "field": "properties.proj:centroid",
+                "precision": 0,
+            }
+        },
+        "geometry_geohash_grid_frequency": {
+            "geohash_grid": {
+                "field": "geometry",
+                "precision": 1,
+            }
+        },
+        "geometry_geotile_grid_frequency": {
+            "geotile_grid": {
+                "field": "geometry",
+                "precision": 0,
+            }
+        },
+    }
+    """CORE LOGIC"""
+    async def get_all_collections(
+        self, token: Optional[str], limit: int, request: Request
+    ) -> Tuple[List[Dict[str, Any]], Optional[str]]:
+        """
+        Retrieve a list of all collections from Opensearch, supporting pagination.
+        Args:
+            token (Optional[str]): The pagination token.
+            limit (int): The number of results to return.
+        Returns:
+            A tuple of (collections, next pagination token if any).
+        """
+        search_body = {
+            "sort": [{"id": {"order": "asc"}}],
+            "size": limit,
+        }
+        # Only add search_after to the query if token is not None and not empty
+        if token:
+            search_after = [token]
+            search_body["search_after"] = search_after
+        response = await self.client.search(
+            index=COLLECTIONS_INDEX,
+            body=search_body,
+        )
+        hits = response["hits"]["hits"]
+        collections = [
+            self.collection_serializer.db_to_stac(
+                collection=hit["_source"], request=request, extensions=self.extensions
+            )
+            for hit in hits
+        ]
+        next_token = None
+        if len(hits) == limit:
+            # Ensure we have a valid sort value for next_token
+            next_token_values = hits[-1].get("sort")
+            if next_token_values:
+                next_token = next_token_values[0]
+        return collections, next_token
+    async def get_one_item(self, collection_id: str, item_id: str) -> Dict:
+        """Retrieve a single item from the database.
+        Args:
+            collection_id (str): The id of the Collection that the Item belongs to.
+            item_id (str): The id of the Item.
+        Returns:
+            item (Dict): A dictionary containing the source data for the Item.
+        Raises:
+            NotFoundError: If the specified Item does not exist in the Collection.
+        Notes:
+            The Item is retrieved from the Elasticsearch database using the `client.get` method,
+            with the index for the Collection as the target index and the combined `mk_item_id` as the document id.
+        """
+        try:
+            item = await self.client.get(
+                index=index_alias_by_collection_id(collection_id),
+                id=mk_item_id(item_id, collection_id),
+            )
+        except exceptions.NotFoundError:
+            raise NotFoundError(
+                f"Item {item_id} does not exist inside Collection {collection_id}"
+            )
+        return item["_source"]
+    @staticmethod
+    def make_search():
+        """Database logic to create a Search instance."""
+        return Search().sort(*DEFAULT_SORT)
+    @staticmethod
+    def apply_ids_filter(search: Search, item_ids: List[str]):
+        """Database logic to search a list of STAC item ids."""
+        return search.filter("terms", id=item_ids)
+    @staticmethod
+    def apply_collections_filter(search: Search, collection_ids: List[str]):
+        """Database logic to search a list of STAC collection ids."""
+        return search.filter("terms", collection=collection_ids)
+    @staticmethod
+    def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str]]):
+        """Database logic to perform query for search endpoint."""
+        if free_text_queries is not None:
+            free_text_query_string = '" OR properties.\\*:"'.join(free_text_queries)
+            search = search.query(
+                "query_string", query=f'properties.\\*:"{free_text_query_string}"'
+            )
+        return search
+    @staticmethod
+    def apply_datetime_filter(search: Search, datetime_search):
+        """Apply a filter to search based on datetime field.
+        Args:
+            search (Search): The search object to filter.
+            datetime_search (dict): The datetime filter criteria.
+        Returns:
+            Search: The filtered search object.
+        """
+        if "eq" in datetime_search:
+            search = search.filter(
+                "term", **{"properties__datetime": datetime_search["eq"]}
+            )
+        else:
+            search = search.filter(
+                "range", properties__datetime={"lte": datetime_search["lte"]}
+            )
+            search = search.filter(
+                "range", properties__datetime={"gte": datetime_search["gte"]}
+            )
+        return search
+    @staticmethod
+    def apply_bbox_filter(search: Search, bbox: List):
+        """Filter search results based on bounding box.
+        Args:
+            search (Search): The search object to apply the filter to.
+            bbox (List): The bounding box coordinates, represented as a list of four values [minx, miny, maxx, maxy].
+        Returns:
+            search (Search): The search object with the bounding box filter applied.
+        Notes:
+            The bounding box is transformed into a polygon using the `bbox2polygon` function and
+            a geo_shape filter is added to the search object, set to intersect with the specified polygon.
+        """
+        return search.filter(
+            Q(
+                {
+                    "geo_shape": {
+                        "geometry": {
+                            "shape": {
+                                "type": "polygon",
+                                "coordinates": bbox2polygon(*bbox),
+                            },
+                            "relation": "intersects",
+                        }
+                    }
+                }
+            )
+        )
+    @staticmethod
+    def apply_intersects_filter(
+        search: Search,
+        intersects: Geometry,
+    ):
+        """Filter search results based on intersecting geometry.
+        Args:
+            search (Search): The search object to apply the filter to.
+            intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object.
+        Returns:
+            search (Search): The search object with the intersecting geometry filter applied.
+        Notes:
+            A geo_shape filter is added to the search object, set to intersect with the specified geometry.
+        """
+        return search.filter(
+            Q(
+                {
+                    "geo_shape": {
+                        "geometry": {
+                            "shape": {
+                                "type": intersects.type.lower(),
+                                "coordinates": intersects.coordinates,
+                            },
+                            "relation": "intersects",
+                        }
+                    }
+                }
+            )
+        )
+    @staticmethod
+    def apply_stacql_filter(search: Search, op: str, field: str, value: float):
+        """Filter search results based on a comparison between a field and a value.
+        Args:
+            search (Search): The search object to apply the filter to.
+            op (str): The comparison operator to use. Can be 'eq' (equal), 'gt' (greater than), 'gte' (greater than or equal),
+                'lt' (less than), or 'lte' (less than or equal).
+            field (str): The field to perform the comparison on.
+            value (float): The value to compare the field against.
+        Returns:
+            search (Search): The search object with the specified filter applied.
+        """
+        if op != "eq":
+            key_filter = {field: {f"{op}": value}}
+            search = search.filter(Q("range", **key_filter))
+        else:
+            search = search.filter("term", **{field: value})
+        return search
+    @staticmethod
+    def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]):
+        """
+        Apply a CQL2 filter to an Opensearch Search object.
+        This method transforms a dictionary representing a CQL2 filter into an Opensearch query
+        and applies it to the provided Search object. If the filter is None, the original Search
+        object is returned unmodified.
+        Args:
+            search (Search): The Opensearch Search object to which the filter will be applied.
+            _filter (Optional[Dict[str, Any]]): The filter in dictionary form that needs to be applied
+                                                to the search. The dictionary should follow the structure
+                                                required by the `to_es` function which converts it
+                                                to an Opensearch query.
+        Returns:
+            Search: The modified Search object with the filter applied if a filter is provided,
+                    otherwise the original Search object.
+        """
+        if _filter is not None:
+            es_query = filter.to_es(_filter)
+            search = search.filter(es_query)
+        return search
+    @staticmethod
+    def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]:
+        """Database logic to sort search instance."""
+        if sortby:
+            return {s.field: {"order": s.direction} for s in sortby}
+        else:
+            return None
+    async def execute_search(
+        self,
+        search: Search,
+        limit: int,
+        token: Optional[str],
+        sort: Optional[Dict[str, Dict[str, str]]],
+        collection_ids: Optional[List[str]],
+        ignore_unavailable: bool = True,
+    ) -> Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]:
+        """Execute a search query with limit and other optional parameters.
+        Args:
+            search (Search): The search query to be executed.
+            limit (int): The maximum number of results to be returned.
+            token (Optional[str]): The token used to return the next set of results.
+            sort (Optional[Dict[str, Dict[str, str]]]): Specifies how the results should be sorted.
+            collection_ids (Optional[List[str]]): The collection ids to search.
+            ignore_unavailable (bool, optional): Whether to ignore unavailable collections. Defaults to True.
+        Returns:
+            Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: A tuple containing:
+                - An iterable of search results, where each result is a dictionary with keys and values representing the
+                fields and values of each document.
+                - The total number of results (if the count could be computed), or None if the count could not be
+                computed.
+                - The token to be used to retrieve the next set of results, or None if there are no more results.
+        Raises:
+            NotFoundError: If the collections specified in `collection_ids` do not exist.
+        """
+        search_body: Dict[str, Any] = {}
+        query = search.query.to_dict() if search.query else None
+        if query:
+            search_body["query"] = query
+        search_after = None
+        if token:
+            search_after = json.loads(urlsafe_b64decode(token).decode())
+        if search_after:
+            search_body["search_after"] = search_after
+        search_body["sort"] = sort if sort else DEFAULT_SORT
+        index_param = indices(collection_ids)
+        max_result_window = MAX_LIMIT
+        size_limit = min(limit + 1, max_result_window)
+        search_task = asyncio.create_task(
+            self.client.search(
+                index=index_param,
+                ignore_unavailable=ignore_unavailable,
+                body=search_body,
+                size=size_limit,
+            )
+        )
+        count_task = asyncio.create_task(
+            self.client.count(
+                index=index_param,
+                ignore_unavailable=ignore_unavailable,
+                body=search.to_dict(count=True),
+            )
+        )
+        try:
+            es_response = await search_task
+        except exceptions.NotFoundError:
+            raise NotFoundError(f"Collections '{collection_ids}' do not exist")
+        hits = es_response["hits"]["hits"]
+        items = (hit["_source"] for hit in hits[:limit])
+        next_token = None
+        if len(hits) > limit and limit < max_result_window:
+            if hits and (sort_array := hits[limit - 1].get("sort")):
+                next_token = urlsafe_b64encode(json.dumps(sort_array).encode()).decode()
+        matched = (
+            es_response["hits"]["total"]["value"]
+            if es_response["hits"]["total"]["relation"] == "eq"
+            else None
+        )
+        if count_task.done():
+            try:
+                matched = count_task.result().get("count")
+            except Exception as e:
+                logger.error(f"Count task failed: {e}")
+        return items, matched, next_token
+    """ AGGREGATE LOGIC """
+    async def aggregate(
+        self,
+        collection_ids: Optional[List[str]],
+        aggregations: List[str],
+        search: Search,
+        centroid_geohash_grid_precision: int,
+        centroid_geohex_grid_precision: int,
+        centroid_geotile_grid_precision: int,
+        geometry_geohash_grid_precision: int,
+        geometry_geotile_grid_precision: int,
+        datetime_frequency_interval: str,
+        ignore_unavailable: Optional[bool] = True,
+    ):
+        """Return aggregations of STAC Items."""
+        search_body: Dict[str, Any] = {}
+        query = search.query.to_dict() if search.query else None
+        if query:
+            search_body["query"] = query
+        def _fill_aggregation_parameters(name: str, agg: dict) -> dict:
+            [key] = agg.keys()
+            agg_precision = {
+                "centroid_geohash_grid_frequency": centroid_geohash_grid_precision,
+                "centroid_geohex_grid_frequency": centroid_geohex_grid_precision,
+                "centroid_geotile_grid_frequency": centroid_geotile_grid_precision,
+                "geometry_geohash_grid_frequency": geometry_geohash_grid_precision,
+                "geometry_geotile_grid_frequency": geometry_geotile_grid_precision,
+            }
+            if name in agg_precision:
+                agg[key]["precision"] = agg_precision[name]
+            if key == "date_histogram":
+                agg[key]["calendar_interval"] = datetime_frequency_interval
+            return agg
+        # include all aggregations specified
+        # this will ignore aggregations with the wrong names
+        search_body["aggregations"] = {
+            k: _fill_aggregation_parameters(k, deepcopy(v))
+            for k, v in self.aggregation_mapping.items()
+            if k in aggregations
+        }
+        index_param = indices(collection_ids)
+        search_task = asyncio.create_task(
+            self.client.search(
+                index=index_param,
+                ignore_unavailable=ignore_unavailable,
+                body=search_body,
+            )
+        )
+        try:
+            db_response = await search_task
+        except exceptions.NotFoundError:
+            raise NotFoundError(f"Collections '{collection_ids}' do not exist")
+        return db_response
+    """ TRANSACTION LOGIC """
+    async def check_collection_exists(self, collection_id: str):
+        """Database logic to check if a collection exists."""
+        if not await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id):
+            raise NotFoundError(f"Collection {collection_id} does not exist")
+    async def prep_create_item(
+        self, item: Item, base_url: str, exist_ok: bool = False
+    ) -> Item:
+        """
+        Preps an item for insertion into the database.
+        Args:
+            item (Item): The item to be prepped for insertion.
+            base_url (str): The base URL used to create the item's self URL.
+            exist_ok (bool): Indicates whether the item can exist already.
+        Returns:
+            Item: The prepped item.
+        Raises:
+            ConflictError: If the item already exists in the database.
+        """
+        await self.check_collection_exists(collection_id=item["collection"])
+        if not exist_ok and await self.client.exists(
+            index=index_alias_by_collection_id(item["collection"]),
+            id=mk_item_id(item["id"], item["collection"]),
+        ):
+            raise ConflictError(
+                f"Item {item['id']} in collection {item['collection']} already exists"
+            )
+        return self.item_serializer.stac_to_db(item, base_url)
+    def sync_prep_create_item(
+        self, item: Item, base_url: str, exist_ok: bool = False
+    ) -> Item:
+        """
+        Prepare an item for insertion into the database.
+        This method performs pre-insertion preparation on the given `item`,
+        such as checking if the collection the item belongs to exists,
+        and optionally verifying that an item with the same ID does not already exist in the database.
+        Args:
+            item (Item): The item to be inserted into the database.
+            base_url (str): The base URL used for constructing URLs for the item.
+            exist_ok (bool): Indicates whether the item can exist already.
+        Returns:
+            Item: The item after preparation is done.
+        Raises:
+            NotFoundError: If the collection that the item belongs to does not exist in the database.
+            ConflictError: If an item with the same ID already exists in the collection.
+        """
+        item_id = item["id"]
+        collection_id = item["collection"]
+        if not self.sync_client.exists(index=COLLECTIONS_INDEX, id=collection_id):
+            raise NotFoundError(f"Collection {collection_id} does not exist")
+        if not exist_ok and self.sync_client.exists(
+            index=index_alias_by_collection_id(collection_id),
+            id=mk_item_id(item_id, collection_id),
+        ):
+            raise ConflictError(
+                f"Item {item_id} in collection {collection_id} already exists"
+            )
+        return self.item_serializer.stac_to_db(item, base_url)
+    async def create_item(self, item: Item, refresh: bool = False):
+        """Database logic for creating one item.
+        Args:
+            item (Item): The item to be created.
+            refresh (bool, optional): Refresh the index after performing the operation. Defaults to False.
+        Raises:
+            ConflictError: If the item already exists in the database.
+        Returns:
+            None
+        """
+        # todo: check if collection exists, but cache
+        item_id = item["id"]
+        collection_id = item["collection"]
+        es_resp = await self.client.index(
+            index=index_alias_by_collection_id(collection_id),
+            id=mk_item_id(item_id, collection_id),
+            body=item,
+            refresh=refresh,
+        )
+        if (meta := es_resp.get("meta")) and meta.get("status") == 409:
+            raise ConflictError(
+                f"Item {item_id} in collection {collection_id} already exists"
+            )
+    async def delete_item(
+        self, item_id: str, collection_id: str, refresh: bool = False
+    ):
+        """Delete a single item from the database.
+        Args:
+            item_id (str): The id of the Item to be deleted.
+            collection_id (str): The id of the Collection that the Item belongs to.
+            refresh (bool, optional): Whether to refresh the index after the deletion. Default is False.
+        Raises:
+            NotFoundError: If the Item does not exist in the database.
+        """
+        try:
+            await self.client.delete(
+                index=index_alias_by_collection_id(collection_id),
+                id=mk_item_id(item_id, collection_id),
+                refresh=refresh,
+            )
+        except exceptions.NotFoundError:
+            raise NotFoundError(
+                f"Item {item_id} in collection {collection_id} not found"
+            )
+    async def get_items_mapping(self, collection_id: str) -> Dict[str, Any]:
+        """Get the mapping for the specified collection's items index.
+        Args:
+            collection_id (str): The ID of the collection to get items mapping for.
+        Returns:
+            Dict[str, Any]: The mapping information.
+        """
+        index_name = index_alias_by_collection_id(collection_id)
+        try:
+            mapping = await self.client.indices.get_mapping(
+                index=index_name, params={"allow_no_indices": "false"}
+            )
+            return mapping
+        except exceptions.NotFoundError:
+            raise NotFoundError(f"Mapping for index {index_name} not found")
+    async def create_collection(self, collection: Collection, refresh: bool = False):
+        """Create a single collection in the database.
+        Args:
+            collection (Collection): The Collection object to be created.
+            refresh (bool, optional): Whether to refresh the index after the creation. Default is False.
+        Raises:
+            ConflictError: If a Collection with the same id already exists in the database.
+        Notes:
+            A new index is created for the items in the Collection using the `create_item_index` function.
+        """
+        collection_id = collection["id"]
+        if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id):
+            raise ConflictError(f"Collection {collection_id} already exists")
+        await self.client.index(
+            index=COLLECTIONS_INDEX,
+            id=collection_id,
+            body=collection,
+            refresh=refresh,
+        )
+        await create_item_index(collection_id)
+    async def find_collection(self, collection_id: str) -> Collection:
+        """Find and return a collection from the database.
+        Args:
+            self: The instance of the object calling this function.
+            collection_id (str): The ID of the collection to be found.
+        Returns:
+            Collection: The found collection, represented as a `Collection` object.
+        Raises:
+            NotFoundError: If the collection with the given `collection_id` is not found in the database.
+        Notes:
+            This function searches for a collection in the database using the specified `collection_id` and returns the found
+            collection as a `Collection` object. If the collection is not found, a `NotFoundError` is raised.
+        """
+        try:
+            collection = await self.client.get(
+                index=COLLECTIONS_INDEX, id=collection_id
+            )
+        except exceptions.NotFoundError:
+            raise NotFoundError(f"Collection {collection_id} not found")
+        return collection["_source"]
+    async def update_collection(
+        self, collection_id: str, collection: Collection, refresh: bool = False
+    ):
+        """Update a collection from the database.
+        Args:
+            self: The instance of the object calling this function.
+            collection_id (str): The ID of the collection to be updated.
+            collection (Collection): The Collection object to be used for the update.
+        Raises:
+            NotFoundError: If the collection with the given `collection_id` is not
+            found in the database.
+        Notes:
+            This function updates the collection in the database using the specified
+            `collection_id` and with the collection specified in the `Collection` object.
+            If the collection is not found, a `NotFoundError` is raised.
+        """
+        await self.find_collection(collection_id=collection_id)
+        if collection_id != collection["id"]:
+            await self.create_collection(collection, refresh=refresh)
+            await self.client.reindex(
+                body={
+                    "dest": {"index": f"{ITEMS_INDEX_PREFIX}{collection['id']}"},
+                    "source": {"index": f"{ITEMS_INDEX_PREFIX}{collection_id}"},
+                    "script": {
+                        "lang": "painless",
+                        "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""",  # noqa: E702
+                    },
+                },
+                wait_for_completion=True,
+                refresh=refresh,
+            )
+            await self.delete_collection(collection_id)
+        else:
+            await self.client.index(
+                index=COLLECTIONS_INDEX,
+                id=collection_id,
+                body=collection,
+                refresh=refresh,
+            )
+    async def delete_collection(self, collection_id: str, refresh: bool = False):
+        """Delete a collection from the database.
+        Parameters:
+            self: The instance of the object calling this function.
+            collection_id (str): The ID of the collection to be deleted.
+            refresh (bool): Whether to refresh the index after the deletion (default: False).
+        Raises:
+            NotFoundError: If the collection with the given `collection_id` is not found in the database.
+        Notes:
+            This function first verifies that the collection with the specified `collection_id` exists in the database, and then
+            deletes the collection. If `refresh` is set to True, the index is refreshed after the deletion. Additionally, this
+            function also calls `delete_item_index` to delete the index for the items in the collection.
+        """
+        await self.find_collection(collection_id=collection_id)
+        await self.client.delete(
+            index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh
+        )
+        await delete_item_index(collection_id)
+    async def bulk_async(
+        self, collection_id: str, processed_items: List[Item], refresh: bool = False
+    ) -> None:
+        """Perform a bulk insert of items into the database asynchronously.
+        Args:
+            self: The instance of the object calling this function.
+            collection_id (str): The ID of the collection to which the items belong.
+            processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
+            refresh (bool): Whether to refresh the index after the bulk insert (default: False).
+        Notes:
+            This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The
+            insert is performed asynchronously, and the event loop is used to run the operation in a separate executor. The
+            `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the
+            index is refreshed after the bulk insert. The function does not return any value.
+        """
+        await helpers.async_bulk(
+            self.client,
+            mk_actions(collection_id, processed_items),
+            refresh=refresh,
+            raise_on_error=False,
+        )
+    def bulk_sync(
+        self, collection_id: str, processed_items: List[Item], refresh: bool = False
+    ) -> None:
+        """Perform a bulk insert of items into the database synchronously.
+        Args:
+            self: The instance of the object calling this function.
+            collection_id (str): The ID of the collection to which the items belong.
+            processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
+            refresh (bool): Whether to refresh the index after the bulk insert (default: False).
+        Notes:
+            This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The
+            insert is performed synchronously and blocking, meaning that the function does not return until the insert has
+            completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to
+            True, the index is refreshed after the bulk insert. The function does not return any value.
+        """
+        helpers.bulk(
+            self.sync_client,
+            mk_actions(collection_id, processed_items),
+            refresh=refresh,
+            raise_on_error=False,
+        )
+    # DANGER
+    async def delete_items(self) -> None:
+        """Danger. this is only for tests."""
+        await self.client.delete_by_query(
+            index=ITEM_INDICES,
+            body={"query": {"match_all": {}}},
+            wait_for_completion=True,
+        )
+    # DANGER
+    async def delete_collections(self) -> None:
+        """Danger. this is only for tests."""
+        await self.client.delete_by_query(
+            index=COLLECTIONS_INDEX,
+            body={"query": {"match_all": {}}},
+            wait_for_completion=True,
+        )