stac-fastapi-opensearch 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,959 @@
1
+ """Database logic."""
2
+
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ from base64 import urlsafe_b64decode, urlsafe_b64encode
7
+ from copy import deepcopy
8
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
9
+
10
+ import attr
11
+ from opensearchpy import exceptions, helpers
12
+ from opensearchpy.helpers.query import Q
13
+ from opensearchpy.helpers.search import Search
14
+ from starlette.requests import Request
15
+
16
+ from stac_fastapi.core import serializers
17
+ from stac_fastapi.core.base_database_logic import BaseDatabaseLogic
18
+ from stac_fastapi.core.database_logic import (
19
+ COLLECTIONS_INDEX,
20
+ DEFAULT_SORT,
21
+ ES_COLLECTIONS_MAPPINGS,
22
+ ES_ITEMS_MAPPINGS,
23
+ ES_ITEMS_SETTINGS,
24
+ ITEM_INDICES,
25
+ ITEMS_INDEX_PREFIX,
26
+ Geometry,
27
+ index_alias_by_collection_id,
28
+ index_by_collection_id,
29
+ indices,
30
+ mk_actions,
31
+ mk_item_id,
32
+ )
33
+ from stac_fastapi.core.extensions import filter
34
+ from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon
35
+ from stac_fastapi.opensearch.config import (
36
+ AsyncOpensearchSettings as AsyncSearchSettings,
37
+ )
38
+ from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings
39
+ from stac_fastapi.types.errors import ConflictError, NotFoundError
40
+ from stac_fastapi.types.stac import Collection, Item
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ async def create_index_templates() -> None:
46
+ """
47
+ Create index templates for the Collection and Item indices.
48
+
49
+ Returns:
50
+ None
51
+
52
+ """
53
+ client = AsyncSearchSettings().create_client
54
+ await client.indices.put_template(
55
+ name=f"template_{COLLECTIONS_INDEX}",
56
+ body={
57
+ "index_patterns": [f"{COLLECTIONS_INDEX}*"],
58
+ "mappings": ES_COLLECTIONS_MAPPINGS,
59
+ },
60
+ )
61
+ await client.indices.put_template(
62
+ name=f"template_{ITEMS_INDEX_PREFIX}",
63
+ body={
64
+ "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"],
65
+ "settings": ES_ITEMS_SETTINGS,
66
+ "mappings": ES_ITEMS_MAPPINGS,
67
+ },
68
+ )
69
+ await client.close()
70
+
71
+
72
+ async def create_collection_index() -> None:
73
+ """
74
+ Create the index for a Collection. The settings of the index template will be used implicitly.
75
+
76
+ Returns:
77
+ None
78
+
79
+ """
80
+ client = AsyncSearchSettings().create_client
81
+
82
+ index = f"{COLLECTIONS_INDEX}-000001"
83
+
84
+ exists = await client.indices.exists(index=index)
85
+ if not exists:
86
+ await client.indices.create(
87
+ index=index,
88
+ body={
89
+ "aliases": {COLLECTIONS_INDEX: {}},
90
+ "mappings": ES_COLLECTIONS_MAPPINGS,
91
+ },
92
+ )
93
+ await client.close()
94
+
95
+
96
+ async def create_item_index(collection_id: str) -> None:
97
+ """
98
+ Create the index for Items. The settings of the index template will be used implicitly.
99
+
100
+ Args:
101
+ collection_id (str): Collection identifier.
102
+
103
+ Returns:
104
+ None
105
+
106
+ """
107
+ client = AsyncSearchSettings().create_client
108
+
109
+ index_name = f"{index_by_collection_id(collection_id)}-000001"
110
+ exists = await client.indices.exists(index=index_name)
111
+ if not exists:
112
+ await client.indices.create(
113
+ index=index_name,
114
+ body={
115
+ "aliases": {index_alias_by_collection_id(collection_id): {}},
116
+ "mappings": ES_ITEMS_MAPPINGS,
117
+ "settings": ES_ITEMS_SETTINGS,
118
+ },
119
+ )
120
+ await client.close()
121
+
122
+
123
+ async def delete_item_index(collection_id: str) -> None:
124
+ """Delete the index for items in a collection.
125
+
126
+ Args:
127
+ collection_id (str): The ID of the collection whose items index will be deleted.
128
+ """
129
+ client = AsyncSearchSettings().create_client
130
+
131
+ name = index_alias_by_collection_id(collection_id)
132
+ resolved = await client.indices.resolve_index(name=name)
133
+ if "aliases" in resolved and resolved["aliases"]:
134
+ [alias] = resolved["aliases"]
135
+ await client.indices.delete_alias(index=alias["indices"], name=alias["name"])
136
+ await client.indices.delete(index=alias["indices"])
137
+ else:
138
+ await client.indices.delete(index=name)
139
+ await client.close()
140
+
141
+
142
+ @attr.s
143
+ class DatabaseLogic(BaseDatabaseLogic):
144
+ """Database logic."""
145
+
146
+ client = AsyncSearchSettings().create_client
147
+ sync_client = SyncSearchSettings().create_client
148
+
149
+ item_serializer: Type[serializers.ItemSerializer] = attr.ib(
150
+ default=serializers.ItemSerializer
151
+ )
152
+ collection_serializer: Type[serializers.CollectionSerializer] = attr.ib(
153
+ default=serializers.CollectionSerializer
154
+ )
155
+
156
+ extensions: List[str] = attr.ib(default=attr.Factory(list))
157
+
158
+ aggregation_mapping: Dict[str, Dict[str, Any]] = {
159
+ "total_count": {"value_count": {"field": "id"}},
160
+ "collection_frequency": {"terms": {"field": "collection", "size": 100}},
161
+ "platform_frequency": {"terms": {"field": "properties.platform", "size": 100}},
162
+ "cloud_cover_frequency": {
163
+ "range": {
164
+ "field": "properties.eo:cloud_cover",
165
+ "ranges": [
166
+ {"to": 5},
167
+ {"from": 5, "to": 15},
168
+ {"from": 15, "to": 40},
169
+ {"from": 40},
170
+ ],
171
+ }
172
+ },
173
+ "datetime_frequency": {
174
+ "date_histogram": {
175
+ "field": "properties.datetime",
176
+ "calendar_interval": "month",
177
+ }
178
+ },
179
+ "datetime_min": {"min": {"field": "properties.datetime"}},
180
+ "datetime_max": {"max": {"field": "properties.datetime"}},
181
+ "grid_code_frequency": {
182
+ "terms": {
183
+ "field": "properties.grid:code",
184
+ "missing": "none",
185
+ "size": 10000,
186
+ }
187
+ },
188
+ "sun_elevation_frequency": {
189
+ "histogram": {"field": "properties.view:sun_elevation", "interval": 5}
190
+ },
191
+ "sun_azimuth_frequency": {
192
+ "histogram": {"field": "properties.view:sun_azimuth", "interval": 5}
193
+ },
194
+ "off_nadir_frequency": {
195
+ "histogram": {"field": "properties.view:off_nadir", "interval": 5}
196
+ },
197
+ "centroid_geohash_grid_frequency": {
198
+ "geohash_grid": {
199
+ "field": "properties.proj:centroid",
200
+ "precision": 1,
201
+ }
202
+ },
203
+ "centroid_geohex_grid_frequency": {
204
+ "geohex_grid": {
205
+ "field": "properties.proj:centroid",
206
+ "precision": 0,
207
+ }
208
+ },
209
+ "centroid_geotile_grid_frequency": {
210
+ "geotile_grid": {
211
+ "field": "properties.proj:centroid",
212
+ "precision": 0,
213
+ }
214
+ },
215
+ "geometry_geohash_grid_frequency": {
216
+ "geohash_grid": {
217
+ "field": "geometry",
218
+ "precision": 1,
219
+ }
220
+ },
221
+ "geometry_geotile_grid_frequency": {
222
+ "geotile_grid": {
223
+ "field": "geometry",
224
+ "precision": 0,
225
+ }
226
+ },
227
+ }
228
+
229
+ """CORE LOGIC"""
230
+
231
+ async def get_all_collections(
232
+ self, token: Optional[str], limit: int, request: Request
233
+ ) -> Tuple[List[Dict[str, Any]], Optional[str]]:
234
+ """
235
+ Retrieve a list of all collections from Opensearch, supporting pagination.
236
+
237
+ Args:
238
+ token (Optional[str]): The pagination token.
239
+ limit (int): The number of results to return.
240
+
241
+ Returns:
242
+ A tuple of (collections, next pagination token if any).
243
+ """
244
+ search_body = {
245
+ "sort": [{"id": {"order": "asc"}}],
246
+ "size": limit,
247
+ }
248
+
249
+ # Only add search_after to the query if token is not None and not empty
250
+ if token:
251
+ search_after = [token]
252
+ search_body["search_after"] = search_after
253
+
254
+ response = await self.client.search(
255
+ index=COLLECTIONS_INDEX,
256
+ body=search_body,
257
+ )
258
+
259
+ hits = response["hits"]["hits"]
260
+ collections = [
261
+ self.collection_serializer.db_to_stac(
262
+ collection=hit["_source"], request=request, extensions=self.extensions
263
+ )
264
+ for hit in hits
265
+ ]
266
+
267
+ next_token = None
268
+ if len(hits) == limit:
269
+ # Ensure we have a valid sort value for next_token
270
+ next_token_values = hits[-1].get("sort")
271
+ if next_token_values:
272
+ next_token = next_token_values[0]
273
+
274
+ return collections, next_token
275
+
276
+ async def get_one_item(self, collection_id: str, item_id: str) -> Dict:
277
+ """Retrieve a single item from the database.
278
+
279
+ Args:
280
+ collection_id (str): The id of the Collection that the Item belongs to.
281
+ item_id (str): The id of the Item.
282
+
283
+ Returns:
284
+ item (Dict): A dictionary containing the source data for the Item.
285
+
286
+ Raises:
287
+ NotFoundError: If the specified Item does not exist in the Collection.
288
+
289
+ Notes:
290
+ The Item is retrieved from the Elasticsearch database using the `client.get` method,
291
+ with the index for the Collection as the target index and the combined `mk_item_id` as the document id.
292
+ """
293
+ try:
294
+ item = await self.client.get(
295
+ index=index_alias_by_collection_id(collection_id),
296
+ id=mk_item_id(item_id, collection_id),
297
+ )
298
+ except exceptions.NotFoundError:
299
+ raise NotFoundError(
300
+ f"Item {item_id} does not exist inside Collection {collection_id}"
301
+ )
302
+ return item["_source"]
303
+
304
+ @staticmethod
305
+ def make_search():
306
+ """Database logic to create a Search instance."""
307
+ return Search().sort(*DEFAULT_SORT)
308
+
309
+ @staticmethod
310
+ def apply_ids_filter(search: Search, item_ids: List[str]):
311
+ """Database logic to search a list of STAC item ids."""
312
+ return search.filter("terms", id=item_ids)
313
+
314
+ @staticmethod
315
+ def apply_collections_filter(search: Search, collection_ids: List[str]):
316
+ """Database logic to search a list of STAC collection ids."""
317
+ return search.filter("terms", collection=collection_ids)
318
+
319
+ @staticmethod
320
+ def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str]]):
321
+ """Database logic to perform query for search endpoint."""
322
+ if free_text_queries is not None:
323
+ free_text_query_string = '" OR properties.\\*:"'.join(free_text_queries)
324
+ search = search.query(
325
+ "query_string", query=f'properties.\\*:"{free_text_query_string}"'
326
+ )
327
+
328
+ return search
329
+
330
+ @staticmethod
331
+ def apply_datetime_filter(search: Search, datetime_search):
332
+ """Apply a filter to search based on datetime field.
333
+
334
+ Args:
335
+ search (Search): The search object to filter.
336
+ datetime_search (dict): The datetime filter criteria.
337
+
338
+ Returns:
339
+ Search: The filtered search object.
340
+ """
341
+ if "eq" in datetime_search:
342
+ search = search.filter(
343
+ "term", **{"properties__datetime": datetime_search["eq"]}
344
+ )
345
+ else:
346
+ search = search.filter(
347
+ "range", properties__datetime={"lte": datetime_search["lte"]}
348
+ )
349
+ search = search.filter(
350
+ "range", properties__datetime={"gte": datetime_search["gte"]}
351
+ )
352
+ return search
353
+
354
+ @staticmethod
355
+ def apply_bbox_filter(search: Search, bbox: List):
356
+ """Filter search results based on bounding box.
357
+
358
+ Args:
359
+ search (Search): The search object to apply the filter to.
360
+ bbox (List): The bounding box coordinates, represented as a list of four values [minx, miny, maxx, maxy].
361
+
362
+ Returns:
363
+ search (Search): The search object with the bounding box filter applied.
364
+
365
+ Notes:
366
+ The bounding box is transformed into a polygon using the `bbox2polygon` function and
367
+ a geo_shape filter is added to the search object, set to intersect with the specified polygon.
368
+ """
369
+ return search.filter(
370
+ Q(
371
+ {
372
+ "geo_shape": {
373
+ "geometry": {
374
+ "shape": {
375
+ "type": "polygon",
376
+ "coordinates": bbox2polygon(*bbox),
377
+ },
378
+ "relation": "intersects",
379
+ }
380
+ }
381
+ }
382
+ )
383
+ )
384
+
385
+ @staticmethod
386
+ def apply_intersects_filter(
387
+ search: Search,
388
+ intersects: Geometry,
389
+ ):
390
+ """Filter search results based on intersecting geometry.
391
+
392
+ Args:
393
+ search (Search): The search object to apply the filter to.
394
+ intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object.
395
+
396
+ Returns:
397
+ search (Search): The search object with the intersecting geometry filter applied.
398
+
399
+ Notes:
400
+ A geo_shape filter is added to the search object, set to intersect with the specified geometry.
401
+ """
402
+ return search.filter(
403
+ Q(
404
+ {
405
+ "geo_shape": {
406
+ "geometry": {
407
+ "shape": {
408
+ "type": intersects.type.lower(),
409
+ "coordinates": intersects.coordinates,
410
+ },
411
+ "relation": "intersects",
412
+ }
413
+ }
414
+ }
415
+ )
416
+ )
417
+
418
+ @staticmethod
419
+ def apply_stacql_filter(search: Search, op: str, field: str, value: float):
420
+ """Filter search results based on a comparison between a field and a value.
421
+
422
+ Args:
423
+ search (Search): The search object to apply the filter to.
424
+ op (str): The comparison operator to use. Can be 'eq' (equal), 'gt' (greater than), 'gte' (greater than or equal),
425
+ 'lt' (less than), or 'lte' (less than or equal).
426
+ field (str): The field to perform the comparison on.
427
+ value (float): The value to compare the field against.
428
+
429
+ Returns:
430
+ search (Search): The search object with the specified filter applied.
431
+ """
432
+ if op != "eq":
433
+ key_filter = {field: {f"{op}": value}}
434
+ search = search.filter(Q("range", **key_filter))
435
+ else:
436
+ search = search.filter("term", **{field: value})
437
+
438
+ return search
439
+
440
+ @staticmethod
441
+ def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]):
442
+ """
443
+ Apply a CQL2 filter to an Opensearch Search object.
444
+
445
+ This method transforms a dictionary representing a CQL2 filter into an Opensearch query
446
+ and applies it to the provided Search object. If the filter is None, the original Search
447
+ object is returned unmodified.
448
+
449
+ Args:
450
+ search (Search): The Opensearch Search object to which the filter will be applied.
451
+ _filter (Optional[Dict[str, Any]]): The filter in dictionary form that needs to be applied
452
+ to the search. The dictionary should follow the structure
453
+ required by the `to_es` function which converts it
454
+ to an Opensearch query.
455
+
456
+ Returns:
457
+ Search: The modified Search object with the filter applied if a filter is provided,
458
+ otherwise the original Search object.
459
+ """
460
+ if _filter is not None:
461
+ es_query = filter.to_es(_filter)
462
+ search = search.filter(es_query)
463
+
464
+ return search
465
+
466
+ @staticmethod
467
+ def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]:
468
+ """Database logic to sort search instance."""
469
+ if sortby:
470
+ return {s.field: {"order": s.direction} for s in sortby}
471
+ else:
472
+ return None
473
+
474
+ async def execute_search(
475
+ self,
476
+ search: Search,
477
+ limit: int,
478
+ token: Optional[str],
479
+ sort: Optional[Dict[str, Dict[str, str]]],
480
+ collection_ids: Optional[List[str]],
481
+ ignore_unavailable: bool = True,
482
+ ) -> Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]:
483
+ """Execute a search query with limit and other optional parameters.
484
+
485
+ Args:
486
+ search (Search): The search query to be executed.
487
+ limit (int): The maximum number of results to be returned.
488
+ token (Optional[str]): The token used to return the next set of results.
489
+ sort (Optional[Dict[str, Dict[str, str]]]): Specifies how the results should be sorted.
490
+ collection_ids (Optional[List[str]]): The collection ids to search.
491
+ ignore_unavailable (bool, optional): Whether to ignore unavailable collections. Defaults to True.
492
+
493
+ Returns:
494
+ Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: A tuple containing:
495
+ - An iterable of search results, where each result is a dictionary with keys and values representing the
496
+ fields and values of each document.
497
+ - The total number of results (if the count could be computed), or None if the count could not be
498
+ computed.
499
+ - The token to be used to retrieve the next set of results, or None if there are no more results.
500
+
501
+ Raises:
502
+ NotFoundError: If the collections specified in `collection_ids` do not exist.
503
+ """
504
+ search_body: Dict[str, Any] = {}
505
+ query = search.query.to_dict() if search.query else None
506
+ if query:
507
+ search_body["query"] = query
508
+
509
+ search_after = None
510
+
511
+ if token:
512
+ search_after = json.loads(urlsafe_b64decode(token).decode())
513
+ if search_after:
514
+ search_body["search_after"] = search_after
515
+
516
+ search_body["sort"] = sort if sort else DEFAULT_SORT
517
+
518
+ index_param = indices(collection_ids)
519
+
520
+ max_result_window = MAX_LIMIT
521
+
522
+ size_limit = min(limit + 1, max_result_window)
523
+
524
+ search_task = asyncio.create_task(
525
+ self.client.search(
526
+ index=index_param,
527
+ ignore_unavailable=ignore_unavailable,
528
+ body=search_body,
529
+ size=size_limit,
530
+ )
531
+ )
532
+
533
+ count_task = asyncio.create_task(
534
+ self.client.count(
535
+ index=index_param,
536
+ ignore_unavailable=ignore_unavailable,
537
+ body=search.to_dict(count=True),
538
+ )
539
+ )
540
+
541
+ try:
542
+ es_response = await search_task
543
+ except exceptions.NotFoundError:
544
+ raise NotFoundError(f"Collections '{collection_ids}' do not exist")
545
+
546
+ hits = es_response["hits"]["hits"]
547
+ items = (hit["_source"] for hit in hits[:limit])
548
+
549
+ next_token = None
550
+ if len(hits) > limit and limit < max_result_window:
551
+ if hits and (sort_array := hits[limit - 1].get("sort")):
552
+ next_token = urlsafe_b64encode(json.dumps(sort_array).encode()).decode()
553
+
554
+ matched = (
555
+ es_response["hits"]["total"]["value"]
556
+ if es_response["hits"]["total"]["relation"] == "eq"
557
+ else None
558
+ )
559
+ if count_task.done():
560
+ try:
561
+ matched = count_task.result().get("count")
562
+ except Exception as e:
563
+ logger.error(f"Count task failed: {e}")
564
+
565
+ return items, matched, next_token
566
+
567
+ """ AGGREGATE LOGIC """
568
+
569
+ async def aggregate(
570
+ self,
571
+ collection_ids: Optional[List[str]],
572
+ aggregations: List[str],
573
+ search: Search,
574
+ centroid_geohash_grid_precision: int,
575
+ centroid_geohex_grid_precision: int,
576
+ centroid_geotile_grid_precision: int,
577
+ geometry_geohash_grid_precision: int,
578
+ geometry_geotile_grid_precision: int,
579
+ datetime_frequency_interval: str,
580
+ ignore_unavailable: Optional[bool] = True,
581
+ ):
582
+ """Return aggregations of STAC Items."""
583
+ search_body: Dict[str, Any] = {}
584
+ query = search.query.to_dict() if search.query else None
585
+ if query:
586
+ search_body["query"] = query
587
+
588
+ def _fill_aggregation_parameters(name: str, agg: dict) -> dict:
589
+ [key] = agg.keys()
590
+ agg_precision = {
591
+ "centroid_geohash_grid_frequency": centroid_geohash_grid_precision,
592
+ "centroid_geohex_grid_frequency": centroid_geohex_grid_precision,
593
+ "centroid_geotile_grid_frequency": centroid_geotile_grid_precision,
594
+ "geometry_geohash_grid_frequency": geometry_geohash_grid_precision,
595
+ "geometry_geotile_grid_frequency": geometry_geotile_grid_precision,
596
+ }
597
+ if name in agg_precision:
598
+ agg[key]["precision"] = agg_precision[name]
599
+
600
+ if key == "date_histogram":
601
+ agg[key]["calendar_interval"] = datetime_frequency_interval
602
+
603
+ return agg
604
+
605
+ # include all aggregations specified
606
+ # this will ignore aggregations with the wrong names
607
+ search_body["aggregations"] = {
608
+ k: _fill_aggregation_parameters(k, deepcopy(v))
609
+ for k, v in self.aggregation_mapping.items()
610
+ if k in aggregations
611
+ }
612
+
613
+ index_param = indices(collection_ids)
614
+ search_task = asyncio.create_task(
615
+ self.client.search(
616
+ index=index_param,
617
+ ignore_unavailable=ignore_unavailable,
618
+ body=search_body,
619
+ )
620
+ )
621
+
622
+ try:
623
+ db_response = await search_task
624
+ except exceptions.NotFoundError:
625
+ raise NotFoundError(f"Collections '{collection_ids}' do not exist")
626
+
627
+ return db_response
628
+
629
+ """ TRANSACTION LOGIC """
630
+
631
+ async def check_collection_exists(self, collection_id: str):
632
+ """Database logic to check if a collection exists."""
633
+ if not await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id):
634
+ raise NotFoundError(f"Collection {collection_id} does not exist")
635
+
636
+ async def prep_create_item(
637
+ self, item: Item, base_url: str, exist_ok: bool = False
638
+ ) -> Item:
639
+ """
640
+ Preps an item for insertion into the database.
641
+
642
+ Args:
643
+ item (Item): The item to be prepped for insertion.
644
+ base_url (str): The base URL used to create the item's self URL.
645
+ exist_ok (bool): Indicates whether the item can exist already.
646
+
647
+ Returns:
648
+ Item: The prepped item.
649
+
650
+ Raises:
651
+ ConflictError: If the item already exists in the database.
652
+
653
+ """
654
+ await self.check_collection_exists(collection_id=item["collection"])
655
+
656
+ if not exist_ok and await self.client.exists(
657
+ index=index_alias_by_collection_id(item["collection"]),
658
+ id=mk_item_id(item["id"], item["collection"]),
659
+ ):
660
+ raise ConflictError(
661
+ f"Item {item['id']} in collection {item['collection']} already exists"
662
+ )
663
+
664
+ return self.item_serializer.stac_to_db(item, base_url)
665
+
666
+ def sync_prep_create_item(
667
+ self, item: Item, base_url: str, exist_ok: bool = False
668
+ ) -> Item:
669
+ """
670
+ Prepare an item for insertion into the database.
671
+
672
+ This method performs pre-insertion preparation on the given `item`,
673
+ such as checking if the collection the item belongs to exists,
674
+ and optionally verifying that an item with the same ID does not already exist in the database.
675
+
676
+ Args:
677
+ item (Item): The item to be inserted into the database.
678
+ base_url (str): The base URL used for constructing URLs for the item.
679
+ exist_ok (bool): Indicates whether the item can exist already.
680
+
681
+ Returns:
682
+ Item: The item after preparation is done.
683
+
684
+ Raises:
685
+ NotFoundError: If the collection that the item belongs to does not exist in the database.
686
+ ConflictError: If an item with the same ID already exists in the collection.
687
+ """
688
+ item_id = item["id"]
689
+ collection_id = item["collection"]
690
+ if not self.sync_client.exists(index=COLLECTIONS_INDEX, id=collection_id):
691
+ raise NotFoundError(f"Collection {collection_id} does not exist")
692
+
693
+ if not exist_ok and self.sync_client.exists(
694
+ index=index_alias_by_collection_id(collection_id),
695
+ id=mk_item_id(item_id, collection_id),
696
+ ):
697
+ raise ConflictError(
698
+ f"Item {item_id} in collection {collection_id} already exists"
699
+ )
700
+
701
+ return self.item_serializer.stac_to_db(item, base_url)
702
+
703
+ async def create_item(self, item: Item, refresh: bool = False):
704
+ """Database logic for creating one item.
705
+
706
+ Args:
707
+ item (Item): The item to be created.
708
+ refresh (bool, optional): Refresh the index after performing the operation. Defaults to False.
709
+
710
+ Raises:
711
+ ConflictError: If the item already exists in the database.
712
+
713
+ Returns:
714
+ None
715
+ """
716
+ # todo: check if collection exists, but cache
717
+ item_id = item["id"]
718
+ collection_id = item["collection"]
719
+ es_resp = await self.client.index(
720
+ index=index_alias_by_collection_id(collection_id),
721
+ id=mk_item_id(item_id, collection_id),
722
+ body=item,
723
+ refresh=refresh,
724
+ )
725
+
726
+ if (meta := es_resp.get("meta")) and meta.get("status") == 409:
727
+ raise ConflictError(
728
+ f"Item {item_id} in collection {collection_id} already exists"
729
+ )
730
+
731
+ async def delete_item(
732
+ self, item_id: str, collection_id: str, refresh: bool = False
733
+ ):
734
+ """Delete a single item from the database.
735
+
736
+ Args:
737
+ item_id (str): The id of the Item to be deleted.
738
+ collection_id (str): The id of the Collection that the Item belongs to.
739
+ refresh (bool, optional): Whether to refresh the index after the deletion. Default is False.
740
+
741
+ Raises:
742
+ NotFoundError: If the Item does not exist in the database.
743
+ """
744
+ try:
745
+ await self.client.delete(
746
+ index=index_alias_by_collection_id(collection_id),
747
+ id=mk_item_id(item_id, collection_id),
748
+ refresh=refresh,
749
+ )
750
+ except exceptions.NotFoundError:
751
+ raise NotFoundError(
752
+ f"Item {item_id} in collection {collection_id} not found"
753
+ )
754
+
755
+ async def get_items_mapping(self, collection_id: str) -> Dict[str, Any]:
756
+ """Get the mapping for the specified collection's items index.
757
+
758
+ Args:
759
+ collection_id (str): The ID of the collection to get items mapping for.
760
+
761
+ Returns:
762
+ Dict[str, Any]: The mapping information.
763
+ """
764
+ index_name = index_alias_by_collection_id(collection_id)
765
+ try:
766
+ mapping = await self.client.indices.get_mapping(
767
+ index=index_name, params={"allow_no_indices": "false"}
768
+ )
769
+ return mapping
770
+ except exceptions.NotFoundError:
771
+ raise NotFoundError(f"Mapping for index {index_name} not found")
772
+
773
+ async def create_collection(self, collection: Collection, refresh: bool = False):
774
+ """Create a single collection in the database.
775
+
776
+ Args:
777
+ collection (Collection): The Collection object to be created.
778
+ refresh (bool, optional): Whether to refresh the index after the creation. Default is False.
779
+
780
+ Raises:
781
+ ConflictError: If a Collection with the same id already exists in the database.
782
+
783
+ Notes:
784
+ A new index is created for the items in the Collection using the `create_item_index` function.
785
+ """
786
+ collection_id = collection["id"]
787
+
788
+ if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id):
789
+ raise ConflictError(f"Collection {collection_id} already exists")
790
+
791
+ await self.client.index(
792
+ index=COLLECTIONS_INDEX,
793
+ id=collection_id,
794
+ body=collection,
795
+ refresh=refresh,
796
+ )
797
+
798
+ await create_item_index(collection_id)
799
+
800
+ async def find_collection(self, collection_id: str) -> Collection:
801
+ """Find and return a collection from the database.
802
+
803
+ Args:
804
+ self: The instance of the object calling this function.
805
+ collection_id (str): The ID of the collection to be found.
806
+
807
+ Returns:
808
+ Collection: The found collection, represented as a `Collection` object.
809
+
810
+ Raises:
811
+ NotFoundError: If the collection with the given `collection_id` is not found in the database.
812
+
813
+ Notes:
814
+ This function searches for a collection in the database using the specified `collection_id` and returns the found
815
+ collection as a `Collection` object. If the collection is not found, a `NotFoundError` is raised.
816
+ """
817
+ try:
818
+ collection = await self.client.get(
819
+ index=COLLECTIONS_INDEX, id=collection_id
820
+ )
821
+ except exceptions.NotFoundError:
822
+ raise NotFoundError(f"Collection {collection_id} not found")
823
+
824
+ return collection["_source"]
825
+
826
+ async def update_collection(
827
+ self, collection_id: str, collection: Collection, refresh: bool = False
828
+ ):
829
+ """Update a collection from the database.
830
+
831
+ Args:
832
+ self: The instance of the object calling this function.
833
+ collection_id (str): The ID of the collection to be updated.
834
+ collection (Collection): The Collection object to be used for the update.
835
+
836
+ Raises:
837
+ NotFoundError: If the collection with the given `collection_id` is not
838
+ found in the database.
839
+
840
+ Notes:
841
+ This function updates the collection in the database using the specified
842
+ `collection_id` and with the collection specified in the `Collection` object.
843
+ If the collection is not found, a `NotFoundError` is raised.
844
+ """
845
+ await self.find_collection(collection_id=collection_id)
846
+
847
+ if collection_id != collection["id"]:
848
+ await self.create_collection(collection, refresh=refresh)
849
+
850
+ await self.client.reindex(
851
+ body={
852
+ "dest": {"index": f"{ITEMS_INDEX_PREFIX}{collection['id']}"},
853
+ "source": {"index": f"{ITEMS_INDEX_PREFIX}{collection_id}"},
854
+ "script": {
855
+ "lang": "painless",
856
+ "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", # noqa: E702
857
+ },
858
+ },
859
+ wait_for_completion=True,
860
+ refresh=refresh,
861
+ )
862
+
863
+ await self.delete_collection(collection_id)
864
+
865
+ else:
866
+ await self.client.index(
867
+ index=COLLECTIONS_INDEX,
868
+ id=collection_id,
869
+ body=collection,
870
+ refresh=refresh,
871
+ )
872
+
873
+ async def delete_collection(self, collection_id: str, refresh: bool = False):
874
+ """Delete a collection from the database.
875
+
876
+ Parameters:
877
+ self: The instance of the object calling this function.
878
+ collection_id (str): The ID of the collection to be deleted.
879
+ refresh (bool): Whether to refresh the index after the deletion (default: False).
880
+
881
+ Raises:
882
+ NotFoundError: If the collection with the given `collection_id` is not found in the database.
883
+
884
+ Notes:
885
+ This function first verifies that the collection with the specified `collection_id` exists in the database, and then
886
+ deletes the collection. If `refresh` is set to True, the index is refreshed after the deletion. Additionally, this
887
+ function also calls `delete_item_index` to delete the index for the items in the collection.
888
+ """
889
+ await self.find_collection(collection_id=collection_id)
890
+ await self.client.delete(
891
+ index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh
892
+ )
893
+ await delete_item_index(collection_id)
894
+
895
+ async def bulk_async(
896
+ self, collection_id: str, processed_items: List[Item], refresh: bool = False
897
+ ) -> None:
898
+ """Perform a bulk insert of items into the database asynchronously.
899
+
900
+ Args:
901
+ self: The instance of the object calling this function.
902
+ collection_id (str): The ID of the collection to which the items belong.
903
+ processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
904
+ refresh (bool): Whether to refresh the index after the bulk insert (default: False).
905
+
906
+ Notes:
907
+ This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The
908
+ insert is performed asynchronously, and the event loop is used to run the operation in a separate executor. The
909
+ `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the
910
+ index is refreshed after the bulk insert. The function does not return any value.
911
+ """
912
+ await helpers.async_bulk(
913
+ self.client,
914
+ mk_actions(collection_id, processed_items),
915
+ refresh=refresh,
916
+ raise_on_error=False,
917
+ )
918
+
919
+ def bulk_sync(
920
+ self, collection_id: str, processed_items: List[Item], refresh: bool = False
921
+ ) -> None:
922
+ """Perform a bulk insert of items into the database synchronously.
923
+
924
+ Args:
925
+ self: The instance of the object calling this function.
926
+ collection_id (str): The ID of the collection to which the items belong.
927
+ processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
928
+ refresh (bool): Whether to refresh the index after the bulk insert (default: False).
929
+
930
+ Notes:
931
+ This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The
932
+ insert is performed synchronously and blocking, meaning that the function does not return until the insert has
933
+ completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to
934
+ True, the index is refreshed after the bulk insert. The function does not return any value.
935
+ """
936
+ helpers.bulk(
937
+ self.sync_client,
938
+ mk_actions(collection_id, processed_items),
939
+ refresh=refresh,
940
+ raise_on_error=False,
941
+ )
942
+
943
+ # DANGER
944
+ async def delete_items(self) -> None:
945
+ """Danger. this is only for tests."""
946
+ await self.client.delete_by_query(
947
+ index=ITEM_INDICES,
948
+ body={"query": {"match_all": {}}},
949
+ wait_for_completion=True,
950
+ )
951
+
952
+ # DANGER
953
+ async def delete_collections(self) -> None:
954
+ """Danger. this is only for tests."""
955
+ await self.client.delete_by_query(
956
+ index=COLLECTIONS_INDEX,
957
+ body={"query": {"match_all": {}}},
958
+ wait_for_completion=True,
959
+ )