stac-fastapi-opensearch 4.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,965 @@
1
+ """Database logic."""
2
+
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ from base64 import urlsafe_b64decode, urlsafe_b64encode
7
+ from copy import deepcopy
8
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
9
+
10
+ import attr
11
+ from opensearchpy import exceptions, helpers
12
+ from opensearchpy.exceptions import TransportError
13
+ from opensearchpy.helpers.query import Q
14
+ from opensearchpy.helpers.search import Search
15
+ from starlette.requests import Request
16
+
17
+ from stac_fastapi.core import serializers
18
+ from stac_fastapi.core.base_database_logic import BaseDatabaseLogic
19
+ from stac_fastapi.core.database_logic import (
20
+ COLLECTIONS_INDEX,
21
+ DEFAULT_SORT,
22
+ ES_COLLECTIONS_MAPPINGS,
23
+ ES_ITEMS_MAPPINGS,
24
+ ES_ITEMS_SETTINGS,
25
+ ITEM_INDICES,
26
+ ITEMS_INDEX_PREFIX,
27
+ Geometry,
28
+ index_alias_by_collection_id,
29
+ index_by_collection_id,
30
+ indices,
31
+ mk_actions,
32
+ mk_item_id,
33
+ )
34
+ from stac_fastapi.core.extensions import filter
35
+ from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon
36
+ from stac_fastapi.opensearch.config import (
37
+ AsyncOpensearchSettings as AsyncSearchSettings,
38
+ )
39
+ from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings
40
+ from stac_fastapi.types.errors import ConflictError, NotFoundError
41
+ from stac_fastapi.types.stac import Collection, Item
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ async def create_index_templates() -> None:
47
+ """
48
+ Create index templates for the Collection and Item indices.
49
+
50
+ Returns:
51
+ None
52
+
53
+ """
54
+ client = AsyncSearchSettings().create_client
55
+ await client.indices.put_template(
56
+ name=f"template_{COLLECTIONS_INDEX}",
57
+ body={
58
+ "index_patterns": [f"{COLLECTIONS_INDEX}*"],
59
+ "mappings": ES_COLLECTIONS_MAPPINGS,
60
+ },
61
+ )
62
+ await client.indices.put_template(
63
+ name=f"template_{ITEMS_INDEX_PREFIX}",
64
+ body={
65
+ "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"],
66
+ "settings": ES_ITEMS_SETTINGS,
67
+ "mappings": ES_ITEMS_MAPPINGS,
68
+ },
69
+ )
70
+ await client.close()
71
+
72
+
73
+ async def create_collection_index() -> None:
74
+ """
75
+ Create the index for a Collection. The settings of the index template will be used implicitly.
76
+
77
+ Returns:
78
+ None
79
+
80
+ """
81
+ client = AsyncSearchSettings().create_client
82
+
83
+ search_body: Dict[str, Any] = {
84
+ "aliases": {COLLECTIONS_INDEX: {}},
85
+ }
86
+
87
+ index = f"{COLLECTIONS_INDEX}-000001"
88
+
89
+ try:
90
+ await client.indices.create(index=index, body=search_body)
91
+ except TransportError as e:
92
+ if e.status_code == 400:
93
+ pass # Ignore 400 status codes
94
+ else:
95
+ raise e
96
+
97
+ await client.close()
98
+
99
+
100
+ async def create_item_index(collection_id: str):
101
+ """
102
+ Create the index for Items. The settings of the index template will be used implicitly.
103
+
104
+ Args:
105
+ collection_id (str): Collection identifier.
106
+
107
+ Returns:
108
+ None
109
+
110
+ """
111
+ client = AsyncSearchSettings().create_client
112
+ search_body: Dict[str, Any] = {
113
+ "aliases": {index_alias_by_collection_id(collection_id): {}},
114
+ }
115
+
116
+ try:
117
+ await client.indices.create(
118
+ index=f"{index_by_collection_id(collection_id)}-000001", body=search_body
119
+ )
120
+ except TransportError as e:
121
+ if e.status_code == 400:
122
+ pass # Ignore 400 status codes
123
+ else:
124
+ raise e
125
+
126
+ await client.close()
127
+
128
+
129
+ async def delete_item_index(collection_id: str):
130
+ """Delete the index for items in a collection.
131
+
132
+ Args:
133
+ collection_id (str): The ID of the collection whose items index will be deleted.
134
+ """
135
+ client = AsyncSearchSettings().create_client
136
+
137
+ name = index_alias_by_collection_id(collection_id)
138
+ resolved = await client.indices.resolve_index(name=name)
139
+ if "aliases" in resolved and resolved["aliases"]:
140
+ [alias] = resolved["aliases"]
141
+ await client.indices.delete_alias(index=alias["indices"], name=alias["name"])
142
+ await client.indices.delete(index=alias["indices"])
143
+ else:
144
+ await client.indices.delete(index=name)
145
+ await client.close()
146
+
147
+
148
+ @attr.s
149
+ class DatabaseLogic(BaseDatabaseLogic):
150
+ """Database logic."""
151
+
152
+ client = AsyncSearchSettings().create_client
153
+ sync_client = SyncSearchSettings().create_client
154
+
155
+ item_serializer: Type[serializers.ItemSerializer] = attr.ib(
156
+ default=serializers.ItemSerializer
157
+ )
158
+ collection_serializer: Type[serializers.CollectionSerializer] = attr.ib(
159
+ default=serializers.CollectionSerializer
160
+ )
161
+
162
+ extensions: List[str] = attr.ib(default=attr.Factory(list))
163
+
164
+ aggregation_mapping: Dict[str, Dict[str, Any]] = {
165
+ "total_count": {"value_count": {"field": "id"}},
166
+ "collection_frequency": {"terms": {"field": "collection", "size": 100}},
167
+ "platform_frequency": {"terms": {"field": "properties.platform", "size": 100}},
168
+ "cloud_cover_frequency": {
169
+ "range": {
170
+ "field": "properties.eo:cloud_cover",
171
+ "ranges": [
172
+ {"to": 5},
173
+ {"from": 5, "to": 15},
174
+ {"from": 15, "to": 40},
175
+ {"from": 40},
176
+ ],
177
+ }
178
+ },
179
+ "datetime_frequency": {
180
+ "date_histogram": {
181
+ "field": "properties.datetime",
182
+ "calendar_interval": "month",
183
+ }
184
+ },
185
+ "datetime_min": {"min": {"field": "properties.datetime"}},
186
+ "datetime_max": {"max": {"field": "properties.datetime"}},
187
+ "grid_code_frequency": {
188
+ "terms": {
189
+ "field": "properties.grid:code",
190
+ "missing": "none",
191
+ "size": 10000,
192
+ }
193
+ },
194
+ "sun_elevation_frequency": {
195
+ "histogram": {"field": "properties.view:sun_elevation", "interval": 5}
196
+ },
197
+ "sun_azimuth_frequency": {
198
+ "histogram": {"field": "properties.view:sun_azimuth", "interval": 5}
199
+ },
200
+ "off_nadir_frequency": {
201
+ "histogram": {"field": "properties.view:off_nadir", "interval": 5}
202
+ },
203
+ "centroid_geohash_grid_frequency": {
204
+ "geohash_grid": {
205
+ "field": "properties.proj:centroid",
206
+ "precision": 1,
207
+ }
208
+ },
209
+ "centroid_geohex_grid_frequency": {
210
+ "geohex_grid": {
211
+ "field": "properties.proj:centroid",
212
+ "precision": 0,
213
+ }
214
+ },
215
+ "centroid_geotile_grid_frequency": {
216
+ "geotile_grid": {
217
+ "field": "properties.proj:centroid",
218
+ "precision": 0,
219
+ }
220
+ },
221
+ "geometry_geohash_grid_frequency": {
222
+ "geohash_grid": {
223
+ "field": "geometry",
224
+ "precision": 1,
225
+ }
226
+ },
227
+ "geometry_geotile_grid_frequency": {
228
+ "geotile_grid": {
229
+ "field": "geometry",
230
+ "precision": 0,
231
+ }
232
+ },
233
+ }
234
+
235
+ """CORE LOGIC"""
236
+
237
+ async def get_all_collections(
238
+ self, token: Optional[str], limit: int, request: Request
239
+ ) -> Tuple[List[Dict[str, Any]], Optional[str]]:
240
+ """
241
+ Retrieve a list of all collections from Opensearch, supporting pagination.
242
+
243
+ Args:
244
+ token (Optional[str]): The pagination token.
245
+ limit (int): The number of results to return.
246
+
247
+ Returns:
248
+ A tuple of (collections, next pagination token if any).
249
+ """
250
+ search_body = {
251
+ "sort": [{"id": {"order": "asc"}}],
252
+ "size": limit,
253
+ }
254
+
255
+ # Only add search_after to the query if token is not None and not empty
256
+ if token:
257
+ search_after = [token]
258
+ search_body["search_after"] = search_after
259
+
260
+ response = await self.client.search(
261
+ index=COLLECTIONS_INDEX,
262
+ body=search_body,
263
+ )
264
+
265
+ hits = response["hits"]["hits"]
266
+ collections = [
267
+ self.collection_serializer.db_to_stac(
268
+ collection=hit["_source"], request=request, extensions=self.extensions
269
+ )
270
+ for hit in hits
271
+ ]
272
+
273
+ next_token = None
274
+ if len(hits) == limit:
275
+ # Ensure we have a valid sort value for next_token
276
+ next_token_values = hits[-1].get("sort")
277
+ if next_token_values:
278
+ next_token = next_token_values[0]
279
+
280
+ return collections, next_token
281
+
282
+ async def get_one_item(self, collection_id: str, item_id: str) -> Dict:
283
+ """Retrieve a single item from the database.
284
+
285
+ Args:
286
+ collection_id (str): The id of the Collection that the Item belongs to.
287
+ item_id (str): The id of the Item.
288
+
289
+ Returns:
290
+ item (Dict): A dictionary containing the source data for the Item.
291
+
292
+ Raises:
293
+ NotFoundError: If the specified Item does not exist in the Collection.
294
+
295
+ Notes:
296
+ The Item is retrieved from the Elasticsearch database using the `client.get` method,
297
+ with the index for the Collection as the target index and the combined `mk_item_id` as the document id.
298
+ """
299
+ try:
300
+ item = await self.client.get(
301
+ index=index_alias_by_collection_id(collection_id),
302
+ id=mk_item_id(item_id, collection_id),
303
+ )
304
+ except exceptions.NotFoundError:
305
+ raise NotFoundError(
306
+ f"Item {item_id} does not exist inside Collection {collection_id}"
307
+ )
308
+ return item["_source"]
309
+
310
+ @staticmethod
311
+ def make_search():
312
+ """Database logic to create a Search instance."""
313
+ return Search().sort(*DEFAULT_SORT)
314
+
315
+ @staticmethod
316
+ def apply_ids_filter(search: Search, item_ids: List[str]):
317
+ """Database logic to search a list of STAC item ids."""
318
+ return search.filter("terms", id=item_ids)
319
+
320
+ @staticmethod
321
+ def apply_collections_filter(search: Search, collection_ids: List[str]):
322
+ """Database logic to search a list of STAC collection ids."""
323
+ return search.filter("terms", collection=collection_ids)
324
+
325
+ @staticmethod
326
+ def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str]]):
327
+ """Database logic to perform query for search endpoint."""
328
+ if free_text_queries is not None:
329
+ free_text_query_string = '" OR properties.\\*:"'.join(free_text_queries)
330
+ search = search.query(
331
+ "query_string", query=f'properties.\\*:"{free_text_query_string}"'
332
+ )
333
+
334
+ return search
335
+
336
+ @staticmethod
337
+ def apply_datetime_filter(search: Search, datetime_search):
338
+ """Apply a filter to search based on datetime field.
339
+
340
+ Args:
341
+ search (Search): The search object to filter.
342
+ datetime_search (dict): The datetime filter criteria.
343
+
344
+ Returns:
345
+ Search: The filtered search object.
346
+ """
347
+ if "eq" in datetime_search:
348
+ search = search.filter(
349
+ "term", **{"properties__datetime": datetime_search["eq"]}
350
+ )
351
+ else:
352
+ search = search.filter(
353
+ "range", properties__datetime={"lte": datetime_search["lte"]}
354
+ )
355
+ search = search.filter(
356
+ "range", properties__datetime={"gte": datetime_search["gte"]}
357
+ )
358
+ return search
359
+
360
+ @staticmethod
361
+ def apply_bbox_filter(search: Search, bbox: List):
362
+ """Filter search results based on bounding box.
363
+
364
+ Args:
365
+ search (Search): The search object to apply the filter to.
366
+ bbox (List): The bounding box coordinates, represented as a list of four values [minx, miny, maxx, maxy].
367
+
368
+ Returns:
369
+ search (Search): The search object with the bounding box filter applied.
370
+
371
+ Notes:
372
+ The bounding box is transformed into a polygon using the `bbox2polygon` function and
373
+ a geo_shape filter is added to the search object, set to intersect with the specified polygon.
374
+ """
375
+ return search.filter(
376
+ Q(
377
+ {
378
+ "geo_shape": {
379
+ "geometry": {
380
+ "shape": {
381
+ "type": "polygon",
382
+ "coordinates": bbox2polygon(*bbox),
383
+ },
384
+ "relation": "intersects",
385
+ }
386
+ }
387
+ }
388
+ )
389
+ )
390
+
391
+ @staticmethod
392
+ def apply_intersects_filter(
393
+ search: Search,
394
+ intersects: Geometry,
395
+ ):
396
+ """Filter search results based on intersecting geometry.
397
+
398
+ Args:
399
+ search (Search): The search object to apply the filter to.
400
+ intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object.
401
+
402
+ Returns:
403
+ search (Search): The search object with the intersecting geometry filter applied.
404
+
405
+ Notes:
406
+ A geo_shape filter is added to the search object, set to intersect with the specified geometry.
407
+ """
408
+ return search.filter(
409
+ Q(
410
+ {
411
+ "geo_shape": {
412
+ "geometry": {
413
+ "shape": {
414
+ "type": intersects.type.lower(),
415
+ "coordinates": intersects.coordinates,
416
+ },
417
+ "relation": "intersects",
418
+ }
419
+ }
420
+ }
421
+ )
422
+ )
423
+
424
+ @staticmethod
425
+ def apply_stacql_filter(search: Search, op: str, field: str, value: float):
426
+ """Filter search results based on a comparison between a field and a value.
427
+
428
+ Args:
429
+ search (Search): The search object to apply the filter to.
430
+ op (str): The comparison operator to use. Can be 'eq' (equal), 'gt' (greater than), 'gte' (greater than or equal),
431
+ 'lt' (less than), or 'lte' (less than or equal).
432
+ field (str): The field to perform the comparison on.
433
+ value (float): The value to compare the field against.
434
+
435
+ Returns:
436
+ search (Search): The search object with the specified filter applied.
437
+ """
438
+ if op != "eq":
439
+ key_filter = {field: {f"{op}": value}}
440
+ search = search.filter(Q("range", **key_filter))
441
+ else:
442
+ search = search.filter("term", **{field: value})
443
+
444
+ return search
445
+
446
+ @staticmethod
447
+ def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]):
448
+ """
449
+ Apply a CQL2 filter to an Opensearch Search object.
450
+
451
+ This method transforms a dictionary representing a CQL2 filter into an Opensearch query
452
+ and applies it to the provided Search object. If the filter is None, the original Search
453
+ object is returned unmodified.
454
+
455
+ Args:
456
+ search (Search): The Opensearch Search object to which the filter will be applied.
457
+ _filter (Optional[Dict[str, Any]]): The filter in dictionary form that needs to be applied
458
+ to the search. The dictionary should follow the structure
459
+ required by the `to_es` function which converts it
460
+ to an Opensearch query.
461
+
462
+ Returns:
463
+ Search: The modified Search object with the filter applied if a filter is provided,
464
+ otherwise the original Search object.
465
+ """
466
+ if _filter is not None:
467
+ es_query = filter.to_es(_filter)
468
+ search = search.filter(es_query)
469
+
470
+ return search
471
+
472
+ @staticmethod
473
+ def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]:
474
+ """Database logic to sort search instance."""
475
+ if sortby:
476
+ return {s.field: {"order": s.direction} for s in sortby}
477
+ else:
478
+ return None
479
+
480
+ async def execute_search(
481
+ self,
482
+ search: Search,
483
+ limit: int,
484
+ token: Optional[str],
485
+ sort: Optional[Dict[str, Dict[str, str]]],
486
+ collection_ids: Optional[List[str]],
487
+ ignore_unavailable: bool = True,
488
+ ) -> Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]:
489
+ """Execute a search query with limit and other optional parameters.
490
+
491
+ Args:
492
+ search (Search): The search query to be executed.
493
+ limit (int): The maximum number of results to be returned.
494
+ token (Optional[str]): The token used to return the next set of results.
495
+ sort (Optional[Dict[str, Dict[str, str]]]): Specifies how the results should be sorted.
496
+ collection_ids (Optional[List[str]]): The collection ids to search.
497
+ ignore_unavailable (bool, optional): Whether to ignore unavailable collections. Defaults to True.
498
+
499
+ Returns:
500
+ Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: A tuple containing:
501
+ - An iterable of search results, where each result is a dictionary with keys and values representing the
502
+ fields and values of each document.
503
+ - The total number of results (if the count could be computed), or None if the count could not be
504
+ computed.
505
+ - The token to be used to retrieve the next set of results, or None if there are no more results.
506
+
507
+ Raises:
508
+ NotFoundError: If the collections specified in `collection_ids` do not exist.
509
+ """
510
+ search_body: Dict[str, Any] = {}
511
+ query = search.query.to_dict() if search.query else None
512
+ if query:
513
+ search_body["query"] = query
514
+
515
+ search_after = None
516
+
517
+ if token:
518
+ search_after = json.loads(urlsafe_b64decode(token).decode())
519
+ if search_after:
520
+ search_body["search_after"] = search_after
521
+
522
+ search_body["sort"] = sort if sort else DEFAULT_SORT
523
+
524
+ index_param = indices(collection_ids)
525
+
526
+ max_result_window = MAX_LIMIT
527
+
528
+ size_limit = min(limit + 1, max_result_window)
529
+
530
+ search_task = asyncio.create_task(
531
+ self.client.search(
532
+ index=index_param,
533
+ ignore_unavailable=ignore_unavailable,
534
+ body=search_body,
535
+ size=size_limit,
536
+ )
537
+ )
538
+
539
+ count_task = asyncio.create_task(
540
+ self.client.count(
541
+ index=index_param,
542
+ ignore_unavailable=ignore_unavailable,
543
+ body=search.to_dict(count=True),
544
+ )
545
+ )
546
+
547
+ try:
548
+ es_response = await search_task
549
+ except exceptions.NotFoundError:
550
+ raise NotFoundError(f"Collections '{collection_ids}' do not exist")
551
+
552
+ hits = es_response["hits"]["hits"]
553
+ items = (hit["_source"] for hit in hits[:limit])
554
+
555
+ next_token = None
556
+ if len(hits) > limit and limit < max_result_window:
557
+ if hits and (sort_array := hits[limit - 1].get("sort")):
558
+ next_token = urlsafe_b64encode(json.dumps(sort_array).encode()).decode()
559
+
560
+ matched = (
561
+ es_response["hits"]["total"]["value"]
562
+ if es_response["hits"]["total"]["relation"] == "eq"
563
+ else None
564
+ )
565
+ if count_task.done():
566
+ try:
567
+ matched = count_task.result().get("count")
568
+ except Exception as e:
569
+ logger.error(f"Count task failed: {e}")
570
+
571
+ return items, matched, next_token
572
+
573
+ """ AGGREGATE LOGIC """
574
+
575
+ async def aggregate(
576
+ self,
577
+ collection_ids: Optional[List[str]],
578
+ aggregations: List[str],
579
+ search: Search,
580
+ centroid_geohash_grid_precision: int,
581
+ centroid_geohex_grid_precision: int,
582
+ centroid_geotile_grid_precision: int,
583
+ geometry_geohash_grid_precision: int,
584
+ geometry_geotile_grid_precision: int,
585
+ datetime_frequency_interval: str,
586
+ ignore_unavailable: Optional[bool] = True,
587
+ ):
588
+ """Return aggregations of STAC Items."""
589
+ search_body: Dict[str, Any] = {}
590
+ query = search.query.to_dict() if search.query else None
591
+ if query:
592
+ search_body["query"] = query
593
+
594
+ def _fill_aggregation_parameters(name: str, agg: dict) -> dict:
595
+ [key] = agg.keys()
596
+ agg_precision = {
597
+ "centroid_geohash_grid_frequency": centroid_geohash_grid_precision,
598
+ "centroid_geohex_grid_frequency": centroid_geohex_grid_precision,
599
+ "centroid_geotile_grid_frequency": centroid_geotile_grid_precision,
600
+ "geometry_geohash_grid_frequency": geometry_geohash_grid_precision,
601
+ "geometry_geotile_grid_frequency": geometry_geotile_grid_precision,
602
+ }
603
+ if name in agg_precision:
604
+ agg[key]["precision"] = agg_precision[name]
605
+
606
+ if key == "date_histogram":
607
+ agg[key]["calendar_interval"] = datetime_frequency_interval
608
+
609
+ return agg
610
+
611
+ # include all aggregations specified
612
+ # this will ignore aggregations with the wrong names
613
+ search_body["aggregations"] = {
614
+ k: _fill_aggregation_parameters(k, deepcopy(v))
615
+ for k, v in self.aggregation_mapping.items()
616
+ if k in aggregations
617
+ }
618
+
619
+ index_param = indices(collection_ids)
620
+ search_task = asyncio.create_task(
621
+ self.client.search(
622
+ index=index_param,
623
+ ignore_unavailable=ignore_unavailable,
624
+ body=search_body,
625
+ )
626
+ )
627
+
628
+ try:
629
+ db_response = await search_task
630
+ except exceptions.NotFoundError:
631
+ raise NotFoundError(f"Collections '{collection_ids}' do not exist")
632
+
633
+ return db_response
634
+
635
+ """ TRANSACTION LOGIC """
636
+
637
+ async def check_collection_exists(self, collection_id: str):
638
+ """Database logic to check if a collection exists."""
639
+ if not await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id):
640
+ raise NotFoundError(f"Collection {collection_id} does not exist")
641
+
642
+ async def prep_create_item(
643
+ self, item: Item, base_url: str, exist_ok: bool = False
644
+ ) -> Item:
645
+ """
646
+ Preps an item for insertion into the database.
647
+
648
+ Args:
649
+ item (Item): The item to be prepped for insertion.
650
+ base_url (str): The base URL used to create the item's self URL.
651
+ exist_ok (bool): Indicates whether the item can exist already.
652
+
653
+ Returns:
654
+ Item: The prepped item.
655
+
656
+ Raises:
657
+ ConflictError: If the item already exists in the database.
658
+
659
+ """
660
+ await self.check_collection_exists(collection_id=item["collection"])
661
+
662
+ if not exist_ok and await self.client.exists(
663
+ index=index_alias_by_collection_id(item["collection"]),
664
+ id=mk_item_id(item["id"], item["collection"]),
665
+ ):
666
+ raise ConflictError(
667
+ f"Item {item['id']} in collection {item['collection']} already exists"
668
+ )
669
+
670
+ return self.item_serializer.stac_to_db(item, base_url)
671
+
672
+ def sync_prep_create_item(
673
+ self, item: Item, base_url: str, exist_ok: bool = False
674
+ ) -> Item:
675
+ """
676
+ Prepare an item for insertion into the database.
677
+
678
+ This method performs pre-insertion preparation on the given `item`,
679
+ such as checking if the collection the item belongs to exists,
680
+ and optionally verifying that an item with the same ID does not already exist in the database.
681
+
682
+ Args:
683
+ item (Item): The item to be inserted into the database.
684
+ base_url (str): The base URL used for constructing URLs for the item.
685
+ exist_ok (bool): Indicates whether the item can exist already.
686
+
687
+ Returns:
688
+ Item: The item after preparation is done.
689
+
690
+ Raises:
691
+ NotFoundError: If the collection that the item belongs to does not exist in the database.
692
+ ConflictError: If an item with the same ID already exists in the collection.
693
+ """
694
+ item_id = item["id"]
695
+ collection_id = item["collection"]
696
+ if not self.sync_client.exists(index=COLLECTIONS_INDEX, id=collection_id):
697
+ raise NotFoundError(f"Collection {collection_id} does not exist")
698
+
699
+ if not exist_ok and self.sync_client.exists(
700
+ index=index_alias_by_collection_id(collection_id),
701
+ id=mk_item_id(item_id, collection_id),
702
+ ):
703
+ raise ConflictError(
704
+ f"Item {item_id} in collection {collection_id} already exists"
705
+ )
706
+
707
+ return self.item_serializer.stac_to_db(item, base_url)
708
+
709
+ async def create_item(self, item: Item, refresh: bool = False):
710
+ """Database logic for creating one item.
711
+
712
+ Args:
713
+ item (Item): The item to be created.
714
+ refresh (bool, optional): Refresh the index after performing the operation. Defaults to False.
715
+
716
+ Raises:
717
+ ConflictError: If the item already exists in the database.
718
+
719
+ Returns:
720
+ None
721
+ """
722
+ # todo: check if collection exists, but cache
723
+ item_id = item["id"]
724
+ collection_id = item["collection"]
725
+ es_resp = await self.client.index(
726
+ index=index_alias_by_collection_id(collection_id),
727
+ id=mk_item_id(item_id, collection_id),
728
+ body=item,
729
+ refresh=refresh,
730
+ )
731
+
732
+ if (meta := es_resp.get("meta")) and meta.get("status") == 409:
733
+ raise ConflictError(
734
+ f"Item {item_id} in collection {collection_id} already exists"
735
+ )
736
+
737
+ async def delete_item(
738
+ self, item_id: str, collection_id: str, refresh: bool = False
739
+ ):
740
+ """Delete a single item from the database.
741
+
742
+ Args:
743
+ item_id (str): The id of the Item to be deleted.
744
+ collection_id (str): The id of the Collection that the Item belongs to.
745
+ refresh (bool, optional): Whether to refresh the index after the deletion. Default is False.
746
+
747
+ Raises:
748
+ NotFoundError: If the Item does not exist in the database.
749
+ """
750
+ try:
751
+ await self.client.delete(
752
+ index=index_alias_by_collection_id(collection_id),
753
+ id=mk_item_id(item_id, collection_id),
754
+ refresh=refresh,
755
+ )
756
+ except exceptions.NotFoundError:
757
+ raise NotFoundError(
758
+ f"Item {item_id} in collection {collection_id} not found"
759
+ )
760
+
761
+ async def get_items_mapping(self, collection_id: str) -> Dict[str, Any]:
762
+ """Get the mapping for the specified collection's items index.
763
+
764
+ Args:
765
+ collection_id (str): The ID of the collection to get items mapping for.
766
+
767
+ Returns:
768
+ Dict[str, Any]: The mapping information.
769
+ """
770
+ index_name = index_alias_by_collection_id(collection_id)
771
+ try:
772
+ mapping = await self.client.indices.get_mapping(
773
+ index=index_name, params={"allow_no_indices": "false"}
774
+ )
775
+ return mapping
776
+ except exceptions.NotFoundError:
777
+ raise NotFoundError(f"Mapping for index {index_name} not found")
778
+
779
+ async def create_collection(self, collection: Collection, refresh: bool = False):
780
+ """Create a single collection in the database.
781
+
782
+ Args:
783
+ collection (Collection): The Collection object to be created.
784
+ refresh (bool, optional): Whether to refresh the index after the creation. Default is False.
785
+
786
+ Raises:
787
+ ConflictError: If a Collection with the same id already exists in the database.
788
+
789
+ Notes:
790
+ A new index is created for the items in the Collection using the `create_item_index` function.
791
+ """
792
+ collection_id = collection["id"]
793
+
794
+ if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id):
795
+ raise ConflictError(f"Collection {collection_id} already exists")
796
+
797
+ await self.client.index(
798
+ index=COLLECTIONS_INDEX,
799
+ id=collection_id,
800
+ body=collection,
801
+ refresh=refresh,
802
+ )
803
+
804
+ await create_item_index(collection_id)
805
+
806
+ async def find_collection(self, collection_id: str) -> Collection:
807
+ """Find and return a collection from the database.
808
+
809
+ Args:
810
+ self: The instance of the object calling this function.
811
+ collection_id (str): The ID of the collection to be found.
812
+
813
+ Returns:
814
+ Collection: The found collection, represented as a `Collection` object.
815
+
816
+ Raises:
817
+ NotFoundError: If the collection with the given `collection_id` is not found in the database.
818
+
819
+ Notes:
820
+ This function searches for a collection in the database using the specified `collection_id` and returns the found
821
+ collection as a `Collection` object. If the collection is not found, a `NotFoundError` is raised.
822
+ """
823
+ try:
824
+ collection = await self.client.get(
825
+ index=COLLECTIONS_INDEX, id=collection_id
826
+ )
827
+ except exceptions.NotFoundError:
828
+ raise NotFoundError(f"Collection {collection_id} not found")
829
+
830
+ return collection["_source"]
831
+
832
+ async def update_collection(
833
+ self, collection_id: str, collection: Collection, refresh: bool = False
834
+ ):
835
+ """Update a collection from the database.
836
+
837
+ Args:
838
+ self: The instance of the object calling this function.
839
+ collection_id (str): The ID of the collection to be updated.
840
+ collection (Collection): The Collection object to be used for the update.
841
+
842
+ Raises:
843
+ NotFoundError: If the collection with the given `collection_id` is not
844
+ found in the database.
845
+
846
+ Notes:
847
+ This function updates the collection in the database using the specified
848
+ `collection_id` and with the collection specified in the `Collection` object.
849
+ If the collection is not found, a `NotFoundError` is raised.
850
+ """
851
+ await self.find_collection(collection_id=collection_id)
852
+
853
+ if collection_id != collection["id"]:
854
+ await self.create_collection(collection, refresh=refresh)
855
+
856
+ await self.client.reindex(
857
+ body={
858
+ "dest": {"index": f"{ITEMS_INDEX_PREFIX}{collection['id']}"},
859
+ "source": {"index": f"{ITEMS_INDEX_PREFIX}{collection_id}"},
860
+ "script": {
861
+ "lang": "painless",
862
+ "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", # noqa: E702
863
+ },
864
+ },
865
+ wait_for_completion=True,
866
+ refresh=refresh,
867
+ )
868
+
869
+ await self.delete_collection(collection_id)
870
+
871
+ else:
872
+ await self.client.index(
873
+ index=COLLECTIONS_INDEX,
874
+ id=collection_id,
875
+ body=collection,
876
+ refresh=refresh,
877
+ )
878
+
879
+ async def delete_collection(self, collection_id: str, refresh: bool = False):
880
+ """Delete a collection from the database.
881
+
882
+ Parameters:
883
+ self: The instance of the object calling this function.
884
+ collection_id (str): The ID of the collection to be deleted.
885
+ refresh (bool): Whether to refresh the index after the deletion (default: False).
886
+
887
+ Raises:
888
+ NotFoundError: If the collection with the given `collection_id` is not found in the database.
889
+
890
+ Notes:
891
+ This function first verifies that the collection with the specified `collection_id` exists in the database, and then
892
+ deletes the collection. If `refresh` is set to True, the index is refreshed after the deletion. Additionally, this
893
+ function also calls `delete_item_index` to delete the index for the items in the collection.
894
+ """
895
+ await self.find_collection(collection_id=collection_id)
896
+ await self.client.delete(
897
+ index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh
898
+ )
899
+ await delete_item_index(collection_id)
900
+
901
+ async def bulk_async(
902
+ self, collection_id: str, processed_items: List[Item], refresh: bool = False
903
+ ) -> None:
904
+ """Perform a bulk insert of items into the database asynchronously.
905
+
906
+ Args:
907
+ self: The instance of the object calling this function.
908
+ collection_id (str): The ID of the collection to which the items belong.
909
+ processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
910
+ refresh (bool): Whether to refresh the index after the bulk insert (default: False).
911
+
912
+ Notes:
913
+ This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The
914
+ insert is performed asynchronously, and the event loop is used to run the operation in a separate executor. The
915
+ `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the
916
+ index is refreshed after the bulk insert. The function does not return any value.
917
+ """
918
+ await helpers.async_bulk(
919
+ self.client,
920
+ mk_actions(collection_id, processed_items),
921
+ refresh=refresh,
922
+ raise_on_error=False,
923
+ )
924
+
925
+ def bulk_sync(
926
+ self, collection_id: str, processed_items: List[Item], refresh: bool = False
927
+ ) -> None:
928
+ """Perform a bulk insert of items into the database synchronously.
929
+
930
+ Args:
931
+ self: The instance of the object calling this function.
932
+ collection_id (str): The ID of the collection to which the items belong.
933
+ processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
934
+ refresh (bool): Whether to refresh the index after the bulk insert (default: False).
935
+
936
+ Notes:
937
+ This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The
938
+ insert is performed synchronously and blocking, meaning that the function does not return until the insert has
939
+ completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to
940
+ True, the index is refreshed after the bulk insert. The function does not return any value.
941
+ """
942
+ helpers.bulk(
943
+ self.sync_client,
944
+ mk_actions(collection_id, processed_items),
945
+ refresh=refresh,
946
+ raise_on_error=False,
947
+ )
948
+
949
+ # DANGER
950
+ async def delete_items(self) -> None:
951
+ """Danger. this is only for tests."""
952
+ await self.client.delete_by_query(
953
+ index=ITEM_INDICES,
954
+ body={"query": {"match_all": {}}},
955
+ wait_for_completion=True,
956
+ )
957
+
958
+ # DANGER
959
+ async def delete_collections(self) -> None:
960
+ """Danger. this is only for tests."""
961
+ await self.client.delete_by_query(
962
+ index=COLLECTIONS_INDEX,
963
+ body={"query": {"match_all": {}}},
964
+ wait_for_completion=True,
965
+ )