stac-fastapi-elasticsearch 4.1.0__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,42 +1,51 @@
1
1
  """Database logic."""
2
2
 
3
3
  import asyncio
4
- import json
5
4
  import logging
6
5
  from base64 import urlsafe_b64decode, urlsafe_b64encode
7
6
  from copy import deepcopy
8
- from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
7
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
9
8
 
10
9
  import attr
11
10
  import elasticsearch.helpers as helpers
11
+ import orjson
12
12
  from elasticsearch.dsl import Q, Search
13
13
  from elasticsearch.exceptions import NotFoundError as ESNotFoundError
14
14
  from starlette.requests import Request
15
15
 
16
16
  from stac_fastapi.core.base_database_logic import BaseDatabaseLogic
17
- from stac_fastapi.core.database_logic import (
18
- COLLECTIONS_INDEX,
19
- DEFAULT_SORT,
20
- ES_COLLECTIONS_MAPPINGS,
21
- ES_ITEMS_MAPPINGS,
22
- ES_ITEMS_SETTINGS,
23
- ITEM_INDICES,
24
- ITEMS_INDEX_PREFIX,
25
- Geometry,
17
+ from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
18
+ from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon
19
+ from stac_fastapi.elasticsearch.config import AsyncElasticsearchSettings
20
+ from stac_fastapi.elasticsearch.config import (
21
+ ElasticsearchSettings as SyncElasticsearchSettings,
22
+ )
23
+ from stac_fastapi.sfeos_helpers import filter
24
+ from stac_fastapi.sfeos_helpers.database import (
25
+ apply_free_text_filter_shared,
26
+ apply_intersects_filter_shared,
27
+ create_index_templates_shared,
28
+ delete_item_index_shared,
29
+ get_queryables_mapping_shared,
26
30
  index_alias_by_collection_id,
27
31
  index_by_collection_id,
28
32
  indices,
29
33
  mk_actions,
30
34
  mk_item_id,
35
+ populate_sort_shared,
36
+ return_date,
37
+ validate_refresh,
31
38
  )
32
- from stac_fastapi.core.extensions import filter
33
- from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
34
- from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon
35
- from stac_fastapi.elasticsearch.config import AsyncElasticsearchSettings
36
- from stac_fastapi.elasticsearch.config import (
37
- ElasticsearchSettings as SyncElasticsearchSettings,
39
+ from stac_fastapi.sfeos_helpers.mappings import (
40
+ AGGREGATION_MAPPING,
41
+ COLLECTIONS_INDEX,
42
+ DEFAULT_SORT,
43
+ ITEM_INDICES,
44
+ ITEMS_INDEX_PREFIX,
45
+ Geometry,
38
46
  )
39
47
  from stac_fastapi.types.errors import ConflictError, NotFoundError
48
+ from stac_fastapi.types.rfc3339 import DateTimeType
40
49
  from stac_fastapi.types.stac import Collection, Item
41
50
 
42
51
  logger = logging.getLogger(__name__)
@@ -50,22 +59,7 @@ async def create_index_templates() -> None:
50
59
  None
51
60
 
52
61
  """
53
- client = AsyncElasticsearchSettings().create_client
54
- await client.indices.put_index_template(
55
- name=f"template_{COLLECTIONS_INDEX}",
56
- body={
57
- "index_patterns": [f"{COLLECTIONS_INDEX}*"],
58
- "template": {"mappings": ES_COLLECTIONS_MAPPINGS},
59
- },
60
- )
61
- await client.indices.put_index_template(
62
- name=f"template_{ITEMS_INDEX_PREFIX}",
63
- body={
64
- "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"],
65
- "template": {"settings": ES_ITEMS_SETTINGS, "mappings": ES_ITEMS_MAPPINGS},
66
- },
67
- )
68
- await client.close()
62
+ await create_index_templates_shared(settings=AsyncElasticsearchSettings())
69
63
 
70
64
 
71
65
  async def create_collection_index() -> None:
@@ -110,18 +104,13 @@ async def delete_item_index(collection_id: str):
110
104
 
111
105
  Args:
112
106
  collection_id (str): The ID of the collection whose items index will be deleted.
113
- """
114
- client = AsyncElasticsearchSettings().create_client
115
107
 
116
- name = index_alias_by_collection_id(collection_id)
117
- resolved = await client.indices.resolve_index(name=name)
118
- if "aliases" in resolved and resolved["aliases"]:
119
- [alias] = resolved["aliases"]
120
- await client.indices.delete_alias(index=alias["indices"], name=alias["name"])
121
- await client.indices.delete(index=alias["indices"])
122
- else:
123
- await client.indices.delete(index=name)
124
- await client.close()
108
+ Notes:
109
+ This function delegates to the shared implementation in delete_item_index_shared.
110
+ """
111
+ await delete_item_index_shared(
112
+ settings=AsyncElasticsearchSettings(), collection_id=collection_id
113
+ )
125
114
 
126
115
 
127
116
  @attr.s
@@ -150,76 +139,7 @@ class DatabaseLogic(BaseDatabaseLogic):
150
139
 
151
140
  extensions: List[str] = attr.ib(default=attr.Factory(list))
152
141
 
153
- aggregation_mapping: Dict[str, Dict[str, Any]] = {
154
- "total_count": {"value_count": {"field": "id"}},
155
- "collection_frequency": {"terms": {"field": "collection", "size": 100}},
156
- "platform_frequency": {"terms": {"field": "properties.platform", "size": 100}},
157
- "cloud_cover_frequency": {
158
- "range": {
159
- "field": "properties.eo:cloud_cover",
160
- "ranges": [
161
- {"to": 5},
162
- {"from": 5, "to": 15},
163
- {"from": 15, "to": 40},
164
- {"from": 40},
165
- ],
166
- }
167
- },
168
- "datetime_frequency": {
169
- "date_histogram": {
170
- "field": "properties.datetime",
171
- "calendar_interval": "month",
172
- }
173
- },
174
- "datetime_min": {"min": {"field": "properties.datetime"}},
175
- "datetime_max": {"max": {"field": "properties.datetime"}},
176
- "grid_code_frequency": {
177
- "terms": {
178
- "field": "properties.grid:code",
179
- "missing": "none",
180
- "size": 10000,
181
- }
182
- },
183
- "sun_elevation_frequency": {
184
- "histogram": {"field": "properties.view:sun_elevation", "interval": 5}
185
- },
186
- "sun_azimuth_frequency": {
187
- "histogram": {"field": "properties.view:sun_azimuth", "interval": 5}
188
- },
189
- "off_nadir_frequency": {
190
- "histogram": {"field": "properties.view:off_nadir", "interval": 5}
191
- },
192
- "centroid_geohash_grid_frequency": {
193
- "geohash_grid": {
194
- "field": "properties.proj:centroid",
195
- "precision": 1,
196
- }
197
- },
198
- "centroid_geohex_grid_frequency": {
199
- "geohex_grid": {
200
- "field": "properties.proj:centroid",
201
- "precision": 0,
202
- }
203
- },
204
- "centroid_geotile_grid_frequency": {
205
- "geotile_grid": {
206
- "field": "properties.proj:centroid",
207
- "precision": 0,
208
- }
209
- },
210
- "geometry_geohash_grid_frequency": {
211
- "geohash_grid": {
212
- "field": "geometry",
213
- "precision": 1,
214
- }
215
- },
216
- "geometry_geotile_grid_frequency": {
217
- "geotile_grid": {
218
- "field": "geometry",
219
- "precision": 0,
220
- }
221
- },
222
- }
142
+ aggregation_mapping: Dict[str, Dict[str, Any]] = AGGREGATION_MAPPING
223
143
 
224
144
  """CORE LOGIC"""
225
145
 
@@ -290,6 +210,23 @@ class DatabaseLogic(BaseDatabaseLogic):
290
210
  )
291
211
  return item["_source"]
292
212
 
213
+ async def get_queryables_mapping(self, collection_id: str = "*") -> dict:
214
+ """Retrieve mapping of Queryables for search.
215
+
216
+ Args:
217
+ collection_id (str, optional): The id of the Collection the Queryables
218
+ belongs to. Defaults to "*".
219
+
220
+ Returns:
221
+ dict: A dictionary containing the Queryables mappings.
222
+ """
223
+ mappings = await self.client.indices.get_mapping(
224
+ index=f"{ITEMS_INDEX_PREFIX}{collection_id}",
225
+ )
226
+ return await get_queryables_mapping_shared(
227
+ collection_id=collection_id, mappings=mappings
228
+ )
229
+
293
230
  @staticmethod
294
231
  def make_search():
295
232
  """Database logic to create a Search instance."""
@@ -306,120 +243,99 @@ class DatabaseLogic(BaseDatabaseLogic):
306
243
  return search.filter("terms", collection=collection_ids)
307
244
 
308
245
  @staticmethod
309
- def apply_datetime_filter(search: Search, datetime_search: dict):
246
+ def apply_datetime_filter(
247
+ search: Search, interval: Optional[Union[DateTimeType, str]]
248
+ ) -> Search:
310
249
  """Apply a filter to search on datetime, start_datetime, and end_datetime fields.
311
250
 
312
251
  Args:
313
- search (Search): The search object to filter.
314
- datetime_search (dict): The datetime filter criteria.
252
+ search: The search object to filter.
253
+ interval: Optional datetime interval to filter by. Can be:
254
+ - A single datetime string (e.g., "2023-01-01T12:00:00")
255
+ - A datetime range string (e.g., "2023-01-01/2023-12-31")
256
+ - A datetime object
257
+ - A tuple of (start_datetime, end_datetime)
315
258
 
316
259
  Returns:
317
- Search: The filtered search object.
260
+ The filtered search object.
318
261
  """
262
+ if not interval:
263
+ return search
264
+
319
265
  should = []
266
+ try:
267
+ datetime_search = return_date(interval)
268
+ except (ValueError, TypeError) as e:
269
+ # Handle invalid interval formats if return_date fails
270
+ logger.error(f"Invalid interval format: {interval}, error: {e}")
271
+ return search
320
272
 
321
- # If the request is a single datetime return
322
- # items with datetimes equal to the requested datetime OR
323
- # the requested datetime is between their start and end datetimes
324
273
  if "eq" in datetime_search:
325
- should.extend(
326
- [
327
- Q(
328
- "bool",
329
- filter=[
330
- Q(
331
- "term",
332
- properties__datetime=datetime_search["eq"],
333
- ),
334
- ],
335
- ),
336
- Q(
337
- "bool",
338
- filter=[
339
- Q(
340
- "range",
341
- properties__start_datetime={
342
- "lte": datetime_search["eq"],
343
- },
344
- ),
345
- Q(
346
- "range",
347
- properties__end_datetime={
348
- "gte": datetime_search["eq"],
349
- },
350
- ),
351
- ],
352
- ),
353
- ]
354
- )
355
-
356
- # If the request is a date range return
357
- # items with datetimes within the requested date range OR
358
- # their startdatetime ithin the requested date range OR
359
- # their enddatetime ithin the requested date range OR
360
- # the requested daterange within their start and end datetimes
274
+ # For exact matches, include:
275
+ # 1. Items with matching exact datetime
276
+ # 2. Items with datetime:null where the time falls within their range
277
+ should = [
278
+ Q(
279
+ "bool",
280
+ filter=[
281
+ Q("exists", field="properties.datetime"),
282
+ Q("term", **{"properties__datetime": datetime_search["eq"]}),
283
+ ],
284
+ ),
285
+ Q(
286
+ "bool",
287
+ must_not=[Q("exists", field="properties.datetime")],
288
+ filter=[
289
+ Q("exists", field="properties.start_datetime"),
290
+ Q("exists", field="properties.end_datetime"),
291
+ Q(
292
+ "range",
293
+ properties__start_datetime={"lte": datetime_search["eq"]},
294
+ ),
295
+ Q(
296
+ "range",
297
+ properties__end_datetime={"gte": datetime_search["eq"]},
298
+ ),
299
+ ],
300
+ ),
301
+ ]
361
302
  else:
362
- should.extend(
363
- [
364
- Q(
365
- "bool",
366
- filter=[
367
- Q(
368
- "range",
369
- properties__datetime={
370
- "gte": datetime_search["gte"],
371
- "lte": datetime_search["lte"],
372
- },
373
- ),
374
- ],
375
- ),
376
- Q(
377
- "bool",
378
- filter=[
379
- Q(
380
- "range",
381
- properties__start_datetime={
382
- "gte": datetime_search["gte"],
383
- "lte": datetime_search["lte"],
384
- },
385
- ),
386
- ],
387
- ),
388
- Q(
389
- "bool",
390
- filter=[
391
- Q(
392
- "range",
393
- properties__end_datetime={
394
- "gte": datetime_search["gte"],
395
- "lte": datetime_search["lte"],
396
- },
397
- ),
398
- ],
399
- ),
400
- Q(
401
- "bool",
402
- filter=[
403
- Q(
404
- "range",
405
- properties__start_datetime={
406
- "lte": datetime_search["gte"]
407
- },
408
- ),
409
- Q(
410
- "range",
411
- properties__end_datetime={
412
- "gte": datetime_search["lte"]
413
- },
414
- ),
415
- ],
416
- ),
417
- ]
418
- )
419
-
420
- search = search.query(Q("bool", filter=[Q("bool", should=should)]))
421
-
422
- return search
303
+ # For date ranges, include:
304
+ # 1. Items with datetime in the range
305
+ # 2. Items with datetime:null that overlap the search range
306
+ should = [
307
+ Q(
308
+ "bool",
309
+ filter=[
310
+ Q("exists", field="properties.datetime"),
311
+ Q(
312
+ "range",
313
+ properties__datetime={
314
+ "gte": datetime_search["gte"],
315
+ "lte": datetime_search["lte"],
316
+ },
317
+ ),
318
+ ],
319
+ ),
320
+ Q(
321
+ "bool",
322
+ must_not=[Q("exists", field="properties.datetime")],
323
+ filter=[
324
+ Q("exists", field="properties.start_datetime"),
325
+ Q("exists", field="properties.end_datetime"),
326
+ Q(
327
+ "range",
328
+ properties__start_datetime={"lte": datetime_search["lte"]},
329
+ ),
330
+ Q(
331
+ "range",
332
+ properties__end_datetime={"gte": datetime_search["gte"]},
333
+ ),
334
+ ],
335
+ ),
336
+ ]
337
+
338
+ return search.query(Q("bool", should=should, minimum_should_match=1))
423
339
 
424
340
  @staticmethod
425
341
  def apply_bbox_filter(search: Search, bbox: List):
@@ -469,21 +385,8 @@ class DatabaseLogic(BaseDatabaseLogic):
469
385
  Notes:
470
386
  A geo_shape filter is added to the search object, set to intersect with the specified geometry.
471
387
  """
472
- return search.filter(
473
- Q(
474
- {
475
- "geo_shape": {
476
- "geometry": {
477
- "shape": {
478
- "type": intersects.type.lower(),
479
- "coordinates": intersects.coordinates,
480
- },
481
- "relation": "intersects",
482
- }
483
- }
484
- }
485
- )
486
- )
388
+ filter = apply_intersects_filter_shared(intersects=intersects)
389
+ return search.filter(Q(filter))
487
390
 
488
391
  @staticmethod
489
392
  def apply_stacql_filter(search: Search, op: str, field: str, value: float):
@@ -509,17 +412,25 @@ class DatabaseLogic(BaseDatabaseLogic):
509
412
 
510
413
  @staticmethod
511
414
  def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str]]):
512
- """Database logic to perform query for search endpoint."""
513
- if free_text_queries is not None:
514
- free_text_query_string = '" OR properties.\\*:"'.join(free_text_queries)
515
- search = search.query(
516
- "query_string", query=f'properties.\\*:"{free_text_query_string}"'
517
- )
415
+ """Create a free text query for Elasticsearch queries.
518
416
 
519
- return search
417
+ This method delegates to the shared implementation in apply_free_text_filter_shared.
520
418
 
521
- @staticmethod
522
- def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]):
419
+ Args:
420
+ search (Search): The search object to apply the query to.
421
+ free_text_queries (Optional[List[str]]): A list of text strings to search for in the properties.
422
+
423
+ Returns:
424
+ Search: The search object with the free text query applied, or the original search
425
+ object if no free_text_queries were provided.
426
+ """
427
+ return apply_free_text_filter_shared(
428
+ search=search, free_text_queries=free_text_queries
429
+ )
430
+
431
+ async def apply_cql2_filter(
432
+ self, search: Search, _filter: Optional[Dict[str, Any]]
433
+ ):
523
434
  """
524
435
  Apply a CQL2 filter to an Elasticsearch Search object.
525
436
 
@@ -539,18 +450,25 @@ class DatabaseLogic(BaseDatabaseLogic):
539
450
  otherwise the original Search object.
540
451
  """
541
452
  if _filter is not None:
542
- es_query = filter.to_es(_filter)
453
+ es_query = filter.to_es(await self.get_queryables_mapping(), _filter)
543
454
  search = search.query(es_query)
544
455
 
545
456
  return search
546
457
 
547
458
  @staticmethod
548
459
  def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]:
549
- """Database logic to sort search instance."""
550
- if sortby:
551
- return {s.field: {"order": s.direction} for s in sortby}
552
- else:
553
- return None
460
+ """Create a sort configuration for Elasticsearch queries.
461
+
462
+ This method delegates to the shared implementation in populate_sort_shared.
463
+
464
+ Args:
465
+ sortby (List): A list of sort specifications, each containing a field and direction.
466
+
467
+ Returns:
468
+ Optional[Dict[str, Dict[str, str]]]: A dictionary mapping field names to sort direction
469
+ configurations, or None if no sort was specified.
470
+ """
471
+ return populate_sort_shared(sortby=sortby)
554
472
 
555
473
  async def execute_search(
556
474
  self,
@@ -585,7 +503,7 @@ class DatabaseLogic(BaseDatabaseLogic):
585
503
  search_after = None
586
504
 
587
505
  if token:
588
- search_after = json.loads(urlsafe_b64decode(token).decode())
506
+ search_after = orjson.loads(urlsafe_b64decode(token))
589
507
 
590
508
  query = search.query.to_dict() if search.query else None
591
509
 
@@ -625,7 +543,7 @@ class DatabaseLogic(BaseDatabaseLogic):
625
543
  next_token = None
626
544
  if len(hits) > limit and limit < max_result_window:
627
545
  if hits and (sort_array := hits[limit - 1].get("sort")):
628
- next_token = urlsafe_b64encode(json.dumps(sort_array).encode()).decode()
546
+ next_token = urlsafe_b64encode(orjson.dumps(sort_array)).decode()
629
547
 
630
548
  matched = (
631
549
  es_response["hits"]["total"]["value"]
@@ -845,15 +763,19 @@ class DatabaseLogic(BaseDatabaseLogic):
845
763
  async def create_item(
846
764
  self,
847
765
  item: Item,
848
- refresh: bool = False,
849
766
  base_url: str = "",
850
767
  exist_ok: bool = False,
768
+ **kwargs: Any,
851
769
  ):
852
770
  """Database logic for creating one item.
853
771
 
854
772
  Args:
855
773
  item (Item): The item to be created.
856
- refresh (bool, optional): Refresh the index after performing the operation. Defaults to False.
774
+ base_url (str, optional): The base URL for the item. Defaults to an empty string.
775
+ exist_ok (bool, optional): Whether to allow the item to exist already. Defaults to False.
776
+ **kwargs: Additional keyword arguments.
777
+ - refresh (str): Whether to refresh the index after the operation. Can be "true", "false", or "wait_for".
778
+ - refresh (bool): Whether to refresh the index after the operation. Defaults to the value in `self.async_settings.database_refresh`.
857
779
 
858
780
  Raises:
859
781
  ConflictError: If the item already exists in the database.
@@ -861,12 +783,28 @@ class DatabaseLogic(BaseDatabaseLogic):
861
783
  Returns:
862
784
  None
863
785
  """
864
- # todo: check if collection exists, but cache
786
+ # Extract item and collection IDs
865
787
  item_id = item["id"]
866
788
  collection_id = item["collection"]
789
+
790
+ # Ensure kwargs is a dictionary
791
+ kwargs = kwargs or {}
792
+
793
+ # Resolve the `refresh` parameter
794
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
795
+ refresh = validate_refresh(refresh)
796
+
797
+ # Log the creation attempt
798
+ logger.info(
799
+ f"Creating item {item_id} in collection {collection_id} with refresh={refresh}"
800
+ )
801
+
802
+ # Prepare the item for insertion
867
803
  item = await self.async_prep_create_item(
868
804
  item=item, base_url=base_url, exist_ok=exist_ok
869
805
  )
806
+
807
+ # Index the item in the database
870
808
  await self.client.index(
871
809
  index=index_alias_by_collection_id(collection_id),
872
810
  id=mk_item_id(item_id, collection_id),
@@ -874,26 +812,43 @@ class DatabaseLogic(BaseDatabaseLogic):
874
812
  refresh=refresh,
875
813
  )
876
814
 
877
- async def delete_item(
878
- self, item_id: str, collection_id: str, refresh: bool = False
879
- ):
815
+ async def delete_item(self, item_id: str, collection_id: str, **kwargs: Any):
880
816
  """Delete a single item from the database.
881
817
 
882
818
  Args:
883
819
  item_id (str): The id of the Item to be deleted.
884
820
  collection_id (str): The id of the Collection that the Item belongs to.
885
- refresh (bool, optional): Whether to refresh the index after the deletion. Default is False.
821
+ **kwargs: Additional keyword arguments.
822
+ - refresh (str): Whether to refresh the index after the operation. Can be "true", "false", or "wait_for".
823
+ - refresh (bool): Whether to refresh the index after the operation. Defaults to the value in `self.async_settings.database_refresh`.
886
824
 
887
825
  Raises:
888
826
  NotFoundError: If the Item does not exist in the database.
827
+
828
+ Returns:
829
+ None
889
830
  """
831
+ # Ensure kwargs is a dictionary
832
+ kwargs = kwargs or {}
833
+
834
+ # Resolve the `refresh` parameter
835
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
836
+ refresh = validate_refresh(refresh)
837
+
838
+ # Log the deletion attempt
839
+ logger.info(
840
+ f"Deleting item {item_id} from collection {collection_id} with refresh={refresh}"
841
+ )
842
+
890
843
  try:
844
+ # Perform the delete operation
891
845
  await self.client.delete(
892
846
  index=index_alias_by_collection_id(collection_id),
893
847
  id=mk_item_id(item_id, collection_id),
894
848
  refresh=refresh,
895
849
  )
896
850
  except ESNotFoundError:
851
+ # Raise a custom NotFoundError if the item does not exist
897
852
  raise NotFoundError(
898
853
  f"Item {item_id} in collection {collection_id} not found"
899
854
  )
@@ -916,24 +871,72 @@ class DatabaseLogic(BaseDatabaseLogic):
916
871
  except ESNotFoundError:
917
872
  raise NotFoundError(f"Mapping for index {index_name} not found")
918
873
 
919
- async def create_collection(self, collection: Collection, refresh: bool = False):
874
+ async def get_items_unique_values(
875
+ self, collection_id: str, field_names: Iterable[str], *, limit: int = 100
876
+ ) -> Dict[str, List[str]]:
877
+ """Get the unique values for the given fields in the collection."""
878
+ limit_plus_one = limit + 1
879
+ index_name = index_alias_by_collection_id(collection_id)
880
+
881
+ query = await self.client.search(
882
+ index=index_name,
883
+ body={
884
+ "size": 0,
885
+ "aggs": {
886
+ field: {"terms": {"field": field, "size": limit_plus_one}}
887
+ for field in field_names
888
+ },
889
+ },
890
+ )
891
+
892
+ result: Dict[str, List[str]] = {}
893
+ for field, agg in query["aggregations"].items():
894
+ if len(agg["buckets"]) > limit:
895
+ logger.warning(
896
+ "Skipping enum field %s: exceeds limit of %d unique values. "
897
+ "Consider excluding this field from enumeration or increase the limit.",
898
+ field,
899
+ limit,
900
+ )
901
+ continue
902
+ result[field] = [bucket["key"] for bucket in agg["buckets"]]
903
+ return result
904
+
905
+ async def create_collection(self, collection: Collection, **kwargs: Any):
920
906
  """Create a single collection in the database.
921
907
 
922
908
  Args:
923
909
  collection (Collection): The Collection object to be created.
924
- refresh (bool, optional): Whether to refresh the index after the creation. Default is False.
910
+ **kwargs: Additional keyword arguments.
911
+ - refresh (str): Whether to refresh the index after the operation. Can be "true", "false", or "wait_for".
912
+ - refresh (bool): Whether to refresh the index after the operation. Defaults to the value in `self.async_settings.database_refresh`.
925
913
 
926
914
  Raises:
927
915
  ConflictError: If a Collection with the same id already exists in the database.
928
916
 
917
+ Returns:
918
+ None
919
+
929
920
  Notes:
930
921
  A new index is created for the items in the Collection using the `create_item_index` function.
931
922
  """
932
923
  collection_id = collection["id"]
933
924
 
925
+ # Ensure kwargs is a dictionary
926
+ kwargs = kwargs or {}
927
+
928
+ # Resolve the `refresh` parameter
929
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
930
+ refresh = validate_refresh(refresh)
931
+
932
+ # Log the creation attempt
933
+ logger.info(f"Creating collection {collection_id} with refresh={refresh}")
934
+
935
+ # Check if the collection already exists
934
936
  if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id):
935
937
  raise ConflictError(f"Collection {collection_id} already exists")
936
938
 
939
+ # Index the collection in the database
937
940
  await self.client.index(
938
941
  index=COLLECTIONS_INDEX,
939
942
  id=collection_id,
@@ -941,6 +944,7 @@ class DatabaseLogic(BaseDatabaseLogic):
941
944
  refresh=refresh,
942
945
  )
943
946
 
947
+ # Create the item index for the collection
944
948
  await create_item_index(collection_id)
945
949
 
946
950
  async def find_collection(self, collection_id: str) -> Collection:
@@ -970,29 +974,52 @@ class DatabaseLogic(BaseDatabaseLogic):
970
974
  return collection["_source"]
971
975
 
972
976
  async def update_collection(
973
- self, collection_id: str, collection: Collection, refresh: bool = False
977
+ self, collection_id: str, collection: Collection, **kwargs: Any
974
978
  ):
975
- """Update a collection from the database.
979
+ """Update a collection in the database.
976
980
 
977
981
  Args:
978
- self: The instance of the object calling this function.
979
982
  collection_id (str): The ID of the collection to be updated.
980
983
  collection (Collection): The Collection object to be used for the update.
984
+ **kwargs: Additional keyword arguments.
985
+ - refresh (str): Whether to refresh the index after the operation. Can be "true", "false", or "wait_for".
986
+ - refresh (bool): Whether to refresh the index after the operation. Defaults to the value in `self.async_settings.database_refresh`.
987
+ Returns:
988
+ None
981
989
 
982
990
  Raises:
983
- NotFoundError: If the collection with the given `collection_id` is not
984
- found in the database.
991
+ NotFoundError: If the collection with the given `collection_id` is not found in the database.
992
+ ConflictError: If a conflict occurs during the update.
985
993
 
986
994
  Notes:
987
995
  This function updates the collection in the database using the specified
988
- `collection_id` and with the collection specified in the `Collection` object.
989
- If the collection is not found, a `NotFoundError` is raised.
996
+ `collection_id` and the provided `Collection` object. If the collection ID
997
+ changes, the function creates a new collection, reindexes the items, and deletes
998
+ the old collection.
990
999
  """
1000
+ # Ensure kwargs is a dictionary
1001
+ kwargs = kwargs or {}
1002
+
1003
+ # Resolve the `refresh` parameter
1004
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
1005
+ refresh = validate_refresh(refresh)
1006
+
1007
+ # Log the update attempt
1008
+ logger.info(f"Updating collection {collection_id} with refresh={refresh}")
1009
+
1010
+ # Ensure the collection exists
991
1011
  await self.find_collection(collection_id=collection_id)
992
1012
 
1013
+ # Handle collection ID change
993
1014
  if collection_id != collection["id"]:
1015
+ logger.info(
1016
+ f"Collection ID change detected: {collection_id} -> {collection['id']}"
1017
+ )
1018
+
1019
+ # Create the new collection
994
1020
  await self.create_collection(collection, refresh=refresh)
995
1021
 
1022
+ # Reindex items from the old collection to the new collection
996
1023
  await self.client.reindex(
997
1024
  body={
998
1025
  "dest": {"index": f"{ITEMS_INDEX_PREFIX}{collection['id']}"},
@@ -1006,9 +1033,11 @@ class DatabaseLogic(BaseDatabaseLogic):
1006
1033
  refresh=refresh,
1007
1034
  )
1008
1035
 
1036
+ # Delete the old collection
1009
1037
  await self.delete_collection(collection_id)
1010
1038
 
1011
1039
  else:
1040
+ # Update the existing collection
1012
1041
  await self.client.index(
1013
1042
  index=COLLECTIONS_INDEX,
1014
1043
  id=collection_id,
@@ -1016,33 +1045,57 @@ class DatabaseLogic(BaseDatabaseLogic):
1016
1045
  refresh=refresh,
1017
1046
  )
1018
1047
 
1019
- async def delete_collection(self, collection_id: str, refresh: bool = False):
1048
+ async def delete_collection(self, collection_id: str, **kwargs: Any):
1020
1049
  """Delete a collection from the database.
1021
1050
 
1022
1051
  Parameters:
1023
- self: The instance of the object calling this function.
1024
1052
  collection_id (str): The ID of the collection to be deleted.
1025
- refresh (bool): Whether to refresh the index after the deletion (default: False).
1053
+ kwargs (Any, optional): Additional keyword arguments, including `refresh`.
1054
+ - refresh (str): Whether to refresh the index after the operation. Can be "true", "false", or "wait_for".
1055
+ - refresh (bool): Whether to refresh the index after the operation. Defaults to the value in `self.async_settings.database_refresh`.
1026
1056
 
1027
1057
  Raises:
1028
1058
  NotFoundError: If the collection with the given `collection_id` is not found in the database.
1029
1059
 
1060
+ Returns:
1061
+ None
1062
+
1030
1063
  Notes:
1031
1064
  This function first verifies that the collection with the specified `collection_id` exists in the database, and then
1032
- deletes the collection. If `refresh` is set to True, the index is refreshed after the deletion. Additionally, this
1033
- function also calls `delete_item_index` to delete the index for the items in the collection.
1065
+ deletes the collection. If `refresh` is set to "true", "false", or "wait_for", the index is refreshed accordingly after
1066
+ the deletion. Additionally, this function also calls `delete_item_index` to delete the index for the items in the collection.
1034
1067
  """
1068
+ # Ensure kwargs is a dictionary
1069
+ kwargs = kwargs or {}
1070
+
1071
+ # Verify that the collection exists
1035
1072
  await self.find_collection(collection_id=collection_id)
1073
+
1074
+ # Resolve the `refresh` parameter
1075
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
1076
+ refresh = validate_refresh(refresh)
1077
+
1078
+ # Log the deletion attempt
1079
+ logger.info(f"Deleting collection {collection_id} with refresh={refresh}")
1080
+
1081
+ # Delete the collection from the database
1036
1082
  await self.client.delete(
1037
1083
  index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh
1038
1084
  )
1039
- await delete_item_index(collection_id)
1085
+
1086
+ # Delete the item index for the collection
1087
+ try:
1088
+ await delete_item_index(collection_id)
1089
+ except Exception as e:
1090
+ logger.error(
1091
+ f"Failed to delete item index for collection {collection_id}: {e}"
1092
+ )
1040
1093
 
1041
1094
  async def bulk_async(
1042
1095
  self,
1043
1096
  collection_id: str,
1044
1097
  processed_items: List[Item],
1045
- refresh: bool = False,
1098
+ **kwargs: Any,
1046
1099
  ) -> Tuple[int, List[Dict[str, Any]]]:
1047
1100
  """
1048
1101
  Perform a bulk insert of items into the database asynchronously.
@@ -1050,7 +1103,12 @@ class DatabaseLogic(BaseDatabaseLogic):
1050
1103
  Args:
1051
1104
  collection_id (str): The ID of the collection to which the items belong.
1052
1105
  processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
1053
- refresh (bool): Whether to refresh the index after the bulk insert (default: False).
1106
+ **kwargs (Any): Additional keyword arguments, including:
1107
+ - refresh (str, optional): Whether to refresh the index after the bulk insert.
1108
+ Can be "true", "false", or "wait_for". Defaults to the value of `self.sync_settings.database_refresh`.
1109
+ - refresh (bool, optional): Whether to refresh the index after the bulk insert.
1110
+ - raise_on_error (bool, optional): Whether to raise an error if any of the bulk operations fail.
1111
+ Defaults to the value of `self.async_settings.raise_on_bulk_error`.
1054
1112
 
1055
1113
  Returns:
1056
1114
  Tuple[int, List[Dict[str, Any]]]: A tuple containing:
@@ -1059,10 +1117,31 @@ class DatabaseLogic(BaseDatabaseLogic):
1059
1117
 
1060
1118
  Notes:
1061
1119
  This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`.
1062
- The insert is performed asynchronously, and the event loop is used to run the operation in a separate executor.
1063
- The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True,
1064
- the index is refreshed after the bulk insert.
1120
+ The insert is performed synchronously and blocking, meaning that the function does not return until the insert has
1121
+ completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. The `refresh`
1122
+ parameter determines whether the index is refreshed after the bulk insert:
1123
+ - "true": Forces an immediate refresh of the index.
1124
+ - "false": Does not refresh the index immediately (default behavior).
1125
+ - "wait_for": Waits for the next refresh cycle to make the changes visible.
1065
1126
  """
1127
+ # Ensure kwargs is a dictionary
1128
+ kwargs = kwargs or {}
1129
+
1130
+ # Resolve the `refresh` parameter
1131
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
1132
+ refresh = validate_refresh(refresh)
1133
+
1134
+ # Log the bulk insert attempt
1135
+ logger.info(
1136
+ f"Performing bulk insert for collection {collection_id} with refresh={refresh}"
1137
+ )
1138
+
1139
+ # Handle empty processed_items
1140
+ if not processed_items:
1141
+ logger.warning(f"No items to insert for collection {collection_id}")
1142
+ return 0, []
1143
+
1144
+ # Perform the bulk insert
1066
1145
  raise_on_error = self.async_settings.raise_on_bulk_error
1067
1146
  success, errors = await helpers.async_bulk(
1068
1147
  self.client,
@@ -1070,13 +1149,19 @@ class DatabaseLogic(BaseDatabaseLogic):
1070
1149
  refresh=refresh,
1071
1150
  raise_on_error=raise_on_error,
1072
1151
  )
1152
+
1153
+ # Log the result
1154
+ logger.info(
1155
+ f"Bulk insert completed for collection {collection_id}: {success} successes, {len(errors)} errors"
1156
+ )
1157
+
1073
1158
  return success, errors
1074
1159
 
1075
1160
  def bulk_sync(
1076
1161
  self,
1077
1162
  collection_id: str,
1078
1163
  processed_items: List[Item],
1079
- refresh: bool = False,
1164
+ **kwargs: Any,
1080
1165
  ) -> Tuple[int, List[Dict[str, Any]]]:
1081
1166
  """
1082
1167
  Perform a bulk insert of items into the database synchronously.
@@ -1084,7 +1169,12 @@ class DatabaseLogic(BaseDatabaseLogic):
1084
1169
  Args:
1085
1170
  collection_id (str): The ID of the collection to which the items belong.
1086
1171
  processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
1087
- refresh (bool): Whether to refresh the index after the bulk insert (default: False).
1172
+ **kwargs (Any): Additional keyword arguments, including:
1173
+ - refresh (str, optional): Whether to refresh the index after the bulk insert.
1174
+ Can be "true", "false", or "wait_for". Defaults to the value of `self.sync_settings.database_refresh`.
1175
+ - refresh (bool, optional): Whether to refresh the index after the bulk insert.
1176
+ - raise_on_error (bool, optional): Whether to raise an error if any of the bulk operations fail.
1177
+ Defaults to the value of `self.async_settings.raise_on_bulk_error`.
1088
1178
 
1089
1179
  Returns:
1090
1180
  Tuple[int, List[Dict[str, Any]]]: A tuple containing:
@@ -1094,9 +1184,30 @@ class DatabaseLogic(BaseDatabaseLogic):
1094
1184
  Notes:
1095
1185
  This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`.
1096
1186
  The insert is performed synchronously and blocking, meaning that the function does not return until the insert has
1097
- completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to
1098
- True, the index is refreshed after the bulk insert.
1187
+ completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. The `refresh`
1188
+ parameter determines whether the index is refreshed after the bulk insert:
1189
+ - "true": Forces an immediate refresh of the index.
1190
+ - "false": Does not refresh the index immediately (default behavior).
1191
+ - "wait_for": Waits for the next refresh cycle to make the changes visible.
1099
1192
  """
1193
+ # Ensure kwargs is a dictionary
1194
+ kwargs = kwargs or {}
1195
+
1196
+ # Resolve the `refresh` parameter
1197
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
1198
+ refresh = validate_refresh(refresh)
1199
+
1200
+ # Log the bulk insert attempt
1201
+ logger.info(
1202
+ f"Performing bulk insert for collection {collection_id} with refresh={refresh}"
1203
+ )
1204
+
1205
+ # Handle empty processed_items
1206
+ if not processed_items:
1207
+ logger.warning(f"No items to insert for collection {collection_id}")
1208
+ return 0, []
1209
+
1210
+ # Perform the bulk insert
1100
1211
  raise_on_error = self.sync_settings.raise_on_bulk_error
1101
1212
  success, errors = helpers.bulk(
1102
1213
  self.sync_client,
@@ -1104,6 +1215,12 @@ class DatabaseLogic(BaseDatabaseLogic):
1104
1215
  refresh=refresh,
1105
1216
  raise_on_error=raise_on_error,
1106
1217
  )
1218
+
1219
+ # Log the result
1220
+ logger.info(
1221
+ f"Bulk insert completed for collection {collection_id}: {success} successes, {len(errors)} errors"
1222
+ )
1223
+
1107
1224
  return success, errors
1108
1225
 
1109
1226
  # DANGER