stac-fastapi-opensearch 4.1.0__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,43 @@
1
1
  """Database logic."""
2
2
 
3
3
  import asyncio
4
- import json
5
4
  import logging
6
5
  from base64 import urlsafe_b64decode, urlsafe_b64encode
7
6
  from copy import deepcopy
8
- from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
7
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
9
8
 
10
9
  import attr
10
+ import orjson
11
11
  from opensearchpy import exceptions, helpers
12
12
  from opensearchpy.helpers.query import Q
13
13
  from opensearchpy.helpers.search import Search
14
14
  from starlette.requests import Request
15
15
 
16
16
  from stac_fastapi.core.base_database_logic import BaseDatabaseLogic
17
- from stac_fastapi.core.database_logic import (
17
+ from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
18
+ from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon
19
+ from stac_fastapi.opensearch.config import (
20
+ AsyncOpensearchSettings as AsyncSearchSettings,
21
+ )
22
+ from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings
23
+ from stac_fastapi.sfeos_helpers import filter
24
+ from stac_fastapi.sfeos_helpers.database import (
25
+ apply_free_text_filter_shared,
26
+ apply_intersects_filter_shared,
27
+ create_index_templates_shared,
28
+ delete_item_index_shared,
29
+ get_queryables_mapping_shared,
30
+ index_alias_by_collection_id,
31
+ index_by_collection_id,
32
+ indices,
33
+ mk_actions,
34
+ mk_item_id,
35
+ populate_sort_shared,
36
+ return_date,
37
+ validate_refresh,
38
+ )
39
+ from stac_fastapi.sfeos_helpers.mappings import (
40
+ AGGREGATION_MAPPING,
18
41
  COLLECTIONS_INDEX,
19
42
  DEFAULT_SORT,
20
43
  ES_COLLECTIONS_MAPPINGS,
@@ -23,20 +46,9 @@ from stac_fastapi.core.database_logic import (
23
46
  ITEM_INDICES,
24
47
  ITEMS_INDEX_PREFIX,
25
48
  Geometry,
26
- index_alias_by_collection_id,
27
- index_by_collection_id,
28
- indices,
29
- mk_actions,
30
- mk_item_id,
31
49
  )
32
- from stac_fastapi.core.extensions import filter
33
- from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
34
- from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon
35
- from stac_fastapi.opensearch.config import (
36
- AsyncOpensearchSettings as AsyncSearchSettings,
37
- )
38
- from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings
39
50
  from stac_fastapi.types.errors import ConflictError, NotFoundError
51
+ from stac_fastapi.types.rfc3339 import DateTimeType
40
52
  from stac_fastapi.types.stac import Collection, Item
41
53
 
42
54
  logger = logging.getLogger(__name__)
@@ -50,23 +62,7 @@ async def create_index_templates() -> None:
50
62
  None
51
63
 
52
64
  """
53
- client = AsyncSearchSettings().create_client
54
- await client.indices.put_template(
55
- name=f"template_{COLLECTIONS_INDEX}",
56
- body={
57
- "index_patterns": [f"{COLLECTIONS_INDEX}*"],
58
- "mappings": ES_COLLECTIONS_MAPPINGS,
59
- },
60
- )
61
- await client.indices.put_template(
62
- name=f"template_{ITEMS_INDEX_PREFIX}",
63
- body={
64
- "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"],
65
- "settings": ES_ITEMS_SETTINGS,
66
- "mappings": ES_ITEMS_MAPPINGS,
67
- },
68
- )
69
- await client.close()
65
+ await create_index_templates_shared(settings=AsyncSearchSettings())
70
66
 
71
67
 
72
68
  async def create_collection_index() -> None:
@@ -125,18 +121,13 @@ async def delete_item_index(collection_id: str) -> None:
125
121
 
126
122
  Args:
127
123
  collection_id (str): The ID of the collection whose items index will be deleted.
128
- """
129
- client = AsyncSearchSettings().create_client
130
124
 
131
- name = index_alias_by_collection_id(collection_id)
132
- resolved = await client.indices.resolve_index(name=name)
133
- if "aliases" in resolved and resolved["aliases"]:
134
- [alias] = resolved["aliases"]
135
- await client.indices.delete_alias(index=alias["indices"], name=alias["name"])
136
- await client.indices.delete(index=alias["indices"])
137
- else:
138
- await client.indices.delete(index=name)
139
- await client.close()
125
+ Notes:
126
+ This function delegates to the shared implementation in delete_item_index_shared.
127
+ """
128
+ await delete_item_index_shared(
129
+ settings=AsyncSearchSettings(), collection_id=collection_id
130
+ )
140
131
 
141
132
 
142
133
  @attr.s
@@ -161,76 +152,7 @@ class DatabaseLogic(BaseDatabaseLogic):
161
152
 
162
153
  extensions: List[str] = attr.ib(default=attr.Factory(list))
163
154
 
164
- aggregation_mapping: Dict[str, Dict[str, Any]] = {
165
- "total_count": {"value_count": {"field": "id"}},
166
- "collection_frequency": {"terms": {"field": "collection", "size": 100}},
167
- "platform_frequency": {"terms": {"field": "properties.platform", "size": 100}},
168
- "cloud_cover_frequency": {
169
- "range": {
170
- "field": "properties.eo:cloud_cover",
171
- "ranges": [
172
- {"to": 5},
173
- {"from": 5, "to": 15},
174
- {"from": 15, "to": 40},
175
- {"from": 40},
176
- ],
177
- }
178
- },
179
- "datetime_frequency": {
180
- "date_histogram": {
181
- "field": "properties.datetime",
182
- "calendar_interval": "month",
183
- }
184
- },
185
- "datetime_min": {"min": {"field": "properties.datetime"}},
186
- "datetime_max": {"max": {"field": "properties.datetime"}},
187
- "grid_code_frequency": {
188
- "terms": {
189
- "field": "properties.grid:code",
190
- "missing": "none",
191
- "size": 10000,
192
- }
193
- },
194
- "sun_elevation_frequency": {
195
- "histogram": {"field": "properties.view:sun_elevation", "interval": 5}
196
- },
197
- "sun_azimuth_frequency": {
198
- "histogram": {"field": "properties.view:sun_azimuth", "interval": 5}
199
- },
200
- "off_nadir_frequency": {
201
- "histogram": {"field": "properties.view:off_nadir", "interval": 5}
202
- },
203
- "centroid_geohash_grid_frequency": {
204
- "geohash_grid": {
205
- "field": "properties.proj:centroid",
206
- "precision": 1,
207
- }
208
- },
209
- "centroid_geohex_grid_frequency": {
210
- "geohex_grid": {
211
- "field": "properties.proj:centroid",
212
- "precision": 0,
213
- }
214
- },
215
- "centroid_geotile_grid_frequency": {
216
- "geotile_grid": {
217
- "field": "properties.proj:centroid",
218
- "precision": 0,
219
- }
220
- },
221
- "geometry_geohash_grid_frequency": {
222
- "geohash_grid": {
223
- "field": "geometry",
224
- "precision": 1,
225
- }
226
- },
227
- "geometry_geotile_grid_frequency": {
228
- "geotile_grid": {
229
- "field": "geometry",
230
- "precision": 0,
231
- }
232
- },
233
- }
155
+ aggregation_mapping: Dict[str, Dict[str, Any]] = AGGREGATION_MAPPING
234
156
 
235
157
  """CORE LOGIC"""
236
158
 
@@ -307,6 +229,23 @@ class DatabaseLogic(BaseDatabaseLogic):
307
229
  )
308
230
  return item["_source"]
309
231
 
232
+ async def get_queryables_mapping(self, collection_id: str = "*") -> dict:
233
+ """Retrieve mapping of Queryables for search.
234
+
235
+ Args:
236
+ collection_id (str, optional): The id of the Collection the Queryables
237
+ belongs to. Defaults to "*".
238
+
239
+ Returns:
240
+ dict: A dictionary containing the Queryables mappings.
241
+ """
242
+ mappings = await self.client.indices.get_mapping(
243
+ index=f"{ITEMS_INDEX_PREFIX}{collection_id}",
244
+ )
245
+ return await get_queryables_mapping_shared(
246
+ collection_id=collection_id, mappings=mappings
247
+ )
248
+
310
249
  @staticmethod
311
250
  def make_search():
312
251
  """Database logic to create a Search instance."""
@@ -324,130 +263,116 @@ class DatabaseLogic(BaseDatabaseLogic):
324
263
 
325
264
  @staticmethod
326
265
  def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str]]):
327
- """Database logic to perform query for search endpoint."""
328
- if free_text_queries is not None:
329
- free_text_query_string = '" OR properties.\\*:"'.join(free_text_queries)
330
- search = search.query(
331
- "query_string", query=f'properties.\\*:"{free_text_query_string}"'
332
- )
266
+ """Create a free text query for OpenSearch queries.
333
267
 
334
- return search
268
+ This method delegates to the shared implementation in apply_free_text_filter_shared.
269
+
270
+ Args:
271
+ search (Search): The search object to apply the query to.
272
+ free_text_queries (Optional[List[str]]): A list of text strings to search for in the properties.
273
+
274
+ Returns:
275
+ Search: The search object with the free text query applied, or the original search
276
+ object if no free_text_queries were provided.
277
+ """
278
+ return apply_free_text_filter_shared(
279
+ search=search, free_text_queries=free_text_queries
280
+ )
335
281
 
336
282
  @staticmethod
337
- def apply_datetime_filter(search: Search, datetime_search):
338
- """Apply a filter to search based on datetime field, start_datetime, and end_datetime fields.
283
+ def apply_datetime_filter(
284
+ search: Search, interval: Optional[Union[DateTimeType, str]]
285
+ ) -> Search:
286
+ """Apply a filter to search on datetime, start_datetime, and end_datetime fields.
339
287
 
340
288
  Args:
341
- search (Search): The search object to filter.
342
- datetime_search (dict): The datetime filter criteria.
289
+ search: The search object to filter.
290
+ interval: Optional datetime interval to filter by. Can be:
291
+ - A single datetime string (e.g., "2023-01-01T12:00:00")
292
+ - A datetime range string (e.g., "2023-01-01/2023-12-31")
293
+ - A datetime object
294
+ - A tuple of (start_datetime, end_datetime)
343
295
 
344
296
  Returns:
345
- Search: The filtered search object.
297
+ The filtered search object.
346
298
  """
299
+ if not interval:
300
+ return search
301
+
347
302
  should = []
303
+ try:
304
+ datetime_search = return_date(interval)
305
+ except (ValueError, TypeError) as e:
306
+ # Handle invalid interval formats if return_date fails
307
+ logger.error(f"Invalid interval format: {interval}, error: {e}")
308
+ return search
348
309
 
349
- # If the request is a single datetime return
350
- # items with datetimes equal to the requested datetime OR
351
- # the requested datetime is between their start and end datetimes
352
310
  if "eq" in datetime_search:
353
- should.extend(
354
- [
355
- Q(
356
- "bool",
357
- filter=[
358
- Q(
359
- "term",
360
- properties__datetime=datetime_search["eq"],
361
- ),
362
- ],
363
- ),
364
- Q(
365
- "bool",
366
- filter=[
367
- Q(
368
- "range",
369
- properties__start_datetime={
370
- "lte": datetime_search["eq"],
371
- },
372
- ),
373
- Q(
374
- "range",
375
- properties__end_datetime={
376
- "gte": datetime_search["eq"],
377
- },
378
- ),
379
- ],
380
- ),
381
- ]
382
- )
383
-
384
- # If the request is a date range return
385
- # items with datetimes within the requested date range OR
386
- # their startdatetime ithin the requested date range OR
387
- # their enddatetime ithin the requested date range OR
388
- # the requested daterange within their start and end datetimes
311
+ # For exact matches, include:
312
+ # 1. Items with matching exact datetime
313
+ # 2. Items with datetime:null where the time falls within their range
314
+ should = [
315
+ Q(
316
+ "bool",
317
+ filter=[
318
+ Q("exists", field="properties.datetime"),
319
+ Q("term", **{"properties__datetime": datetime_search["eq"]}),
320
+ ],
321
+ ),
322
+ Q(
323
+ "bool",
324
+ must_not=[Q("exists", field="properties.datetime")],
325
+ filter=[
326
+ Q("exists", field="properties.start_datetime"),
327
+ Q("exists", field="properties.end_datetime"),
328
+ Q(
329
+ "range",
330
+ properties__start_datetime={"lte": datetime_search["eq"]},
331
+ ),
332
+ Q(
333
+ "range",
334
+ properties__end_datetime={"gte": datetime_search["eq"]},
335
+ ),
336
+ ],
337
+ ),
338
+ ]
389
339
  else:
390
- should.extend(
391
- [
392
- Q(
393
- "bool",
394
- filter=[
395
- Q(
396
- "range",
397
- properties__datetime={
398
- "gte": datetime_search["gte"],
399
- "lte": datetime_search["lte"],
400
- },
401
- ),
402
- ],
403
- ),
404
- Q(
405
- "bool",
406
- filter=[
407
- Q(
408
- "range",
409
- properties__start_datetime={
410
- "gte": datetime_search["gte"],
411
- "lte": datetime_search["lte"],
412
- },
413
- ),
414
- ],
415
- ),
416
- Q(
417
- "bool",
418
- filter=[
419
- Q(
420
- "range",
421
- properties__end_datetime={
422
- "gte": datetime_search["gte"],
423
- "lte": datetime_search["lte"],
424
- },
425
- ),
426
- ],
427
- ),
428
- Q(
429
- "bool",
430
- filter=[
431
- Q(
432
- "range",
433
- properties__start_datetime={
434
- "lte": datetime_search["gte"]
435
- },
436
- ),
437
- Q(
438
- "range",
439
- properties__end_datetime={
440
- "gte": datetime_search["lte"]
441
- },
442
- ),
443
- ],
444
- ),
445
- ]
446
- )
447
-
448
- search = search.query(Q("bool", filter=[Q("bool", should=should)]))
449
-
450
- return search
340
+ # For date ranges, include:
341
+ # 1. Items with datetime in the range
342
+ # 2. Items with datetime:null that overlap the search range
343
+ should = [
344
+ Q(
345
+ "bool",
346
+ filter=[
347
+ Q("exists", field="properties.datetime"),
348
+ Q(
349
+ "range",
350
+ properties__datetime={
351
+ "gte": datetime_search["gte"],
352
+ "lte": datetime_search["lte"],
353
+ },
354
+ ),
355
+ ],
356
+ ),
357
+ Q(
358
+ "bool",
359
+ must_not=[Q("exists", field="properties.datetime")],
360
+ filter=[
361
+ Q("exists", field="properties.start_datetime"),
362
+ Q("exists", field="properties.end_datetime"),
363
+ Q(
364
+ "range",
365
+ properties__start_datetime={"lte": datetime_search["lte"]},
366
+ ),
367
+ Q(
368
+ "range",
369
+ properties__end_datetime={"gte": datetime_search["gte"]},
370
+ ),
371
+ ],
372
+ ),
373
+ ]
374
+
375
+ return search.query(Q("bool", should=should, minimum_should_match=1))
451
376
 
452
377
  @staticmethod
453
378
  def apply_bbox_filter(search: Search, bbox: List):
@@ -497,21 +422,8 @@ class DatabaseLogic(BaseDatabaseLogic):
497
422
  Notes:
498
423
  A geo_shape filter is added to the search object, set to intersect with the specified geometry.
499
424
  """
500
- return search.filter(
501
- Q(
502
- {
503
- "geo_shape": {
504
- "geometry": {
505
- "shape": {
506
- "type": intersects.type.lower(),
507
- "coordinates": intersects.coordinates,
508
- },
509
- "relation": "intersects",
510
- }
511
- }
512
- }
513
- )
514
- )
425
+ filter = apply_intersects_filter_shared(intersects=intersects)
426
+ return search.filter(Q(filter))
515
427
 
516
428
  @staticmethod
517
429
  def apply_stacql_filter(search: Search, op: str, field: str, value: float):
@@ -535,8 +447,9 @@ class DatabaseLogic(BaseDatabaseLogic):
535
447
 
536
448
  return search
537
449
 
538
- @staticmethod
539
- def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]):
450
+ async def apply_cql2_filter(
451
+ self, search: Search, _filter: Optional[Dict[str, Any]]
452
+ ):
540
453
  """
541
454
  Apply a CQL2 filter to an Opensearch Search object.
542
455
 
@@ -556,18 +469,25 @@ class DatabaseLogic(BaseDatabaseLogic):
556
469
  otherwise the original Search object.
557
470
  """
558
471
  if _filter is not None:
559
- es_query = filter.to_es(_filter)
472
+ es_query = filter.to_es(await self.get_queryables_mapping(), _filter)
560
473
  search = search.filter(es_query)
561
474
 
562
475
  return search
563
476
 
564
477
  @staticmethod
565
478
  def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]:
566
- """Database logic to sort search instance."""
567
- if sortby:
568
- return {s.field: {"order": s.direction} for s in sortby}
569
- else:
570
- return None
479
+ """Create a sort configuration for OpenSearch queries.
480
+
481
+ This method delegates to the shared implementation in populate_sort_shared.
482
+
483
+ Args:
484
+ sortby (List): A list of sort specifications, each containing a field and direction.
485
+
486
+ Returns:
487
+ Optional[Dict[str, Dict[str, str]]]: A dictionary mapping field names to sort direction
488
+ configurations, or None if no sort was specified.
489
+ """
490
+ return populate_sort_shared(sortby=sortby)
571
491
 
572
492
  async def execute_search(
573
493
  self,
@@ -607,7 +527,7 @@ class DatabaseLogic(BaseDatabaseLogic):
607
527
  search_after = None
608
528
 
609
529
  if token:
610
- search_after = json.loads(urlsafe_b64decode(token).decode())
530
+ search_after = orjson.loads(urlsafe_b64decode(token))
611
531
  if search_after:
612
532
  search_body["search_after"] = search_after
613
533
 
@@ -647,7 +567,7 @@ class DatabaseLogic(BaseDatabaseLogic):
647
567
  next_token = None
648
568
  if len(hits) > limit and limit < max_result_window:
649
569
  if hits and (sort_array := hits[limit - 1].get("sort")):
650
- next_token = urlsafe_b64encode(json.dumps(sort_array).encode()).decode()
570
+ next_token = urlsafe_b64encode(orjson.dumps(sort_array)).decode()
651
571
 
652
572
  matched = (
653
573
  es_response["hits"]["total"]["value"]
@@ -864,15 +784,17 @@ class DatabaseLogic(BaseDatabaseLogic):
864
784
  async def create_item(
865
785
  self,
866
786
  item: Item,
867
- refresh: bool = False,
868
787
  base_url: str = "",
869
788
  exist_ok: bool = False,
789
+ **kwargs: Any,
870
790
  ):
871
791
  """Database logic for creating one item.
872
792
 
873
793
  Args:
874
794
  item (Item): The item to be created.
875
- refresh (bool, optional): Refresh the index after performing the operation. Defaults to False.
795
+ base_url (str, optional): The base URL for the item. Defaults to an empty string.
796
+ exist_ok (bool, optional): Whether to allow the item to exist already. Defaults to False.
797
+ **kwargs: Additional keyword arguments like refresh.
876
798
 
877
799
  Raises:
878
800
  ConflictError: If the item already exists in the database.
@@ -883,6 +805,19 @@ class DatabaseLogic(BaseDatabaseLogic):
883
805
  # todo: check if collection exists, but cache
884
806
  item_id = item["id"]
885
807
  collection_id = item["collection"]
808
+
809
+ # Ensure kwargs is a dictionary
810
+ kwargs = kwargs or {}
811
+
812
+ # Resolve the `refresh` parameter
813
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
814
+ refresh = validate_refresh(refresh)
815
+
816
+ # Log the creation attempt
817
+ logger.info(
818
+ f"Creating item {item_id} in collection {collection_id} with refresh={refresh}"
819
+ )
820
+
886
821
  item = await self.async_prep_create_item(
887
822
  item=item, base_url=base_url, exist_ok=exist_ok
888
823
  )
@@ -893,19 +828,29 @@ class DatabaseLogic(BaseDatabaseLogic):
893
828
  refresh=refresh,
894
829
  )
895
830
 
896
- async def delete_item(
897
- self, item_id: str, collection_id: str, refresh: bool = False
898
- ):
831
+ async def delete_item(self, item_id: str, collection_id: str, **kwargs: Any):
899
832
  """Delete a single item from the database.
900
833
 
901
834
  Args:
902
835
  item_id (str): The id of the Item to be deleted.
903
836
  collection_id (str): The id of the Collection that the Item belongs to.
904
- refresh (bool, optional): Whether to refresh the index after the deletion. Default is False.
837
+ **kwargs: Additional keyword arguments like refresh.
905
838
 
906
839
  Raises:
907
840
  NotFoundError: If the Item does not exist in the database.
908
841
  """
842
+ # Ensure kwargs is a dictionary
843
+ kwargs = kwargs or {}
844
+
845
+ # Resolve the `refresh` parameter
846
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
847
+ refresh = validate_refresh(refresh)
848
+
849
+ # Log the deletion attempt
850
+ logger.info(
851
+ f"Deleting item {item_id} from collection {collection_id} with refresh={refresh}"
852
+ )
853
+
909
854
  try:
910
855
  await self.client.delete(
911
856
  index=index_alias_by_collection_id(collection_id),
@@ -935,12 +880,43 @@ class DatabaseLogic(BaseDatabaseLogic):
935
880
  except exceptions.NotFoundError:
936
881
  raise NotFoundError(f"Mapping for index {index_name} not found")
937
882
 
938
- async def create_collection(self, collection: Collection, refresh: bool = False):
883
+ async def get_items_unique_values(
884
+ self, collection_id: str, field_names: Iterable[str], *, limit: int = 100
885
+ ) -> Dict[str, List[str]]:
886
+ """Get the unique values for the given fields in the collection."""
887
+ limit_plus_one = limit + 1
888
+ index_name = index_alias_by_collection_id(collection_id)
889
+
890
+ query = await self.client.search(
891
+ index=index_name,
892
+ body={
893
+ "size": 0,
894
+ "aggs": {
895
+ field: {"terms": {"field": field, "size": limit_plus_one}}
896
+ for field in field_names
897
+ },
898
+ },
899
+ )
900
+
901
+ result: Dict[str, List[str]] = {}
902
+ for field, agg in query["aggregations"].items():
903
+ if len(agg["buckets"]) > limit:
904
+ logger.warning(
905
+ "Skipping enum field %s: exceeds limit of %d unique values. "
906
+ "Consider excluding this field from enumeration or increase the limit.",
907
+ field,
908
+ limit,
909
+ )
910
+ continue
911
+ result[field] = [bucket["key"] for bucket in agg["buckets"]]
912
+ return result
913
+
914
+ async def create_collection(self, collection: Collection, **kwargs: Any):
939
915
  """Create a single collection in the database.
940
916
 
941
917
  Args:
942
918
  collection (Collection): The Collection object to be created.
943
- refresh (bool, optional): Whether to refresh the index after the creation. Default is False.
919
+ **kwargs: Additional keyword arguments like refresh.
944
920
 
945
921
  Raises:
946
922
  ConflictError: If a Collection with the same id already exists in the database.
@@ -950,6 +926,16 @@ class DatabaseLogic(BaseDatabaseLogic):
950
926
  """
951
927
  collection_id = collection["id"]
952
928
 
929
+ # Ensure kwargs is a dictionary
930
+ kwargs = kwargs or {}
931
+
932
+ # Resolve the `refresh` parameter
933
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
934
+ refresh = validate_refresh(refresh)
935
+
936
+ # Log the creation attempt
937
+ logger.info(f"Creating collection {collection_id} with refresh={refresh}")
938
+
953
939
  if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id):
954
940
  raise ConflictError(f"Collection {collection_id} already exists")
955
941
 
@@ -989,14 +975,14 @@ class DatabaseLogic(BaseDatabaseLogic):
989
975
  return collection["_source"]
990
976
 
991
977
  async def update_collection(
992
- self, collection_id: str, collection: Collection, refresh: bool = False
978
+ self, collection_id: str, collection: Collection, **kwargs: Any
993
979
  ):
994
980
  """Update a collection from the database.
995
981
 
996
982
  Args:
997
- self: The instance of the object calling this function.
998
983
  collection_id (str): The ID of the collection to be updated.
999
984
  collection (Collection): The Collection object to be used for the update.
985
+ **kwargs: Additional keyword arguments like refresh.
1000
986
 
1001
987
  Raises:
1002
988
  NotFoundError: If the collection with the given `collection_id` is not
@@ -1007,9 +993,23 @@ class DatabaseLogic(BaseDatabaseLogic):
1007
993
  `collection_id` and with the collection specified in the `Collection` object.
1008
994
  If the collection is not found, a `NotFoundError` is raised.
1009
995
  """
996
+ # Ensure kwargs is a dictionary
997
+ kwargs = kwargs or {}
998
+
999
+ # Resolve the `refresh` parameter
1000
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
1001
+ refresh = validate_refresh(refresh)
1002
+
1003
+ # Log the update attempt
1004
+ logger.info(f"Updating collection {collection_id} with refresh={refresh}")
1005
+
1010
1006
  await self.find_collection(collection_id=collection_id)
1011
1007
 
1012
1008
  if collection_id != collection["id"]:
1009
+ logger.info(
1010
+ f"Collection ID change detected: {collection_id} -> {collection['id']}"
1011
+ )
1012
+
1013
1013
  await self.create_collection(collection, refresh=refresh)
1014
1014
 
1015
1015
  await self.client.reindex(
@@ -1025,7 +1025,7 @@ class DatabaseLogic(BaseDatabaseLogic):
1025
1025
  refresh=refresh,
1026
1026
  )
1027
1027
 
1028
- await self.delete_collection(collection_id)
1028
+ await self.delete_collection(collection_id=collection_id, **kwargs)
1029
1029
 
1030
1030
  else:
1031
1031
  await self.client.index(
@@ -1035,23 +1035,34 @@ class DatabaseLogic(BaseDatabaseLogic):
1035
1035
  refresh=refresh,
1036
1036
  )
1037
1037
 
1038
- async def delete_collection(self, collection_id: str, refresh: bool = False):
1038
+ async def delete_collection(self, collection_id: str, **kwargs: Any):
1039
1039
  """Delete a collection from the database.
1040
1040
 
1041
1041
  Parameters:
1042
1042
  self: The instance of the object calling this function.
1043
1043
  collection_id (str): The ID of the collection to be deleted.
1044
- refresh (bool): Whether to refresh the index after the deletion (default: False).
1044
+ **kwargs: Additional keyword arguments like refresh.
1045
1045
 
1046
1046
  Raises:
1047
1047
  NotFoundError: If the collection with the given `collection_id` is not found in the database.
1048
1048
 
1049
1049
  Notes:
1050
1050
  This function first verifies that the collection with the specified `collection_id` exists in the database, and then
1051
- deletes the collection. If `refresh` is set to True, the index is refreshed after the deletion. Additionally, this
1052
- function also calls `delete_item_index` to delete the index for the items in the collection.
1051
+ deletes the collection. If `refresh` is set to "true", "false", or "wait_for", the index is refreshed accordingly after
1052
+ the deletion. Additionally, this function also calls `delete_item_index` to delete the index for the items in the collection.
1053
1053
  """
1054
+ # Ensure kwargs is a dictionary
1055
+ kwargs = kwargs or {}
1056
+
1054
1057
  await self.find_collection(collection_id=collection_id)
1058
+
1059
+ # Resolve the `refresh` parameter
1060
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
1061
+ refresh = validate_refresh(refresh)
1062
+
1063
+ # Log the deletion attempt
1064
+ logger.info(f"Deleting collection {collection_id} with refresh={refresh}")
1065
+
1055
1066
  await self.client.delete(
1056
1067
  index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh
1057
1068
  )
@@ -1061,7 +1072,7 @@ class DatabaseLogic(BaseDatabaseLogic):
1061
1072
  self,
1062
1073
  collection_id: str,
1063
1074
  processed_items: List[Item],
1064
- refresh: bool = False,
1075
+ **kwargs: Any,
1065
1076
  ) -> Tuple[int, List[Dict[str, Any]]]:
1066
1077
  """
1067
1078
  Perform a bulk insert of items into the database asynchronously.
@@ -1069,7 +1080,12 @@ class DatabaseLogic(BaseDatabaseLogic):
1069
1080
  Args:
1070
1081
  collection_id (str): The ID of the collection to which the items belong.
1071
1082
  processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
1072
- refresh (bool): Whether to refresh the index after the bulk insert (default: False).
1083
+ **kwargs (Any): Additional keyword arguments, including:
1084
+ - refresh (str, optional): Whether to refresh the index after the bulk insert.
1085
+ Can be "true", "false", or "wait_for". Defaults to the value of `self.sync_settings.database_refresh`.
1086
+ - refresh (bool, optional): Whether to refresh the index after the bulk insert.
1087
+ - raise_on_error (bool, optional): Whether to raise an error if any of the bulk operations fail.
1088
+ Defaults to the value of `self.async_settings.raise_on_bulk_error`.
1073
1089
 
1074
1090
  Returns:
1075
1091
  Tuple[int, List[Dict[str, Any]]]: A tuple containing:
@@ -1078,10 +1094,30 @@ class DatabaseLogic(BaseDatabaseLogic):
1078
1094
 
1079
1095
  Notes:
1080
1096
  This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`.
1081
- The insert is performed asynchronously, and the event loop is used to run the operation in a separate executor.
1082
- The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True,
1083
- the index is refreshed after the bulk insert.
1097
+ The insert is performed synchronously and blocking, meaning that the function does not return until the insert has
1098
+ completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. The `refresh`
1099
+ parameter determines whether the index is refreshed after the bulk insert:
1100
+ - "true": Forces an immediate refresh of the index.
1101
+ - "false": Does not refresh the index immediately (default behavior).
1102
+ - "wait_for": Waits for the next refresh cycle to make the changes visible.
1084
1103
  """
1104
+ # Ensure kwargs is a dictionary
1105
+ kwargs = kwargs or {}
1106
+
1107
+ # Resolve the `refresh` parameter
1108
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
1109
+ refresh = validate_refresh(refresh)
1110
+
1111
+ # Log the bulk insert attempt
1112
+ logger.info(
1113
+ f"Performing bulk insert for collection {collection_id} with refresh={refresh}"
1114
+ )
1115
+
1116
+ # Handle empty processed_items
1117
+ if not processed_items:
1118
+ logger.warning(f"No items to insert for collection {collection_id}")
1119
+ return 0, []
1120
+
1085
1121
  raise_on_error = self.async_settings.raise_on_bulk_error
1086
1122
  success, errors = await helpers.async_bulk(
1087
1123
  self.client,
@@ -1089,21 +1125,30 @@ class DatabaseLogic(BaseDatabaseLogic):
1089
1125
  refresh=refresh,
1090
1126
  raise_on_error=raise_on_error,
1091
1127
  )
1128
+ # Log the result
1129
+ logger.info(
1130
+ f"Bulk insert completed for collection {collection_id}: {success} successes, {len(errors)} errors"
1131
+ )
1092
1132
  return success, errors
1093
1133
 
1094
1134
  def bulk_sync(
1095
1135
  self,
1096
1136
  collection_id: str,
1097
1137
  processed_items: List[Item],
1098
- refresh: bool = False,
1138
+ **kwargs: Any,
1099
1139
  ) -> Tuple[int, List[Dict[str, Any]]]:
1100
1140
  """
1101
- Perform a bulk insert of items into the database synchronously.
1141
+ Perform a bulk insert of items into the database asynchronously.
1102
1142
 
1103
1143
  Args:
1104
1144
  collection_id (str): The ID of the collection to which the items belong.
1105
1145
  processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
1106
- refresh (bool): Whether to refresh the index after the bulk insert (default: False).
1146
+ **kwargs (Any): Additional keyword arguments, including:
1147
+ - refresh (str, optional): Whether to refresh the index after the bulk insert.
1148
+ Can be "true", "false", or "wait_for". Defaults to the value of `self.sync_settings.database_refresh`.
1149
+ - refresh (bool, optional): Whether to refresh the index after the bulk insert.
1150
+ - raise_on_error (bool, optional): Whether to raise an error if any of the bulk operations fail.
1151
+ Defaults to the value of `self.async_settings.raise_on_bulk_error`.
1107
1152
 
1108
1153
  Returns:
1109
1154
  Tuple[int, List[Dict[str, Any]]]: A tuple containing:
@@ -1113,9 +1158,29 @@ class DatabaseLogic(BaseDatabaseLogic):
1113
1158
  Notes:
1114
1159
  This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`.
1115
1160
  The insert is performed synchronously and blocking, meaning that the function does not return until the insert has
1116
- completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to
1117
- True, the index is refreshed after the bulk insert.
1161
+ completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. The `refresh`
1162
+ parameter determines whether the index is refreshed after the bulk insert:
1163
+ - "true": Forces an immediate refresh of the index.
1164
+ - "false": Does not refresh the index immediately (default behavior).
1165
+ - "wait_for": Waits for the next refresh cycle to make the changes visible.
1118
1166
  """
1167
+ # Ensure kwargs is a dictionary
1168
+ kwargs = kwargs or {}
1169
+
1170
+ # Resolve the `refresh` parameter
1171
+ refresh = kwargs.get("refresh", self.async_settings.database_refresh)
1172
+ refresh = validate_refresh(refresh)
1173
+
1174
+ # Log the bulk insert attempt
1175
+ logger.info(
1176
+ f"Performing bulk insert for collection {collection_id} with refresh={refresh}"
1177
+ )
1178
+
1179
+ # Handle empty processed_items
1180
+ if not processed_items:
1181
+ logger.warning(f"No items to insert for collection {collection_id}")
1182
+ return 0, []
1183
+
1119
1184
  raise_on_error = self.sync_settings.raise_on_bulk_error
1120
1185
  success, errors = helpers.bulk(
1121
1186
  self.sync_client,