stac-fastapi-elasticsearch 4.1.0__py3-none-any.whl → 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stac_fastapi/elasticsearch/app.py +51 -29
- stac_fastapi/elasticsearch/config.py +28 -5
- stac_fastapi/elasticsearch/database_logic.py +398 -281
- stac_fastapi/elasticsearch/version.py +1 -1
- stac_fastapi_elasticsearch-5.0.0.dist-info/METADATA +575 -0
- stac_fastapi_elasticsearch-5.0.0.dist-info/RECORD +10 -0
- stac_fastapi_elasticsearch-4.1.0.dist-info/METADATA +0 -379
- stac_fastapi_elasticsearch-4.1.0.dist-info/RECORD +0 -10
- {stac_fastapi_elasticsearch-4.1.0.dist-info → stac_fastapi_elasticsearch-5.0.0.dist-info}/WHEEL +0 -0
- {stac_fastapi_elasticsearch-4.1.0.dist-info → stac_fastapi_elasticsearch-5.0.0.dist-info}/entry_points.txt +0 -0
- {stac_fastapi_elasticsearch-4.1.0.dist-info → stac_fastapi_elasticsearch-5.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,42 +1,51 @@
|
|
|
1
1
|
"""Database logic."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
import json
|
|
5
4
|
import logging
|
|
6
5
|
from base64 import urlsafe_b64decode, urlsafe_b64encode
|
|
7
6
|
from copy import deepcopy
|
|
8
|
-
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
|
|
7
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
|
|
9
8
|
|
|
10
9
|
import attr
|
|
11
10
|
import elasticsearch.helpers as helpers
|
|
11
|
+
import orjson
|
|
12
12
|
from elasticsearch.dsl import Q, Search
|
|
13
13
|
from elasticsearch.exceptions import NotFoundError as ESNotFoundError
|
|
14
14
|
from starlette.requests import Request
|
|
15
15
|
|
|
16
16
|
from stac_fastapi.core.base_database_logic import BaseDatabaseLogic
|
|
17
|
-
from stac_fastapi.core.
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
17
|
+
from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
|
|
18
|
+
from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon
|
|
19
|
+
from stac_fastapi.elasticsearch.config import AsyncElasticsearchSettings
|
|
20
|
+
from stac_fastapi.elasticsearch.config import (
|
|
21
|
+
ElasticsearchSettings as SyncElasticsearchSettings,
|
|
22
|
+
)
|
|
23
|
+
from stac_fastapi.sfeos_helpers import filter
|
|
24
|
+
from stac_fastapi.sfeos_helpers.database import (
|
|
25
|
+
apply_free_text_filter_shared,
|
|
26
|
+
apply_intersects_filter_shared,
|
|
27
|
+
create_index_templates_shared,
|
|
28
|
+
delete_item_index_shared,
|
|
29
|
+
get_queryables_mapping_shared,
|
|
26
30
|
index_alias_by_collection_id,
|
|
27
31
|
index_by_collection_id,
|
|
28
32
|
indices,
|
|
29
33
|
mk_actions,
|
|
30
34
|
mk_item_id,
|
|
35
|
+
populate_sort_shared,
|
|
36
|
+
return_date,
|
|
37
|
+
validate_refresh,
|
|
31
38
|
)
|
|
32
|
-
from stac_fastapi.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
39
|
+
from stac_fastapi.sfeos_helpers.mappings import (
|
|
40
|
+
AGGREGATION_MAPPING,
|
|
41
|
+
COLLECTIONS_INDEX,
|
|
42
|
+
DEFAULT_SORT,
|
|
43
|
+
ITEM_INDICES,
|
|
44
|
+
ITEMS_INDEX_PREFIX,
|
|
45
|
+
Geometry,
|
|
38
46
|
)
|
|
39
47
|
from stac_fastapi.types.errors import ConflictError, NotFoundError
|
|
48
|
+
from stac_fastapi.types.rfc3339 import DateTimeType
|
|
40
49
|
from stac_fastapi.types.stac import Collection, Item
|
|
41
50
|
|
|
42
51
|
logger = logging.getLogger(__name__)
|
|
@@ -50,22 +59,7 @@ async def create_index_templates() -> None:
|
|
|
50
59
|
None
|
|
51
60
|
|
|
52
61
|
"""
|
|
53
|
-
|
|
54
|
-
await client.indices.put_index_template(
|
|
55
|
-
name=f"template_{COLLECTIONS_INDEX}",
|
|
56
|
-
body={
|
|
57
|
-
"index_patterns": [f"{COLLECTIONS_INDEX}*"],
|
|
58
|
-
"template": {"mappings": ES_COLLECTIONS_MAPPINGS},
|
|
59
|
-
},
|
|
60
|
-
)
|
|
61
|
-
await client.indices.put_index_template(
|
|
62
|
-
name=f"template_{ITEMS_INDEX_PREFIX}",
|
|
63
|
-
body={
|
|
64
|
-
"index_patterns": [f"{ITEMS_INDEX_PREFIX}*"],
|
|
65
|
-
"template": {"settings": ES_ITEMS_SETTINGS, "mappings": ES_ITEMS_MAPPINGS},
|
|
66
|
-
},
|
|
67
|
-
)
|
|
68
|
-
await client.close()
|
|
62
|
+
await create_index_templates_shared(settings=AsyncElasticsearchSettings())
|
|
69
63
|
|
|
70
64
|
|
|
71
65
|
async def create_collection_index() -> None:
|
|
@@ -110,18 +104,13 @@ async def delete_item_index(collection_id: str):
|
|
|
110
104
|
|
|
111
105
|
Args:
|
|
112
106
|
collection_id (str): The ID of the collection whose items index will be deleted.
|
|
113
|
-
"""
|
|
114
|
-
client = AsyncElasticsearchSettings().create_client
|
|
115
107
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
else:
|
|
123
|
-
await client.indices.delete(index=name)
|
|
124
|
-
await client.close()
|
|
108
|
+
Notes:
|
|
109
|
+
This function delegates to the shared implementation in delete_item_index_shared.
|
|
110
|
+
"""
|
|
111
|
+
await delete_item_index_shared(
|
|
112
|
+
settings=AsyncElasticsearchSettings(), collection_id=collection_id
|
|
113
|
+
)
|
|
125
114
|
|
|
126
115
|
|
|
127
116
|
@attr.s
|
|
@@ -150,76 +139,7 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
150
139
|
|
|
151
140
|
extensions: List[str] = attr.ib(default=attr.Factory(list))
|
|
152
141
|
|
|
153
|
-
aggregation_mapping: Dict[str, Dict[str, Any]] =
|
|
154
|
-
"total_count": {"value_count": {"field": "id"}},
|
|
155
|
-
"collection_frequency": {"terms": {"field": "collection", "size": 100}},
|
|
156
|
-
"platform_frequency": {"terms": {"field": "properties.platform", "size": 100}},
|
|
157
|
-
"cloud_cover_frequency": {
|
|
158
|
-
"range": {
|
|
159
|
-
"field": "properties.eo:cloud_cover",
|
|
160
|
-
"ranges": [
|
|
161
|
-
{"to": 5},
|
|
162
|
-
{"from": 5, "to": 15},
|
|
163
|
-
{"from": 15, "to": 40},
|
|
164
|
-
{"from": 40},
|
|
165
|
-
],
|
|
166
|
-
}
|
|
167
|
-
},
|
|
168
|
-
"datetime_frequency": {
|
|
169
|
-
"date_histogram": {
|
|
170
|
-
"field": "properties.datetime",
|
|
171
|
-
"calendar_interval": "month",
|
|
172
|
-
}
|
|
173
|
-
},
|
|
174
|
-
"datetime_min": {"min": {"field": "properties.datetime"}},
|
|
175
|
-
"datetime_max": {"max": {"field": "properties.datetime"}},
|
|
176
|
-
"grid_code_frequency": {
|
|
177
|
-
"terms": {
|
|
178
|
-
"field": "properties.grid:code",
|
|
179
|
-
"missing": "none",
|
|
180
|
-
"size": 10000,
|
|
181
|
-
}
|
|
182
|
-
},
|
|
183
|
-
"sun_elevation_frequency": {
|
|
184
|
-
"histogram": {"field": "properties.view:sun_elevation", "interval": 5}
|
|
185
|
-
},
|
|
186
|
-
"sun_azimuth_frequency": {
|
|
187
|
-
"histogram": {"field": "properties.view:sun_azimuth", "interval": 5}
|
|
188
|
-
},
|
|
189
|
-
"off_nadir_frequency": {
|
|
190
|
-
"histogram": {"field": "properties.view:off_nadir", "interval": 5}
|
|
191
|
-
},
|
|
192
|
-
"centroid_geohash_grid_frequency": {
|
|
193
|
-
"geohash_grid": {
|
|
194
|
-
"field": "properties.proj:centroid",
|
|
195
|
-
"precision": 1,
|
|
196
|
-
}
|
|
197
|
-
},
|
|
198
|
-
"centroid_geohex_grid_frequency": {
|
|
199
|
-
"geohex_grid": {
|
|
200
|
-
"field": "properties.proj:centroid",
|
|
201
|
-
"precision": 0,
|
|
202
|
-
}
|
|
203
|
-
},
|
|
204
|
-
"centroid_geotile_grid_frequency": {
|
|
205
|
-
"geotile_grid": {
|
|
206
|
-
"field": "properties.proj:centroid",
|
|
207
|
-
"precision": 0,
|
|
208
|
-
}
|
|
209
|
-
},
|
|
210
|
-
"geometry_geohash_grid_frequency": {
|
|
211
|
-
"geohash_grid": {
|
|
212
|
-
"field": "geometry",
|
|
213
|
-
"precision": 1,
|
|
214
|
-
}
|
|
215
|
-
},
|
|
216
|
-
"geometry_geotile_grid_frequency": {
|
|
217
|
-
"geotile_grid": {
|
|
218
|
-
"field": "geometry",
|
|
219
|
-
"precision": 0,
|
|
220
|
-
}
|
|
221
|
-
},
|
|
222
|
-
}
|
|
142
|
+
aggregation_mapping: Dict[str, Dict[str, Any]] = AGGREGATION_MAPPING
|
|
223
143
|
|
|
224
144
|
"""CORE LOGIC"""
|
|
225
145
|
|
|
@@ -290,6 +210,23 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
290
210
|
)
|
|
291
211
|
return item["_source"]
|
|
292
212
|
|
|
213
|
+
async def get_queryables_mapping(self, collection_id: str = "*") -> dict:
|
|
214
|
+
"""Retrieve mapping of Queryables for search.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
collection_id (str, optional): The id of the Collection the Queryables
|
|
218
|
+
belongs to. Defaults to "*".
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
dict: A dictionary containing the Queryables mappings.
|
|
222
|
+
"""
|
|
223
|
+
mappings = await self.client.indices.get_mapping(
|
|
224
|
+
index=f"{ITEMS_INDEX_PREFIX}{collection_id}",
|
|
225
|
+
)
|
|
226
|
+
return await get_queryables_mapping_shared(
|
|
227
|
+
collection_id=collection_id, mappings=mappings
|
|
228
|
+
)
|
|
229
|
+
|
|
293
230
|
@staticmethod
|
|
294
231
|
def make_search():
|
|
295
232
|
"""Database logic to create a Search instance."""
|
|
@@ -306,120 +243,99 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
306
243
|
return search.filter("terms", collection=collection_ids)
|
|
307
244
|
|
|
308
245
|
@staticmethod
|
|
309
|
-
def apply_datetime_filter(
|
|
246
|
+
def apply_datetime_filter(
|
|
247
|
+
search: Search, interval: Optional[Union[DateTimeType, str]]
|
|
248
|
+
) -> Search:
|
|
310
249
|
"""Apply a filter to search on datetime, start_datetime, and end_datetime fields.
|
|
311
250
|
|
|
312
251
|
Args:
|
|
313
|
-
search
|
|
314
|
-
|
|
252
|
+
search: The search object to filter.
|
|
253
|
+
interval: Optional datetime interval to filter by. Can be:
|
|
254
|
+
- A single datetime string (e.g., "2023-01-01T12:00:00")
|
|
255
|
+
- A datetime range string (e.g., "2023-01-01/2023-12-31")
|
|
256
|
+
- A datetime object
|
|
257
|
+
- A tuple of (start_datetime, end_datetime)
|
|
315
258
|
|
|
316
259
|
Returns:
|
|
317
|
-
|
|
260
|
+
The filtered search object.
|
|
318
261
|
"""
|
|
262
|
+
if not interval:
|
|
263
|
+
return search
|
|
264
|
+
|
|
319
265
|
should = []
|
|
266
|
+
try:
|
|
267
|
+
datetime_search = return_date(interval)
|
|
268
|
+
except (ValueError, TypeError) as e:
|
|
269
|
+
# Handle invalid interval formats if return_date fails
|
|
270
|
+
logger.error(f"Invalid interval format: {interval}, error: {e}")
|
|
271
|
+
return search
|
|
320
272
|
|
|
321
|
-
# If the request is a single datetime return
|
|
322
|
-
# items with datetimes equal to the requested datetime OR
|
|
323
|
-
# the requested datetime is between their start and end datetimes
|
|
324
273
|
if "eq" in datetime_search:
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
]
|
|
354
|
-
)
|
|
355
|
-
|
|
356
|
-
# If the request is a date range return
|
|
357
|
-
# items with datetimes within the requested date range OR
|
|
358
|
-
# their startdatetime ithin the requested date range OR
|
|
359
|
-
# their enddatetime ithin the requested date range OR
|
|
360
|
-
# the requested daterange within their start and end datetimes
|
|
274
|
+
# For exact matches, include:
|
|
275
|
+
# 1. Items with matching exact datetime
|
|
276
|
+
# 2. Items with datetime:null where the time falls within their range
|
|
277
|
+
should = [
|
|
278
|
+
Q(
|
|
279
|
+
"bool",
|
|
280
|
+
filter=[
|
|
281
|
+
Q("exists", field="properties.datetime"),
|
|
282
|
+
Q("term", **{"properties__datetime": datetime_search["eq"]}),
|
|
283
|
+
],
|
|
284
|
+
),
|
|
285
|
+
Q(
|
|
286
|
+
"bool",
|
|
287
|
+
must_not=[Q("exists", field="properties.datetime")],
|
|
288
|
+
filter=[
|
|
289
|
+
Q("exists", field="properties.start_datetime"),
|
|
290
|
+
Q("exists", field="properties.end_datetime"),
|
|
291
|
+
Q(
|
|
292
|
+
"range",
|
|
293
|
+
properties__start_datetime={"lte": datetime_search["eq"]},
|
|
294
|
+
),
|
|
295
|
+
Q(
|
|
296
|
+
"range",
|
|
297
|
+
properties__end_datetime={"gte": datetime_search["eq"]},
|
|
298
|
+
),
|
|
299
|
+
],
|
|
300
|
+
),
|
|
301
|
+
]
|
|
361
302
|
else:
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
],
|
|
399
|
-
),
|
|
400
|
-
Q(
|
|
401
|
-
"bool",
|
|
402
|
-
filter=[
|
|
403
|
-
Q(
|
|
404
|
-
"range",
|
|
405
|
-
properties__start_datetime={
|
|
406
|
-
"lte": datetime_search["gte"]
|
|
407
|
-
},
|
|
408
|
-
),
|
|
409
|
-
Q(
|
|
410
|
-
"range",
|
|
411
|
-
properties__end_datetime={
|
|
412
|
-
"gte": datetime_search["lte"]
|
|
413
|
-
},
|
|
414
|
-
),
|
|
415
|
-
],
|
|
416
|
-
),
|
|
417
|
-
]
|
|
418
|
-
)
|
|
419
|
-
|
|
420
|
-
search = search.query(Q("bool", filter=[Q("bool", should=should)]))
|
|
421
|
-
|
|
422
|
-
return search
|
|
303
|
+
# For date ranges, include:
|
|
304
|
+
# 1. Items with datetime in the range
|
|
305
|
+
# 2. Items with datetime:null that overlap the search range
|
|
306
|
+
should = [
|
|
307
|
+
Q(
|
|
308
|
+
"bool",
|
|
309
|
+
filter=[
|
|
310
|
+
Q("exists", field="properties.datetime"),
|
|
311
|
+
Q(
|
|
312
|
+
"range",
|
|
313
|
+
properties__datetime={
|
|
314
|
+
"gte": datetime_search["gte"],
|
|
315
|
+
"lte": datetime_search["lte"],
|
|
316
|
+
},
|
|
317
|
+
),
|
|
318
|
+
],
|
|
319
|
+
),
|
|
320
|
+
Q(
|
|
321
|
+
"bool",
|
|
322
|
+
must_not=[Q("exists", field="properties.datetime")],
|
|
323
|
+
filter=[
|
|
324
|
+
Q("exists", field="properties.start_datetime"),
|
|
325
|
+
Q("exists", field="properties.end_datetime"),
|
|
326
|
+
Q(
|
|
327
|
+
"range",
|
|
328
|
+
properties__start_datetime={"lte": datetime_search["lte"]},
|
|
329
|
+
),
|
|
330
|
+
Q(
|
|
331
|
+
"range",
|
|
332
|
+
properties__end_datetime={"gte": datetime_search["gte"]},
|
|
333
|
+
),
|
|
334
|
+
],
|
|
335
|
+
),
|
|
336
|
+
]
|
|
337
|
+
|
|
338
|
+
return search.query(Q("bool", should=should, minimum_should_match=1))
|
|
423
339
|
|
|
424
340
|
@staticmethod
|
|
425
341
|
def apply_bbox_filter(search: Search, bbox: List):
|
|
@@ -469,21 +385,8 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
469
385
|
Notes:
|
|
470
386
|
A geo_shape filter is added to the search object, set to intersect with the specified geometry.
|
|
471
387
|
"""
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
{
|
|
475
|
-
"geo_shape": {
|
|
476
|
-
"geometry": {
|
|
477
|
-
"shape": {
|
|
478
|
-
"type": intersects.type.lower(),
|
|
479
|
-
"coordinates": intersects.coordinates,
|
|
480
|
-
},
|
|
481
|
-
"relation": "intersects",
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
}
|
|
485
|
-
)
|
|
486
|
-
)
|
|
388
|
+
filter = apply_intersects_filter_shared(intersects=intersects)
|
|
389
|
+
return search.filter(Q(filter))
|
|
487
390
|
|
|
488
391
|
@staticmethod
|
|
489
392
|
def apply_stacql_filter(search: Search, op: str, field: str, value: float):
|
|
@@ -509,17 +412,25 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
509
412
|
|
|
510
413
|
@staticmethod
|
|
511
414
|
def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str]]):
|
|
512
|
-
"""
|
|
513
|
-
if free_text_queries is not None:
|
|
514
|
-
free_text_query_string = '" OR properties.\\*:"'.join(free_text_queries)
|
|
515
|
-
search = search.query(
|
|
516
|
-
"query_string", query=f'properties.\\*:"{free_text_query_string}"'
|
|
517
|
-
)
|
|
415
|
+
"""Create a free text query for Elasticsearch queries.
|
|
518
416
|
|
|
519
|
-
|
|
417
|
+
This method delegates to the shared implementation in apply_free_text_filter_shared.
|
|
520
418
|
|
|
521
|
-
|
|
522
|
-
|
|
419
|
+
Args:
|
|
420
|
+
search (Search): The search object to apply the query to.
|
|
421
|
+
free_text_queries (Optional[List[str]]): A list of text strings to search for in the properties.
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
Search: The search object with the free text query applied, or the original search
|
|
425
|
+
object if no free_text_queries were provided.
|
|
426
|
+
"""
|
|
427
|
+
return apply_free_text_filter_shared(
|
|
428
|
+
search=search, free_text_queries=free_text_queries
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
async def apply_cql2_filter(
|
|
432
|
+
self, search: Search, _filter: Optional[Dict[str, Any]]
|
|
433
|
+
):
|
|
523
434
|
"""
|
|
524
435
|
Apply a CQL2 filter to an Elasticsearch Search object.
|
|
525
436
|
|
|
@@ -539,18 +450,25 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
539
450
|
otherwise the original Search object.
|
|
540
451
|
"""
|
|
541
452
|
if _filter is not None:
|
|
542
|
-
es_query = filter.to_es(_filter)
|
|
453
|
+
es_query = filter.to_es(await self.get_queryables_mapping(), _filter)
|
|
543
454
|
search = search.query(es_query)
|
|
544
455
|
|
|
545
456
|
return search
|
|
546
457
|
|
|
547
458
|
@staticmethod
|
|
548
459
|
def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]:
|
|
549
|
-
"""
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
460
|
+
"""Create a sort configuration for Elasticsearch queries.
|
|
461
|
+
|
|
462
|
+
This method delegates to the shared implementation in populate_sort_shared.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
sortby (List): A list of sort specifications, each containing a field and direction.
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
Optional[Dict[str, Dict[str, str]]]: A dictionary mapping field names to sort direction
|
|
469
|
+
configurations, or None if no sort was specified.
|
|
470
|
+
"""
|
|
471
|
+
return populate_sort_shared(sortby=sortby)
|
|
554
472
|
|
|
555
473
|
async def execute_search(
|
|
556
474
|
self,
|
|
@@ -585,7 +503,7 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
585
503
|
search_after = None
|
|
586
504
|
|
|
587
505
|
if token:
|
|
588
|
-
search_after =
|
|
506
|
+
search_after = orjson.loads(urlsafe_b64decode(token))
|
|
589
507
|
|
|
590
508
|
query = search.query.to_dict() if search.query else None
|
|
591
509
|
|
|
@@ -625,7 +543,7 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
625
543
|
next_token = None
|
|
626
544
|
if len(hits) > limit and limit < max_result_window:
|
|
627
545
|
if hits and (sort_array := hits[limit - 1].get("sort")):
|
|
628
|
-
next_token = urlsafe_b64encode(
|
|
546
|
+
next_token = urlsafe_b64encode(orjson.dumps(sort_array)).decode()
|
|
629
547
|
|
|
630
548
|
matched = (
|
|
631
549
|
es_response["hits"]["total"]["value"]
|
|
@@ -845,15 +763,19 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
845
763
|
async def create_item(
|
|
846
764
|
self,
|
|
847
765
|
item: Item,
|
|
848
|
-
refresh: bool = False,
|
|
849
766
|
base_url: str = "",
|
|
850
767
|
exist_ok: bool = False,
|
|
768
|
+
**kwargs: Any,
|
|
851
769
|
):
|
|
852
770
|
"""Database logic for creating one item.
|
|
853
771
|
|
|
854
772
|
Args:
|
|
855
773
|
item (Item): The item to be created.
|
|
856
|
-
|
|
774
|
+
base_url (str, optional): The base URL for the item. Defaults to an empty string.
|
|
775
|
+
exist_ok (bool, optional): Whether to allow the item to exist already. Defaults to False.
|
|
776
|
+
**kwargs: Additional keyword arguments.
|
|
777
|
+
- refresh (str): Whether to refresh the index after the operation. Can be "true", "false", or "wait_for".
|
|
778
|
+
- refresh (bool): Whether to refresh the index after the operation. Defaults to the value in `self.async_settings.database_refresh`.
|
|
857
779
|
|
|
858
780
|
Raises:
|
|
859
781
|
ConflictError: If the item already exists in the database.
|
|
@@ -861,12 +783,28 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
861
783
|
Returns:
|
|
862
784
|
None
|
|
863
785
|
"""
|
|
864
|
-
#
|
|
786
|
+
# Extract item and collection IDs
|
|
865
787
|
item_id = item["id"]
|
|
866
788
|
collection_id = item["collection"]
|
|
789
|
+
|
|
790
|
+
# Ensure kwargs is a dictionary
|
|
791
|
+
kwargs = kwargs or {}
|
|
792
|
+
|
|
793
|
+
# Resolve the `refresh` parameter
|
|
794
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
795
|
+
refresh = validate_refresh(refresh)
|
|
796
|
+
|
|
797
|
+
# Log the creation attempt
|
|
798
|
+
logger.info(
|
|
799
|
+
f"Creating item {item_id} in collection {collection_id} with refresh={refresh}"
|
|
800
|
+
)
|
|
801
|
+
|
|
802
|
+
# Prepare the item for insertion
|
|
867
803
|
item = await self.async_prep_create_item(
|
|
868
804
|
item=item, base_url=base_url, exist_ok=exist_ok
|
|
869
805
|
)
|
|
806
|
+
|
|
807
|
+
# Index the item in the database
|
|
870
808
|
await self.client.index(
|
|
871
809
|
index=index_alias_by_collection_id(collection_id),
|
|
872
810
|
id=mk_item_id(item_id, collection_id),
|
|
@@ -874,26 +812,43 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
874
812
|
refresh=refresh,
|
|
875
813
|
)
|
|
876
814
|
|
|
877
|
-
async def delete_item(
|
|
878
|
-
self, item_id: str, collection_id: str, refresh: bool = False
|
|
879
|
-
):
|
|
815
|
+
async def delete_item(self, item_id: str, collection_id: str, **kwargs: Any):
|
|
880
816
|
"""Delete a single item from the database.
|
|
881
817
|
|
|
882
818
|
Args:
|
|
883
819
|
item_id (str): The id of the Item to be deleted.
|
|
884
820
|
collection_id (str): The id of the Collection that the Item belongs to.
|
|
885
|
-
|
|
821
|
+
**kwargs: Additional keyword arguments.
|
|
822
|
+
- refresh (str): Whether to refresh the index after the operation. Can be "true", "false", or "wait_for".
|
|
823
|
+
- refresh (bool): Whether to refresh the index after the operation. Defaults to the value in `self.async_settings.database_refresh`.
|
|
886
824
|
|
|
887
825
|
Raises:
|
|
888
826
|
NotFoundError: If the Item does not exist in the database.
|
|
827
|
+
|
|
828
|
+
Returns:
|
|
829
|
+
None
|
|
889
830
|
"""
|
|
831
|
+
# Ensure kwargs is a dictionary
|
|
832
|
+
kwargs = kwargs or {}
|
|
833
|
+
|
|
834
|
+
# Resolve the `refresh` parameter
|
|
835
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
836
|
+
refresh = validate_refresh(refresh)
|
|
837
|
+
|
|
838
|
+
# Log the deletion attempt
|
|
839
|
+
logger.info(
|
|
840
|
+
f"Deleting item {item_id} from collection {collection_id} with refresh={refresh}"
|
|
841
|
+
)
|
|
842
|
+
|
|
890
843
|
try:
|
|
844
|
+
# Perform the delete operation
|
|
891
845
|
await self.client.delete(
|
|
892
846
|
index=index_alias_by_collection_id(collection_id),
|
|
893
847
|
id=mk_item_id(item_id, collection_id),
|
|
894
848
|
refresh=refresh,
|
|
895
849
|
)
|
|
896
850
|
except ESNotFoundError:
|
|
851
|
+
# Raise a custom NotFoundError if the item does not exist
|
|
897
852
|
raise NotFoundError(
|
|
898
853
|
f"Item {item_id} in collection {collection_id} not found"
|
|
899
854
|
)
|
|
@@ -916,24 +871,72 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
916
871
|
except ESNotFoundError:
|
|
917
872
|
raise NotFoundError(f"Mapping for index {index_name} not found")
|
|
918
873
|
|
|
919
|
-
async def
|
|
874
|
+
async def get_items_unique_values(
|
|
875
|
+
self, collection_id: str, field_names: Iterable[str], *, limit: int = 100
|
|
876
|
+
) -> Dict[str, List[str]]:
|
|
877
|
+
"""Get the unique values for the given fields in the collection."""
|
|
878
|
+
limit_plus_one = limit + 1
|
|
879
|
+
index_name = index_alias_by_collection_id(collection_id)
|
|
880
|
+
|
|
881
|
+
query = await self.client.search(
|
|
882
|
+
index=index_name,
|
|
883
|
+
body={
|
|
884
|
+
"size": 0,
|
|
885
|
+
"aggs": {
|
|
886
|
+
field: {"terms": {"field": field, "size": limit_plus_one}}
|
|
887
|
+
for field in field_names
|
|
888
|
+
},
|
|
889
|
+
},
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
result: Dict[str, List[str]] = {}
|
|
893
|
+
for field, agg in query["aggregations"].items():
|
|
894
|
+
if len(agg["buckets"]) > limit:
|
|
895
|
+
logger.warning(
|
|
896
|
+
"Skipping enum field %s: exceeds limit of %d unique values. "
|
|
897
|
+
"Consider excluding this field from enumeration or increase the limit.",
|
|
898
|
+
field,
|
|
899
|
+
limit,
|
|
900
|
+
)
|
|
901
|
+
continue
|
|
902
|
+
result[field] = [bucket["key"] for bucket in agg["buckets"]]
|
|
903
|
+
return result
|
|
904
|
+
|
|
905
|
+
async def create_collection(self, collection: Collection, **kwargs: Any):
|
|
920
906
|
"""Create a single collection in the database.
|
|
921
907
|
|
|
922
908
|
Args:
|
|
923
909
|
collection (Collection): The Collection object to be created.
|
|
924
|
-
|
|
910
|
+
**kwargs: Additional keyword arguments.
|
|
911
|
+
- refresh (str): Whether to refresh the index after the operation. Can be "true", "false", or "wait_for".
|
|
912
|
+
- refresh (bool): Whether to refresh the index after the operation. Defaults to the value in `self.async_settings.database_refresh`.
|
|
925
913
|
|
|
926
914
|
Raises:
|
|
927
915
|
ConflictError: If a Collection with the same id already exists in the database.
|
|
928
916
|
|
|
917
|
+
Returns:
|
|
918
|
+
None
|
|
919
|
+
|
|
929
920
|
Notes:
|
|
930
921
|
A new index is created for the items in the Collection using the `create_item_index` function.
|
|
931
922
|
"""
|
|
932
923
|
collection_id = collection["id"]
|
|
933
924
|
|
|
925
|
+
# Ensure kwargs is a dictionary
|
|
926
|
+
kwargs = kwargs or {}
|
|
927
|
+
|
|
928
|
+
# Resolve the `refresh` parameter
|
|
929
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
930
|
+
refresh = validate_refresh(refresh)
|
|
931
|
+
|
|
932
|
+
# Log the creation attempt
|
|
933
|
+
logger.info(f"Creating collection {collection_id} with refresh={refresh}")
|
|
934
|
+
|
|
935
|
+
# Check if the collection already exists
|
|
934
936
|
if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id):
|
|
935
937
|
raise ConflictError(f"Collection {collection_id} already exists")
|
|
936
938
|
|
|
939
|
+
# Index the collection in the database
|
|
937
940
|
await self.client.index(
|
|
938
941
|
index=COLLECTIONS_INDEX,
|
|
939
942
|
id=collection_id,
|
|
@@ -941,6 +944,7 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
941
944
|
refresh=refresh,
|
|
942
945
|
)
|
|
943
946
|
|
|
947
|
+
# Create the item index for the collection
|
|
944
948
|
await create_item_index(collection_id)
|
|
945
949
|
|
|
946
950
|
async def find_collection(self, collection_id: str) -> Collection:
|
|
@@ -970,29 +974,52 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
970
974
|
return collection["_source"]
|
|
971
975
|
|
|
972
976
|
async def update_collection(
|
|
973
|
-
self, collection_id: str, collection: Collection,
|
|
977
|
+
self, collection_id: str, collection: Collection, **kwargs: Any
|
|
974
978
|
):
|
|
975
|
-
"""Update a collection
|
|
979
|
+
"""Update a collection in the database.
|
|
976
980
|
|
|
977
981
|
Args:
|
|
978
|
-
self: The instance of the object calling this function.
|
|
979
982
|
collection_id (str): The ID of the collection to be updated.
|
|
980
983
|
collection (Collection): The Collection object to be used for the update.
|
|
984
|
+
**kwargs: Additional keyword arguments.
|
|
985
|
+
- refresh (str): Whether to refresh the index after the operation. Can be "true", "false", or "wait_for".
|
|
986
|
+
- refresh (bool): Whether to refresh the index after the operation. Defaults to the value in `self.async_settings.database_refresh`.
|
|
987
|
+
Returns:
|
|
988
|
+
None
|
|
981
989
|
|
|
982
990
|
Raises:
|
|
983
|
-
NotFoundError: If the collection with the given `collection_id` is not
|
|
984
|
-
|
|
991
|
+
NotFoundError: If the collection with the given `collection_id` is not found in the database.
|
|
992
|
+
ConflictError: If a conflict occurs during the update.
|
|
985
993
|
|
|
986
994
|
Notes:
|
|
987
995
|
This function updates the collection in the database using the specified
|
|
988
|
-
`collection_id` and
|
|
989
|
-
|
|
996
|
+
`collection_id` and the provided `Collection` object. If the collection ID
|
|
997
|
+
changes, the function creates a new collection, reindexes the items, and deletes
|
|
998
|
+
the old collection.
|
|
990
999
|
"""
|
|
1000
|
+
# Ensure kwargs is a dictionary
|
|
1001
|
+
kwargs = kwargs or {}
|
|
1002
|
+
|
|
1003
|
+
# Resolve the `refresh` parameter
|
|
1004
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
1005
|
+
refresh = validate_refresh(refresh)
|
|
1006
|
+
|
|
1007
|
+
# Log the update attempt
|
|
1008
|
+
logger.info(f"Updating collection {collection_id} with refresh={refresh}")
|
|
1009
|
+
|
|
1010
|
+
# Ensure the collection exists
|
|
991
1011
|
await self.find_collection(collection_id=collection_id)
|
|
992
1012
|
|
|
1013
|
+
# Handle collection ID change
|
|
993
1014
|
if collection_id != collection["id"]:
|
|
1015
|
+
logger.info(
|
|
1016
|
+
f"Collection ID change detected: {collection_id} -> {collection['id']}"
|
|
1017
|
+
)
|
|
1018
|
+
|
|
1019
|
+
# Create the new collection
|
|
994
1020
|
await self.create_collection(collection, refresh=refresh)
|
|
995
1021
|
|
|
1022
|
+
# Reindex items from the old collection to the new collection
|
|
996
1023
|
await self.client.reindex(
|
|
997
1024
|
body={
|
|
998
1025
|
"dest": {"index": f"{ITEMS_INDEX_PREFIX}{collection['id']}"},
|
|
@@ -1006,9 +1033,11 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1006
1033
|
refresh=refresh,
|
|
1007
1034
|
)
|
|
1008
1035
|
|
|
1036
|
+
# Delete the old collection
|
|
1009
1037
|
await self.delete_collection(collection_id)
|
|
1010
1038
|
|
|
1011
1039
|
else:
|
|
1040
|
+
# Update the existing collection
|
|
1012
1041
|
await self.client.index(
|
|
1013
1042
|
index=COLLECTIONS_INDEX,
|
|
1014
1043
|
id=collection_id,
|
|
@@ -1016,33 +1045,57 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1016
1045
|
refresh=refresh,
|
|
1017
1046
|
)
|
|
1018
1047
|
|
|
1019
|
-
async def delete_collection(self, collection_id: str,
|
|
1048
|
+
async def delete_collection(self, collection_id: str, **kwargs: Any):
|
|
1020
1049
|
"""Delete a collection from the database.
|
|
1021
1050
|
|
|
1022
1051
|
Parameters:
|
|
1023
|
-
self: The instance of the object calling this function.
|
|
1024
1052
|
collection_id (str): The ID of the collection to be deleted.
|
|
1025
|
-
|
|
1053
|
+
kwargs (Any, optional): Additional keyword arguments, including `refresh`.
|
|
1054
|
+
- refresh (str): Whether to refresh the index after the operation. Can be "true", "false", or "wait_for".
|
|
1055
|
+
- refresh (bool): Whether to refresh the index after the operation. Defaults to the value in `self.async_settings.database_refresh`.
|
|
1026
1056
|
|
|
1027
1057
|
Raises:
|
|
1028
1058
|
NotFoundError: If the collection with the given `collection_id` is not found in the database.
|
|
1029
1059
|
|
|
1060
|
+
Returns:
|
|
1061
|
+
None
|
|
1062
|
+
|
|
1030
1063
|
Notes:
|
|
1031
1064
|
This function first verifies that the collection with the specified `collection_id` exists in the database, and then
|
|
1032
|
-
deletes the collection. If `refresh` is set to
|
|
1033
|
-
function also calls `delete_item_index` to delete the index for the items in the collection.
|
|
1065
|
+
deletes the collection. If `refresh` is set to "true", "false", or "wait_for", the index is refreshed accordingly after
|
|
1066
|
+
the deletion. Additionally, this function also calls `delete_item_index` to delete the index for the items in the collection.
|
|
1034
1067
|
"""
|
|
1068
|
+
# Ensure kwargs is a dictionary
|
|
1069
|
+
kwargs = kwargs or {}
|
|
1070
|
+
|
|
1071
|
+
# Verify that the collection exists
|
|
1035
1072
|
await self.find_collection(collection_id=collection_id)
|
|
1073
|
+
|
|
1074
|
+
# Resolve the `refresh` parameter
|
|
1075
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
1076
|
+
refresh = validate_refresh(refresh)
|
|
1077
|
+
|
|
1078
|
+
# Log the deletion attempt
|
|
1079
|
+
logger.info(f"Deleting collection {collection_id} with refresh={refresh}")
|
|
1080
|
+
|
|
1081
|
+
# Delete the collection from the database
|
|
1036
1082
|
await self.client.delete(
|
|
1037
1083
|
index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh
|
|
1038
1084
|
)
|
|
1039
|
-
|
|
1085
|
+
|
|
1086
|
+
# Delete the item index for the collection
|
|
1087
|
+
try:
|
|
1088
|
+
await delete_item_index(collection_id)
|
|
1089
|
+
except Exception as e:
|
|
1090
|
+
logger.error(
|
|
1091
|
+
f"Failed to delete item index for collection {collection_id}: {e}"
|
|
1092
|
+
)
|
|
1040
1093
|
|
|
1041
1094
|
async def bulk_async(
|
|
1042
1095
|
self,
|
|
1043
1096
|
collection_id: str,
|
|
1044
1097
|
processed_items: List[Item],
|
|
1045
|
-
|
|
1098
|
+
**kwargs: Any,
|
|
1046
1099
|
) -> Tuple[int, List[Dict[str, Any]]]:
|
|
1047
1100
|
"""
|
|
1048
1101
|
Perform a bulk insert of items into the database asynchronously.
|
|
@@ -1050,7 +1103,12 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1050
1103
|
Args:
|
|
1051
1104
|
collection_id (str): The ID of the collection to which the items belong.
|
|
1052
1105
|
processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
|
|
1053
|
-
|
|
1106
|
+
**kwargs (Any): Additional keyword arguments, including:
|
|
1107
|
+
- refresh (str, optional): Whether to refresh the index after the bulk insert.
|
|
1108
|
+
Can be "true", "false", or "wait_for". Defaults to the value of `self.sync_settings.database_refresh`.
|
|
1109
|
+
- refresh (bool, optional): Whether to refresh the index after the bulk insert.
|
|
1110
|
+
- raise_on_error (bool, optional): Whether to raise an error if any of the bulk operations fail.
|
|
1111
|
+
Defaults to the value of `self.async_settings.raise_on_bulk_error`.
|
|
1054
1112
|
|
|
1055
1113
|
Returns:
|
|
1056
1114
|
Tuple[int, List[Dict[str, Any]]]: A tuple containing:
|
|
@@ -1059,10 +1117,31 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1059
1117
|
|
|
1060
1118
|
Notes:
|
|
1061
1119
|
This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`.
|
|
1062
|
-
The insert is performed
|
|
1063
|
-
The `mk_actions` function is called to generate a list of actions for the bulk insert.
|
|
1064
|
-
the index is refreshed after the bulk insert
|
|
1120
|
+
The insert is performed synchronously and blocking, meaning that the function does not return until the insert has
|
|
1121
|
+
completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. The `refresh`
|
|
1122
|
+
parameter determines whether the index is refreshed after the bulk insert:
|
|
1123
|
+
- "true": Forces an immediate refresh of the index.
|
|
1124
|
+
- "false": Does not refresh the index immediately (default behavior).
|
|
1125
|
+
- "wait_for": Waits for the next refresh cycle to make the changes visible.
|
|
1065
1126
|
"""
|
|
1127
|
+
# Ensure kwargs is a dictionary
|
|
1128
|
+
kwargs = kwargs or {}
|
|
1129
|
+
|
|
1130
|
+
# Resolve the `refresh` parameter
|
|
1131
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
1132
|
+
refresh = validate_refresh(refresh)
|
|
1133
|
+
|
|
1134
|
+
# Log the bulk insert attempt
|
|
1135
|
+
logger.info(
|
|
1136
|
+
f"Performing bulk insert for collection {collection_id} with refresh={refresh}"
|
|
1137
|
+
)
|
|
1138
|
+
|
|
1139
|
+
# Handle empty processed_items
|
|
1140
|
+
if not processed_items:
|
|
1141
|
+
logger.warning(f"No items to insert for collection {collection_id}")
|
|
1142
|
+
return 0, []
|
|
1143
|
+
|
|
1144
|
+
# Perform the bulk insert
|
|
1066
1145
|
raise_on_error = self.async_settings.raise_on_bulk_error
|
|
1067
1146
|
success, errors = await helpers.async_bulk(
|
|
1068
1147
|
self.client,
|
|
@@ -1070,13 +1149,19 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1070
1149
|
refresh=refresh,
|
|
1071
1150
|
raise_on_error=raise_on_error,
|
|
1072
1151
|
)
|
|
1152
|
+
|
|
1153
|
+
# Log the result
|
|
1154
|
+
logger.info(
|
|
1155
|
+
f"Bulk insert completed for collection {collection_id}: {success} successes, {len(errors)} errors"
|
|
1156
|
+
)
|
|
1157
|
+
|
|
1073
1158
|
return success, errors
|
|
1074
1159
|
|
|
1075
1160
|
def bulk_sync(
|
|
1076
1161
|
self,
|
|
1077
1162
|
collection_id: str,
|
|
1078
1163
|
processed_items: List[Item],
|
|
1079
|
-
|
|
1164
|
+
**kwargs: Any,
|
|
1080
1165
|
) -> Tuple[int, List[Dict[str, Any]]]:
|
|
1081
1166
|
"""
|
|
1082
1167
|
Perform a bulk insert of items into the database synchronously.
|
|
@@ -1084,7 +1169,12 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1084
1169
|
Args:
|
|
1085
1170
|
collection_id (str): The ID of the collection to which the items belong.
|
|
1086
1171
|
processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
|
|
1087
|
-
|
|
1172
|
+
**kwargs (Any): Additional keyword arguments, including:
|
|
1173
|
+
- refresh (str, optional): Whether to refresh the index after the bulk insert.
|
|
1174
|
+
Can be "true", "false", or "wait_for". Defaults to the value of `self.sync_settings.database_refresh`.
|
|
1175
|
+
- refresh (bool, optional): Whether to refresh the index after the bulk insert.
|
|
1176
|
+
- raise_on_error (bool, optional): Whether to raise an error if any of the bulk operations fail.
|
|
1177
|
+
Defaults to the value of `self.async_settings.raise_on_bulk_error`.
|
|
1088
1178
|
|
|
1089
1179
|
Returns:
|
|
1090
1180
|
Tuple[int, List[Dict[str, Any]]]: A tuple containing:
|
|
@@ -1094,9 +1184,30 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1094
1184
|
Notes:
|
|
1095
1185
|
This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`.
|
|
1096
1186
|
The insert is performed synchronously and blocking, meaning that the function does not return until the insert has
|
|
1097
|
-
completed. The `mk_actions` function is called to generate a list of actions for the bulk insert.
|
|
1098
|
-
|
|
1187
|
+
completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. The `refresh`
|
|
1188
|
+
parameter determines whether the index is refreshed after the bulk insert:
|
|
1189
|
+
- "true": Forces an immediate refresh of the index.
|
|
1190
|
+
- "false": Does not refresh the index immediately (default behavior).
|
|
1191
|
+
- "wait_for": Waits for the next refresh cycle to make the changes visible.
|
|
1099
1192
|
"""
|
|
1193
|
+
# Ensure kwargs is a dictionary
|
|
1194
|
+
kwargs = kwargs or {}
|
|
1195
|
+
|
|
1196
|
+
# Resolve the `refresh` parameter
|
|
1197
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
1198
|
+
refresh = validate_refresh(refresh)
|
|
1199
|
+
|
|
1200
|
+
# Log the bulk insert attempt
|
|
1201
|
+
logger.info(
|
|
1202
|
+
f"Performing bulk insert for collection {collection_id} with refresh={refresh}"
|
|
1203
|
+
)
|
|
1204
|
+
|
|
1205
|
+
# Handle empty processed_items
|
|
1206
|
+
if not processed_items:
|
|
1207
|
+
logger.warning(f"No items to insert for collection {collection_id}")
|
|
1208
|
+
return 0, []
|
|
1209
|
+
|
|
1210
|
+
# Perform the bulk insert
|
|
1100
1211
|
raise_on_error = self.sync_settings.raise_on_bulk_error
|
|
1101
1212
|
success, errors = helpers.bulk(
|
|
1102
1213
|
self.sync_client,
|
|
@@ -1104,6 +1215,12 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1104
1215
|
refresh=refresh,
|
|
1105
1216
|
raise_on_error=raise_on_error,
|
|
1106
1217
|
)
|
|
1218
|
+
|
|
1219
|
+
# Log the result
|
|
1220
|
+
logger.info(
|
|
1221
|
+
f"Bulk insert completed for collection {collection_id}: {success} successes, {len(errors)} errors"
|
|
1222
|
+
)
|
|
1223
|
+
|
|
1107
1224
|
return success, errors
|
|
1108
1225
|
|
|
1109
1226
|
# DANGER
|