stac-fastapi-opensearch 4.1.0__py3-none-any.whl → 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stac_fastapi/opensearch/app.py +52 -29
- stac_fastapi/opensearch/config.py +36 -13
- stac_fastapi/opensearch/database_logic.py +340 -275
- stac_fastapi/opensearch/version.py +1 -1
- stac_fastapi_opensearch-5.0.0.dist-info/METADATA +576 -0
- stac_fastapi_opensearch-5.0.0.dist-info/RECORD +10 -0
- stac_fastapi_opensearch-4.1.0.dist-info/METADATA +0 -380
- stac_fastapi_opensearch-4.1.0.dist-info/RECORD +0 -10
- {stac_fastapi_opensearch-4.1.0.dist-info → stac_fastapi_opensearch-5.0.0.dist-info}/WHEEL +0 -0
- {stac_fastapi_opensearch-4.1.0.dist-info → stac_fastapi_opensearch-5.0.0.dist-info}/entry_points.txt +0 -0
- {stac_fastapi_opensearch-4.1.0.dist-info → stac_fastapi_opensearch-5.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,20 +1,43 @@
|
|
|
1
1
|
"""Database logic."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
import json
|
|
5
4
|
import logging
|
|
6
5
|
from base64 import urlsafe_b64decode, urlsafe_b64encode
|
|
7
6
|
from copy import deepcopy
|
|
8
|
-
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
|
|
7
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
|
|
9
8
|
|
|
10
9
|
import attr
|
|
10
|
+
import orjson
|
|
11
11
|
from opensearchpy import exceptions, helpers
|
|
12
12
|
from opensearchpy.helpers.query import Q
|
|
13
13
|
from opensearchpy.helpers.search import Search
|
|
14
14
|
from starlette.requests import Request
|
|
15
15
|
|
|
16
16
|
from stac_fastapi.core.base_database_logic import BaseDatabaseLogic
|
|
17
|
-
from stac_fastapi.core.
|
|
17
|
+
from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
|
|
18
|
+
from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon
|
|
19
|
+
from stac_fastapi.opensearch.config import (
|
|
20
|
+
AsyncOpensearchSettings as AsyncSearchSettings,
|
|
21
|
+
)
|
|
22
|
+
from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings
|
|
23
|
+
from stac_fastapi.sfeos_helpers import filter
|
|
24
|
+
from stac_fastapi.sfeos_helpers.database import (
|
|
25
|
+
apply_free_text_filter_shared,
|
|
26
|
+
apply_intersects_filter_shared,
|
|
27
|
+
create_index_templates_shared,
|
|
28
|
+
delete_item_index_shared,
|
|
29
|
+
get_queryables_mapping_shared,
|
|
30
|
+
index_alias_by_collection_id,
|
|
31
|
+
index_by_collection_id,
|
|
32
|
+
indices,
|
|
33
|
+
mk_actions,
|
|
34
|
+
mk_item_id,
|
|
35
|
+
populate_sort_shared,
|
|
36
|
+
return_date,
|
|
37
|
+
validate_refresh,
|
|
38
|
+
)
|
|
39
|
+
from stac_fastapi.sfeos_helpers.mappings import (
|
|
40
|
+
AGGREGATION_MAPPING,
|
|
18
41
|
COLLECTIONS_INDEX,
|
|
19
42
|
DEFAULT_SORT,
|
|
20
43
|
ES_COLLECTIONS_MAPPINGS,
|
|
@@ -23,20 +46,9 @@ from stac_fastapi.core.database_logic import (
|
|
|
23
46
|
ITEM_INDICES,
|
|
24
47
|
ITEMS_INDEX_PREFIX,
|
|
25
48
|
Geometry,
|
|
26
|
-
index_alias_by_collection_id,
|
|
27
|
-
index_by_collection_id,
|
|
28
|
-
indices,
|
|
29
|
-
mk_actions,
|
|
30
|
-
mk_item_id,
|
|
31
49
|
)
|
|
32
|
-
from stac_fastapi.core.extensions import filter
|
|
33
|
-
from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
|
|
34
|
-
from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon
|
|
35
|
-
from stac_fastapi.opensearch.config import (
|
|
36
|
-
AsyncOpensearchSettings as AsyncSearchSettings,
|
|
37
|
-
)
|
|
38
|
-
from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings
|
|
39
50
|
from stac_fastapi.types.errors import ConflictError, NotFoundError
|
|
51
|
+
from stac_fastapi.types.rfc3339 import DateTimeType
|
|
40
52
|
from stac_fastapi.types.stac import Collection, Item
|
|
41
53
|
|
|
42
54
|
logger = logging.getLogger(__name__)
|
|
@@ -50,23 +62,7 @@ async def create_index_templates() -> None:
|
|
|
50
62
|
None
|
|
51
63
|
|
|
52
64
|
"""
|
|
53
|
-
|
|
54
|
-
await client.indices.put_template(
|
|
55
|
-
name=f"template_{COLLECTIONS_INDEX}",
|
|
56
|
-
body={
|
|
57
|
-
"index_patterns": [f"{COLLECTIONS_INDEX}*"],
|
|
58
|
-
"mappings": ES_COLLECTIONS_MAPPINGS,
|
|
59
|
-
},
|
|
60
|
-
)
|
|
61
|
-
await client.indices.put_template(
|
|
62
|
-
name=f"template_{ITEMS_INDEX_PREFIX}",
|
|
63
|
-
body={
|
|
64
|
-
"index_patterns": [f"{ITEMS_INDEX_PREFIX}*"],
|
|
65
|
-
"settings": ES_ITEMS_SETTINGS,
|
|
66
|
-
"mappings": ES_ITEMS_MAPPINGS,
|
|
67
|
-
},
|
|
68
|
-
)
|
|
69
|
-
await client.close()
|
|
65
|
+
await create_index_templates_shared(settings=AsyncSearchSettings())
|
|
70
66
|
|
|
71
67
|
|
|
72
68
|
async def create_collection_index() -> None:
|
|
@@ -125,18 +121,13 @@ async def delete_item_index(collection_id: str) -> None:
|
|
|
125
121
|
|
|
126
122
|
Args:
|
|
127
123
|
collection_id (str): The ID of the collection whose items index will be deleted.
|
|
128
|
-
"""
|
|
129
|
-
client = AsyncSearchSettings().create_client
|
|
130
124
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
else:
|
|
138
|
-
await client.indices.delete(index=name)
|
|
139
|
-
await client.close()
|
|
125
|
+
Notes:
|
|
126
|
+
This function delegates to the shared implementation in delete_item_index_shared.
|
|
127
|
+
"""
|
|
128
|
+
await delete_item_index_shared(
|
|
129
|
+
settings=AsyncSearchSettings(), collection_id=collection_id
|
|
130
|
+
)
|
|
140
131
|
|
|
141
132
|
|
|
142
133
|
@attr.s
|
|
@@ -161,76 +152,7 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
161
152
|
|
|
162
153
|
extensions: List[str] = attr.ib(default=attr.Factory(list))
|
|
163
154
|
|
|
164
|
-
aggregation_mapping: Dict[str, Dict[str, Any]] =
|
|
165
|
-
"total_count": {"value_count": {"field": "id"}},
|
|
166
|
-
"collection_frequency": {"terms": {"field": "collection", "size": 100}},
|
|
167
|
-
"platform_frequency": {"terms": {"field": "properties.platform", "size": 100}},
|
|
168
|
-
"cloud_cover_frequency": {
|
|
169
|
-
"range": {
|
|
170
|
-
"field": "properties.eo:cloud_cover",
|
|
171
|
-
"ranges": [
|
|
172
|
-
{"to": 5},
|
|
173
|
-
{"from": 5, "to": 15},
|
|
174
|
-
{"from": 15, "to": 40},
|
|
175
|
-
{"from": 40},
|
|
176
|
-
],
|
|
177
|
-
}
|
|
178
|
-
},
|
|
179
|
-
"datetime_frequency": {
|
|
180
|
-
"date_histogram": {
|
|
181
|
-
"field": "properties.datetime",
|
|
182
|
-
"calendar_interval": "month",
|
|
183
|
-
}
|
|
184
|
-
},
|
|
185
|
-
"datetime_min": {"min": {"field": "properties.datetime"}},
|
|
186
|
-
"datetime_max": {"max": {"field": "properties.datetime"}},
|
|
187
|
-
"grid_code_frequency": {
|
|
188
|
-
"terms": {
|
|
189
|
-
"field": "properties.grid:code",
|
|
190
|
-
"missing": "none",
|
|
191
|
-
"size": 10000,
|
|
192
|
-
}
|
|
193
|
-
},
|
|
194
|
-
"sun_elevation_frequency": {
|
|
195
|
-
"histogram": {"field": "properties.view:sun_elevation", "interval": 5}
|
|
196
|
-
},
|
|
197
|
-
"sun_azimuth_frequency": {
|
|
198
|
-
"histogram": {"field": "properties.view:sun_azimuth", "interval": 5}
|
|
199
|
-
},
|
|
200
|
-
"off_nadir_frequency": {
|
|
201
|
-
"histogram": {"field": "properties.view:off_nadir", "interval": 5}
|
|
202
|
-
},
|
|
203
|
-
"centroid_geohash_grid_frequency": {
|
|
204
|
-
"geohash_grid": {
|
|
205
|
-
"field": "properties.proj:centroid",
|
|
206
|
-
"precision": 1,
|
|
207
|
-
}
|
|
208
|
-
},
|
|
209
|
-
"centroid_geohex_grid_frequency": {
|
|
210
|
-
"geohex_grid": {
|
|
211
|
-
"field": "properties.proj:centroid",
|
|
212
|
-
"precision": 0,
|
|
213
|
-
}
|
|
214
|
-
},
|
|
215
|
-
"centroid_geotile_grid_frequency": {
|
|
216
|
-
"geotile_grid": {
|
|
217
|
-
"field": "properties.proj:centroid",
|
|
218
|
-
"precision": 0,
|
|
219
|
-
}
|
|
220
|
-
},
|
|
221
|
-
"geometry_geohash_grid_frequency": {
|
|
222
|
-
"geohash_grid": {
|
|
223
|
-
"field": "geometry",
|
|
224
|
-
"precision": 1,
|
|
225
|
-
}
|
|
226
|
-
},
|
|
227
|
-
"geometry_geotile_grid_frequency": {
|
|
228
|
-
"geotile_grid": {
|
|
229
|
-
"field": "geometry",
|
|
230
|
-
"precision": 0,
|
|
231
|
-
}
|
|
232
|
-
},
|
|
233
|
-
}
|
|
155
|
+
aggregation_mapping: Dict[str, Dict[str, Any]] = AGGREGATION_MAPPING
|
|
234
156
|
|
|
235
157
|
"""CORE LOGIC"""
|
|
236
158
|
|
|
@@ -307,6 +229,23 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
307
229
|
)
|
|
308
230
|
return item["_source"]
|
|
309
231
|
|
|
232
|
+
async def get_queryables_mapping(self, collection_id: str = "*") -> dict:
|
|
233
|
+
"""Retrieve mapping of Queryables for search.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
collection_id (str, optional): The id of the Collection the Queryables
|
|
237
|
+
belongs to. Defaults to "*".
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
dict: A dictionary containing the Queryables mappings.
|
|
241
|
+
"""
|
|
242
|
+
mappings = await self.client.indices.get_mapping(
|
|
243
|
+
index=f"{ITEMS_INDEX_PREFIX}{collection_id}",
|
|
244
|
+
)
|
|
245
|
+
return await get_queryables_mapping_shared(
|
|
246
|
+
collection_id=collection_id, mappings=mappings
|
|
247
|
+
)
|
|
248
|
+
|
|
310
249
|
@staticmethod
|
|
311
250
|
def make_search():
|
|
312
251
|
"""Database logic to create a Search instance."""
|
|
@@ -324,130 +263,116 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
324
263
|
|
|
325
264
|
@staticmethod
|
|
326
265
|
def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str]]):
|
|
327
|
-
"""
|
|
328
|
-
if free_text_queries is not None:
|
|
329
|
-
free_text_query_string = '" OR properties.\\*:"'.join(free_text_queries)
|
|
330
|
-
search = search.query(
|
|
331
|
-
"query_string", query=f'properties.\\*:"{free_text_query_string}"'
|
|
332
|
-
)
|
|
266
|
+
"""Create a free text query for OpenSearch queries.
|
|
333
267
|
|
|
334
|
-
|
|
268
|
+
This method delegates to the shared implementation in apply_free_text_filter_shared.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
search (Search): The search object to apply the query to.
|
|
272
|
+
free_text_queries (Optional[List[str]]): A list of text strings to search for in the properties.
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
Search: The search object with the free text query applied, or the original search
|
|
276
|
+
object if no free_text_queries were provided.
|
|
277
|
+
"""
|
|
278
|
+
return apply_free_text_filter_shared(
|
|
279
|
+
search=search, free_text_queries=free_text_queries
|
|
280
|
+
)
|
|
335
281
|
|
|
336
282
|
@staticmethod
|
|
337
|
-
def apply_datetime_filter(
|
|
338
|
-
|
|
283
|
+
def apply_datetime_filter(
|
|
284
|
+
search: Search, interval: Optional[Union[DateTimeType, str]]
|
|
285
|
+
) -> Search:
|
|
286
|
+
"""Apply a filter to search on datetime, start_datetime, and end_datetime fields.
|
|
339
287
|
|
|
340
288
|
Args:
|
|
341
|
-
search
|
|
342
|
-
|
|
289
|
+
search: The search object to filter.
|
|
290
|
+
interval: Optional datetime interval to filter by. Can be:
|
|
291
|
+
- A single datetime string (e.g., "2023-01-01T12:00:00")
|
|
292
|
+
- A datetime range string (e.g., "2023-01-01/2023-12-31")
|
|
293
|
+
- A datetime object
|
|
294
|
+
- A tuple of (start_datetime, end_datetime)
|
|
343
295
|
|
|
344
296
|
Returns:
|
|
345
|
-
|
|
297
|
+
The filtered search object.
|
|
346
298
|
"""
|
|
299
|
+
if not interval:
|
|
300
|
+
return search
|
|
301
|
+
|
|
347
302
|
should = []
|
|
303
|
+
try:
|
|
304
|
+
datetime_search = return_date(interval)
|
|
305
|
+
except (ValueError, TypeError) as e:
|
|
306
|
+
# Handle invalid interval formats if return_date fails
|
|
307
|
+
logger.error(f"Invalid interval format: {interval}, error: {e}")
|
|
308
|
+
return search
|
|
348
309
|
|
|
349
|
-
# If the request is a single datetime return
|
|
350
|
-
# items with datetimes equal to the requested datetime OR
|
|
351
|
-
# the requested datetime is between their start and end datetimes
|
|
352
310
|
if "eq" in datetime_search:
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
]
|
|
382
|
-
)
|
|
383
|
-
|
|
384
|
-
# If the request is a date range return
|
|
385
|
-
# items with datetimes within the requested date range OR
|
|
386
|
-
# their startdatetime ithin the requested date range OR
|
|
387
|
-
# their enddatetime ithin the requested date range OR
|
|
388
|
-
# the requested daterange within their start and end datetimes
|
|
311
|
+
# For exact matches, include:
|
|
312
|
+
# 1. Items with matching exact datetime
|
|
313
|
+
# 2. Items with datetime:null where the time falls within their range
|
|
314
|
+
should = [
|
|
315
|
+
Q(
|
|
316
|
+
"bool",
|
|
317
|
+
filter=[
|
|
318
|
+
Q("exists", field="properties.datetime"),
|
|
319
|
+
Q("term", **{"properties__datetime": datetime_search["eq"]}),
|
|
320
|
+
],
|
|
321
|
+
),
|
|
322
|
+
Q(
|
|
323
|
+
"bool",
|
|
324
|
+
must_not=[Q("exists", field="properties.datetime")],
|
|
325
|
+
filter=[
|
|
326
|
+
Q("exists", field="properties.start_datetime"),
|
|
327
|
+
Q("exists", field="properties.end_datetime"),
|
|
328
|
+
Q(
|
|
329
|
+
"range",
|
|
330
|
+
properties__start_datetime={"lte": datetime_search["eq"]},
|
|
331
|
+
),
|
|
332
|
+
Q(
|
|
333
|
+
"range",
|
|
334
|
+
properties__end_datetime={"gte": datetime_search["eq"]},
|
|
335
|
+
),
|
|
336
|
+
],
|
|
337
|
+
),
|
|
338
|
+
]
|
|
389
339
|
else:
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
],
|
|
427
|
-
),
|
|
428
|
-
Q(
|
|
429
|
-
"bool",
|
|
430
|
-
filter=[
|
|
431
|
-
Q(
|
|
432
|
-
"range",
|
|
433
|
-
properties__start_datetime={
|
|
434
|
-
"lte": datetime_search["gte"]
|
|
435
|
-
},
|
|
436
|
-
),
|
|
437
|
-
Q(
|
|
438
|
-
"range",
|
|
439
|
-
properties__end_datetime={
|
|
440
|
-
"gte": datetime_search["lte"]
|
|
441
|
-
},
|
|
442
|
-
),
|
|
443
|
-
],
|
|
444
|
-
),
|
|
445
|
-
]
|
|
446
|
-
)
|
|
447
|
-
|
|
448
|
-
search = search.query(Q("bool", filter=[Q("bool", should=should)]))
|
|
449
|
-
|
|
450
|
-
return search
|
|
340
|
+
# For date ranges, include:
|
|
341
|
+
# 1. Items with datetime in the range
|
|
342
|
+
# 2. Items with datetime:null that overlap the search range
|
|
343
|
+
should = [
|
|
344
|
+
Q(
|
|
345
|
+
"bool",
|
|
346
|
+
filter=[
|
|
347
|
+
Q("exists", field="properties.datetime"),
|
|
348
|
+
Q(
|
|
349
|
+
"range",
|
|
350
|
+
properties__datetime={
|
|
351
|
+
"gte": datetime_search["gte"],
|
|
352
|
+
"lte": datetime_search["lte"],
|
|
353
|
+
},
|
|
354
|
+
),
|
|
355
|
+
],
|
|
356
|
+
),
|
|
357
|
+
Q(
|
|
358
|
+
"bool",
|
|
359
|
+
must_not=[Q("exists", field="properties.datetime")],
|
|
360
|
+
filter=[
|
|
361
|
+
Q("exists", field="properties.start_datetime"),
|
|
362
|
+
Q("exists", field="properties.end_datetime"),
|
|
363
|
+
Q(
|
|
364
|
+
"range",
|
|
365
|
+
properties__start_datetime={"lte": datetime_search["lte"]},
|
|
366
|
+
),
|
|
367
|
+
Q(
|
|
368
|
+
"range",
|
|
369
|
+
properties__end_datetime={"gte": datetime_search["gte"]},
|
|
370
|
+
),
|
|
371
|
+
],
|
|
372
|
+
),
|
|
373
|
+
]
|
|
374
|
+
|
|
375
|
+
return search.query(Q("bool", should=should, minimum_should_match=1))
|
|
451
376
|
|
|
452
377
|
@staticmethod
|
|
453
378
|
def apply_bbox_filter(search: Search, bbox: List):
|
|
@@ -497,21 +422,8 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
497
422
|
Notes:
|
|
498
423
|
A geo_shape filter is added to the search object, set to intersect with the specified geometry.
|
|
499
424
|
"""
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
{
|
|
503
|
-
"geo_shape": {
|
|
504
|
-
"geometry": {
|
|
505
|
-
"shape": {
|
|
506
|
-
"type": intersects.type.lower(),
|
|
507
|
-
"coordinates": intersects.coordinates,
|
|
508
|
-
},
|
|
509
|
-
"relation": "intersects",
|
|
510
|
-
}
|
|
511
|
-
}
|
|
512
|
-
}
|
|
513
|
-
)
|
|
514
|
-
)
|
|
425
|
+
filter = apply_intersects_filter_shared(intersects=intersects)
|
|
426
|
+
return search.filter(Q(filter))
|
|
515
427
|
|
|
516
428
|
@staticmethod
|
|
517
429
|
def apply_stacql_filter(search: Search, op: str, field: str, value: float):
|
|
@@ -535,8 +447,9 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
535
447
|
|
|
536
448
|
return search
|
|
537
449
|
|
|
538
|
-
|
|
539
|
-
|
|
450
|
+
async def apply_cql2_filter(
|
|
451
|
+
self, search: Search, _filter: Optional[Dict[str, Any]]
|
|
452
|
+
):
|
|
540
453
|
"""
|
|
541
454
|
Apply a CQL2 filter to an Opensearch Search object.
|
|
542
455
|
|
|
@@ -556,18 +469,25 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
556
469
|
otherwise the original Search object.
|
|
557
470
|
"""
|
|
558
471
|
if _filter is not None:
|
|
559
|
-
es_query = filter.to_es(_filter)
|
|
472
|
+
es_query = filter.to_es(await self.get_queryables_mapping(), _filter)
|
|
560
473
|
search = search.filter(es_query)
|
|
561
474
|
|
|
562
475
|
return search
|
|
563
476
|
|
|
564
477
|
@staticmethod
|
|
565
478
|
def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]:
|
|
566
|
-
"""
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
479
|
+
"""Create a sort configuration for OpenSearch queries.
|
|
480
|
+
|
|
481
|
+
This method delegates to the shared implementation in populate_sort_shared.
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
sortby (List): A list of sort specifications, each containing a field and direction.
|
|
485
|
+
|
|
486
|
+
Returns:
|
|
487
|
+
Optional[Dict[str, Dict[str, str]]]: A dictionary mapping field names to sort direction
|
|
488
|
+
configurations, or None if no sort was specified.
|
|
489
|
+
"""
|
|
490
|
+
return populate_sort_shared(sortby=sortby)
|
|
571
491
|
|
|
572
492
|
async def execute_search(
|
|
573
493
|
self,
|
|
@@ -607,7 +527,7 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
607
527
|
search_after = None
|
|
608
528
|
|
|
609
529
|
if token:
|
|
610
|
-
search_after =
|
|
530
|
+
search_after = orjson.loads(urlsafe_b64decode(token))
|
|
611
531
|
if search_after:
|
|
612
532
|
search_body["search_after"] = search_after
|
|
613
533
|
|
|
@@ -647,7 +567,7 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
647
567
|
next_token = None
|
|
648
568
|
if len(hits) > limit and limit < max_result_window:
|
|
649
569
|
if hits and (sort_array := hits[limit - 1].get("sort")):
|
|
650
|
-
next_token = urlsafe_b64encode(
|
|
570
|
+
next_token = urlsafe_b64encode(orjson.dumps(sort_array)).decode()
|
|
651
571
|
|
|
652
572
|
matched = (
|
|
653
573
|
es_response["hits"]["total"]["value"]
|
|
@@ -864,15 +784,17 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
864
784
|
async def create_item(
|
|
865
785
|
self,
|
|
866
786
|
item: Item,
|
|
867
|
-
refresh: bool = False,
|
|
868
787
|
base_url: str = "",
|
|
869
788
|
exist_ok: bool = False,
|
|
789
|
+
**kwargs: Any,
|
|
870
790
|
):
|
|
871
791
|
"""Database logic for creating one item.
|
|
872
792
|
|
|
873
793
|
Args:
|
|
874
794
|
item (Item): The item to be created.
|
|
875
|
-
|
|
795
|
+
base_url (str, optional): The base URL for the item. Defaults to an empty string.
|
|
796
|
+
exist_ok (bool, optional): Whether to allow the item to exist already. Defaults to False.
|
|
797
|
+
**kwargs: Additional keyword arguments like refresh.
|
|
876
798
|
|
|
877
799
|
Raises:
|
|
878
800
|
ConflictError: If the item already exists in the database.
|
|
@@ -883,6 +805,19 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
883
805
|
# todo: check if collection exists, but cache
|
|
884
806
|
item_id = item["id"]
|
|
885
807
|
collection_id = item["collection"]
|
|
808
|
+
|
|
809
|
+
# Ensure kwargs is a dictionary
|
|
810
|
+
kwargs = kwargs or {}
|
|
811
|
+
|
|
812
|
+
# Resolve the `refresh` parameter
|
|
813
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
814
|
+
refresh = validate_refresh(refresh)
|
|
815
|
+
|
|
816
|
+
# Log the creation attempt
|
|
817
|
+
logger.info(
|
|
818
|
+
f"Creating item {item_id} in collection {collection_id} with refresh={refresh}"
|
|
819
|
+
)
|
|
820
|
+
|
|
886
821
|
item = await self.async_prep_create_item(
|
|
887
822
|
item=item, base_url=base_url, exist_ok=exist_ok
|
|
888
823
|
)
|
|
@@ -893,19 +828,29 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
893
828
|
refresh=refresh,
|
|
894
829
|
)
|
|
895
830
|
|
|
896
|
-
async def delete_item(
|
|
897
|
-
self, item_id: str, collection_id: str, refresh: bool = False
|
|
898
|
-
):
|
|
831
|
+
async def delete_item(self, item_id: str, collection_id: str, **kwargs: Any):
|
|
899
832
|
"""Delete a single item from the database.
|
|
900
833
|
|
|
901
834
|
Args:
|
|
902
835
|
item_id (str): The id of the Item to be deleted.
|
|
903
836
|
collection_id (str): The id of the Collection that the Item belongs to.
|
|
904
|
-
|
|
837
|
+
**kwargs: Additional keyword arguments like refresh.
|
|
905
838
|
|
|
906
839
|
Raises:
|
|
907
840
|
NotFoundError: If the Item does not exist in the database.
|
|
908
841
|
"""
|
|
842
|
+
# Ensure kwargs is a dictionary
|
|
843
|
+
kwargs = kwargs or {}
|
|
844
|
+
|
|
845
|
+
# Resolve the `refresh` parameter
|
|
846
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
847
|
+
refresh = validate_refresh(refresh)
|
|
848
|
+
|
|
849
|
+
# Log the deletion attempt
|
|
850
|
+
logger.info(
|
|
851
|
+
f"Deleting item {item_id} from collection {collection_id} with refresh={refresh}"
|
|
852
|
+
)
|
|
853
|
+
|
|
909
854
|
try:
|
|
910
855
|
await self.client.delete(
|
|
911
856
|
index=index_alias_by_collection_id(collection_id),
|
|
@@ -935,12 +880,43 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
935
880
|
except exceptions.NotFoundError:
|
|
936
881
|
raise NotFoundError(f"Mapping for index {index_name} not found")
|
|
937
882
|
|
|
938
|
-
async def
|
|
883
|
+
async def get_items_unique_values(
|
|
884
|
+
self, collection_id: str, field_names: Iterable[str], *, limit: int = 100
|
|
885
|
+
) -> Dict[str, List[str]]:
|
|
886
|
+
"""Get the unique values for the given fields in the collection."""
|
|
887
|
+
limit_plus_one = limit + 1
|
|
888
|
+
index_name = index_alias_by_collection_id(collection_id)
|
|
889
|
+
|
|
890
|
+
query = await self.client.search(
|
|
891
|
+
index=index_name,
|
|
892
|
+
body={
|
|
893
|
+
"size": 0,
|
|
894
|
+
"aggs": {
|
|
895
|
+
field: {"terms": {"field": field, "size": limit_plus_one}}
|
|
896
|
+
for field in field_names
|
|
897
|
+
},
|
|
898
|
+
},
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
result: Dict[str, List[str]] = {}
|
|
902
|
+
for field, agg in query["aggregations"].items():
|
|
903
|
+
if len(agg["buckets"]) > limit:
|
|
904
|
+
logger.warning(
|
|
905
|
+
"Skipping enum field %s: exceeds limit of %d unique values. "
|
|
906
|
+
"Consider excluding this field from enumeration or increase the limit.",
|
|
907
|
+
field,
|
|
908
|
+
limit,
|
|
909
|
+
)
|
|
910
|
+
continue
|
|
911
|
+
result[field] = [bucket["key"] for bucket in agg["buckets"]]
|
|
912
|
+
return result
|
|
913
|
+
|
|
914
|
+
async def create_collection(self, collection: Collection, **kwargs: Any):
|
|
939
915
|
"""Create a single collection in the database.
|
|
940
916
|
|
|
941
917
|
Args:
|
|
942
918
|
collection (Collection): The Collection object to be created.
|
|
943
|
-
|
|
919
|
+
**kwargs: Additional keyword arguments like refresh.
|
|
944
920
|
|
|
945
921
|
Raises:
|
|
946
922
|
ConflictError: If a Collection with the same id already exists in the database.
|
|
@@ -950,6 +926,16 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
950
926
|
"""
|
|
951
927
|
collection_id = collection["id"]
|
|
952
928
|
|
|
929
|
+
# Ensure kwargs is a dictionary
|
|
930
|
+
kwargs = kwargs or {}
|
|
931
|
+
|
|
932
|
+
# Resolve the `refresh` parameter
|
|
933
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
934
|
+
refresh = validate_refresh(refresh)
|
|
935
|
+
|
|
936
|
+
# Log the creation attempt
|
|
937
|
+
logger.info(f"Creating collection {collection_id} with refresh={refresh}")
|
|
938
|
+
|
|
953
939
|
if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id):
|
|
954
940
|
raise ConflictError(f"Collection {collection_id} already exists")
|
|
955
941
|
|
|
@@ -989,14 +975,14 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
989
975
|
return collection["_source"]
|
|
990
976
|
|
|
991
977
|
async def update_collection(
|
|
992
|
-
self, collection_id: str, collection: Collection,
|
|
978
|
+
self, collection_id: str, collection: Collection, **kwargs: Any
|
|
993
979
|
):
|
|
994
980
|
"""Update a collection from the database.
|
|
995
981
|
|
|
996
982
|
Args:
|
|
997
|
-
self: The instance of the object calling this function.
|
|
998
983
|
collection_id (str): The ID of the collection to be updated.
|
|
999
984
|
collection (Collection): The Collection object to be used for the update.
|
|
985
|
+
**kwargs: Additional keyword arguments like refresh.
|
|
1000
986
|
|
|
1001
987
|
Raises:
|
|
1002
988
|
NotFoundError: If the collection with the given `collection_id` is not
|
|
@@ -1007,9 +993,23 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1007
993
|
`collection_id` and with the collection specified in the `Collection` object.
|
|
1008
994
|
If the collection is not found, a `NotFoundError` is raised.
|
|
1009
995
|
"""
|
|
996
|
+
# Ensure kwargs is a dictionary
|
|
997
|
+
kwargs = kwargs or {}
|
|
998
|
+
|
|
999
|
+
# Resolve the `refresh` parameter
|
|
1000
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
1001
|
+
refresh = validate_refresh(refresh)
|
|
1002
|
+
|
|
1003
|
+
# Log the update attempt
|
|
1004
|
+
logger.info(f"Updating collection {collection_id} with refresh={refresh}")
|
|
1005
|
+
|
|
1010
1006
|
await self.find_collection(collection_id=collection_id)
|
|
1011
1007
|
|
|
1012
1008
|
if collection_id != collection["id"]:
|
|
1009
|
+
logger.info(
|
|
1010
|
+
f"Collection ID change detected: {collection_id} -> {collection['id']}"
|
|
1011
|
+
)
|
|
1012
|
+
|
|
1013
1013
|
await self.create_collection(collection, refresh=refresh)
|
|
1014
1014
|
|
|
1015
1015
|
await self.client.reindex(
|
|
@@ -1025,7 +1025,7 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1025
1025
|
refresh=refresh,
|
|
1026
1026
|
)
|
|
1027
1027
|
|
|
1028
|
-
await self.delete_collection(collection_id)
|
|
1028
|
+
await self.delete_collection(collection_id=collection_id, **kwargs)
|
|
1029
1029
|
|
|
1030
1030
|
else:
|
|
1031
1031
|
await self.client.index(
|
|
@@ -1035,23 +1035,34 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1035
1035
|
refresh=refresh,
|
|
1036
1036
|
)
|
|
1037
1037
|
|
|
1038
|
-
async def delete_collection(self, collection_id: str,
|
|
1038
|
+
async def delete_collection(self, collection_id: str, **kwargs: Any):
|
|
1039
1039
|
"""Delete a collection from the database.
|
|
1040
1040
|
|
|
1041
1041
|
Parameters:
|
|
1042
1042
|
self: The instance of the object calling this function.
|
|
1043
1043
|
collection_id (str): The ID of the collection to be deleted.
|
|
1044
|
-
|
|
1044
|
+
**kwargs: Additional keyword arguments like refresh.
|
|
1045
1045
|
|
|
1046
1046
|
Raises:
|
|
1047
1047
|
NotFoundError: If the collection with the given `collection_id` is not found in the database.
|
|
1048
1048
|
|
|
1049
1049
|
Notes:
|
|
1050
1050
|
This function first verifies that the collection with the specified `collection_id` exists in the database, and then
|
|
1051
|
-
deletes the collection. If `refresh` is set to
|
|
1052
|
-
function also calls `delete_item_index` to delete the index for the items in the collection.
|
|
1051
|
+
deletes the collection. If `refresh` is set to "true", "false", or "wait_for", the index is refreshed accordingly after
|
|
1052
|
+
the deletion. Additionally, this function also calls `delete_item_index` to delete the index for the items in the collection.
|
|
1053
1053
|
"""
|
|
1054
|
+
# Ensure kwargs is a dictionary
|
|
1055
|
+
kwargs = kwargs or {}
|
|
1056
|
+
|
|
1054
1057
|
await self.find_collection(collection_id=collection_id)
|
|
1058
|
+
|
|
1059
|
+
# Resolve the `refresh` parameter
|
|
1060
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
1061
|
+
refresh = validate_refresh(refresh)
|
|
1062
|
+
|
|
1063
|
+
# Log the deletion attempt
|
|
1064
|
+
logger.info(f"Deleting collection {collection_id} with refresh={refresh}")
|
|
1065
|
+
|
|
1055
1066
|
await self.client.delete(
|
|
1056
1067
|
index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh
|
|
1057
1068
|
)
|
|
@@ -1061,7 +1072,7 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1061
1072
|
self,
|
|
1062
1073
|
collection_id: str,
|
|
1063
1074
|
processed_items: List[Item],
|
|
1064
|
-
|
|
1075
|
+
**kwargs: Any,
|
|
1065
1076
|
) -> Tuple[int, List[Dict[str, Any]]]:
|
|
1066
1077
|
"""
|
|
1067
1078
|
Perform a bulk insert of items into the database asynchronously.
|
|
@@ -1069,7 +1080,12 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1069
1080
|
Args:
|
|
1070
1081
|
collection_id (str): The ID of the collection to which the items belong.
|
|
1071
1082
|
processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
|
|
1072
|
-
|
|
1083
|
+
**kwargs (Any): Additional keyword arguments, including:
|
|
1084
|
+
- refresh (str, optional): Whether to refresh the index after the bulk insert.
|
|
1085
|
+
Can be "true", "false", or "wait_for". Defaults to the value of `self.sync_settings.database_refresh`.
|
|
1086
|
+
- refresh (bool, optional): Whether to refresh the index after the bulk insert.
|
|
1087
|
+
- raise_on_error (bool, optional): Whether to raise an error if any of the bulk operations fail.
|
|
1088
|
+
Defaults to the value of `self.async_settings.raise_on_bulk_error`.
|
|
1073
1089
|
|
|
1074
1090
|
Returns:
|
|
1075
1091
|
Tuple[int, List[Dict[str, Any]]]: A tuple containing:
|
|
@@ -1078,10 +1094,30 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1078
1094
|
|
|
1079
1095
|
Notes:
|
|
1080
1096
|
This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`.
|
|
1081
|
-
The insert is performed
|
|
1082
|
-
The `mk_actions` function is called to generate a list of actions for the bulk insert.
|
|
1083
|
-
the index is refreshed after the bulk insert
|
|
1097
|
+
The insert is performed synchronously and blocking, meaning that the function does not return until the insert has
|
|
1098
|
+
completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. The `refresh`
|
|
1099
|
+
parameter determines whether the index is refreshed after the bulk insert:
|
|
1100
|
+
- "true": Forces an immediate refresh of the index.
|
|
1101
|
+
- "false": Does not refresh the index immediately (default behavior).
|
|
1102
|
+
- "wait_for": Waits for the next refresh cycle to make the changes visible.
|
|
1084
1103
|
"""
|
|
1104
|
+
# Ensure kwargs is a dictionary
|
|
1105
|
+
kwargs = kwargs or {}
|
|
1106
|
+
|
|
1107
|
+
# Resolve the `refresh` parameter
|
|
1108
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
1109
|
+
refresh = validate_refresh(refresh)
|
|
1110
|
+
|
|
1111
|
+
# Log the bulk insert attempt
|
|
1112
|
+
logger.info(
|
|
1113
|
+
f"Performing bulk insert for collection {collection_id} with refresh={refresh}"
|
|
1114
|
+
)
|
|
1115
|
+
|
|
1116
|
+
# Handle empty processed_items
|
|
1117
|
+
if not processed_items:
|
|
1118
|
+
logger.warning(f"No items to insert for collection {collection_id}")
|
|
1119
|
+
return 0, []
|
|
1120
|
+
|
|
1085
1121
|
raise_on_error = self.async_settings.raise_on_bulk_error
|
|
1086
1122
|
success, errors = await helpers.async_bulk(
|
|
1087
1123
|
self.client,
|
|
@@ -1089,21 +1125,30 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1089
1125
|
refresh=refresh,
|
|
1090
1126
|
raise_on_error=raise_on_error,
|
|
1091
1127
|
)
|
|
1128
|
+
# Log the result
|
|
1129
|
+
logger.info(
|
|
1130
|
+
f"Bulk insert completed for collection {collection_id}: {success} successes, {len(errors)} errors"
|
|
1131
|
+
)
|
|
1092
1132
|
return success, errors
|
|
1093
1133
|
|
|
1094
1134
|
def bulk_sync(
|
|
1095
1135
|
self,
|
|
1096
1136
|
collection_id: str,
|
|
1097
1137
|
processed_items: List[Item],
|
|
1098
|
-
|
|
1138
|
+
**kwargs: Any,
|
|
1099
1139
|
) -> Tuple[int, List[Dict[str, Any]]]:
|
|
1100
1140
|
"""
|
|
1101
|
-
Perform a bulk insert of items into the database
|
|
1141
|
+
Perform a bulk insert of items into the database asynchronously.
|
|
1102
1142
|
|
|
1103
1143
|
Args:
|
|
1104
1144
|
collection_id (str): The ID of the collection to which the items belong.
|
|
1105
1145
|
processed_items (List[Item]): A list of `Item` objects to be inserted into the database.
|
|
1106
|
-
|
|
1146
|
+
**kwargs (Any): Additional keyword arguments, including:
|
|
1147
|
+
- refresh (str, optional): Whether to refresh the index after the bulk insert.
|
|
1148
|
+
Can be "true", "false", or "wait_for". Defaults to the value of `self.sync_settings.database_refresh`.
|
|
1149
|
+
- refresh (bool, optional): Whether to refresh the index after the bulk insert.
|
|
1150
|
+
- raise_on_error (bool, optional): Whether to raise an error if any of the bulk operations fail.
|
|
1151
|
+
Defaults to the value of `self.async_settings.raise_on_bulk_error`.
|
|
1107
1152
|
|
|
1108
1153
|
Returns:
|
|
1109
1154
|
Tuple[int, List[Dict[str, Any]]]: A tuple containing:
|
|
@@ -1113,9 +1158,29 @@ class DatabaseLogic(BaseDatabaseLogic):
|
|
|
1113
1158
|
Notes:
|
|
1114
1159
|
This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`.
|
|
1115
1160
|
The insert is performed synchronously and blocking, meaning that the function does not return until the insert has
|
|
1116
|
-
completed. The `mk_actions` function is called to generate a list of actions for the bulk insert.
|
|
1117
|
-
|
|
1161
|
+
completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. The `refresh`
|
|
1162
|
+
parameter determines whether the index is refreshed after the bulk insert:
|
|
1163
|
+
- "true": Forces an immediate refresh of the index.
|
|
1164
|
+
- "false": Does not refresh the index immediately (default behavior).
|
|
1165
|
+
- "wait_for": Waits for the next refresh cycle to make the changes visible.
|
|
1118
1166
|
"""
|
|
1167
|
+
# Ensure kwargs is a dictionary
|
|
1168
|
+
kwargs = kwargs or {}
|
|
1169
|
+
|
|
1170
|
+
# Resolve the `refresh` parameter
|
|
1171
|
+
refresh = kwargs.get("refresh", self.async_settings.database_refresh)
|
|
1172
|
+
refresh = validate_refresh(refresh)
|
|
1173
|
+
|
|
1174
|
+
# Log the bulk insert attempt
|
|
1175
|
+
logger.info(
|
|
1176
|
+
f"Performing bulk insert for collection {collection_id} with refresh={refresh}"
|
|
1177
|
+
)
|
|
1178
|
+
|
|
1179
|
+
# Handle empty processed_items
|
|
1180
|
+
if not processed_items:
|
|
1181
|
+
logger.warning(f"No items to insert for collection {collection_id}")
|
|
1182
|
+
return 0, []
|
|
1183
|
+
|
|
1119
1184
|
raise_on_error = self.sync_settings.raise_on_bulk_error
|
|
1120
1185
|
success, errors = helpers.bulk(
|
|
1121
1186
|
self.sync_client,
|