stac-fastapi-opensearch 4.2.0__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,43 @@
1
1
  """Database logic."""
2
2
 
3
3
  import asyncio
4
- import json
5
4
  import logging
6
5
  from base64 import urlsafe_b64decode, urlsafe_b64encode
7
6
  from copy import deepcopy
8
- from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
7
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
9
8
 
10
9
  import attr
10
+ import orjson
11
11
  from opensearchpy import exceptions, helpers
12
12
  from opensearchpy.helpers.query import Q
13
13
  from opensearchpy.helpers.search import Search
14
14
  from starlette.requests import Request
15
15
 
16
16
  from stac_fastapi.core.base_database_logic import BaseDatabaseLogic
17
- from stac_fastapi.core.database_logic import (
17
+ from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
18
+ from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon
19
+ from stac_fastapi.opensearch.config import (
20
+ AsyncOpensearchSettings as AsyncSearchSettings,
21
+ )
22
+ from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings
23
+ from stac_fastapi.sfeos_helpers import filter
24
+ from stac_fastapi.sfeos_helpers.database import (
25
+ apply_free_text_filter_shared,
26
+ apply_intersects_filter_shared,
27
+ create_index_templates_shared,
28
+ delete_item_index_shared,
29
+ get_queryables_mapping_shared,
30
+ index_alias_by_collection_id,
31
+ index_by_collection_id,
32
+ indices,
33
+ mk_actions,
34
+ mk_item_id,
35
+ populate_sort_shared,
36
+ return_date,
37
+ validate_refresh,
38
+ )
39
+ from stac_fastapi.sfeos_helpers.mappings import (
40
+ AGGREGATION_MAPPING,
18
41
  COLLECTIONS_INDEX,
19
42
  DEFAULT_SORT,
20
43
  ES_COLLECTIONS_MAPPINGS,
@@ -23,20 +46,9 @@ from stac_fastapi.core.database_logic import (
23
46
  ITEM_INDICES,
24
47
  ITEMS_INDEX_PREFIX,
25
48
  Geometry,
26
- index_alias_by_collection_id,
27
- index_by_collection_id,
28
- indices,
29
- mk_actions,
30
- mk_item_id,
31
49
  )
32
- from stac_fastapi.core.extensions import filter
33
- from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
34
- from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon, validate_refresh
35
- from stac_fastapi.opensearch.config import (
36
- AsyncOpensearchSettings as AsyncSearchSettings,
37
- )
38
- from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings
39
50
  from stac_fastapi.types.errors import ConflictError, NotFoundError
51
+ from stac_fastapi.types.rfc3339 import DateTimeType
40
52
  from stac_fastapi.types.stac import Collection, Item
41
53
 
42
54
  logger = logging.getLogger(__name__)
@@ -50,23 +62,7 @@ async def create_index_templates() -> None:
50
62
  None
51
63
 
52
64
  """
53
- client = AsyncSearchSettings().create_client
54
- await client.indices.put_template(
55
- name=f"template_{COLLECTIONS_INDEX}",
56
- body={
57
- "index_patterns": [f"{COLLECTIONS_INDEX}*"],
58
- "mappings": ES_COLLECTIONS_MAPPINGS,
59
- },
60
- )
61
- await client.indices.put_template(
62
- name=f"template_{ITEMS_INDEX_PREFIX}",
63
- body={
64
- "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"],
65
- "settings": ES_ITEMS_SETTINGS,
66
- "mappings": ES_ITEMS_MAPPINGS,
67
- },
68
- )
69
- await client.close()
65
+ await create_index_templates_shared(settings=AsyncSearchSettings())
70
66
 
71
67
 
72
68
  async def create_collection_index() -> None:
@@ -125,18 +121,13 @@ async def delete_item_index(collection_id: str) -> None:
125
121
 
126
122
  Args:
127
123
  collection_id (str): The ID of the collection whose items index will be deleted.
128
- """
129
- client = AsyncSearchSettings().create_client
130
124
 
131
- name = index_alias_by_collection_id(collection_id)
132
- resolved = await client.indices.resolve_index(name=name)
133
- if "aliases" in resolved and resolved["aliases"]:
134
- [alias] = resolved["aliases"]
135
- await client.indices.delete_alias(index=alias["indices"], name=alias["name"])
136
- await client.indices.delete(index=alias["indices"])
137
- else:
138
- await client.indices.delete(index=name)
139
- await client.close()
125
+ Notes:
126
+ This function delegates to the shared implementation in delete_item_index_shared.
127
+ """
128
+ await delete_item_index_shared(
129
+ settings=AsyncSearchSettings(), collection_id=collection_id
130
+ )
140
131
 
141
132
 
142
133
  @attr.s
@@ -161,76 +152,7 @@ class DatabaseLogic(BaseDatabaseLogic):
161
152
 
162
153
  extensions: List[str] = attr.ib(default=attr.Factory(list))
163
154
 
164
- aggregation_mapping: Dict[str, Dict[str, Any]] = {
165
- "total_count": {"value_count": {"field": "id"}},
166
- "collection_frequency": {"terms": {"field": "collection", "size": 100}},
167
- "platform_frequency": {"terms": {"field": "properties.platform", "size": 100}},
168
- "cloud_cover_frequency": {
169
- "range": {
170
- "field": "properties.eo:cloud_cover",
171
- "ranges": [
172
- {"to": 5},
173
- {"from": 5, "to": 15},
174
- {"from": 15, "to": 40},
175
- {"from": 40},
176
- ],
177
- }
178
- },
179
- "datetime_frequency": {
180
- "date_histogram": {
181
- "field": "properties.datetime",
182
- "calendar_interval": "month",
183
- }
184
- },
185
- "datetime_min": {"min": {"field": "properties.datetime"}},
186
- "datetime_max": {"max": {"field": "properties.datetime"}},
187
- "grid_code_frequency": {
188
- "terms": {
189
- "field": "properties.grid:code",
190
- "missing": "none",
191
- "size": 10000,
192
- }
193
- },
194
- "sun_elevation_frequency": {
195
- "histogram": {"field": "properties.view:sun_elevation", "interval": 5}
196
- },
197
- "sun_azimuth_frequency": {
198
- "histogram": {"field": "properties.view:sun_azimuth", "interval": 5}
199
- },
200
- "off_nadir_frequency": {
201
- "histogram": {"field": "properties.view:off_nadir", "interval": 5}
202
- },
203
- "centroid_geohash_grid_frequency": {
204
- "geohash_grid": {
205
- "field": "properties.proj:centroid",
206
- "precision": 1,
207
- }
208
- },
209
- "centroid_geohex_grid_frequency": {
210
- "geohex_grid": {
211
- "field": "properties.proj:centroid",
212
- "precision": 0,
213
- }
214
- },
215
- "centroid_geotile_grid_frequency": {
216
- "geotile_grid": {
217
- "field": "properties.proj:centroid",
218
- "precision": 0,
219
- }
220
- },
221
- "geometry_geohash_grid_frequency": {
222
- "geohash_grid": {
223
- "field": "geometry",
224
- "precision": 1,
225
- }
226
- },
227
- "geometry_geotile_grid_frequency": {
228
- "geotile_grid": {
229
- "field": "geometry",
230
- "precision": 0,
231
- }
232
- },
233
- }
155
+ aggregation_mapping: Dict[str, Dict[str, Any]] = AGGREGATION_MAPPING
234
156
 
235
157
  """CORE LOGIC"""
236
158
 
@@ -317,23 +239,12 @@ class DatabaseLogic(BaseDatabaseLogic):
317
239
  Returns:
318
240
  dict: A dictionary containing the Queryables mappings.
319
241
  """
320
- queryables_mapping = {}
321
-
322
242
  mappings = await self.client.indices.get_mapping(
323
243
  index=f"{ITEMS_INDEX_PREFIX}{collection_id}",
324
244
  )
325
-
326
- for mapping in mappings.values():
327
- fields = mapping["mappings"].get("properties", {})
328
- properties = fields.pop("properties", {}).get("properties", {}).keys()
329
-
330
- for field_key in fields:
331
- queryables_mapping[field_key] = field_key
332
-
333
- for property_key in properties:
334
- queryables_mapping[property_key] = f"properties.{property_key}"
335
-
336
- return queryables_mapping
245
+ return await get_queryables_mapping_shared(
246
+ collection_id=collection_id, mappings=mappings
247
+ )
337
248
 
338
249
  @staticmethod
339
250
  def make_search():
@@ -352,130 +263,116 @@ class DatabaseLogic(BaseDatabaseLogic):
352
263
 
353
264
  @staticmethod
354
265
  def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str]]):
355
- """Database logic to perform query for search endpoint."""
356
- if free_text_queries is not None:
357
- free_text_query_string = '" OR properties.\\*:"'.join(free_text_queries)
358
- search = search.query(
359
- "query_string", query=f'properties.\\*:"{free_text_query_string}"'
360
- )
266
+ """Create a free text query for OpenSearch queries.
361
267
 
362
- return search
268
+ This method delegates to the shared implementation in apply_free_text_filter_shared.
269
+
270
+ Args:
271
+ search (Search): The search object to apply the query to.
272
+ free_text_queries (Optional[List[str]]): A list of text strings to search for in the properties.
273
+
274
+ Returns:
275
+ Search: The search object with the free text query applied, or the original search
276
+ object if no free_text_queries were provided.
277
+ """
278
+ return apply_free_text_filter_shared(
279
+ search=search, free_text_queries=free_text_queries
280
+ )
363
281
 
364
282
  @staticmethod
365
- def apply_datetime_filter(search: Search, datetime_search):
366
- """Apply a filter to search based on datetime field, start_datetime, and end_datetime fields.
283
+ def apply_datetime_filter(
284
+ search: Search, interval: Optional[Union[DateTimeType, str]]
285
+ ) -> Search:
286
+ """Apply a filter to search on datetime, start_datetime, and end_datetime fields.
367
287
 
368
288
  Args:
369
- search (Search): The search object to filter.
370
- datetime_search (dict): The datetime filter criteria.
289
+ search: The search object to filter.
290
+ interval: Optional datetime interval to filter by. Can be:
291
+ - A single datetime string (e.g., "2023-01-01T12:00:00")
292
+ - A datetime range string (e.g., "2023-01-01/2023-12-31")
293
+ - A datetime object
294
+ - A tuple of (start_datetime, end_datetime)
371
295
 
372
296
  Returns:
373
- Search: The filtered search object.
297
+ The filtered search object.
374
298
  """
299
+ if not interval:
300
+ return search
301
+
375
302
  should = []
303
+ try:
304
+ datetime_search = return_date(interval)
305
+ except (ValueError, TypeError) as e:
306
+ # Handle invalid interval formats if return_date fails
307
+ logger.error(f"Invalid interval format: {interval}, error: {e}")
308
+ return search
376
309
 
377
- # If the request is a single datetime return
378
- # items with datetimes equal to the requested datetime OR
379
- # the requested datetime is between their start and end datetimes
380
310
  if "eq" in datetime_search:
381
- should.extend(
382
- [
383
- Q(
384
- "bool",
385
- filter=[
386
- Q(
387
- "term",
388
- properties__datetime=datetime_search["eq"],
389
- ),
390
- ],
391
- ),
392
- Q(
393
- "bool",
394
- filter=[
395
- Q(
396
- "range",
397
- properties__start_datetime={
398
- "lte": datetime_search["eq"],
399
- },
400
- ),
401
- Q(
402
- "range",
403
- properties__end_datetime={
404
- "gte": datetime_search["eq"],
405
- },
406
- ),
407
- ],
408
- ),
409
- ]
410
- )
411
-
412
- # If the request is a date range return
413
- # items with datetimes within the requested date range OR
414
- # their startdatetime ithin the requested date range OR
415
- # their enddatetime ithin the requested date range OR
416
- # the requested daterange within their start and end datetimes
311
+ # For exact matches, include:
312
+ # 1. Items with matching exact datetime
313
+ # 2. Items with datetime:null where the time falls within their range
314
+ should = [
315
+ Q(
316
+ "bool",
317
+ filter=[
318
+ Q("exists", field="properties.datetime"),
319
+ Q("term", **{"properties__datetime": datetime_search["eq"]}),
320
+ ],
321
+ ),
322
+ Q(
323
+ "bool",
324
+ must_not=[Q("exists", field="properties.datetime")],
325
+ filter=[
326
+ Q("exists", field="properties.start_datetime"),
327
+ Q("exists", field="properties.end_datetime"),
328
+ Q(
329
+ "range",
330
+ properties__start_datetime={"lte": datetime_search["eq"]},
331
+ ),
332
+ Q(
333
+ "range",
334
+ properties__end_datetime={"gte": datetime_search["eq"]},
335
+ ),
336
+ ],
337
+ ),
338
+ ]
417
339
  else:
418
- should.extend(
419
- [
420
- Q(
421
- "bool",
422
- filter=[
423
- Q(
424
- "range",
425
- properties__datetime={
426
- "gte": datetime_search["gte"],
427
- "lte": datetime_search["lte"],
428
- },
429
- ),
430
- ],
431
- ),
432
- Q(
433
- "bool",
434
- filter=[
435
- Q(
436
- "range",
437
- properties__start_datetime={
438
- "gte": datetime_search["gte"],
439
- "lte": datetime_search["lte"],
440
- },
441
- ),
442
- ],
443
- ),
444
- Q(
445
- "bool",
446
- filter=[
447
- Q(
448
- "range",
449
- properties__end_datetime={
450
- "gte": datetime_search["gte"],
451
- "lte": datetime_search["lte"],
452
- },
453
- ),
454
- ],
455
- ),
456
- Q(
457
- "bool",
458
- filter=[
459
- Q(
460
- "range",
461
- properties__start_datetime={
462
- "lte": datetime_search["gte"]
463
- },
464
- ),
465
- Q(
466
- "range",
467
- properties__end_datetime={
468
- "gte": datetime_search["lte"]
469
- },
470
- ),
471
- ],
472
- ),
473
- ]
474
- )
475
-
476
- search = search.query(Q("bool", filter=[Q("bool", should=should)]))
477
-
478
- return search
340
+ # For date ranges, include:
341
+ # 1. Items with datetime in the range
342
+ # 2. Items with datetime:null that overlap the search range
343
+ should = [
344
+ Q(
345
+ "bool",
346
+ filter=[
347
+ Q("exists", field="properties.datetime"),
348
+ Q(
349
+ "range",
350
+ properties__datetime={
351
+ "gte": datetime_search["gte"],
352
+ "lte": datetime_search["lte"],
353
+ },
354
+ ),
355
+ ],
356
+ ),
357
+ Q(
358
+ "bool",
359
+ must_not=[Q("exists", field="properties.datetime")],
360
+ filter=[
361
+ Q("exists", field="properties.start_datetime"),
362
+ Q("exists", field="properties.end_datetime"),
363
+ Q(
364
+ "range",
365
+ properties__start_datetime={"lte": datetime_search["lte"]},
366
+ ),
367
+ Q(
368
+ "range",
369
+ properties__end_datetime={"gte": datetime_search["gte"]},
370
+ ),
371
+ ],
372
+ ),
373
+ ]
374
+
375
+ return search.query(Q("bool", should=should, minimum_should_match=1))
479
376
 
480
377
  @staticmethod
481
378
  def apply_bbox_filter(search: Search, bbox: List):
@@ -525,21 +422,8 @@ class DatabaseLogic(BaseDatabaseLogic):
525
422
  Notes:
526
423
  A geo_shape filter is added to the search object, set to intersect with the specified geometry.
527
424
  """
528
- return search.filter(
529
- Q(
530
- {
531
- "geo_shape": {
532
- "geometry": {
533
- "shape": {
534
- "type": intersects.type.lower(),
535
- "coordinates": intersects.coordinates,
536
- },
537
- "relation": "intersects",
538
- }
539
- }
540
- }
541
- )
542
- )
425
+ filter = apply_intersects_filter_shared(intersects=intersects)
426
+ return search.filter(Q(filter))
543
427
 
544
428
  @staticmethod
545
429
  def apply_stacql_filter(search: Search, op: str, field: str, value: float):
@@ -592,11 +476,18 @@ class DatabaseLogic(BaseDatabaseLogic):
592
476
 
593
477
  @staticmethod
594
478
  def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]:
595
- """Database logic to sort search instance."""
596
- if sortby:
597
- return {s.field: {"order": s.direction} for s in sortby}
598
- else:
599
- return None
479
+ """Create a sort configuration for OpenSearch queries.
480
+
481
+ This method delegates to the shared implementation in populate_sort_shared.
482
+
483
+ Args:
484
+ sortby (List): A list of sort specifications, each containing a field and direction.
485
+
486
+ Returns:
487
+ Optional[Dict[str, Dict[str, str]]]: A dictionary mapping field names to sort direction
488
+ configurations, or None if no sort was specified.
489
+ """
490
+ return populate_sort_shared(sortby=sortby)
600
491
 
601
492
  async def execute_search(
602
493
  self,
@@ -636,7 +527,7 @@ class DatabaseLogic(BaseDatabaseLogic):
636
527
  search_after = None
637
528
 
638
529
  if token:
639
- search_after = json.loads(urlsafe_b64decode(token).decode())
530
+ search_after = orjson.loads(urlsafe_b64decode(token))
640
531
  if search_after:
641
532
  search_body["search_after"] = search_after
642
533
 
@@ -676,7 +567,7 @@ class DatabaseLogic(BaseDatabaseLogic):
676
567
  next_token = None
677
568
  if len(hits) > limit and limit < max_result_window:
678
569
  if hits and (sort_array := hits[limit - 1].get("sort")):
679
- next_token = urlsafe_b64encode(json.dumps(sort_array).encode()).decode()
570
+ next_token = urlsafe_b64encode(orjson.dumps(sort_array)).decode()
680
571
 
681
572
  matched = (
682
573
  es_response["hits"]["total"]["value"]
@@ -989,6 +880,37 @@ class DatabaseLogic(BaseDatabaseLogic):
989
880
  except exceptions.NotFoundError:
990
881
  raise NotFoundError(f"Mapping for index {index_name} not found")
991
882
 
883
+ async def get_items_unique_values(
884
+ self, collection_id: str, field_names: Iterable[str], *, limit: int = 100
885
+ ) -> Dict[str, List[str]]:
886
+ """Get the unique values for the given fields in the collection."""
887
+ limit_plus_one = limit + 1
888
+ index_name = index_alias_by_collection_id(collection_id)
889
+
890
+ query = await self.client.search(
891
+ index=index_name,
892
+ body={
893
+ "size": 0,
894
+ "aggs": {
895
+ field: {"terms": {"field": field, "size": limit_plus_one}}
896
+ for field in field_names
897
+ },
898
+ },
899
+ )
900
+
901
+ result: Dict[str, List[str]] = {}
902
+ for field, agg in query["aggregations"].items():
903
+ if len(agg["buckets"]) > limit:
904
+ logger.warning(
905
+ "Skipping enum field %s: exceeds limit of %d unique values. "
906
+ "Consider excluding this field from enumeration or increase the limit.",
907
+ field,
908
+ limit,
909
+ )
910
+ continue
911
+ result[field] = [bucket["key"] for bucket in agg["buckets"]]
912
+ return result
913
+
992
914
  async def create_collection(self, collection: Collection, **kwargs: Any):
993
915
  """Create a single collection in the database.
994
916
 
@@ -1,2 +1,2 @@
1
1
  """library version."""
2
- __version__ = "4.2.0"
2
+ __version__ = "5.0.0"