stac-fastapi-opensearch 6.3.0__py3-none-any.whl → 6.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,9 @@ from stac_fastapi.core.extensions.aggregation import (
23
23
  EsAggregationExtensionGetRequest,
24
24
  EsAggregationExtensionPostRequest,
25
25
  )
26
+ from stac_fastapi.core.extensions.collections_search import (
27
+ CollectionsSearchEndpointExtension,
28
+ )
26
29
  from stac_fastapi.core.extensions.fields import FieldsExtension
27
30
  from stac_fastapi.core.rate_limit import setup_rate_limit
28
31
  from stac_fastapi.core.route_dependencies import get_route_dependencies
@@ -31,6 +34,8 @@ from stac_fastapi.core.utilities import get_bool_env
31
34
  from stac_fastapi.extensions.core import (
32
35
  AggregationExtension,
33
36
  CollectionSearchExtension,
37
+ CollectionSearchFilterExtension,
38
+ CollectionSearchPostExtension,
34
39
  FilterExtension,
35
40
  FreeTextExtension,
36
41
  SortExtension,
@@ -39,6 +44,7 @@ from stac_fastapi.extensions.core import (
39
44
  )
40
45
  from stac_fastapi.extensions.core.fields import FieldsConformanceClasses
41
46
  from stac_fastapi.extensions.core.filter import FilterConformanceClasses
47
+ from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses
42
48
  from stac_fastapi.extensions.core.query import QueryConformanceClasses
43
49
  from stac_fastapi.extensions.core.sort import SortConformanceClasses
44
50
  from stac_fastapi.extensions.third_party import BulkTransactionExtension
@@ -55,7 +61,15 @@ logging.basicConfig(level=logging.INFO)
55
61
  logger = logging.getLogger(__name__)
56
62
 
57
63
  TRANSACTIONS_EXTENSIONS = get_bool_env("ENABLE_TRANSACTIONS_EXTENSIONS", default=True)
64
+ ENABLE_COLLECTIONS_SEARCH = get_bool_env("ENABLE_COLLECTIONS_SEARCH", default=True)
65
+ ENABLE_COLLECTIONS_SEARCH_ROUTE = get_bool_env(
66
+ "ENABLE_COLLECTIONS_SEARCH_ROUTE", default=False
67
+ )
58
68
  logger.info("TRANSACTIONS_EXTENSIONS is set to %s", TRANSACTIONS_EXTENSIONS)
69
+ logger.info("ENABLE_COLLECTIONS_SEARCH is set to %s", ENABLE_COLLECTIONS_SEARCH)
70
+ logger.info(
71
+ "ENABLE_COLLECTIONS_SEARCH_ROUTE is set to %s", ENABLE_COLLECTIONS_SEARCH_ROUTE
72
+ )
59
73
 
60
74
  settings = OpensearchSettings()
61
75
  session = Session.create_from_settings(settings)
@@ -69,14 +83,6 @@ filter_extension.conformance_classes.append(
69
83
  FilterConformanceClasses.ADVANCED_COMPARISON_OPERATORS
70
84
  )
71
85
 
72
- # Adding collection search extension for compatibility with stac-auth-proxy
73
- # (https://github.com/developmentseed/stac-auth-proxy)
74
- # The extension is not fully implemented yet but is required for collection filtering support
75
- collection_search_extension = CollectionSearchExtension()
76
- collection_search_extension.conformance_classes.append(
77
- "https://api.stacspec.org/v1.0.0-rc.1/collection-search#filter"
78
- )
79
-
80
86
  aggregation_extension = AggregationExtension(
81
87
  client=EsAsyncBaseAggregationClient(
82
88
  database=database_logic, session=session, settings=settings
@@ -95,7 +101,6 @@ search_extensions = [
95
101
  TokenPaginationExtension(),
96
102
  filter_extension,
97
103
  FreeTextExtension(),
98
- collection_search_extension,
99
104
  ]
100
105
 
101
106
 
@@ -122,6 +127,80 @@ if TRANSACTIONS_EXTENSIONS:
122
127
 
123
128
  extensions = [aggregation_extension] + search_extensions
124
129
 
130
+ # Collection search related variables
131
+ collections_get_request_model = None
132
+
133
+ if ENABLE_COLLECTIONS_SEARCH or ENABLE_COLLECTIONS_SEARCH_ROUTE:
134
+ # Create collection search extensions
135
+ collection_search_extensions = [
136
+ QueryExtension(conformance_classes=[QueryConformanceClasses.COLLECTIONS]),
137
+ SortExtension(conformance_classes=[SortConformanceClasses.COLLECTIONS]),
138
+ FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]),
139
+ CollectionSearchFilterExtension(
140
+ conformance_classes=[FilterConformanceClasses.COLLECTIONS]
141
+ ),
142
+ FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
143
+ ]
144
+
145
+ # Initialize collection search with its extensions
146
+ collection_search_ext = CollectionSearchExtension.from_extensions(
147
+ collection_search_extensions
148
+ )
149
+ collections_get_request_model = collection_search_ext.GET
150
+
151
+ # Create a post request model for collection search
152
+ collection_search_post_request_model = create_post_request_model(
153
+ collection_search_extensions
154
+ )
155
+
156
+ # Create collection search extensions if enabled
157
+ if ENABLE_COLLECTIONS_SEARCH:
158
+ # Initialize collection search POST extension
159
+ collection_search_post_ext = CollectionSearchPostExtension(
160
+ client=CoreClient(
161
+ database=database_logic,
162
+ session=session,
163
+ post_request_model=collection_search_post_request_model,
164
+ landing_page_id=os.getenv("STAC_FASTAPI_LANDING_PAGE_ID", "stac-fastapi"),
165
+ ),
166
+ settings=settings,
167
+ POST=collection_search_post_request_model,
168
+ conformance_classes=[
169
+ "https://api.stacspec.org/v1.0.0-rc.1/collection-search",
170
+ QueryConformanceClasses.COLLECTIONS,
171
+ FilterConformanceClasses.COLLECTIONS,
172
+ FreeTextConformanceClasses.COLLECTIONS,
173
+ SortConformanceClasses.COLLECTIONS,
174
+ FieldsConformanceClasses.COLLECTIONS,
175
+ ],
176
+ )
177
+ extensions.append(collection_search_ext)
178
+ extensions.append(collection_search_post_ext)
179
+
180
+ if ENABLE_COLLECTIONS_SEARCH_ROUTE:
181
+ # Initialize collections-search endpoint extension
182
+ collections_search_endpoint_ext = CollectionsSearchEndpointExtension(
183
+ client=CoreClient(
184
+ database=database_logic,
185
+ session=session,
186
+ post_request_model=collection_search_post_request_model,
187
+ landing_page_id=os.getenv("STAC_FASTAPI_LANDING_PAGE_ID", "stac-fastapi"),
188
+ ),
189
+ settings=settings,
190
+ GET=collections_get_request_model,
191
+ POST=collection_search_post_request_model,
192
+ conformance_classes=[
193
+ "https://api.stacspec.org/v1.0.0-rc.1/collection-search",
194
+ QueryConformanceClasses.COLLECTIONS,
195
+ FilterConformanceClasses.COLLECTIONS,
196
+ FreeTextConformanceClasses.COLLECTIONS,
197
+ SortConformanceClasses.COLLECTIONS,
198
+ FieldsConformanceClasses.COLLECTIONS,
199
+ ],
200
+ )
201
+ extensions.append(collections_search_endpoint_ext)
202
+
203
+
125
204
  database_logic.extensions = [type(ext).__name__ for ext in extensions]
126
205
 
127
206
  post_request_model = create_post_request_model(search_extensions)
@@ -154,12 +233,17 @@ app_config = {
154
233
  post_request_model=post_request_model,
155
234
  landing_page_id=os.getenv("STAC_FASTAPI_LANDING_PAGE_ID", "stac-fastapi"),
156
235
  ),
236
+ "collections_get_request_model": collections_get_request_model,
157
237
  "search_get_request_model": create_get_request_model(search_extensions),
158
238
  "search_post_request_model": post_request_model,
159
239
  "items_get_request_model": items_get_request_model,
160
240
  "route_dependencies": get_route_dependencies(),
161
241
  }
162
242
 
243
+ # Add collections_get_request_model if it was created
244
+ if collections_get_request_model:
245
+ app_config["collections_get_request_model"] = collections_get_request_model
246
+
163
247
  api = StacApi(**app_config)
164
248
 
165
249
 
@@ -3,8 +3,9 @@
3
3
  import asyncio
4
4
  import logging
5
5
  from base64 import urlsafe_b64decode, urlsafe_b64encode
6
+ from collections.abc import Iterable
6
7
  from copy import deepcopy
7
- from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
8
+ from typing import Any, Dict, List, Optional, Tuple, Type
8
9
 
9
10
  import attr
10
11
  import orjson
@@ -16,7 +17,7 @@ from starlette.requests import Request
16
17
 
17
18
  from stac_fastapi.core.base_database_logic import BaseDatabaseLogic
18
19
  from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
19
- from stac_fastapi.core.utilities import bbox2polygon, get_max_limit
20
+ from stac_fastapi.core.utilities import bbox2polygon, get_bool_env, get_max_limit
20
21
  from stac_fastapi.extensions.core.transaction.request import (
21
22
  PartialCollection,
22
23
  PartialItem,
@@ -153,33 +154,190 @@ class DatabaseLogic(BaseDatabaseLogic):
153
154
  """CORE LOGIC"""
154
155
 
155
156
  async def get_all_collections(
156
- self, token: Optional[str], limit: int, request: Request
157
- ) -> Tuple[List[Dict[str, Any]], Optional[str]]:
158
- """
159
- Retrieve a list of all collections from Opensearch, supporting pagination.
157
+ self,
158
+ token: Optional[str],
159
+ limit: int,
160
+ request: Request,
161
+ sort: Optional[List[Dict[str, Any]]] = None,
162
+ q: Optional[List[str]] = None,
163
+ filter: Optional[Dict[str, Any]] = None,
164
+ query: Optional[Dict[str, Dict[str, Any]]] = None,
165
+ datetime: Optional[str] = None,
166
+ ) -> Tuple[List[Dict[str, Any]], Optional[str], Optional[int]]:
167
+ """Retrieve a list of collections from OpenSearch, supporting pagination.
160
168
 
161
169
  Args:
162
170
  token (Optional[str]): The pagination token.
163
171
  limit (int): The number of results to return.
172
+ request (Request): The FastAPI request object.
173
+ sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
174
+ q (Optional[List[str]]): Free text search terms.
175
+ query (Optional[Dict[str, Dict[str, Any]]]): Query extension parameters.
176
+ filter (Optional[Dict[str, Any]]): Structured query in CQL2 format.
177
+ datetime (Optional[str]): Temporal filter.
164
178
 
165
179
  Returns:
166
180
  A tuple of (collections, next pagination token if any).
181
+
182
+ Raises:
183
+ HTTPException: If sorting is requested on a field that is not sortable.
167
184
  """
168
- search_body = {
169
- "sort": [{"id": {"order": "asc"}}],
185
+ # Define sortable fields based on the ES_COLLECTIONS_MAPPINGS
186
+ sortable_fields = ["id", "extent.temporal.interval", "temporal"]
187
+
188
+ # Format the sort parameter
189
+ formatted_sort = []
190
+ if sort:
191
+ for item in sort:
192
+ field = item.get("field")
193
+ direction = item.get("direction", "asc")
194
+ if field:
195
+ # Validate that the field is sortable
196
+ if field not in sortable_fields:
197
+ raise HTTPException(
198
+ status_code=400,
199
+ detail=f"Field '{field}' is not sortable. Sortable fields are: {', '.join(sortable_fields)}. "
200
+ + "Text fields are not sortable by default in OpenSearch. "
201
+ + "To make a field sortable, update the mapping to use 'keyword' type or add a '.keyword' subfield. ",
202
+ )
203
+ formatted_sort.append({field: {"order": direction}})
204
+ # Always include id as a secondary sort to ensure consistent pagination
205
+ if not any("id" in item for item in formatted_sort):
206
+ formatted_sort.append({"id": {"order": "asc"}})
207
+ else:
208
+ formatted_sort = [{"id": {"order": "asc"}}]
209
+
210
+ body = {
211
+ "sort": formatted_sort,
170
212
  "size": limit,
171
213
  }
172
214
 
173
- # Only add search_after to the query if token is not None and not empty
215
+ # Handle search_after token - split by '|' to get all sort values
216
+ search_after = None
174
217
  if token:
175
- search_after = [token]
176
- search_body["search_after"] = search_after
218
+ try:
219
+ # The token should be a pipe-separated string of sort values
220
+ # e.g., "2023-01-01T00:00:00Z|collection-1"
221
+ search_after = token.split("|")
222
+ # If the number of sort fields doesn't match token parts, ignore the token
223
+ if len(search_after) != len(formatted_sort):
224
+ search_after = None
225
+ except Exception:
226
+ search_after = None
227
+
228
+ if search_after is not None:
229
+ body["search_after"] = search_after
230
+
231
+ # Build the query part of the body
232
+ query_parts = []
233
+
234
+ # Apply free text query if provided
235
+ if q:
236
+ # For collections, we want to search across all relevant fields
237
+ should_clauses = []
238
+
239
+ # For each search term
240
+ for term in q:
241
+ # Create a multi_match query for each term
242
+ for field in [
243
+ "id",
244
+ "title",
245
+ "description",
246
+ "keywords",
247
+ "summaries.platform",
248
+ "summaries.constellation",
249
+ "providers.name",
250
+ "providers.url",
251
+ ]:
252
+ should_clauses.append(
253
+ {
254
+ "wildcard": {
255
+ field: {"value": f"*{term}*", "case_insensitive": True}
256
+ }
257
+ }
258
+ )
177
259
 
178
- response = await self.client.search(
179
- index=COLLECTIONS_INDEX,
180
- body=search_body,
260
+ # Add the free text query to the query parts
261
+ query_parts.append(
262
+ {"bool": {"should": should_clauses, "minimum_should_match": 1}}
263
+ )
264
+
265
+ # Apply structured filter if provided
266
+ if filter:
267
+ # Convert string filter to dict if needed
268
+ if isinstance(filter, str):
269
+ filter = orjson.loads(filter)
270
+ # Convert the filter to an OpenSearch query using the filter module
271
+ es_query = filter_module.to_es(await self.get_queryables_mapping(), filter)
272
+ query_parts.append(es_query)
273
+
274
+ # Apply query extension if provided
275
+ if query:
276
+ try:
277
+ # First create a search object to apply filters
278
+ search = Search(index=COLLECTIONS_INDEX)
279
+
280
+ # Process each field and operator in the query
281
+ for field_name, expr in query.items():
282
+ for op, value in expr.items():
283
+ # For collections, we don't need to prefix with 'properties__'
284
+ field = field_name
285
+ # Apply the filter using apply_stacql_filter
286
+ search = self.apply_stacql_filter(
287
+ search=search, op=op, field=field, value=value
288
+ )
289
+
290
+ # Convert the search object to a query dict and add it to query_parts
291
+ search_dict = search.to_dict()
292
+ if "query" in search_dict:
293
+ query_parts.append(search_dict["query"])
294
+
295
+ except Exception as e:
296
+ logger = logging.getLogger(__name__)
297
+ logger.error(f"Error converting query to OpenSearch: {e}")
298
+ # If there's an error, add a query that matches nothing
299
+ query_parts.append({"bool": {"must_not": {"match_all": {}}}})
300
+ raise
301
+
302
+ # Combine all query parts with AND logic if there are multiple
303
+ datetime_filter = None
304
+ if datetime:
305
+ datetime_filter = self._apply_collection_datetime_filter(datetime)
306
+ if datetime_filter:
307
+ query_parts.append(datetime_filter)
308
+
309
+ # Combine all query parts with AND logic
310
+ if query_parts:
311
+ body["query"] = (
312
+ query_parts[0]
313
+ if len(query_parts) == 1
314
+ else {"bool": {"must": query_parts}}
315
+ )
316
+
317
+ # Create a copy of the body for count query (without pagination and sorting)
318
+ count_body = body.copy()
319
+ if "search_after" in count_body:
320
+ del count_body["search_after"]
321
+ count_body["size"] = 0
322
+
323
+ # Create async tasks for both search and count
324
+ search_task = asyncio.create_task(
325
+ self.client.search(
326
+ index=COLLECTIONS_INDEX,
327
+ body=body,
328
+ )
329
+ )
330
+
331
+ count_task = asyncio.create_task(
332
+ self.client.count(
333
+ index=COLLECTIONS_INDEX,
334
+ body={"query": body.get("query", {"match_all": {}})},
335
+ )
181
336
  )
182
337
 
338
+ # Wait for search task to complete
339
+ response = await search_task
340
+
183
341
  hits = response["hits"]["hits"]
184
342
  collections = [
185
343
  self.collection_serializer.db_to_stac(
@@ -190,12 +348,27 @@ class DatabaseLogic(BaseDatabaseLogic):
190
348
 
191
349
  next_token = None
192
350
  if len(hits) == limit:
193
- # Ensure we have a valid sort value for next_token
194
351
  next_token_values = hits[-1].get("sort")
195
352
  if next_token_values:
196
- next_token = next_token_values[0]
353
+ # Join all sort values with '|' to create the token
354
+ next_token = "|".join(str(val) for val in next_token_values)
197
355
 
198
- return collections, next_token
356
+ # Get the total count of collections
357
+ matched = (
358
+ response["hits"]["total"]["value"]
359
+ if response["hits"]["total"]["relation"] == "eq"
360
+ else None
361
+ )
362
+
363
+ # If count task is done, use its result
364
+ if count_task.done():
365
+ try:
366
+ matched = count_task.result().get("count")
367
+ except Exception as e:
368
+ logger = logging.getLogger(__name__)
369
+ logger.error(f"Count task failed: {e}")
370
+
371
+ return collections, next_token, matched
199
372
 
200
373
  async def get_one_item(self, collection_id: str, item_id: str) -> Dict:
201
374
  """Retrieve a single item from the database.
@@ -211,7 +384,7 @@ class DatabaseLogic(BaseDatabaseLogic):
211
384
  NotFoundError: If the specified Item does not exist in the Collection.
212
385
 
213
386
  Notes:
214
- The Item is retrieved from the Elasticsearch database using the `client.get` method,
387
+ The Item is retrieved from the Opensearch database using the `client.get` method,
215
388
  with the index for the Collection as the target index and the combined `mk_item_id` as the document id.
216
389
  """
217
390
  try:
@@ -283,6 +456,41 @@ class DatabaseLogic(BaseDatabaseLogic):
283
456
  search=search, free_text_queries=free_text_queries
284
457
  )
285
458
 
459
+ @staticmethod
460
+ def _apply_collection_datetime_filter(
461
+ datetime_str: Optional[str],
462
+ ) -> Optional[Dict[str, Any]]:
463
+ """Create a temporal filter for collections based on their extent."""
464
+ if not datetime_str:
465
+ return None
466
+
467
+ # Parse the datetime string into start and end
468
+ if "/" in datetime_str:
469
+ start, end = datetime_str.split("/")
470
+ # Replace open-ended ranges with concrete dates
471
+ if start == "..":
472
+ # For open-ended start, use a very early date
473
+ start = "1800-01-01T00:00:00Z"
474
+ if end == "..":
475
+ # For open-ended end, use a far future date
476
+ end = "2999-12-31T23:59:59Z"
477
+ else:
478
+ # If it's just a single date, use it for both start and end
479
+ start = end = datetime_str
480
+
481
+ return {
482
+ "bool": {
483
+ "must": [
484
+ # Check if any date in the array is less than or equal to the query end date
485
+ # This will match if the collection's start date is before or equal to the query end date
486
+ {"range": {"extent.temporal.interval": {"lte": end}}},
487
+ # Check if any date in the array is greater than or equal to the query start date
488
+ # This will match if the collection's end date is after or equal to the query start date
489
+ {"range": {"extent.temporal.interval": {"gte": start}}},
490
+ ]
491
+ }
492
+ }
493
+
286
494
  @staticmethod
287
495
  def apply_datetime_filter(
288
496
  search: Search, datetime: Optional[str]
@@ -301,21 +509,94 @@ class DatabaseLogic(BaseDatabaseLogic):
301
509
  if not datetime_search:
302
510
  return search, datetime_search
303
511
 
304
- if "eq" in datetime_search:
305
- # For exact matches, include:
306
- # 1. Items with matching exact datetime
307
- # 2. Items with datetime:null where the time falls within their range
308
- should = [
309
- Q(
310
- "bool",
311
- filter=[
312
- Q("exists", field="properties.datetime"),
313
- Q("term", **{"properties__datetime": datetime_search["eq"]}),
314
- ],
315
- ),
316
- Q(
512
+ # USE_DATETIME env var
513
+ # True: Search by datetime, if null search by start/end datetime
514
+ # False: Always search only by start/end datetime
515
+ USE_DATETIME = get_bool_env("USE_DATETIME", default=True)
516
+
517
+ if USE_DATETIME:
518
+ if "eq" in datetime_search:
519
+ # For exact matches, include:
520
+ # 1. Items with matching exact datetime
521
+ # 2. Items with datetime:null where the time falls within their range
522
+ should = [
523
+ Q(
524
+ "bool",
525
+ filter=[
526
+ Q("exists", field="properties.datetime"),
527
+ Q(
528
+ "term",
529
+ **{"properties__datetime": datetime_search["eq"]},
530
+ ),
531
+ ],
532
+ ),
533
+ Q(
534
+ "bool",
535
+ must_not=[Q("exists", field="properties.datetime")],
536
+ filter=[
537
+ Q("exists", field="properties.start_datetime"),
538
+ Q("exists", field="properties.end_datetime"),
539
+ Q(
540
+ "range",
541
+ properties__start_datetime={
542
+ "lte": datetime_search["eq"]
543
+ },
544
+ ),
545
+ Q(
546
+ "range",
547
+ properties__end_datetime={"gte": datetime_search["eq"]},
548
+ ),
549
+ ],
550
+ ),
551
+ ]
552
+ else:
553
+ # For date ranges, include:
554
+ # 1. Items with datetime in the range
555
+ # 2. Items with datetime:null that overlap the search range
556
+ should = [
557
+ Q(
558
+ "bool",
559
+ filter=[
560
+ Q("exists", field="properties.datetime"),
561
+ Q(
562
+ "range",
563
+ properties__datetime={
564
+ "gte": datetime_search["gte"],
565
+ "lte": datetime_search["lte"],
566
+ },
567
+ ),
568
+ ],
569
+ ),
570
+ Q(
571
+ "bool",
572
+ must_not=[Q("exists", field="properties.datetime")],
573
+ filter=[
574
+ Q("exists", field="properties.start_datetime"),
575
+ Q("exists", field="properties.end_datetime"),
576
+ Q(
577
+ "range",
578
+ properties__start_datetime={
579
+ "lte": datetime_search["lte"]
580
+ },
581
+ ),
582
+ Q(
583
+ "range",
584
+ properties__end_datetime={
585
+ "gte": datetime_search["gte"]
586
+ },
587
+ ),
588
+ ],
589
+ ),
590
+ ]
591
+
592
+ return (
593
+ search.query(Q("bool", should=should, minimum_should_match=1)),
594
+ datetime_search,
595
+ )
596
+ else:
597
+ if "eq" in datetime_search:
598
+ filter_query = Q(
317
599
  "bool",
318
- must_not=[Q("exists", field="properties.datetime")],
319
600
  filter=[
320
601
  Q("exists", field="properties.start_datetime"),
321
602
  Q("exists", field="properties.end_datetime"),
@@ -328,29 +609,10 @@ class DatabaseLogic(BaseDatabaseLogic):
328
609
  properties__end_datetime={"gte": datetime_search["eq"]},
329
610
  ),
330
611
  ],
331
- ),
332
- ]
333
- else:
334
- # For date ranges, include:
335
- # 1. Items with datetime in the range
336
- # 2. Items with datetime:null that overlap the search range
337
- should = [
338
- Q(
339
- "bool",
340
- filter=[
341
- Q("exists", field="properties.datetime"),
342
- Q(
343
- "range",
344
- properties__datetime={
345
- "gte": datetime_search["gte"],
346
- "lte": datetime_search["lte"],
347
- },
348
- ),
349
- ],
350
- ),
351
- Q(
612
+ )
613
+ else:
614
+ filter_query = Q(
352
615
  "bool",
353
- must_not=[Q("exists", field="properties.datetime")],
354
616
  filter=[
355
617
  Q("exists", field="properties.start_datetime"),
356
618
  Q("exists", field="properties.end_datetime"),
@@ -363,13 +625,8 @@ class DatabaseLogic(BaseDatabaseLogic):
363
625
  properties__end_datetime={"gte": datetime_search["gte"]},
364
626
  ),
365
627
  ],
366
- ),
367
- ]
368
-
369
- return (
370
- search.query(Q("bool", should=should, minimum_should_match=1)),
371
- datetime_search,
372
- )
628
+ )
629
+ return search.query(filter_query), datetime_search
373
630
 
374
631
  @staticmethod
375
632
  def apply_bbox_filter(search: Search, bbox: List):
@@ -428,18 +685,31 @@ class DatabaseLogic(BaseDatabaseLogic):
428
685
 
429
686
  Args:
430
687
  search (Search): The search object to apply the filter to.
431
- op (str): The comparison operator to use. Can be 'eq' (equal), 'gt' (greater than), 'gte' (greater than or equal),
432
- 'lt' (less than), or 'lte' (less than or equal).
688
+ op (str): The comparison operator to use. Can be 'eq' (equal), 'ne'/'neq' (not equal), 'gt' (greater than),
689
+ 'gte' (greater than or equal), 'lt' (less than), or 'lte' (less than or equal).
433
690
  field (str): The field to perform the comparison on.
434
691
  value (float): The value to compare the field against.
435
692
 
436
693
  Returns:
437
694
  search (Search): The search object with the specified filter applied.
438
695
  """
439
- if op != "eq":
440
- key_filter = {field: {f"{op}": value}}
696
+ if op == "eq":
697
+ search = search.filter("term", **{field: value})
698
+ elif op == "ne" or op == "neq":
699
+ # For not equal, use a bool query with must_not
700
+ search = search.exclude("term", **{field: value})
701
+ elif op in ["gt", "gte", "lt", "lte"]:
702
+ # For range operators
703
+ key_filter = {field: {op: value}}
441
704
  search = search.filter(Q("range", **key_filter))
442
- else:
705
+ elif op == "in":
706
+ # For in operator (value should be a list)
707
+ if isinstance(value, list):
708
+ search = search.filter("terms", **{field: value})
709
+ else:
710
+ search = search.filter("term", **{field: value})
711
+ elif op == "contains":
712
+ # For contains operator (for arrays)
443
713
  search = search.filter("term", **{field: value})
444
714
 
445
715
  return search
@@ -679,14 +949,21 @@ class DatabaseLogic(BaseDatabaseLogic):
679
949
 
680
950
  """
681
951
  await self.check_collection_exists(collection_id=item["collection"])
952
+ alias = index_alias_by_collection_id(item["collection"])
953
+ doc_id = mk_item_id(item["id"], item["collection"])
682
954
 
683
- if not exist_ok and await self.client.exists(
684
- index=index_alias_by_collection_id(item["collection"]),
685
- id=mk_item_id(item["id"], item["collection"]),
686
- ):
687
- raise ConflictError(
688
- f"Item {item['id']} in collection {item['collection']} already exists"
689
- )
955
+ if not exist_ok:
956
+ alias_exists = await self.client.indices.exists_alias(name=alias)
957
+
958
+ if alias_exists:
959
+ alias_info = await self.client.indices.get_alias(name=alias)
960
+ indices = list(alias_info.keys())
961
+
962
+ for index in indices:
963
+ if await self.client.exists(index=index, id=doc_id):
964
+ raise ConflictError(
965
+ f"Item {item['id']} in collection {item['collection']} already exists"
966
+ )
690
967
 
691
968
  return self.item_serializer.stac_to_db(item, base_url)
692
969
 
@@ -903,7 +1180,6 @@ class DatabaseLogic(BaseDatabaseLogic):
903
1180
  "add",
904
1181
  "replace",
905
1182
  ]:
906
-
907
1183
  if operation.path == "collection" and collection_id != operation.value:
908
1184
  await self.check_collection_exists(collection_id=operation.value)
909
1185
  new_collection_id = operation.value
@@ -957,8 +1233,8 @@ class DatabaseLogic(BaseDatabaseLogic):
957
1233
  "script": {
958
1234
  "lang": "painless",
959
1235
  "source": (
960
- f"""ctx._id = ctx._id.replace('{collection_id}', '{new_collection_id}');""" # noqa: E702
961
- f"""ctx._source.collection = '{new_collection_id}';""" # noqa: E702
1236
+ f"""ctx._id = ctx._id.replace('{collection_id}', '{new_collection_id}');"""
1237
+ f"""ctx._source.collection = '{new_collection_id}';"""
962
1238
  ),
963
1239
  },
964
1240
  },
@@ -1180,7 +1456,7 @@ class DatabaseLogic(BaseDatabaseLogic):
1180
1456
  "source": {"index": f"{ITEMS_INDEX_PREFIX}{collection_id}"},
1181
1457
  "script": {
1182
1458
  "lang": "painless",
1183
- "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", # noqa: E702
1459
+ "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""",
1184
1460
  },
1185
1461
  },
1186
1462
  wait_for_completion=True,
@@ -1,2 +1,2 @@
1
1
  """library version."""
2
- __version__ = "6.3.0"
2
+ __version__ = "6.5.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: stac-fastapi-opensearch
3
- Version: 6.3.0
3
+ Version: 6.5.0
4
4
  Summary: Opensearch stac-fastapi backend.
5
5
  Home-page: https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch
6
6
  License: MIT
@@ -15,8 +15,8 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Classifier: License :: OSI Approved :: MIT License
16
16
  Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
- Requires-Dist: stac-fastapi-core==6.3.0
19
- Requires-Dist: sfeos-helpers==6.3.0
18
+ Requires-Dist: stac-fastapi-core==6.5.0
19
+ Requires-Dist: sfeos-helpers==6.5.0
20
20
  Requires-Dist: opensearch-py~=2.8.0
21
21
  Requires-Dist: opensearch-py[async]~=2.8.0
22
22
  Requires-Dist: uvicorn~=0.23.0
@@ -73,11 +73,10 @@ SFEOS (stac-fastapi-elasticsearch-opensearch) is a high-performance, scalable AP
73
73
  - **Scale to millions of geospatial assets** with fast search performance through optimized spatial indexing and query capabilities
74
74
  - **Support OGC-compliant filtering** including spatial operations (intersects, contains, etc.) and temporal queries
75
75
  - **Perform geospatial aggregations** to analyze data distribution across space and time
76
+ - **Enhanced collection search capabilities** with support for sorting and field selection
76
77
 
77
78
  This implementation builds on the STAC-FastAPI framework, providing a production-ready solution specifically optimized for Elasticsearch and OpenSearch databases. It's ideal for organizations managing large geospatial data catalogs who need efficient discovery and access capabilities through standardized APIs.
78
79
 
79
-
80
-
81
80
  ## Common Deployment Patterns
82
81
 
83
82
  stac-fastapi-elasticsearch-opensearch can be deployed in several ways depending on your needs:
@@ -104,12 +103,13 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI
104
103
  ## Table of Contents
105
104
 
106
105
  - [stac-fastapi-elasticsearch-opensearch](#stac-fastapi-elasticsearch-opensearch)
107
- - [Sponsors \& Supporters](#sponsors--supporters)
106
+ - [Sponsors & Supporters](#sponsors--supporters)
108
107
  - [Project Introduction - What is SFEOS?](#project-introduction---what-is-sfeos)
109
108
  - [Common Deployment Patterns](#common-deployment-patterns)
110
109
  - [Technologies](#technologies)
111
110
  - [Table of Contents](#table-of-contents)
112
- - [Documentation \& Resources](#documentation--resources)
111
+ - [Collection Search Extensions](#collection-search-extensions)
112
+ - [Documentation & Resources](#documentation--resources)
113
113
  - [Package Structure](#package-structure)
114
114
  - [Examples](#examples)
115
115
  - [Performance](#performance)
@@ -150,6 +150,59 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI
150
150
  - [Gitter Chat](https://app.gitter.im/#/room/#stac-fastapi-elasticsearch_community:gitter.im) - For real-time discussions
151
151
  - [GitHub Discussions](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/discussions) - For longer-form questions and answers
152
152
 
153
+ ## Collection Search Extensions
154
+
155
+ SFEOS provides enhanced collection search capabilities through two primary routes:
156
+ - **GET/POST `/collections`**: The standard STAC endpoint with extended query parameters
157
+ - **GET/POST `/collections-search`**: A custom endpoint that supports the same parameters, created to avoid conflicts with the STAC Transactions extension if enabled (which uses POST `/collections` for collection creation)
158
+
159
+ These endpoints support advanced collection discovery features including:
160
+
161
+ - **Sorting**: Sort collections by sortable fields using the `sortby` parameter
162
+ - Example: `/collections?sortby=+id` (ascending sort by ID)
163
+ - Example: `/collections?sortby=-id` (descending sort by ID)
164
+ - Example: `/collections?sortby=-temporal` (descending sort by temporal extent)
165
+
166
+ - **Field Selection**: Request only specific fields to be returned using the `fields` parameter
167
+ - Example: `/collections?fields=id,title,description`
168
+ - This helps reduce payload size when only certain fields are needed
169
+
170
+ - **Free Text Search**: Search across collection text fields using the `q` parameter
171
+ - Example: `/collections?q=landsat`
172
+ - Searches across multiple text fields including title, description, and keywords
173
+ - Supports partial word matching and relevance-based sorting
174
+
175
+ - **Structured Filtering**: Filter collections using CQL2 expressions
176
+ - JSON format: `/collections?filter={"op":"=","args":[{"property":"id"},"sentinel-2"]}&filter-lang=cql2-json`
177
+ - Text format: `/collections?filter=id='sentinel-2'&filter-lang=cql2-text` (note: string values must be quoted)
178
+ - Advanced text format: `/collections?filter=id LIKE '%sentinel%'&filter-lang=cql2-text` (supports LIKE, BETWEEN, etc.)
179
+ - Supports both CQL2 JSON and CQL2 text formats with various operators
180
+ - Enables precise filtering on any collection property
181
+
182
+ - **Datetime Filtering**: Filter collections by their temporal extent using the `datetime` parameter
183
+ - Example: `/collections?datetime=2020-01-01T00:00:00Z/2020-12-31T23:59:59Z` (finds collections with temporal extents that overlap this range)
184
+ - Example: `/collections?datetime=2020-06-15T12:00:00Z` (finds collections whose temporal extent includes this specific time)
185
+ - Example: `/collections?datetime=2020-01-01T00:00:00Z/..` (finds collections with temporal extents that extend to or beyond January 1, 2020)
186
+ - Example: `/collections?datetime=../2020-12-31T23:59:59Z` (finds collections with temporal extents that begin on or before December 31, 2020)
187
+ - Collections are matched if their temporal extent overlaps with the provided datetime parameter
188
+ - This allows for efficient discovery of collections based on time periods
189
+
190
+ These extensions make it easier to build user interfaces that display and navigate through collections efficiently.
191
+
192
+ > **Configuration**: Collection search extensions (sorting, field selection, free text search, structured filtering, and datetime filtering) for the `/collections` endpoint can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled.
193
+ >
194
+ > **Configuration**: The custom `/collections-search` endpoint can be enabled by setting the `ENABLE_COLLECTIONS_SEARCH_ROUTE` environment variable to `true`. By default, this endpoint is **disabled**.
195
+
196
+ > **Note**: Sorting is only available on fields that are indexed for sorting in Elasticsearch/OpenSearch. With the default mappings, you can sort on:
197
+ > - `id` (keyword field)
198
+ > - `extent.temporal.interval` (date field)
199
+ > - `temporal` (alias to extent.temporal.interval)
200
+ >
201
+ > Text fields like `title` and `description` are not sortable by default as they use text analysis for better search capabilities. Attempting to sort on these fields will result in a user-friendly error message explaining which fields are sortable and how to make additional fields sortable by updating the mappings.
202
+ >
203
+ > **Important**: Adding keyword fields to make text fields sortable can significantly increase the index size, especially for large text fields. Consider the storage implications when deciding which fields to make sortable.
204
+
205
+
153
206
  ## Package Structure
154
207
 
155
208
  This project is organized into several packages, each with a specific purpose:
@@ -162,7 +215,7 @@ This project is organized into several packages, each with a specific purpose:
162
215
  - Shared logic and utilities that improve code reuse between backends
163
216
 
164
217
  - **stac_fastapi_elasticsearch**: Complete implementation of the STAC API using Elasticsearch as the backend database. This package depends on both `stac_fastapi_core` and `sfeos_helpers`.
165
- -
218
+
166
219
  - **stac_fastapi_opensearch**: Complete implementation of the STAC API using OpenSearch as the backend database. This package depends on both `stac_fastapi_core` and `sfeos_helpers`.
167
220
 
168
221
  ## Examples
@@ -280,10 +333,13 @@ You can customize additional settings in your `.env` file:
280
333
  | `ENABLE_DIRECT_RESPONSE` | Enable direct response for maximum performance (disables all FastAPI dependencies, including authentication, custom status codes, and validation) | `false` | Optional |
281
334
  | `RAISE_ON_BULK_ERROR` | Controls whether bulk insert operations raise exceptions on errors. If set to `true`, the operation will stop and raise an exception when an error occurs. If set to `false`, errors will be logged, and the operation will continue. **Note:** STAC Item and ItemCollection validation errors will always raise, regardless of this flag. | `false` | Optional |
282
335
  | `DATABASE_REFRESH` | Controls whether database operations refresh the index immediately after changes. If set to `true`, changes will be immediately searchable. If set to `false`, changes may not be immediately visible but can improve performance for bulk operations. If set to `wait_for`, changes will wait for the next refresh cycle to become visible. | `false` | Optional |
283
- | `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. If set to `false`, the POST `/collections` route and related transaction endpoints (including bulk transaction operations) will be unavailable in the API. This is useful for deployments where mutating the catalog via the API should be prevented. | `true` | Optional |
336
+ | `ENABLE_COLLECTIONS_SEARCH` | Enable collection search extensions (sort, fields, free text search, structured filtering, and datetime filtering) on the core `/collections` endpoint. | `true` | Optional |
337
+ | `ENABLE_COLLECTIONS_SEARCH_ROUTE` | Enable the custom `/collections-search` endpoint (both GET and POST methods). When disabled, the custom endpoint will not be available, but collection search extensions will still be available on the core `/collections` endpoint if `ENABLE_COLLECTIONS_SEARCH` is true. | `false` | Optional |
338
+ | `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. This is useful for deployments where mutating the catalog via the API should be prevented. If set to `true`, the POST `/collections` route for search will be unavailable in the API. | `true` | Optional |
284
339
  | `STAC_ITEM_LIMIT` | Sets the environment variable for result limiting to SFEOS for the number of returned items and STAC collections. | `10` | Optional |
285
340
  | `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional |
286
341
  | `ENV_MAX_LIMIT` | Configures the environment variable in SFEOS to override the default `MAX_LIMIT`, which controls the limit parameter for returned items and STAC collections. | `10,000` | Optional |
342
+ | `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional |
287
343
 
288
344
  > [!NOTE]
289
345
  > The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.
@@ -425,6 +481,9 @@ The system uses a precise naming convention:
425
481
  - **Root Path Configuration**: The application root path is the base URL by default.
426
482
  - For AWS Lambda with Gateway API: Set `STAC_FASTAPI_ROOT_PATH` to match the Gateway API stage name (e.g., `/v1`)
427
483
 
484
+ - **Feature Configuration**: Control which features are enabled:
485
+ - `ENABLE_COLLECTIONS_SEARCH`: Set to `true` (default) to enable collection search extensions (sort, fields). Set to `false` to disable.
486
+ - `ENABLE_TRANSACTIONS_EXTENSIONS`: Set to `true` (default) to enable transaction extensions. Set to `false` to disable.
428
487
 
429
488
  ## Collection Pagination
430
489
 
@@ -0,0 +1,10 @@
1
+ stac_fastapi/opensearch/__init__.py,sha256=iJWMUgn7mUvmuPQSO_FlyhJ5eDdbbfmGv1qnFOX5-qk,28
2
+ stac_fastapi/opensearch/app.py,sha256=paRSzdkT3yxkegC3KEg98CA7PpNQ0C2LW0Mozb_LcP0,10025
3
+ stac_fastapi/opensearch/config.py,sha256=zGx4-4c5zEnu_Bh8Ra3SkIC83tluDiz-wKYQclRRDJA,5179
4
+ stac_fastapi/opensearch/database_logic.py,sha256=8R40AdapytllTsc9xxtoDuTYa5jubc388SxevSZnfy0,67923
5
+ stac_fastapi/opensearch/version.py,sha256=KQjuGSR03-CXgF6wsaZ8qsni161S2BjhOn3wTX8JAMw,45
6
+ stac_fastapi_opensearch-6.5.0.dist-info/METADATA,sha256=SM_fV6pr5zUPeK9u4GOhbjBkD4dmi08PWz3mZnAK3zk,42048
7
+ stac_fastapi_opensearch-6.5.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
8
+ stac_fastapi_opensearch-6.5.0.dist-info/entry_points.txt,sha256=zjZ0Xsr9BUNJqMkdPpl6zEIUykv1uFdJtNELFRChp0w,76
9
+ stac_fastapi_opensearch-6.5.0.dist-info/top_level.txt,sha256=vqn-D9-HsRPTTxy0Vk_KkDmTiMES4owwBQ3ydSZYb2s,13
10
+ stac_fastapi_opensearch-6.5.0.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- stac_fastapi/opensearch/__init__.py,sha256=iJWMUgn7mUvmuPQSO_FlyhJ5eDdbbfmGv1qnFOX5-qk,28
2
- stac_fastapi/opensearch/app.py,sha256=KTXD2jdAlcrrXr1o0bRMFBJp-B3fo0F3gyY2Pcc9rMA,6558
3
- stac_fastapi/opensearch/config.py,sha256=zGx4-4c5zEnu_Bh8Ra3SkIC83tluDiz-wKYQclRRDJA,5179
4
- stac_fastapi/opensearch/database_logic.py,sha256=jvFsyQYmFTYkakVg6kcQt1zx-IkqmCC2093gtgNwuPQ,56024
5
- stac_fastapi/opensearch/version.py,sha256=rBLPQyvMDNA0PA0jXfByTouJPJn5p0wXiqmUWJMIfYc,45
6
- stac_fastapi_opensearch-6.3.0.dist-info/METADATA,sha256=B-xB5wIjNHXI9bkk5Oeg6wuK69j8nE-p529wXq8oIcw,36590
7
- stac_fastapi_opensearch-6.3.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
8
- stac_fastapi_opensearch-6.3.0.dist-info/entry_points.txt,sha256=zjZ0Xsr9BUNJqMkdPpl6zEIUykv1uFdJtNELFRChp0w,76
9
- stac_fastapi_opensearch-6.3.0.dist-info/top_level.txt,sha256=vqn-D9-HsRPTTxy0Vk_KkDmTiMES4owwBQ3ydSZYb2s,13
10
- stac_fastapi_opensearch-6.3.0.dist-info/RECORD,,