xlr8 0.1.7b3__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1201 @@
1
+ """Bracket-based query analysis for XLR8.
2
+
3
+ ================================================================================
4
+ DATA FLOW - QUERY TO BRACKETS
5
+ ================================================================================
6
+
7
+ This module transforms a MongoDB query into "Brackets" - the fundamental unit
8
+ of work for parallel execution.
9
+
10
+ WHAT IS A BRACKET?
11
+ --------------------------------------------------------------------------------
12
+
13
+ A Bracket = static_filter + TimeRange
14
+
15
+ It represents ONE chunk of work that can be executed independently:
16
+ - static_filter: Non-time conditions (e.g., {"region_id": "64a..."})
17
+ - timerange: Time bounds (lo, hi) that can be further chunked
18
+
19
+ EXAMPLE TRANSFORMATION:
20
+ --------------------------------------------------------------------------------
21
+
22
+ INPUT QUERY:
23
+ {
24
+ "$or": [
25
+ {"region_id": ObjectId("64a...")},
26
+ {"region_id": ObjectId("64b...")},
27
+ {"region_id": ObjectId("64c...")},
28
+ ],
29
+ "account_id": ObjectId("123..."), # Global AND condition
30
+ "timestamp": {"$gte": datetime(2024,1,1), "$lt": datetime(2024,7,1)}
31
+ }
32
+
33
+ STEP 1: split_global_and() extracts:
34
+ global_and = {"account_id": ObjectId("123..."),
35
+ "timestamp": {"$gte": ..., "$lt": ...}}
36
+ or_list = [{"region_id": "64a..."},
37
+ {"region_id": "64b..."}, ...]
38
+
39
+ STEP 2: For each $or branch, merge with global_and:
40
+ Branch 1: {"account_id": "123...", "region_id": "64a...", "timestamp": {...}}
41
+ Branch 2: {"account_id": "123...", "region_id": "64b...", "timestamp": {...}}
42
+ ...
43
+
44
+ STEP 3: Extract time bounds and create Brackets:
45
+
46
+ OUTPUT: List[Bracket]
47
+
48
+ Bracket(
49
+ static_filter={"account_id": "123...", "region_id": "64a..."},
50
+ timerange=TimeRange(lo=2024-01-01, hi=2024-07-01, is_full=True)
51
+ )
52
+
53
+ Bracket(
54
+ static_filter={"account_id": "123...", "region_id": "64b..."},
55
+ timerange=TimeRange(lo=2024-01-01, hi=2024-07-01, is_full=True)
56
+ )
57
+ ...
58
+
59
+ NEXT STEP: Each bracket's timerange is chunked (14-day chunks) and queued
60
+ for parallel execution.
61
+
62
+ WHY BRACKETS?
63
+ --------------------------------------------------------------------------------
64
+ 1. Parallelization: Each bracket can be fetched independently
65
+ 2. Caching: Same static_filter can reuse cached data
66
+ 3. Time chunking: TimeRange can be split into smaller chunks for workers
67
+
68
+ ================================================================================
69
+ """
70
+
71
+ import json
72
+ from copy import deepcopy
73
+ from dataclasses import dataclass
74
+ from datetime import datetime
75
+ from typing import Any, Dict, List, Optional, Set, Tuple
76
+
77
+ from xlr8.analysis.inspector import (
78
+ ChunkabilityMode,
79
+ extract_time_bounds_recursive,
80
+ has_forbidden_ops,
81
+ is_chunkable_query,
82
+ normalize_query,
83
+ or_depth,
84
+ split_global_and,
85
+ )
86
+
87
+ __all__ = [
88
+ # Data structures
89
+ "Bracket",
90
+ "TimeRange",
91
+ # Main public function
92
+ "build_brackets_for_find",
93
+ ]
94
+
95
+
96
+ # =============================================================================
97
+ # OVERLAP DETECTION HELPERS
98
+ # =============================================================================
99
+ # These helpers detect when $or branches may have overlapping result sets,
100
+ # which would cause duplicates when executing brackets independently.
101
+ #
102
+ # NEGATION OPERATORS: $nin, $ne, $not, $nor in an $or branch can overlap with
103
+ # other branches that use positive filters on the same field.
104
+ #
105
+ # $in OVERLAP: Two branches with $in on the same field may share values.
106
+ # Example: {"field": {"$in": [1,2,3]}} and {"field": {"$in": [3,4,5]}}
107
+ #
108
+ # INHERENTLY OVERLAPPING OPERATORS: Some operators can match the same document
109
+ # across different branches even with different values:
110
+ # - $all: {"tags": {"$all": ["a","b"]}} and {"tags": {"$all": ["b","c"]}}
111
+ # both match a document with tags: ["a","b","c"]
112
+ # - $elemMatch: array element matching can overlap
113
+ # - $regex: pattern matching can overlap
114
+ # - $mod: modulo conditions can overlap
115
+ # - Comparison operators ($gt, $lt, etc.): ranges can overlap
116
+ # =============================================================================
117
+
118
+ # Operators that create negation/exclusion filters
119
+ NEGATION_OPERATORS: Set[str] = {"$nin", "$ne", "$not", "$nor"}
120
+
121
+ # Operators that can cause overlap between branches even with different values
122
+ # These should trigger single-bracket execution when used on differentiating fields
123
+ OVERLAP_PRONE_OPERATORS: Set[str] = {
124
+ "$all", # Array superset matching
125
+ "$elemMatch", # Array element matching
126
+ "$regex", # Pattern matching
127
+ "$mod", # Modulo matching
128
+ "$gt", # Greater than - ranges can overlap
129
+ "$gte", # Greater than or equal
130
+ "$lt", # Less than - ranges can overlap
131
+ "$lte", # Less than or equal
132
+ "$bitsAllSet", # Bitwise operations can overlap
133
+ "$bitsAnySet",
134
+ "$bitsAllClear",
135
+ "$bitsAnyClear",
136
+ }
137
+ # both match documents where field=3.
138
+ # =============================================================================
139
+
140
+ # Operators that create negation/exclusion filters
141
+ NEGATION_OPERATORS: Set[str] = {"$nin", "$ne", "$not", "$nor"}
142
+
143
+
144
+ @dataclass
145
+ class TimeRange:
146
+ """
147
+ Time range for a bracket.
148
+
149
+ Attributes:
150
+ lo: Lower bound datetime
151
+ hi: Upper bound datetime
152
+ is_full: Whether both lo and hi are specified
153
+ hi_inclusive: If True, use $lte; if False, use $lt (default: False for $lt)
154
+ lo_inclusive: If True, use $gte; if False, use $gt (default: True for $gte)
155
+
156
+ Example:
157
+ TimeRange(
158
+ lo=datetime(2024, 1, 1, tzinfo=UTC),
159
+ hi=datetime(2024, 7, 1, tzinfo=UTC),
160
+ is_full=True,
161
+ hi_inclusive=False, # Use $lt
162
+ lo_inclusive=True # Use $gte
163
+ )
164
+ """
165
+
166
+ lo: Optional[datetime]
167
+ hi: Optional[datetime]
168
+ is_full: bool
169
+ hi_inclusive: bool = False # Default to $lt for backward compatibility
170
+ lo_inclusive: bool = True # Default to $gte for backward compatibility
171
+
172
+
173
+ @dataclass
174
+ class Bracket:
175
+ """
176
+ A unit of work for parallel execution.
177
+
178
+ Example:
179
+ Bracket(
180
+ static_filter={"account_id": ObjectId("123..."),
181
+ "region_id": ObjectId("64a...")},
182
+ timerange=TimeRange(lo=2024-01-01, hi=2024-07-01, is_full=True)
183
+ )
184
+
185
+ This bracket will be converted to a MongoDB query:
186
+ {
187
+ "account_id": ObjectId("123..."),
188
+ "region_id": ObjectId("64a..."),
189
+ "timestamp": {"$gte": 2024-01-01, "$lt": 2024-07-01}
190
+ }
191
+ """
192
+
193
+ static_filter: Dict[str, Any]
194
+ timerange: TimeRange
195
+
196
+
197
+ # =============================================================================
198
+ # Add overlap detection helpers
199
+ # =============================================================================
200
+
201
+
202
+ def _has_negation_operators(query: Dict[str, Any]) -> bool:
203
+ """
204
+ Check if query contains any negation operators.
205
+
206
+ Negation operators ($nin, $ne, $not, $nor) in an $or branch create
207
+ potential overlap with other branches, leading to duplicate results.
208
+
209
+ Args:
210
+ query: A query dict (typically an $or branch)
211
+
212
+ Returns:
213
+ True if any negation operator is found at any nesting level
214
+
215
+ Examples:
216
+ >>> _has_negation_operators({"field": {"$in": [1,2,3]}})
217
+ False
218
+ >>> _has_negation_operators({"field": {"$nin": [1,2,3]}})
219
+ True
220
+ >>> _has_negation_operators({"$and": [{"field": {"$ne": 5}}]})
221
+ True
222
+ """
223
+
224
+ def _check(obj: Any) -> bool:
225
+ if isinstance(obj, dict):
226
+ for key, value in obj.items():
227
+ if key in NEGATION_OPERATORS:
228
+ return True
229
+ if _check(value):
230
+ return True
231
+ elif isinstance(obj, list):
232
+ for item in obj:
233
+ if _check(item):
234
+ return True
235
+ return False
236
+
237
+ return _check(query)
238
+
239
+
240
+ def _has_overlap_prone_operators(
241
+ query: Dict[str, Any], time_field: str
242
+ ) -> Tuple[bool, Optional[str]]:
243
+ """
244
+ Check if query contains operators that can cause overlap between branches.
245
+
246
+ These operators can match the same document even with different values:
247
+ - $all: array superset matching
248
+ - $elemMatch: array element matching
249
+ - $regex: pattern matching
250
+ - $mod: modulo matching
251
+ - Comparison operators ($gt, $lt, etc.): ranges can overlap
252
+
253
+ NOTE: Comparison operators on the TIME FIELD are allowed (that's how we chunk).
254
+ Only comparison operators on OTHER fields trigger this check.
255
+
256
+ Args:
257
+ query: A query dict (typically an $or branch)
258
+ time_field: The time field name (excluded from comparison operator check)
259
+
260
+ Returns:
261
+ Tuple of (has_overlap_prone, operator_name)
262
+
263
+ Examples:
264
+ >>> _has_overlap_prone_operators({"tags": {"$all": ["a", "b"]}}, "ts")
265
+ (True, '$all')
266
+ >>> _has_overlap_prone_operators({"name": {"$regex": "^John"}}, "ts")
267
+ (True, '$regex')
268
+ >>> _has_overlap_prone_operators({"ts": {"$gte": t1, "$lt": t2}}, "ts")
269
+ (False, None) # Time field comparison is OK
270
+ >>> _has_overlap_prone_operators({"value": {"$gt": 10}}, "ts")
271
+ (True, '$gt') # Non-time field comparison is problematic
272
+ """
273
+ # Operators that are always problematic (not context-dependent)
274
+ always_problematic = {
275
+ "$all",
276
+ "$elemMatch",
277
+ "$regex",
278
+ "$mod",
279
+ "$bitsAllSet",
280
+ "$bitsAnySet",
281
+ "$bitsAllClear",
282
+ "$bitsAnyClear",
283
+ }
284
+
285
+ # Comparison operators - only problematic on non-time fields
286
+ comparison_ops = {"$gt", "$gte", "$lt", "$lte"}
287
+
288
+ def _check(obj: Any, current_field: Optional[str] = None) -> Optional[str]:
289
+ if isinstance(obj, dict):
290
+ for key, value in obj.items():
291
+ # Track current field for comparison operator check
292
+ field = key if not key.startswith("$") else current_field
293
+
294
+ if key in always_problematic:
295
+ return key
296
+
297
+ # Comparison operators are only problematic on non-time fields
298
+ if key in comparison_ops and current_field != time_field:
299
+ return key
300
+
301
+ result = _check(value, field)
302
+ if result:
303
+ return result
304
+ elif isinstance(obj, list):
305
+ for item in obj:
306
+ result = _check(item, current_field)
307
+ if result:
308
+ return result
309
+ return None
310
+
311
+ op = _check(query)
312
+ return (True, op) if op else (False, None)
313
+
314
+
315
+ def _extract_in_values(query: Dict[str, Any], field: str) -> Optional[Set[Any]]:
316
+ """
317
+ Extract $in values for a specific field from query.
318
+
319
+ Args:
320
+ query: Query dict to search
321
+ field: Field name to look for $in on
322
+
323
+ Returns:
324
+ Set of values if $in found, None if field uses different operator or not present
325
+
326
+ Examples:
327
+ >>> _extract_in_values({"field": {"$in": [1, 2, 3]}}, "field")
328
+ {1, 2, 3}
329
+ >>> _extract_in_values({"field": 5}, "field") # Equality, not $in
330
+ None
331
+ >>> _extract_in_values({"other": {"$in": [1]}}, "field") # Different field
332
+ None
333
+ """
334
+ if field not in query:
335
+ return None
336
+
337
+ val = query[field]
338
+ if isinstance(val, dict) and "$in" in val:
339
+ in_vals = val["$in"]
340
+ if isinstance(in_vals, list):
341
+ # Convert to set of hashable representations
342
+ result = set()
343
+ for v in in_vals:
344
+ try:
345
+ result.add(v)
346
+ except TypeError:
347
+ # Unhashable value - convert to string
348
+ result.add(str(v))
349
+ return result
350
+
351
+ return None
352
+
353
+
354
+ def _find_in_fields(query: Dict[str, Any]) -> Dict[str, Set[Any]]:
355
+ """
356
+ Find all fields that use $in operator and their values.
357
+
358
+ Only looks at top-level fields (not nested in $and, etc.)
359
+
360
+ Args:
361
+ query: Query dict (typically an $or branch)
362
+
363
+ Returns:
364
+ Dict mapping field name to set of $in values
365
+
366
+ Examples:
367
+ >>> _find_in_fields({"a": {"$in": [1,2]}, "b": {"$in": [3,4]}})
368
+ {"a": {1, 2}, "b": {3, 4}}
369
+ >>> _find_in_fields({"a": 5, "b": {"$gt": 10}})
370
+ {}
371
+ """
372
+ result: Dict[str, Set[Any]] = {}
373
+
374
+ for field, value in query.items():
375
+ if field.startswith("$"):
376
+ continue # Skip operators
377
+ if isinstance(value, dict) and "$in" in value:
378
+ in_vals = value["$in"]
379
+ if isinstance(in_vals, list):
380
+ try:
381
+ result[field] = set(in_vals)
382
+ except TypeError:
383
+ # Contains unhashable - convert to strings
384
+ result[field] = {str(v) for v in in_vals}
385
+
386
+ return result
387
+
388
+
389
+ def _get_non_time_fields(branch: Dict[str, Any], time_field: str) -> Set[str]:
390
+ """Get all top-level field names except the time field and operators."""
391
+ return {k for k in branch.keys() if not k.startswith("$") and k != time_field}
392
+
393
+
394
+ def _check_or_branch_safety(
395
+ branches: List[Dict[str, Any]], global_and: Dict[str, Any], time_field: str
396
+ ) -> Tuple[bool, str, Optional[List[Dict[str, Any]]]]:
397
+ """
398
+ Analyze $or branches for safety (no overlapping result sets).
399
+
400
+ This function implements the safe algorithm for detecting when $or
401
+ branches can be executed independently as brackets vs when they must
402
+ be executed as a single query to avoid duplicates.
403
+
404
+ SAFETY RULES:
405
+ 1. If ANY branch has negation operators -> UNSAFE (cannot transform)
406
+ 2. If branches have different field sets -> UNSAFE (cannot determine overlap)
407
+ 3. If exactly ONE $in field differs -> TRANSFORM (subtract overlapping values)
408
+ 4. If multiple $in fields differ -> UNSAFE (explosion of combinations)
409
+ 5. If same $in fields with disjoint values -> SAFE
410
+ 6. If same equality values -> SAFE (same static_filter, handled by grouping)
411
+
412
+ Args:
413
+ branches: List of $or branch dicts
414
+ global_and: Global conditions applied to all branches
415
+ time_field: Time field name (excluded from field comparison)
416
+
417
+ Returns:
418
+ Tuple of (is_safe, reason, transformed_branches)
419
+ - is_safe: True if brackets can be executed independently
420
+ - reason: Description of why unsafe (empty if safe)
421
+ - transformed_branches: Modified branches if transformation applied,
422
+ None otherwise
423
+ """
424
+ if len(branches) <= 1:
425
+ return True, "", None # Single branch is always safe
426
+
427
+ # Rule 1a: Check for negation operators in any branch
428
+ for i, branch in enumerate(branches):
429
+ if _has_negation_operators(branch):
430
+ return (
431
+ False,
432
+ f"branch {i} contains negation operator ($nin/$ne/$not/$nor)",
433
+ None,
434
+ )
435
+
436
+ # Rule 1b: Check for overlap-prone operators in any branch
437
+ # These operators can match the same document across branches even with
438
+ # different values
439
+ for i, branch in enumerate(branches):
440
+ has_overlap_op, op = _has_overlap_prone_operators(branch, time_field)
441
+ if has_overlap_op:
442
+ return False, f"branch {i} contains overlap-prone operator ({op})", None
443
+
444
+ # Merge each branch with global_and for analysis
445
+ effective_branches = []
446
+ for br in branches:
447
+ eff = {**global_and, **br}
448
+ # Remove time field for field comparison
449
+ if time_field in eff:
450
+ eff_copy = dict(eff)
451
+ eff_copy.pop(time_field)
452
+ effective_branches.append(eff_copy)
453
+ else:
454
+ effective_branches.append(eff)
455
+
456
+ # Rule 2: Check if all branches have the same field set
457
+ field_sets = [_get_non_time_fields(eb, time_field) for eb in effective_branches]
458
+ first_fields = field_sets[0]
459
+ for i, fs in enumerate(field_sets[1:], 1):
460
+ if fs != first_fields:
461
+ return False, f"branch {i} has different field set than branch 0", None
462
+
463
+ # All branches have same fields - now check for $in overlap
464
+ # Find all $in fields in each branch
465
+ all_in_fields: List[Dict[str, Set[Any]]] = [
466
+ _find_in_fields(eb) for eb in effective_branches
467
+ ]
468
+
469
+ # Collect all $in field names across all branches
470
+ in_field_names: Set[str] = set()
471
+ for in_dict in all_in_fields:
472
+ in_field_names.update(in_dict.keys())
473
+
474
+ if not in_field_names:
475
+ # No $in fields - check for equality overlap
476
+ # Branches with identical static_filters will be grouped/merged by
477
+ # the main algorithm. Different equality values are always disjoint (safe)
478
+ return True, "", None
479
+
480
+ # For each $in field, check if all branches use $in on it
481
+ # and identify overlapping values
482
+ fields_with_overlap: Dict[str, List[Tuple[int, int, Set[Any]]]] = {}
483
+
484
+ for field in in_field_names:
485
+ # Get $in values for this field from each branch
486
+ branch_values: List[Optional[Set[Any]]] = []
487
+ for in_dict in all_in_fields:
488
+ branch_values.append(in_dict.get(field))
489
+
490
+ # Check for overlap between any pair of branches
491
+ overlaps: List[Tuple[int, int, Set[Any]]] = []
492
+ for i in range(len(branches)):
493
+ vals_i = branch_values[i]
494
+ if vals_i is None:
495
+ # This branch doesn't use $in on this field - could be equality
496
+ # This creates potential overlap issues
497
+ continue
498
+ for j in range(i + 1, len(branches)):
499
+ vals_j = branch_values[j]
500
+ if vals_j is None:
501
+ continue
502
+ common = vals_i & vals_j
503
+ if common:
504
+ overlaps.append((i, j, common))
505
+
506
+ if overlaps:
507
+ fields_with_overlap[field] = overlaps
508
+
509
+ if not fields_with_overlap:
510
+ # No overlapping $in values - safe!
511
+ return True, "", None
512
+
513
+ # Rule 3 & 4: Handle overlapping $in values
514
+ # IMPORTANT: Transformation is ONLY safe when all branches have the SAME
515
+ # time bounds! If time bounds differ, we cannot subtract $in values because:
516
+ # - Branch A (IDs 1,2,3) with time [t1, t2]
517
+ # - Branch B (IDs 2,3,4) with time [t0, t3] (wider)
518
+ # If we remove 2,3 from Branch B, documents with IDs 2,3 in [t0,t1) and (t2,t3]
519
+ # would be LOST - not covered by either branch!
520
+ #
521
+ # So if overlapping $in values exist AND time ranges differ -> fall back
522
+ # to single bracket
523
+
524
+ # Extract time bounds from each branch to check if they're identical
525
+ time_bounds = []
526
+ for br in branches:
527
+ combined = {**global_and, **br}
528
+ bounds, _ = extract_time_bounds_recursive(combined, time_field)
529
+ if bounds is None:
530
+ lo, hi = None, None
531
+ else:
532
+ lo, hi, hi_inclusive, lo_inclusive = bounds
533
+ time_bounds.append((lo, hi))
534
+
535
+ # Check if all time bounds are identical
536
+ first_bounds = time_bounds[0]
537
+ all_same_time = all(bounds == first_bounds for bounds in time_bounds)
538
+
539
+ if not all_same_time:
540
+ # Overlapping $in with different time ranges - CANNOT safely transform
541
+ return (
542
+ False,
543
+ (
544
+ f"overlapping $in on '{list(fields_with_overlap.keys())[0]}' "
545
+ "with different time ranges"
546
+ ),
547
+ None,
548
+ )
549
+
550
+ if len(fields_with_overlap) > 1:
551
+ # Multiple $in fields have overlap - too complex to transform
552
+ return (
553
+ False,
554
+ f"multiple $in fields have overlap: {list(fields_with_overlap.keys())}",
555
+ None,
556
+ )
557
+
558
+ # Exactly one $in field has overlap AND same time ranges - we can transform
559
+ field = list(fields_with_overlap.keys())[0]
560
+ overlaps = fields_with_overlap[field]
561
+
562
+ # Transform: For each pair with overlap, subtract overlapping values from one branch
563
+ # Strategy: Build a "seen" set and subtract from later branches
564
+ transformed = [deepcopy(br) for br in branches]
565
+ seen_values: Set[Any] = set()
566
+
567
+ for i, branch in enumerate(transformed):
568
+ # Get current $in values for this branch (merged with global)
569
+ eff = {**global_and, **branch}
570
+ in_vals = _extract_in_values(eff, field)
571
+
572
+ if in_vals is None:
573
+ # Branch uses equality on this field - add to seen
574
+ if field in eff and not isinstance(eff.get(field), dict):
575
+ try:
576
+ seen_values.add(eff[field])
577
+ except TypeError:
578
+ seen_values.add(str(eff[field]))
579
+ continue
580
+
581
+ # Subtract already-seen values
582
+ remaining = in_vals - seen_values
583
+
584
+ if not remaining:
585
+ # All values already covered - mark branch for removal
586
+ transformed[i] = None # type: ignore
587
+ elif remaining != in_vals:
588
+ # Some values removed - update the $in
589
+ if (
590
+ field in branch
591
+ and isinstance(branch.get(field), dict)
592
+ and "$in" in branch[field]
593
+ ):
594
+ branch[field]["$in"] = list(remaining)
595
+ elif field in global_and:
596
+ # Field is in global_and - need to override in branch
597
+ branch[field] = {"$in": list(remaining)}
598
+
599
+ # Add all original values to seen (they're now covered by this bracket)
600
+ seen_values.update(in_vals)
601
+
602
+ # Filter out None branches (fully covered)
603
+ transformed = [b for b in transformed if b is not None]
604
+
605
+ if not transformed:
606
+ # Edge case: all branches were fully covered (shouldn't happen normally)
607
+ return True, "", None
608
+
609
+ return True, "", transformed
610
+
611
+
612
+ # ============================================================================
613
+ # MAIN INTERFACE/ ENTRY Point
614
+ # ============================================================================
615
+
616
+
617
+ def _json_key(d: Dict[str, Any]) -> str:
618
+ """Create a deterministic JSON key for deduplication."""
619
+ return json.dumps(d, sort_keys=True, default=str)
620
+
621
+
622
+ def _merge_full_ranges(ranges: List[TimeRange]) -> List[TimeRange]:
623
+ """Merge overlapping or adjacent time ranges into consolidated spans.
624
+
625
+ Sorts ranges by start time, then iterates through merging any
626
+ that overlap or touch (end of one equals start of next).
627
+ Preserves hi_inclusive and lo_inclusive flags.
628
+ """
629
+
630
+ rs = [r for r in ranges if r.is_full and r.lo and r.hi]
631
+ if not rs:
632
+ return []
633
+
634
+ rs.sort(key=lambda r: r.lo) # type: ignore[arg-type]
635
+ out: List[TimeRange] = [
636
+ TimeRange(rs[0].lo, rs[0].hi, True, rs[0].hi_inclusive, rs[0].lo_inclusive)
637
+ ]
638
+ for r in rs[1:]:
639
+ last = out[-1]
640
+ # Type assertions: we filtered for r.lo and r.hi being not None above
641
+ assert r.lo is not None and r.hi is not None
642
+ assert last.lo is not None and last.hi is not None
643
+ if r.lo <= last.hi: # overlap or touch
644
+ if r.hi > last.hi:
645
+ last.hi = r.hi
646
+ last.hi_inclusive = r.hi_inclusive
647
+ elif r.hi == last.hi:
648
+ last.hi_inclusive = last.hi_inclusive or r.hi_inclusive
649
+ else:
650
+ out.append(TimeRange(r.lo, r.hi, True, r.hi_inclusive, r.lo_inclusive))
651
+ return out
652
+
653
+
654
+ def _partial_covers_full(partial: TimeRange, full: TimeRange) -> bool:
655
+ """Check if a partial time range completely covers a full time range.
656
+
657
+ A partial range covers a full range if:
658
+ - partial has only $gte (lo) and full.lo >= partial.lo
659
+ - partial has only $lt (hi) and full.hi <= partial.hi
660
+
661
+ Args:
662
+ partial: TimeRange with is_full=False (missing lo or hi)
663
+ full: TimeRange with is_full=True
664
+
665
+ Returns:
666
+ True if partial completely covers full, False otherwise
667
+ """
668
+ if full.lo is None or full.hi is None:
669
+ return False
670
+
671
+ # Partial has only lower bound ($gte): covers if full starts at or after
672
+ if partial.lo is not None and partial.hi is None:
673
+ return full.lo >= partial.lo
674
+
675
+ # Partial has only upper bound ($lt): covers if full ends at or before
676
+ if partial.lo is None and partial.hi is not None:
677
+ return full.hi <= partial.hi
678
+
679
+ return False
680
+
681
+
682
+ def _merge_partial_ranges(partials: List[TimeRange]) -> List[TimeRange]:
683
+ """Merge partial ranges where possible.
684
+
685
+ Priority:
686
+ - If ANY range is completely unbounded (no lo, no hi), it covers everything
687
+ - Two $gte-only: keep the one with smallest lo (covers most)
688
+ - Two $lt-only: keep the one with largest hi (covers most)
689
+ Preserves lo_inclusive and hi_inclusive flags.
690
+ """
691
+ if not partials:
692
+ return []
693
+
694
+ # Check for completely unbounded ranges first - they cover everything
695
+ unbounded = [r for r in partials if r.lo is None and r.hi is None]
696
+ if unbounded:
697
+ # One unbounded range covers all other partials
698
+ return [TimeRange(None, None, False, False, True)]
699
+
700
+ gte_only = [r for r in partials if r.lo is not None and r.hi is None]
701
+ lt_only = [r for r in partials if r.lo is None and r.hi is not None]
702
+
703
+ merged: List[TimeRange] = []
704
+
705
+ # For $gte-only, keep the smallest lo (covers most data)
706
+ assert gte_only or lt_only, "No partial ranges to merge"
707
+
708
+ if gte_only:
709
+ # Filter out None values for type safety
710
+ min_lo = min(r.lo for r in gte_only if r.lo is not None)
711
+ # Find the lo_inclusive from the range with min_lo
712
+ lo_inclusive = next(r.lo_inclusive for r in gte_only if r.lo == min_lo)
713
+ merged.append(TimeRange(min_lo, None, False, False, lo_inclusive))
714
+
715
+ # For $lt-only, keep the largest hi (covers most data)
716
+ if lt_only:
717
+ max_hi = max(r.hi for r in lt_only if r.hi is not None)
718
+ # Find the hi_inclusive from the range with max_hi
719
+ hi_inclusive = next(r.hi_inclusive for r in lt_only if r.hi == max_hi)
720
+ merged.append(TimeRange(None, max_hi, False, hi_inclusive, True))
721
+
722
+ return merged
723
+
724
+
725
+ def build_brackets_for_find(
726
+ query: Dict[str, Any],
727
+ time_field: str,
728
+ sort_spec: Optional[List[Tuple[str, int]]] = None,
729
+ ) -> Tuple[
730
+ bool, str, List[Bracket], Optional[Tuple[Optional[datetime], Optional[datetime]]]
731
+ ]:
732
+ """
733
+ Build bracket list for a find() query based on its chunkability.
734
+
735
+ This is the SINGLE ENTRY POINT for bracket creation. All queries flow through
736
+ here to ensure consistent validation and bracket generation.
737
+
738
+ IMPORTANT: Internally calls is_chunkable_query() to validate the query and
739
+ determine execution mode (PARALLEL/SINGLE/REJECT). Cursor methods should NOT
740
+ call is_chunkable_query() separately - this function handles all validation.
741
+
742
+ Args:
743
+ query: MongoDB find() filter dict
744
+ time_field: Name of the timestamp field used for time-based chunking
745
+ (e.g., "timestamp", "recordedAt", "createdAt")
746
+ sort_spec: Optional MongoDB sort specification as list of
747
+ (field, direction) tuples. Required for detecting $natural
748
+ sort. Format: [("field", 1)] or [("field", -1)]
749
+ Example: [("timestamp", 1)] or [("$natural", -1)]
750
+
751
+ Returns:
752
+ Tuple of (is_chunkable, reason, brackets, bounds):
753
+
754
+ - is_chunkable: bool
755
+ - True: Query is valid and executable (PARALLEL or SINGLE mode)
756
+ - False: Invalid query syntax or contradictory constraints (REJECT mode)
757
+
758
+ - reason: str
759
+ - Empty string "" for PARALLEL mode (successful parallelization)
760
+ - Descriptive message for SINGLE mode
761
+ (e.g., "$natural sort requires insertion order")
762
+ - Error description for REJECT mode
763
+ (e.g., "empty $or array (invalid MongoDB syntax)")
764
+
765
+ - brackets: List[Bracket]
766
+ - PARALLEL mode: Non-empty list of Bracket objects for parallel execution
767
+ - SINGLE mode: Empty list [] (signals to use single worker)
768
+ - REJECT mode: Empty list []
769
+
770
+ - bounds: Tuple[Optional[datetime], Optional[datetime]]
771
+ - Time range extracted from query (lo, hi)
772
+ - (None, None) if no time bounds found or query rejected
773
+
774
+ CRITICAL: Empty brackets list has TWO meanings:
775
+ 1. If is_chunkable=True + brackets=[]: SINGLE mode (valid, use single worker)
776
+ 2. If is_chunkable=False + brackets=[]: REJECT mode (invalid, don't execute)
777
+
778
+ Callers MUST check is_chunkable first, then interpret empty brackets accordingly.
779
+
780
+ Example:
781
+ >>> query = {
782
+ ... "$or": [
783
+ ... {"region_id": ObjectId("64a...")},
784
+ ... {"region_id": ObjectId("64b...")},
785
+ ... ],
786
+ ... "account_id": ObjectId("123..."),
787
+ ... "timestamp": {"$gte": datetime(2024,1,1), "$lt": datetime(2024,7,1)}
788
+ ... }
789
+ >>> ok, reason, brackets, bounds = build_brackets_for_find(query, "timestamp")
790
+ >>> # Returns:
791
+ >>> # (True, "", [
792
+ >>> # Bracket(static_filter={"account_id": "123...",
793
+ >>> # "region_id": "64a..."},
794
+ >>> # timerange=TimeRange(lo=2024-01-01, hi=2024-07-01,
795
+ >>> # is_full=True)),
796
+ >>> # Bracket(static_filter={"account_id": "123...",
797
+ >>> # "region_id": "64b..."},
798
+ >>> # timerange=TimeRange(lo=2024-01-01, hi=2024-07-01,
799
+ >>> # is_full=True)),
800
+ >>> # ], (datetime(2024,1,1), datetime(2024,7,1)))
801
+
802
+ Rejection Cases (returns is_chunkable=False):
803
+ - Empty $or array (invalid MongoDB syntax) -> REJECT
804
+ - Contradictory time bounds (lo >= hi) -> REJECT
805
+
806
+ Single-Worker Cases (returns is_chunkable=True, empty brackets):
807
+ - $natural sort (insertion order) -> SINGLE
808
+ - Forbidden operators ($expr, $text, $near, etc.) -> SINGLE
809
+ - Nested $or (depth > 1) -> SINGLE
810
+ - Time field negation ($ne/$nin/$not/$nor on time field) -> SINGLE
811
+ - Unbounded $or branches -> SINGLE
812
+ - No time field reference -> SINGLE
813
+
814
+ Implementation Note - Multiple Time Bounds Extraction:
815
+ This function calls extract_time_bounds_recursive() multiple times in different
816
+ code paths for different purposes:
817
+
818
+ 1. Via is_chunkable_query() - Validates overall query has time bounds
819
+ Returns: result.bounds = union of all time ranges in query
820
+
821
+ 2. In _check_or_branch_safety() - Checks if $or branches have
822
+ identical time bounds
823
+ Purpose: Overlapping $in values can only be safely transformed
824
+ when all branches have the SAME time range. Different
825
+ ranges would cause data loss.
826
+ Example: Branch A [Jan 1-15] with IDs {1,2,3} vs Branch B
827
+ [Jan 10-31] with IDs {2,3,4}. Cannot remove overlap {2,3}
828
+ because documents in [Jan 1-10) would be lost!
829
+
830
+ 3. In merge attempt (unsafe $or handling) - Extracts bounds from
831
+ each branch
832
+ Purpose: If branches have overlapping results (unsafe), check if
833
+ they can be merged into a single bracket. Only possible if
834
+ time ranges are contiguous with no gaps.
835
+ Example: Branch A [Jan 1-15], Branch B [Jan 10-20]
836
+ -> Merged [Jan 1-20]
837
+ Branch A [Jan 1-15], Branch B [Jan 20-31]
838
+ -> Cannot merge (gap!) ✗
839
+
840
+ 4. In final bracket creation - Sets TimeRange for each output
841
+ bracket
842
+ Purpose: Each bracket needs its specific time range for chunking.
843
+ Example: {"sensor": "A", ts: [Jan 1-15]}
844
+ -> Bracket with TimeRange(Jan 1, Jan 15)
845
+ {"sensor": "B", ts: [Feb 1-28]}
846
+ -> Bracket with TimeRange(Feb 1, Feb 28)
847
+
848
+ Why multiple calls are necessary:
849
+ - is_chunkable_query() returns UNION of time bounds (overall range)
850
+ - Each $or branch may have DIFFERENT time bounds (per-branch ranges)
851
+ - Safety checks need to compare bounds across branches (identical?)
852
+ - Merge logic needs to check contiguity (adjacent/overlapping?)
853
+ - Final brackets need their specific ranges (individual TimeRange objects)
854
+
855
+ This is NOT redundant - each extraction serves a different purpose in the
856
+ validation -> optimization -> construction pipeline.
857
+ """
858
+
859
+ # PHASE 0: Validate query using is_chunkable_query
860
+ # This is now the ONLY validation point - cursor methods don't need to
861
+ # call it separately
862
+ result = is_chunkable_query(query, time_field, sort_spec)
863
+
864
+ bounds = result.bounds
865
+
866
+ # Handle REJECT mode - invalid query syntax or contradictory constraints
867
+ if result.mode == ChunkabilityMode.REJECT:
868
+ return False, result.reason, [], (None, None)
869
+
870
+ # Handle SINGLE mode - valid query, but single-worker fallback needed
871
+ if result.mode == ChunkabilityMode.SINGLE:
872
+ # Return empty brackets as signal to use single worker
873
+ # is_chunkable=True means query is VALID and executable
874
+ # Empty brackets means "don't parallelize, use single worker"
875
+ return True, result.reason, [], bounds
876
+
877
+ # =========================================================================
878
+ # DEFENSE-IN-DEPTH: Redundant safety checks
879
+ # =========================================================================
880
+ # These checks duplicate validation already done in is_chunkable_query().
881
+ # They're kept as a safety net in case:
882
+ # 1. is_chunkable_query() has a bug and returns PARALLEL incorrectly
883
+ # 2. Future code changes bypass is_chunkable_query() validation
884
+ # 3. Query is mutated between validation and bracket building
885
+ #
886
+ # PARANOID but JUSTIFIED: Better to catch issues twice than produce
887
+ # incorrect results. These checks are fast and prevent data corruption.
888
+ # =========================================================================
889
+
890
+ # High-level safety checks (kept for defense-in-depth)
891
+ has_forbidden, forbidden_op = has_forbidden_ops(query)
892
+ if has_forbidden:
893
+ return False, f"forbidden-operator: {forbidden_op}", [], (None, None)
894
+
895
+ # PHASE 1: Normalize query (flatten nested $and, detect complexity)
896
+ normalized, complexity_flags = normalize_query(query)
897
+
898
+ # Use normalized query for all subsequent operations
899
+ global_and, or_list = split_global_and(normalized)
900
+
901
+ # Check for nested $or or multiple $or
902
+ if complexity_flags["nested_or"]:
903
+ return False, "nested-or-depth>1", [], (None, None)
904
+
905
+ if or_depth(normalized) > 1:
906
+ return False, "nested-or-depth>1", [], (None, None)
907
+
908
+ # No $or: treat as single branch represented by global_and
909
+ if not or_list:
910
+ branches: List[Dict[str, Any]] = [global_and]
911
+ else:
912
+ # =====================================================================
913
+ # SAFETY CHECK: Detect overlapping $or branches
914
+ # =====================================================================
915
+ # Before splitting $or into independent brackets, we must verify that
916
+ # branches don't have overlapping result sets. Overlap causes duplicates.
917
+ #
918
+ # Cases that cause overlap:
919
+ # - Negation operators ($nin, $ne, $not, $nor) in any branch
920
+ # - Overlapping $in values across branches
921
+ # - Different field sets (can't determine disjointness)
922
+ #
923
+ # If overlap is detected and cannot be transformed, we return a single
924
+ # bracket covering the entire query (executed as unchunked).
925
+ # =====================================================================
926
+ is_safe, reason, transformed = _check_or_branch_safety(
927
+ or_list, global_and, time_field
928
+ )
929
+
930
+ if not is_safe:
931
+ # Unsafe $or pattern detected - but check if we can MERGE branches
932
+ #
933
+ # OPTIMIZATION: If all branches have IDENTICAL static filters
934
+ # (excluding time), AND their time ranges are contiguous (no gaps),
935
+ # we can MERGE them into a single bracket with the union of time
936
+ # ranges.
937
+ #
938
+ # Example (mergeable - overlapping):
939
+ # $or: [
940
+ # {filter_A, timestamp: {$gte: Jan 1, $lt: Jan 20}},
941
+ # {filter_A, timestamp: {$gte: Jan 15, $lt: Feb 1}},
942
+ # ]
943
+ # -> Merged: {filter_A, timestamp: {$gte: Jan 1, $lt: Feb 1}}
944
+ #
945
+ # Example (NOT mergeable - disjoint with gap):
946
+ # $or: [
947
+ # {filter_A, timestamp: {$gte: Jan 1, $lt: Jan 15}},
948
+ # {filter_A, timestamp: {$gte: Feb 1, $lt: Feb 15}},
949
+ # ]
950
+ # -> Cannot merge! Gap from Jan 15 to Feb 1 would include unwanted data.
951
+ # -> Fall back to single bracket with full $or query.
952
+
953
+ # Extract static filters (without time) from each branch
954
+ static_filters = []
955
+ time_bounds_list = []
956
+ has_unbounded_branch = False
957
+ has_partial_branch = False # Only $gte or only $lt
958
+
959
+ for branch in or_list:
960
+ combined = {**global_and, **branch}
961
+ bounds, _ = extract_time_bounds_recursive(combined, time_field)
962
+ if bounds is None:
963
+ branch_lo, branch_hi, branch_hi_inc, branch_lo_inc = (
964
+ None,
965
+ None,
966
+ False,
967
+ True,
968
+ )
969
+ else:
970
+ branch_lo, branch_hi, branch_hi_inc, branch_lo_inc = bounds
971
+
972
+ # Check if this branch has NO time constraint at all
973
+ if branch_lo is None and branch_hi is None:
974
+ has_unbounded_branch = True
975
+ # Check if partial (only one bound)
976
+ elif branch_lo is None or branch_hi is None:
977
+ has_partial_branch = True
978
+
979
+ time_bounds_list.append(
980
+ (branch_lo, branch_hi, branch_hi_inc, branch_lo_inc)
981
+ )
982
+
983
+ # Extract static filter (without time)
984
+ static_wo_time = dict(combined)
985
+ if time_field in static_wo_time:
986
+ static_wo_time.pop(time_field)
987
+ static_filters.append(static_wo_time)
988
+
989
+ # Check if all static filters are identical
990
+ all_static_identical = all(
991
+ _json_key(sf) == _json_key(static_filters[0])
992
+ for sf in static_filters[1:]
993
+ )
994
+
995
+ # Can only merge if:
996
+ # 1. All static filters identical
997
+ # 2. All time ranges are FULL (both lo and hi)
998
+ # 3. Time ranges are contiguous (no gaps)
999
+ can_merge = False
1000
+ merged_lo, merged_hi = None, None
1001
+ merged_hi_inclusive, merged_lo_inclusive = False, True
1002
+
1003
+ if (
1004
+ all_static_identical
1005
+ and not has_unbounded_branch
1006
+ and not has_partial_branch
1007
+ ):
1008
+ # All branches have identical static filters and full time ranges
1009
+ # Check if time ranges are contiguous (no gaps)
1010
+ #
1011
+ # Algorithm: Sort by start time, then verify each range starts
1012
+ # at or before the previous range's end (overlap or adjacent)
1013
+ full_ranges = [
1014
+ (lo, hi, hi_inc, lo_inc)
1015
+ for lo, hi, hi_inc, lo_inc in time_bounds_list
1016
+ ]
1017
+ sorted_ranges = sorted(full_ranges, key=lambda r: r[0])
1018
+
1019
+ # Start with first range
1020
+ running_lo = sorted_ranges[0][0]
1021
+ running_hi = sorted_ranges[0][1]
1022
+ running_lo_inclusive = sorted_ranges[0][3]
1023
+ running_hi_inclusive = sorted_ranges[0][2]
1024
+ has_gap = False
1025
+
1026
+ for lo, hi, hi_inc, lo_inc in sorted_ranges[1:]:
1027
+ if lo > running_hi:
1028
+ # Gap detected! This range starts after the previous ends
1029
+ has_gap = True
1030
+ break
1031
+ # Extend running_hi if this range extends further
1032
+ if hi > running_hi:
1033
+ running_hi = hi
1034
+ running_hi_inclusive = hi_inc
1035
+ elif hi == running_hi:
1036
+ running_hi_inclusive = running_hi_inclusive or hi_inc
1037
+
1038
+ if not has_gap:
1039
+ # All ranges are contiguous - we can merge!
1040
+ merged_lo = running_lo
1041
+ merged_hi = running_hi
1042
+ merged_hi_inclusive = running_hi_inclusive
1043
+ merged_lo_inclusive = running_lo_inclusive
1044
+ can_merge = True
1045
+
1046
+ if can_merge:
1047
+ # Merge into single clean bracket
1048
+ return (
1049
+ True,
1050
+ f"merged-branches:{reason}",
1051
+ [
1052
+ Bracket(
1053
+ static_filter=static_filters[0],
1054
+ timerange=TimeRange(
1055
+ merged_lo,
1056
+ merged_hi,
1057
+ True,
1058
+ merged_hi_inclusive,
1059
+ merged_lo_inclusive,
1060
+ ),
1061
+ )
1062
+ ],
1063
+ (merged_lo, merged_hi),
1064
+ )
1065
+
1066
+ # Cannot merge - fall back to single bracket with full $or
1067
+ # This preserves the original $or semantics
1068
+ lo, hi = None, None
1069
+ hi_inclusive, lo_inclusive = False, True
1070
+
1071
+ for branch_lo, branch_hi, branch_hi_inc, branch_lo_inc in time_bounds_list:
1072
+ if branch_lo is not None:
1073
+ if lo is None or branch_lo < lo:
1074
+ lo = branch_lo
1075
+ lo_inclusive = branch_lo_inc
1076
+ elif branch_lo == lo:
1077
+ lo_inclusive = lo_inclusive or branch_lo_inc
1078
+ if branch_hi is not None:
1079
+ if hi is None or branch_hi > hi:
1080
+ hi = branch_hi
1081
+ hi_inclusive = branch_hi_inc
1082
+ elif branch_hi == hi:
1083
+ hi_inclusive = hi_inclusive or branch_hi_inc
1084
+
1085
+ # If any branch is unbounded, the whole query is unbounded
1086
+ if has_unbounded_branch:
1087
+ lo, hi = None, None
1088
+ hi_inclusive, lo_inclusive = False, True
1089
+
1090
+ # Build the single bracket with original query structure
1091
+ single_filter = dict(query)
1092
+ if time_field in single_filter:
1093
+ single_filter.pop(time_field)
1094
+
1095
+ is_full = lo is not None and hi is not None
1096
+ return (
1097
+ True,
1098
+ f"single-bracket:{reason}",
1099
+ [
1100
+ Bracket(
1101
+ static_filter=single_filter,
1102
+ timerange=TimeRange(
1103
+ lo, hi, is_full, hi_inclusive, lo_inclusive
1104
+ ),
1105
+ )
1106
+ ],
1107
+ (lo, hi),
1108
+ )
1109
+
1110
+ # Use transformed branches if available
1111
+ branches = transformed if transformed else or_list
1112
+
1113
+ prelim: List[Bracket] = []
1114
+ for br in branches:
1115
+ if not isinstance(br, Dict):
1116
+ return False, "branch-not-dict", [], (None, None)
1117
+
1118
+ eff: Dict[str, Any] = {}
1119
+ if global_and:
1120
+ eff.update(global_and)
1121
+ eff.update(br)
1122
+
1123
+ br_bounds, _ = extract_time_bounds_recursive(eff, time_field)
1124
+ if br_bounds is None:
1125
+ lo, hi, hi_inclusive, lo_inclusive = None, None, False, True
1126
+ else:
1127
+ lo, hi, hi_inclusive, lo_inclusive = br_bounds
1128
+ is_full = lo is not None and hi is not None
1129
+
1130
+ # Remove time field from static filter
1131
+ static_wo_time = dict(eff)
1132
+ if time_field in static_wo_time:
1133
+ static_wo_time.pop(time_field)
1134
+
1135
+ if "$or" in static_wo_time:
1136
+ return False, "nested-or-in-branch", [], (None, None)
1137
+
1138
+ prelim.append(
1139
+ Bracket(
1140
+ static_filter=static_wo_time,
1141
+ timerange=TimeRange(lo, hi, is_full, hi_inclusive, lo_inclusive),
1142
+ )
1143
+ )
1144
+
1145
+ grouped: Dict[str, Dict[str, Any]] = {}
1146
+ for b in prelim:
1147
+ key = _json_key(b.static_filter)
1148
+ g = grouped.get(key)
1149
+ if g is None:
1150
+ g = {"static": b.static_filter, "full": [], "partial": []}
1151
+ grouped[key] = g
1152
+ (g["full"] if b.timerange.is_full else g["partial"]).append(b.timerange)
1153
+
1154
+ out_brackets: List[Bracket] = []
1155
+ for g in grouped.values():
1156
+ static = g["static"]
1157
+ full_ranges = g["full"]
1158
+ partial_ranges = g["partial"]
1159
+
1160
+ # Merge partial ranges first (keep most inclusive)
1161
+ # NOTE: _merge_partial_ranges handles unbounded (lo=None, hi=None) by
1162
+ # returning just the unbounded range, which covers everything
1163
+ merged_partials = _merge_partial_ranges(partial_ranges)
1164
+
1165
+ # Check if any partial is completely unbounded - if so, it covers ALL
1166
+ # (both other partials AND all full ranges in this group)
1167
+ has_unbounded = any(r.lo is None and r.hi is None for r in merged_partials)
1168
+ if has_unbounded:
1169
+ # Unbounded covers everything - just emit the unbounded bracket
1170
+ out_brackets.append(
1171
+ Bracket(
1172
+ static_filter=static,
1173
+ timerange=TimeRange(None, None, False, False, True),
1174
+ )
1175
+ )
1176
+ continue # Skip all full and other partial for this static_filter
1177
+
1178
+ # Check if any partial covers all full ranges
1179
+ # If so, we only need the partial (it fetches everything the fulls would)
1180
+ remaining_fulls: List[TimeRange] = []
1181
+ for fr in full_ranges:
1182
+ covered = False
1183
+ for pr in merged_partials:
1184
+ if _partial_covers_full(pr, fr):
1185
+ covered = True
1186
+ break
1187
+ if not covered:
1188
+ remaining_fulls.append(fr)
1189
+
1190
+ # Merge remaining full ranges
1191
+ for r in _merge_full_ranges(remaining_fulls):
1192
+ out_brackets.append(Bracket(static_filter=static, timerange=r))
1193
+
1194
+ # Add merged partial ranges (these will be executed as single unchunked queries)
1195
+ for r in merged_partials:
1196
+ out_brackets.append(Bracket(static_filter=static, timerange=r))
1197
+
1198
+ if not out_brackets:
1199
+ return False, "no-complete-time-range", [], (None, None)
1200
+
1201
+ return True, "", out_brackets, bounds