xlr8 0.1.7b3__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ """
2
+ Time-range chunking utilities for XLR8.
3
+
4
+ This module splits time ranges into day-aligned chunks for parallel processing.
5
+ Each chunk becomes a work item that a worker can fetch independently.
6
+
7
+ WHY CHUNK BY TIME?
8
+ ------------------
9
+
10
+ MongoDB time-series data is typically indexed by time. Chunking allows:
11
+ 1. Parallel fetches - Multiple workers can fetch different time chunks
12
+ 2. Incremental caching - Cache chunks separately, reuse when time range overlaps
13
+ 3. Memory control - Each chunk fits in worker's RAM budget
14
+
15
+ CHUNKING ALGORITHM
16
+ ------------------
17
+
18
+ INPUT:
19
+ start = datetime(2024, 1, 5, 12, 30) # Mid-day start
20
+ end = datetime(2024, 1, 15, 8, 0) # Mid-day end
21
+ chunk_days = 3
22
+
23
+ OUTPUT (day-aligned chunks):
24
+
25
+ Chunk 1: 2024-01-05 12:30 -> 2024-01-08 00:00 (partial first chunk)
26
+ Chunk 2: 2024-01-08 00:00 -> 2024-01-11 00:00 (full 3-day chunk)
27
+ Chunk 3: 2024-01-11 00:00 -> 2024-01-14 00:00 (full 3-day chunk)
28
+ Chunk 4: 2024-01-14 00:00 -> 2024-01-15 08:00 (partial last chunk)
29
+
30
+ Note: First boundary is aligned to day start + step after the start time.
31
+
32
+ TYPICAL USAGE
33
+ -------------
34
+
35
+ 6-month query with 14-day chunks:
36
+ start = 2024-01-01
37
+ end = 2024-07-01
38
+ chunk_days = 14 (default)
39
+
40
+ Result: ~13 chunks
41
+ Chunk 1: Jan 1-15
42
+ Chunk 2: Jan 15-29
43
+ Chunk 3: Jan 29 - Feb 12
44
+ ...
45
+ Chunk 13: Jun 17 - Jul 1
46
+
47
+ With 10 workers, chunks are processed in parallel:
48
+ Workers 0-9 grab chunks 1-10 immediately
49
+ As workers finish, they grab chunks 11-13
50
+ """
51
+
52
+ from datetime import datetime, timedelta, timezone
53
+ from typing import List, Optional, Tuple
54
+
55
+ __all__ = [
56
+ "chunk_time_range",
57
+ ]
58
+
59
+
60
+ def chunk_time_range(
61
+ start: datetime,
62
+ end: datetime,
63
+ chunk_size: Optional[timedelta] = None,
64
+ ) -> List[Tuple[datetime, datetime]]:
65
+ """
66
+ Split time range into chunks.
67
+
68
+ Creates chunks of specified size, aligned to boundaries.
69
+
70
+ Args:
71
+ start: Start datetime (inclusive)
72
+ end: End datetime (exclusive)
73
+ chunk_size: Size of each chunk as timedelta (default: 1 day)
74
+
75
+ Returns:
76
+ List of (chunk_start, chunk_end) tuples
77
+
78
+ Examples:
79
+ Day-level chunking:
80
+ >>> start = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
81
+ >>> end = datetime(2024, 1, 5, 8, 0, 0, tzinfo=timezone.utc)
82
+ >>> chunks = chunk_time_range(start, end, chunk_size=timedelta(days=1))
83
+
84
+ Hour-level chunking:
85
+ >>> chunks = chunk_time_range(start, end, chunk_size=timedelta(hours=8))
86
+ """
87
+ # Ensure timezone-aware
88
+ if start.tzinfo is None:
89
+ start = start.replace(tzinfo=timezone.utc)
90
+ if end.tzinfo is None:
91
+ end = end.replace(tzinfo=timezone.utc)
92
+
93
+ if start >= end:
94
+ return []
95
+
96
+ # Determine step size
97
+ if chunk_size is not None:
98
+ step = chunk_size
99
+ else:
100
+ step = timedelta(days=1) # Default to 1 day
101
+
102
+ out: List[Tuple[datetime, datetime]] = []
103
+
104
+ # First boundary strictly AFTER start, aligned to day start + step
105
+ first_boundary = (
106
+ datetime(start.year, start.month, start.day, tzinfo=timezone.utc) + step
107
+ )
108
+
109
+ lo = start
110
+ cur = first_boundary
111
+
112
+ while lo < end:
113
+ chunk_end = cur if cur < end else end
114
+ out.append((lo, chunk_end))
115
+ lo = cur
116
+ cur = cur + step
117
+
118
+ return out