xlr8 0.1.7b3__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xlr8/__init__.py +113 -0
- xlr8/_xlr8_rust.cpython-311-x86_64-linux-gnu.so +0 -0
- xlr8/_xlr8_rust.pyi +71 -0
- xlr8/analysis/__init__.py +58 -0
- xlr8/analysis/brackets.py +1201 -0
- xlr8/analysis/chunker.py +118 -0
- xlr8/analysis/inspector.py +1889 -0
- xlr8/collection/__init__.py +6 -0
- xlr8/collection/cursor.py +2161 -0
- xlr8/collection/cursor.pyi +179 -0
- xlr8/collection/wrapper.py +400 -0
- xlr8/collection/wrapper.pyi +420 -0
- xlr8/constants.py +24 -0
- xlr8/execution/__init__.py +43 -0
- xlr8/execution/callback.py +792 -0
- xlr8/execution/executor.py +500 -0
- xlr8/execution/planner.py +377 -0
- xlr8/py.typed +1 -0
- xlr8/rust_backend.py +40 -0
- xlr8/rust_backend.pyi +71 -0
- xlr8/schema/__init__.py +42 -0
- xlr8/schema/encoder.py +235 -0
- xlr8/schema/schema.py +265 -0
- xlr8/schema/types.py +239 -0
- xlr8/storage/__init__.py +17 -0
- xlr8/storage/cache.py +228 -0
- xlr8/storage/reader.py +1369 -0
- xlr8-0.1.7b3.dist-info/METADATA +176 -0
- xlr8-0.1.7b3.dist-info/RECORD +31 -0
- xlr8-0.1.7b3.dist-info/WHEEL +5 -0
- xlr8-0.1.7b3.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
"""Type stubs for XLR8 Collection.
|
|
2
|
+
|
|
3
|
+
Provides IDE autocomplete for all PyMongo methods plus XLR8 extensions.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import (
|
|
9
|
+
Any,
|
|
10
|
+
Callable,
|
|
11
|
+
Dict,
|
|
12
|
+
Generic,
|
|
13
|
+
Iterable,
|
|
14
|
+
List,
|
|
15
|
+
Mapping,
|
|
16
|
+
MutableMapping,
|
|
17
|
+
NoReturn,
|
|
18
|
+
Optional,
|
|
19
|
+
Sequence,
|
|
20
|
+
Tuple,
|
|
21
|
+
TypeVar,
|
|
22
|
+
Union,
|
|
23
|
+
overload,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
import bson
|
|
27
|
+
from bson.raw_bson import RawBSONDocument
|
|
28
|
+
from bson.timestamp import Timestamp
|
|
29
|
+
from pymongo.operations import (
|
|
30
|
+
DeleteMany,
|
|
31
|
+
DeleteOne,
|
|
32
|
+
IndexModel,
|
|
33
|
+
InsertOne,
|
|
34
|
+
ReplaceOne,
|
|
35
|
+
SearchIndexModel,
|
|
36
|
+
UpdateMany,
|
|
37
|
+
UpdateOne,
|
|
38
|
+
_IndexKeyHint,
|
|
39
|
+
_IndexList,
|
|
40
|
+
)
|
|
41
|
+
from pymongo.read_preferences import _ServerMode
|
|
42
|
+
from pymongo.results import (
|
|
43
|
+
BulkWriteResult,
|
|
44
|
+
DeleteResult,
|
|
45
|
+
InsertManyResult,
|
|
46
|
+
InsertOneResult,
|
|
47
|
+
UpdateResult,
|
|
48
|
+
)
|
|
49
|
+
from pymongo.synchronous.change_stream import CollectionChangeStream
|
|
50
|
+
from pymongo.synchronous.client_session import ClientSession
|
|
51
|
+
from pymongo.synchronous.collection import Collection as PyMongoCollection
|
|
52
|
+
from pymongo.synchronous.command_cursor import CommandCursor, RawBatchCommandCursor
|
|
53
|
+
from pymongo.synchronous.cursor import RawBatchCursor
|
|
54
|
+
from pymongo.synchronous.database import Database
|
|
55
|
+
from pymongo.typings import _CollationIn, _DocumentType, _DocumentTypeArg, _Pipeline
|
|
56
|
+
from pymongo.write_concern import WriteConcern
|
|
57
|
+
|
|
58
|
+
from .cursor import XLR8Cursor
|
|
59
|
+
|
|
60
|
+
_T = TypeVar("_T", bound=Mapping[str, Any])
|
|
61
|
+
_WriteOp = Union[
|
|
62
|
+
InsertOne[_T], DeleteOne, DeleteMany, ReplaceOne[_T], UpdateOne, UpdateMany
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
class XLR8Collection(Generic[_DocumentType]):
|
|
66
|
+
"""PyMongo-compatible collection with optional acceleration.
|
|
67
|
+
|
|
68
|
+
All PyMongo methods work via delegation. find() returns XLR8Cursor.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(
|
|
72
|
+
self,
|
|
73
|
+
pymongo_collection: PyMongoCollection[_DocumentType],
|
|
74
|
+
schema: Optional[Any] = None,
|
|
75
|
+
mongo_uri: Optional[Union[str, Callable[[], str]]] = None,
|
|
76
|
+
approx_document_size_bytes: int = 500,
|
|
77
|
+
) -> None: ...
|
|
78
|
+
def __getattr__(self, name: str) -> XLR8Collection[_DocumentType]: ...
|
|
79
|
+
def __getitem__(self, name: str) -> XLR8Collection[_DocumentType]: ...
|
|
80
|
+
def __eq__(self, other: Any) -> bool: ...
|
|
81
|
+
def __ne__(self, other: Any) -> bool: ...
|
|
82
|
+
def __hash__(self) -> int: ...
|
|
83
|
+
def __bool__(self) -> NoReturn: ...
|
|
84
|
+
@property
|
|
85
|
+
def full_name(self) -> str: ...
|
|
86
|
+
@property
|
|
87
|
+
def name(self) -> str: ...
|
|
88
|
+
@property
|
|
89
|
+
def database(self) -> Database[_DocumentType]: ...
|
|
90
|
+
@overload
|
|
91
|
+
def with_options(
|
|
92
|
+
self,
|
|
93
|
+
codec_options: None = None,
|
|
94
|
+
read_preference: _ServerMode | None = ...,
|
|
95
|
+
write_concern: WriteConcern | None = ...,
|
|
96
|
+
read_concern: Any | None = ...,
|
|
97
|
+
) -> XLR8Collection[_DocumentType]: ...
|
|
98
|
+
@overload
|
|
99
|
+
def with_options(
|
|
100
|
+
self,
|
|
101
|
+
codec_options: bson.CodecOptions[_DocumentTypeArg],
|
|
102
|
+
read_preference: _ServerMode | None = ...,
|
|
103
|
+
write_concern: WriteConcern | None = ...,
|
|
104
|
+
read_concern: Any | None = ...,
|
|
105
|
+
) -> XLR8Collection[_DocumentTypeArg]: ...
|
|
106
|
+
def __next__(self) -> NoReturn: ...
|
|
107
|
+
def __call__(self, *args: Any, **kwargs: Any) -> NoReturn: ...
|
|
108
|
+
def watch(
|
|
109
|
+
self,
|
|
110
|
+
pipeline: _Pipeline | None = None,
|
|
111
|
+
full_document: str | None = None,
|
|
112
|
+
resume_after: Mapping[str, Any] | None = None,
|
|
113
|
+
max_await_time_ms: int | None = None,
|
|
114
|
+
batch_size: int | None = None,
|
|
115
|
+
collation: _CollationIn | None = None,
|
|
116
|
+
start_at_operation_time: Timestamp | None = None,
|
|
117
|
+
session: ClientSession | None = None,
|
|
118
|
+
start_after: Mapping[str, Any] | None = None,
|
|
119
|
+
comment: Any | None = None,
|
|
120
|
+
full_document_before_change: str | None = None,
|
|
121
|
+
show_expanded_events: bool | None = None,
|
|
122
|
+
) -> CollectionChangeStream[_DocumentType]: ...
|
|
123
|
+
def bulk_write(
|
|
124
|
+
self,
|
|
125
|
+
requests: Sequence[_WriteOp[_DocumentType]],
|
|
126
|
+
ordered: bool = True,
|
|
127
|
+
bypass_document_validation: bool | None = None,
|
|
128
|
+
session: ClientSession | None = None,
|
|
129
|
+
comment: Any | None = None,
|
|
130
|
+
let: Mapping[str, Any] | None = None,
|
|
131
|
+
) -> BulkWriteResult: ...
|
|
132
|
+
def insert_one(
|
|
133
|
+
self,
|
|
134
|
+
document: _DocumentType | RawBSONDocument,
|
|
135
|
+
bypass_document_validation: bool | None = None,
|
|
136
|
+
session: ClientSession | None = None,
|
|
137
|
+
comment: Any | None = None,
|
|
138
|
+
) -> InsertOneResult: ...
|
|
139
|
+
def insert_many(
|
|
140
|
+
self,
|
|
141
|
+
documents: Iterable[_DocumentType | RawBSONDocument],
|
|
142
|
+
ordered: bool = True,
|
|
143
|
+
bypass_document_validation: bool | None = None,
|
|
144
|
+
session: ClientSession | None = None,
|
|
145
|
+
comment: Any | None = None,
|
|
146
|
+
) -> InsertManyResult: ...
|
|
147
|
+
def replace_one(
|
|
148
|
+
self,
|
|
149
|
+
filter: Mapping[str, Any],
|
|
150
|
+
replacement: Mapping[str, Any],
|
|
151
|
+
upsert: bool = False,
|
|
152
|
+
bypass_document_validation: bool | None = None,
|
|
153
|
+
collation: _CollationIn | None = None,
|
|
154
|
+
hint: _IndexKeyHint | None = None,
|
|
155
|
+
session: ClientSession | None = None,
|
|
156
|
+
let: Mapping[str, Any] | None = None,
|
|
157
|
+
sort: Mapping[str, Any] | None = None,
|
|
158
|
+
comment: Any | None = None,
|
|
159
|
+
) -> UpdateResult: ...
|
|
160
|
+
def update_one(
|
|
161
|
+
self,
|
|
162
|
+
filter: Mapping[str, Any],
|
|
163
|
+
update: Mapping[str, Any] | _Pipeline,
|
|
164
|
+
upsert: bool = False,
|
|
165
|
+
bypass_document_validation: bool | None = None,
|
|
166
|
+
collation: _CollationIn | None = None,
|
|
167
|
+
array_filters: Sequence[Mapping[str, Any]] | None = None,
|
|
168
|
+
hint: _IndexKeyHint | None = None,
|
|
169
|
+
session: ClientSession | None = None,
|
|
170
|
+
let: Mapping[str, Any] | None = None,
|
|
171
|
+
sort: Mapping[str, Any] | None = None,
|
|
172
|
+
comment: Any | None = None,
|
|
173
|
+
) -> UpdateResult: ...
|
|
174
|
+
def update_many(
|
|
175
|
+
self,
|
|
176
|
+
filter: Mapping[str, Any],
|
|
177
|
+
update: Mapping[str, Any] | _Pipeline,
|
|
178
|
+
upsert: bool = False,
|
|
179
|
+
array_filters: Sequence[Mapping[str, Any]] | None = None,
|
|
180
|
+
bypass_document_validation: bool | None = None,
|
|
181
|
+
collation: _CollationIn | None = None,
|
|
182
|
+
hint: _IndexKeyHint | None = None,
|
|
183
|
+
session: ClientSession | None = None,
|
|
184
|
+
let: Mapping[str, Any] | None = None,
|
|
185
|
+
comment: Any | None = None,
|
|
186
|
+
) -> UpdateResult: ...
|
|
187
|
+
def drop(
|
|
188
|
+
self,
|
|
189
|
+
session: ClientSession | None = None,
|
|
190
|
+
comment: Any | None = None,
|
|
191
|
+
encrypted_fields: Mapping[str, Any] | None = None,
|
|
192
|
+
) -> None: ...
|
|
193
|
+
def delete_one(
|
|
194
|
+
self,
|
|
195
|
+
filter: Mapping[str, Any],
|
|
196
|
+
collation: _CollationIn | None = None,
|
|
197
|
+
hint: _IndexKeyHint | None = None,
|
|
198
|
+
session: ClientSession | None = None,
|
|
199
|
+
let: Mapping[str, Any] | None = None,
|
|
200
|
+
comment: Any | None = None,
|
|
201
|
+
) -> DeleteResult: ...
|
|
202
|
+
def delete_many(
|
|
203
|
+
self,
|
|
204
|
+
filter: Mapping[str, Any],
|
|
205
|
+
collation: _CollationIn | None = None,
|
|
206
|
+
hint: _IndexKeyHint | None = None,
|
|
207
|
+
session: ClientSession | None = None,
|
|
208
|
+
let: Mapping[str, Any] | None = None,
|
|
209
|
+
comment: Any | None = None,
|
|
210
|
+
) -> DeleteResult: ...
|
|
211
|
+
def find_one(
|
|
212
|
+
self, filter: Any | None = None, *args: Any, **kwargs: Any
|
|
213
|
+
) -> _DocumentType | None: ...
|
|
214
|
+
def find(
|
|
215
|
+
self,
|
|
216
|
+
filter: Optional[Dict[str, Any]] = None,
|
|
217
|
+
projection: Optional[Dict[str, Any]] = None,
|
|
218
|
+
skip: int = 0,
|
|
219
|
+
limit: int = 0,
|
|
220
|
+
sort: Optional[List[Tuple[str, int]]] = None,
|
|
221
|
+
batch_size: int = 1000,
|
|
222
|
+
# PyMongo compatibility parameters (passed through to cursor)
|
|
223
|
+
no_cursor_timeout: bool = False,
|
|
224
|
+
cursor_type: int = ...,
|
|
225
|
+
allow_partial_results: bool = False,
|
|
226
|
+
oplog_replay: bool = False,
|
|
227
|
+
collation: Optional[Dict[str, Any]] = None,
|
|
228
|
+
hint: Optional[Any] = None,
|
|
229
|
+
max_scan: Optional[int] = None,
|
|
230
|
+
max_time_ms: Optional[int] = None,
|
|
231
|
+
max: Optional[List[Tuple[str, Any]]] = None,
|
|
232
|
+
min: Optional[List[Tuple[str, Any]]] = None,
|
|
233
|
+
return_key: Optional[bool] = None,
|
|
234
|
+
show_record_id: Optional[bool] = None,
|
|
235
|
+
snapshot: Optional[bool] = None,
|
|
236
|
+
comment: Optional[Any] = None,
|
|
237
|
+
session: Optional[Any] = None,
|
|
238
|
+
allow_disk_use: Optional[bool] = None,
|
|
239
|
+
let: Optional[Dict[str, Any]] = None,
|
|
240
|
+
**kwargs: Any,
|
|
241
|
+
) -> XLR8Cursor[_DocumentType]: ...
|
|
242
|
+
def find_raw_batches(
|
|
243
|
+
self, *args: Any, **kwargs: Any
|
|
244
|
+
) -> RawBatchCursor[_DocumentType]: ...
|
|
245
|
+
def estimated_document_count(
|
|
246
|
+
self, comment: Any | None = None, **kwargs: Any
|
|
247
|
+
) -> int: ...
|
|
248
|
+
def count_documents(
|
|
249
|
+
self,
|
|
250
|
+
filter: Mapping[str, Any],
|
|
251
|
+
session: ClientSession | None = None,
|
|
252
|
+
comment: Any | None = None,
|
|
253
|
+
**kwargs: Any,
|
|
254
|
+
) -> int: ...
|
|
255
|
+
def create_indexes(
|
|
256
|
+
self,
|
|
257
|
+
indexes: Sequence[IndexModel],
|
|
258
|
+
session: ClientSession | None = None,
|
|
259
|
+
comment: Any | None = None,
|
|
260
|
+
**kwargs: Any,
|
|
261
|
+
) -> list[str]: ...
|
|
262
|
+
def create_index(
|
|
263
|
+
self,
|
|
264
|
+
keys: _IndexKeyHint,
|
|
265
|
+
session: ClientSession | None = None,
|
|
266
|
+
comment: Any | None = None,
|
|
267
|
+
**kwargs: Any,
|
|
268
|
+
) -> str: ...
|
|
269
|
+
def drop_indexes(
|
|
270
|
+
self,
|
|
271
|
+
session: ClientSession | None = None,
|
|
272
|
+
comment: Any | None = None,
|
|
273
|
+
**kwargs: Any,
|
|
274
|
+
) -> None: ...
|
|
275
|
+
def drop_index(
|
|
276
|
+
self,
|
|
277
|
+
index_or_name: _IndexKeyHint,
|
|
278
|
+
session: ClientSession | None = None,
|
|
279
|
+
comment: Any | None = None,
|
|
280
|
+
**kwargs: Any,
|
|
281
|
+
) -> None: ...
|
|
282
|
+
def list_indexes(
|
|
283
|
+
self, session: ClientSession | None = None, comment: Any | None = None
|
|
284
|
+
) -> CommandCursor[MutableMapping[str, Any]]: ...
|
|
285
|
+
def index_information(
|
|
286
|
+
self, session: ClientSession | None = None, comment: Any | None = None
|
|
287
|
+
) -> MutableMapping[str, Any]: ...
|
|
288
|
+
def list_search_indexes(
|
|
289
|
+
self,
|
|
290
|
+
name: str | None = None,
|
|
291
|
+
session: ClientSession | None = None,
|
|
292
|
+
comment: Any | None = None,
|
|
293
|
+
**kwargs: Any,
|
|
294
|
+
) -> CommandCursor[Mapping[str, Any]]: ...
|
|
295
|
+
def create_search_index(
|
|
296
|
+
self,
|
|
297
|
+
model: Mapping[str, Any] | SearchIndexModel,
|
|
298
|
+
session: ClientSession | None = None,
|
|
299
|
+
comment: Any = None,
|
|
300
|
+
**kwargs: Any,
|
|
301
|
+
) -> str: ...
|
|
302
|
+
def create_search_indexes(
|
|
303
|
+
self,
|
|
304
|
+
models: list[SearchIndexModel],
|
|
305
|
+
session: ClientSession | None = None,
|
|
306
|
+
comment: Any | None = None,
|
|
307
|
+
**kwargs: Any,
|
|
308
|
+
) -> list[str]: ...
|
|
309
|
+
def drop_search_index(
|
|
310
|
+
self,
|
|
311
|
+
name: str,
|
|
312
|
+
session: ClientSession | None = None,
|
|
313
|
+
comment: Any | None = None,
|
|
314
|
+
**kwargs: Any,
|
|
315
|
+
) -> None: ...
|
|
316
|
+
def update_search_index(
|
|
317
|
+
self,
|
|
318
|
+
name: str,
|
|
319
|
+
definition: Mapping[str, Any],
|
|
320
|
+
session: ClientSession | None = None,
|
|
321
|
+
comment: Any | None = None,
|
|
322
|
+
**kwargs: Any,
|
|
323
|
+
) -> None: ...
|
|
324
|
+
def options(
|
|
325
|
+
self, session: ClientSession | None = None, comment: Any | None = None
|
|
326
|
+
) -> MutableMapping[str, Any]: ...
|
|
327
|
+
def aggregate(
|
|
328
|
+
self,
|
|
329
|
+
pipeline: _Pipeline,
|
|
330
|
+
session: ClientSession | None = None,
|
|
331
|
+
let: Mapping[str, Any] | None = None,
|
|
332
|
+
comment: Any | None = None,
|
|
333
|
+
**kwargs: Any,
|
|
334
|
+
) -> CommandCursor[_DocumentType]: ...
|
|
335
|
+
def aggregate_raw_batches(
|
|
336
|
+
self,
|
|
337
|
+
pipeline: _Pipeline,
|
|
338
|
+
session: ClientSession | None = None,
|
|
339
|
+
comment: Any | None = None,
|
|
340
|
+
**kwargs: Any,
|
|
341
|
+
) -> RawBatchCommandCursor[_DocumentType]: ...
|
|
342
|
+
def rename(
|
|
343
|
+
self,
|
|
344
|
+
new_name: str,
|
|
345
|
+
session: ClientSession | None = None,
|
|
346
|
+
comment: Any | None = None,
|
|
347
|
+
**kwargs: Any,
|
|
348
|
+
) -> MutableMapping[str, Any]: ...
|
|
349
|
+
def distinct(
|
|
350
|
+
self,
|
|
351
|
+
key: str,
|
|
352
|
+
filter: Mapping[str, Any] | None = None,
|
|
353
|
+
session: ClientSession | None = None,
|
|
354
|
+
comment: Any | None = None,
|
|
355
|
+
hint: _IndexKeyHint | None = None,
|
|
356
|
+
**kwargs: Any,
|
|
357
|
+
) -> list[Any]: ...
|
|
358
|
+
def find_one_and_delete(
|
|
359
|
+
self,
|
|
360
|
+
filter: Mapping[str, Any],
|
|
361
|
+
projection: Mapping[str, Any] | Iterable[str] | None = None,
|
|
362
|
+
sort: _IndexList | None = None,
|
|
363
|
+
hint: _IndexKeyHint | None = None,
|
|
364
|
+
session: ClientSession | None = None,
|
|
365
|
+
let: Mapping[str, Any] | None = None,
|
|
366
|
+
comment: Any | None = None,
|
|
367
|
+
**kwargs: Any,
|
|
368
|
+
) -> _DocumentType | None: ...
|
|
369
|
+
def find_one_and_replace(
|
|
370
|
+
self,
|
|
371
|
+
filter: Mapping[str, Any],
|
|
372
|
+
replacement: Mapping[str, Any],
|
|
373
|
+
projection: Mapping[str, Any] | Iterable[str] | None = None,
|
|
374
|
+
sort: _IndexList | None = None,
|
|
375
|
+
upsert: bool = False,
|
|
376
|
+
return_document: bool = ...,
|
|
377
|
+
hint: _IndexKeyHint | None = None,
|
|
378
|
+
session: ClientSession | None = None,
|
|
379
|
+
let: Mapping[str, Any] | None = None,
|
|
380
|
+
comment: Any | None = None,
|
|
381
|
+
**kwargs: Any,
|
|
382
|
+
) -> _DocumentType | None: ...
|
|
383
|
+
def find_one_and_update(
|
|
384
|
+
self,
|
|
385
|
+
filter: Mapping[str, Any],
|
|
386
|
+
update: Mapping[str, Any] | _Pipeline,
|
|
387
|
+
projection: Mapping[str, Any] | Iterable[str] | None = None,
|
|
388
|
+
sort: _IndexList | None = None,
|
|
389
|
+
upsert: bool = False,
|
|
390
|
+
return_document: bool = ...,
|
|
391
|
+
array_filters: Sequence[Mapping[str, Any]] | None = None,
|
|
392
|
+
hint: _IndexKeyHint | None = None,
|
|
393
|
+
session: ClientSession | None = None,
|
|
394
|
+
let: Mapping[str, Any] | None = None,
|
|
395
|
+
comment: Any | None = None,
|
|
396
|
+
**kwargs: Any,
|
|
397
|
+
) -> _DocumentType | None: ...
|
|
398
|
+
|
|
399
|
+
# XLR8-specific methods
|
|
400
|
+
def raw_collection(self) -> PyMongoCollection[_DocumentType]: ...
|
|
401
|
+
def set_schema(self, schema: Any) -> None: ...
|
|
402
|
+
def get_schema(self) -> Optional[Any]: ...
|
|
403
|
+
def clear_cache(self) -> None: ...
|
|
404
|
+
|
|
405
|
+
# XLR8 properties
|
|
406
|
+
@property
|
|
407
|
+
def schema(self) -> Optional[Any]: ...
|
|
408
|
+
@property
|
|
409
|
+
def pymongo_collection(self) -> PyMongoCollection[_DocumentType]: ...
|
|
410
|
+
@property
|
|
411
|
+
def mongo_uri(self) -> Optional[Union[str, Callable[[], str]]]: ...
|
|
412
|
+
@property
|
|
413
|
+
def approx_document_size_bytes(self) -> int: ...
|
|
414
|
+
|
|
415
|
+
def accelerate(
|
|
416
|
+
pymongo_collection: PyMongoCollection[_DocumentType],
|
|
417
|
+
schema: Any,
|
|
418
|
+
mongo_uri: Union[str, Callable[[], str]],
|
|
419
|
+
approx_document_size_bytes: int = 500,
|
|
420
|
+
) -> XLR8Collection[_DocumentType]: ...
|
xlr8/constants.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
XLR8 constants and configuration values.
|
|
3
|
+
|
|
4
|
+
Centralized constants to avoid magic numbers scattered throughout codebase.
|
|
5
|
+
All tuneable performance parameters should be defined here.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# =============================================================================
|
|
9
|
+
# PARQUET FILE SETTINGS
|
|
10
|
+
# =============================================================================
|
|
11
|
+
|
|
12
|
+
# Default row group size for compression can be altered via argument passed
|
|
13
|
+
# to the special cursor methods e.g to_dataframe
|
|
14
|
+
PARQUET_ROW_GROUP_SIZE = 100_000
|
|
15
|
+
|
|
16
|
+
# Default compression codec for Parquet files
|
|
17
|
+
DEFAULT_COMPRESSION = "zstd"
|
|
18
|
+
|
|
19
|
+
# =============================================================================
|
|
20
|
+
# BATCH PROCESSING
|
|
21
|
+
# =============================================================================
|
|
22
|
+
|
|
23
|
+
# Default batch size for DataFrame operations
|
|
24
|
+
DEFAULT_BATCH_SIZE = 10_000
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Execution engine for parallel query execution via Rust backend.
|
|
3
|
+
|
|
4
|
+
All parallel execution now goes through the Rust backend for GIL-free performance.
|
|
5
|
+
|
|
6
|
+
Components:
|
|
7
|
+
- executor: High-level parallel execution (execute_parallel_stream_to_cache)
|
|
8
|
+
- callback: Partitioned streaming for data lake population
|
|
9
|
+
- planner: Memory-aware execution planning and worker configuration
|
|
10
|
+
|
|
11
|
+
Python handles:
|
|
12
|
+
- Query planning and bracketing
|
|
13
|
+
- Memory budget calculations
|
|
14
|
+
- Result reading and DataFrame construction
|
|
15
|
+
|
|
16
|
+
Rust backend handles:
|
|
17
|
+
- Parallel MongoDB fetches (GIL-free)
|
|
18
|
+
- BSON decoding and Arrow encoding
|
|
19
|
+
- Memory-aware buffering
|
|
20
|
+
- Parquet writing
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from .callback import PartitionWorkItem, execute_partitioned_callback
|
|
24
|
+
from .executor import execute_parallel_stream_to_cache
|
|
25
|
+
from .planner import (
|
|
26
|
+
Backend,
|
|
27
|
+
BackendConfig,
|
|
28
|
+
ExecutionPlan,
|
|
29
|
+
build_execution_plan,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
# Executor
|
|
34
|
+
"execute_parallel_stream_to_cache",
|
|
35
|
+
# Callback
|
|
36
|
+
"PartitionWorkItem",
|
|
37
|
+
"execute_partitioned_callback",
|
|
38
|
+
# Planner
|
|
39
|
+
"Backend",
|
|
40
|
+
"BackendConfig",
|
|
41
|
+
"ExecutionPlan",
|
|
42
|
+
"build_execution_plan",
|
|
43
|
+
]
|