xlr8 0.1.7b3__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,420 @@
1
+ """Type stubs for XLR8 Collection.
2
+
3
+ Provides IDE autocomplete for all PyMongo methods plus XLR8 extensions.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import (
9
+ Any,
10
+ Callable,
11
+ Dict,
12
+ Generic,
13
+ Iterable,
14
+ List,
15
+ Mapping,
16
+ MutableMapping,
17
+ NoReturn,
18
+ Optional,
19
+ Sequence,
20
+ Tuple,
21
+ TypeVar,
22
+ Union,
23
+ overload,
24
+ )
25
+
26
+ import bson
27
+ from bson.raw_bson import RawBSONDocument
28
+ from bson.timestamp import Timestamp
29
+ from pymongo.operations import (
30
+ DeleteMany,
31
+ DeleteOne,
32
+ IndexModel,
33
+ InsertOne,
34
+ ReplaceOne,
35
+ SearchIndexModel,
36
+ UpdateMany,
37
+ UpdateOne,
38
+ _IndexKeyHint,
39
+ _IndexList,
40
+ )
41
+ from pymongo.read_preferences import _ServerMode
42
+ from pymongo.results import (
43
+ BulkWriteResult,
44
+ DeleteResult,
45
+ InsertManyResult,
46
+ InsertOneResult,
47
+ UpdateResult,
48
+ )
49
+ from pymongo.synchronous.change_stream import CollectionChangeStream
50
+ from pymongo.synchronous.client_session import ClientSession
51
+ from pymongo.synchronous.collection import Collection as PyMongoCollection
52
+ from pymongo.synchronous.command_cursor import CommandCursor, RawBatchCommandCursor
53
+ from pymongo.synchronous.cursor import RawBatchCursor
54
+ from pymongo.synchronous.database import Database
55
+ from pymongo.typings import _CollationIn, _DocumentType, _DocumentTypeArg, _Pipeline
56
+ from pymongo.write_concern import WriteConcern
57
+
58
+ from .cursor import XLR8Cursor
59
+
60
+ _T = TypeVar("_T", bound=Mapping[str, Any])
61
+ _WriteOp = Union[
62
+ InsertOne[_T], DeleteOne, DeleteMany, ReplaceOne[_T], UpdateOne, UpdateMany
63
+ ]
64
+
65
+ class XLR8Collection(Generic[_DocumentType]):
66
+ """PyMongo-compatible collection with optional acceleration.
67
+
68
+ All PyMongo methods work via delegation. find() returns XLR8Cursor.
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ pymongo_collection: PyMongoCollection[_DocumentType],
74
+ schema: Optional[Any] = None,
75
+ mongo_uri: Optional[Union[str, Callable[[], str]]] = None,
76
+ approx_document_size_bytes: int = 500,
77
+ ) -> None: ...
78
+ def __getattr__(self, name: str) -> XLR8Collection[_DocumentType]: ...
79
+ def __getitem__(self, name: str) -> XLR8Collection[_DocumentType]: ...
80
+ def __eq__(self, other: Any) -> bool: ...
81
+ def __ne__(self, other: Any) -> bool: ...
82
+ def __hash__(self) -> int: ...
83
+ def __bool__(self) -> NoReturn: ...
84
+ @property
85
+ def full_name(self) -> str: ...
86
+ @property
87
+ def name(self) -> str: ...
88
+ @property
89
+ def database(self) -> Database[_DocumentType]: ...
90
+ @overload
91
+ def with_options(
92
+ self,
93
+ codec_options: None = None,
94
+ read_preference: _ServerMode | None = ...,
95
+ write_concern: WriteConcern | None = ...,
96
+ read_concern: Any | None = ...,
97
+ ) -> XLR8Collection[_DocumentType]: ...
98
+ @overload
99
+ def with_options(
100
+ self,
101
+ codec_options: bson.CodecOptions[_DocumentTypeArg],
102
+ read_preference: _ServerMode | None = ...,
103
+ write_concern: WriteConcern | None = ...,
104
+ read_concern: Any | None = ...,
105
+ ) -> XLR8Collection[_DocumentTypeArg]: ...
106
+ def __next__(self) -> NoReturn: ...
107
+ def __call__(self, *args: Any, **kwargs: Any) -> NoReturn: ...
108
+ def watch(
109
+ self,
110
+ pipeline: _Pipeline | None = None,
111
+ full_document: str | None = None,
112
+ resume_after: Mapping[str, Any] | None = None,
113
+ max_await_time_ms: int | None = None,
114
+ batch_size: int | None = None,
115
+ collation: _CollationIn | None = None,
116
+ start_at_operation_time: Timestamp | None = None,
117
+ session: ClientSession | None = None,
118
+ start_after: Mapping[str, Any] | None = None,
119
+ comment: Any | None = None,
120
+ full_document_before_change: str | None = None,
121
+ show_expanded_events: bool | None = None,
122
+ ) -> CollectionChangeStream[_DocumentType]: ...
123
+ def bulk_write(
124
+ self,
125
+ requests: Sequence[_WriteOp[_DocumentType]],
126
+ ordered: bool = True,
127
+ bypass_document_validation: bool | None = None,
128
+ session: ClientSession | None = None,
129
+ comment: Any | None = None,
130
+ let: Mapping[str, Any] | None = None,
131
+ ) -> BulkWriteResult: ...
132
+ def insert_one(
133
+ self,
134
+ document: _DocumentType | RawBSONDocument,
135
+ bypass_document_validation: bool | None = None,
136
+ session: ClientSession | None = None,
137
+ comment: Any | None = None,
138
+ ) -> InsertOneResult: ...
139
+ def insert_many(
140
+ self,
141
+ documents: Iterable[_DocumentType | RawBSONDocument],
142
+ ordered: bool = True,
143
+ bypass_document_validation: bool | None = None,
144
+ session: ClientSession | None = None,
145
+ comment: Any | None = None,
146
+ ) -> InsertManyResult: ...
147
+ def replace_one(
148
+ self,
149
+ filter: Mapping[str, Any],
150
+ replacement: Mapping[str, Any],
151
+ upsert: bool = False,
152
+ bypass_document_validation: bool | None = None,
153
+ collation: _CollationIn | None = None,
154
+ hint: _IndexKeyHint | None = None,
155
+ session: ClientSession | None = None,
156
+ let: Mapping[str, Any] | None = None,
157
+ sort: Mapping[str, Any] | None = None,
158
+ comment: Any | None = None,
159
+ ) -> UpdateResult: ...
160
+ def update_one(
161
+ self,
162
+ filter: Mapping[str, Any],
163
+ update: Mapping[str, Any] | _Pipeline,
164
+ upsert: bool = False,
165
+ bypass_document_validation: bool | None = None,
166
+ collation: _CollationIn | None = None,
167
+ array_filters: Sequence[Mapping[str, Any]] | None = None,
168
+ hint: _IndexKeyHint | None = None,
169
+ session: ClientSession | None = None,
170
+ let: Mapping[str, Any] | None = None,
171
+ sort: Mapping[str, Any] | None = None,
172
+ comment: Any | None = None,
173
+ ) -> UpdateResult: ...
174
+ def update_many(
175
+ self,
176
+ filter: Mapping[str, Any],
177
+ update: Mapping[str, Any] | _Pipeline,
178
+ upsert: bool = False,
179
+ array_filters: Sequence[Mapping[str, Any]] | None = None,
180
+ bypass_document_validation: bool | None = None,
181
+ collation: _CollationIn | None = None,
182
+ hint: _IndexKeyHint | None = None,
183
+ session: ClientSession | None = None,
184
+ let: Mapping[str, Any] | None = None,
185
+ comment: Any | None = None,
186
+ ) -> UpdateResult: ...
187
+ def drop(
188
+ self,
189
+ session: ClientSession | None = None,
190
+ comment: Any | None = None,
191
+ encrypted_fields: Mapping[str, Any] | None = None,
192
+ ) -> None: ...
193
+ def delete_one(
194
+ self,
195
+ filter: Mapping[str, Any],
196
+ collation: _CollationIn | None = None,
197
+ hint: _IndexKeyHint | None = None,
198
+ session: ClientSession | None = None,
199
+ let: Mapping[str, Any] | None = None,
200
+ comment: Any | None = None,
201
+ ) -> DeleteResult: ...
202
+ def delete_many(
203
+ self,
204
+ filter: Mapping[str, Any],
205
+ collation: _CollationIn | None = None,
206
+ hint: _IndexKeyHint | None = None,
207
+ session: ClientSession | None = None,
208
+ let: Mapping[str, Any] | None = None,
209
+ comment: Any | None = None,
210
+ ) -> DeleteResult: ...
211
+ def find_one(
212
+ self, filter: Any | None = None, *args: Any, **kwargs: Any
213
+ ) -> _DocumentType | None: ...
214
+ def find(
215
+ self,
216
+ filter: Optional[Dict[str, Any]] = None,
217
+ projection: Optional[Dict[str, Any]] = None,
218
+ skip: int = 0,
219
+ limit: int = 0,
220
+ sort: Optional[List[Tuple[str, int]]] = None,
221
+ batch_size: int = 1000,
222
+ # PyMongo compatibility parameters (passed through to cursor)
223
+ no_cursor_timeout: bool = False,
224
+ cursor_type: int = ...,
225
+ allow_partial_results: bool = False,
226
+ oplog_replay: bool = False,
227
+ collation: Optional[Dict[str, Any]] = None,
228
+ hint: Optional[Any] = None,
229
+ max_scan: Optional[int] = None,
230
+ max_time_ms: Optional[int] = None,
231
+ max: Optional[List[Tuple[str, Any]]] = None,
232
+ min: Optional[List[Tuple[str, Any]]] = None,
233
+ return_key: Optional[bool] = None,
234
+ show_record_id: Optional[bool] = None,
235
+ snapshot: Optional[bool] = None,
236
+ comment: Optional[Any] = None,
237
+ session: Optional[Any] = None,
238
+ allow_disk_use: Optional[bool] = None,
239
+ let: Optional[Dict[str, Any]] = None,
240
+ **kwargs: Any,
241
+ ) -> XLR8Cursor[_DocumentType]: ...
242
+ def find_raw_batches(
243
+ self, *args: Any, **kwargs: Any
244
+ ) -> RawBatchCursor[_DocumentType]: ...
245
+ def estimated_document_count(
246
+ self, comment: Any | None = None, **kwargs: Any
247
+ ) -> int: ...
248
+ def count_documents(
249
+ self,
250
+ filter: Mapping[str, Any],
251
+ session: ClientSession | None = None,
252
+ comment: Any | None = None,
253
+ **kwargs: Any,
254
+ ) -> int: ...
255
+ def create_indexes(
256
+ self,
257
+ indexes: Sequence[IndexModel],
258
+ session: ClientSession | None = None,
259
+ comment: Any | None = None,
260
+ **kwargs: Any,
261
+ ) -> list[str]: ...
262
+ def create_index(
263
+ self,
264
+ keys: _IndexKeyHint,
265
+ session: ClientSession | None = None,
266
+ comment: Any | None = None,
267
+ **kwargs: Any,
268
+ ) -> str: ...
269
+ def drop_indexes(
270
+ self,
271
+ session: ClientSession | None = None,
272
+ comment: Any | None = None,
273
+ **kwargs: Any,
274
+ ) -> None: ...
275
+ def drop_index(
276
+ self,
277
+ index_or_name: _IndexKeyHint,
278
+ session: ClientSession | None = None,
279
+ comment: Any | None = None,
280
+ **kwargs: Any,
281
+ ) -> None: ...
282
+ def list_indexes(
283
+ self, session: ClientSession | None = None, comment: Any | None = None
284
+ ) -> CommandCursor[MutableMapping[str, Any]]: ...
285
+ def index_information(
286
+ self, session: ClientSession | None = None, comment: Any | None = None
287
+ ) -> MutableMapping[str, Any]: ...
288
+ def list_search_indexes(
289
+ self,
290
+ name: str | None = None,
291
+ session: ClientSession | None = None,
292
+ comment: Any | None = None,
293
+ **kwargs: Any,
294
+ ) -> CommandCursor[Mapping[str, Any]]: ...
295
+ def create_search_index(
296
+ self,
297
+ model: Mapping[str, Any] | SearchIndexModel,
298
+ session: ClientSession | None = None,
299
+ comment: Any = None,
300
+ **kwargs: Any,
301
+ ) -> str: ...
302
+ def create_search_indexes(
303
+ self,
304
+ models: list[SearchIndexModel],
305
+ session: ClientSession | None = None,
306
+ comment: Any | None = None,
307
+ **kwargs: Any,
308
+ ) -> list[str]: ...
309
+ def drop_search_index(
310
+ self,
311
+ name: str,
312
+ session: ClientSession | None = None,
313
+ comment: Any | None = None,
314
+ **kwargs: Any,
315
+ ) -> None: ...
316
+ def update_search_index(
317
+ self,
318
+ name: str,
319
+ definition: Mapping[str, Any],
320
+ session: ClientSession | None = None,
321
+ comment: Any | None = None,
322
+ **kwargs: Any,
323
+ ) -> None: ...
324
+ def options(
325
+ self, session: ClientSession | None = None, comment: Any | None = None
326
+ ) -> MutableMapping[str, Any]: ...
327
+ def aggregate(
328
+ self,
329
+ pipeline: _Pipeline,
330
+ session: ClientSession | None = None,
331
+ let: Mapping[str, Any] | None = None,
332
+ comment: Any | None = None,
333
+ **kwargs: Any,
334
+ ) -> CommandCursor[_DocumentType]: ...
335
+ def aggregate_raw_batches(
336
+ self,
337
+ pipeline: _Pipeline,
338
+ session: ClientSession | None = None,
339
+ comment: Any | None = None,
340
+ **kwargs: Any,
341
+ ) -> RawBatchCommandCursor[_DocumentType]: ...
342
+ def rename(
343
+ self,
344
+ new_name: str,
345
+ session: ClientSession | None = None,
346
+ comment: Any | None = None,
347
+ **kwargs: Any,
348
+ ) -> MutableMapping[str, Any]: ...
349
+ def distinct(
350
+ self,
351
+ key: str,
352
+ filter: Mapping[str, Any] | None = None,
353
+ session: ClientSession | None = None,
354
+ comment: Any | None = None,
355
+ hint: _IndexKeyHint | None = None,
356
+ **kwargs: Any,
357
+ ) -> list[Any]: ...
358
+ def find_one_and_delete(
359
+ self,
360
+ filter: Mapping[str, Any],
361
+ projection: Mapping[str, Any] | Iterable[str] | None = None,
362
+ sort: _IndexList | None = None,
363
+ hint: _IndexKeyHint | None = None,
364
+ session: ClientSession | None = None,
365
+ let: Mapping[str, Any] | None = None,
366
+ comment: Any | None = None,
367
+ **kwargs: Any,
368
+ ) -> _DocumentType | None: ...
369
+ def find_one_and_replace(
370
+ self,
371
+ filter: Mapping[str, Any],
372
+ replacement: Mapping[str, Any],
373
+ projection: Mapping[str, Any] | Iterable[str] | None = None,
374
+ sort: _IndexList | None = None,
375
+ upsert: bool = False,
376
+ return_document: bool = ...,
377
+ hint: _IndexKeyHint | None = None,
378
+ session: ClientSession | None = None,
379
+ let: Mapping[str, Any] | None = None,
380
+ comment: Any | None = None,
381
+ **kwargs: Any,
382
+ ) -> _DocumentType | None: ...
383
+ def find_one_and_update(
384
+ self,
385
+ filter: Mapping[str, Any],
386
+ update: Mapping[str, Any] | _Pipeline,
387
+ projection: Mapping[str, Any] | Iterable[str] | None = None,
388
+ sort: _IndexList | None = None,
389
+ upsert: bool = False,
390
+ return_document: bool = ...,
391
+ array_filters: Sequence[Mapping[str, Any]] | None = None,
392
+ hint: _IndexKeyHint | None = None,
393
+ session: ClientSession | None = None,
394
+ let: Mapping[str, Any] | None = None,
395
+ comment: Any | None = None,
396
+ **kwargs: Any,
397
+ ) -> _DocumentType | None: ...
398
+
399
+ # XLR8-specific methods
400
+ def raw_collection(self) -> PyMongoCollection[_DocumentType]: ...
401
+ def set_schema(self, schema: Any) -> None: ...
402
+ def get_schema(self) -> Optional[Any]: ...
403
+ def clear_cache(self) -> None: ...
404
+
405
+ # XLR8 properties
406
+ @property
407
+ def schema(self) -> Optional[Any]: ...
408
+ @property
409
+ def pymongo_collection(self) -> PyMongoCollection[_DocumentType]: ...
410
+ @property
411
+ def mongo_uri(self) -> Optional[Union[str, Callable[[], str]]]: ...
412
+ @property
413
+ def approx_document_size_bytes(self) -> int: ...
414
+
415
+ def accelerate(
416
+ pymongo_collection: PyMongoCollection[_DocumentType],
417
+ schema: Any,
418
+ mongo_uri: Union[str, Callable[[], str]],
419
+ approx_document_size_bytes: int = 500,
420
+ ) -> XLR8Collection[_DocumentType]: ...
xlr8/constants.py ADDED
@@ -0,0 +1,24 @@
1
+ """
2
+ XLR8 constants and configuration values.
3
+
4
+ Centralized constants to avoid magic numbers scattered throughout codebase.
5
+ All tuneable performance parameters should be defined here.
6
+ """
7
+
8
+ # =============================================================================
9
+ # PARQUET FILE SETTINGS
10
+ # =============================================================================
11
+
12
+ # Default row group size for compression can be altered via argument passed
13
+ # to the special cursor methods e.g to_dataframe
14
+ PARQUET_ROW_GROUP_SIZE = 100_000
15
+
16
+ # Default compression codec for Parquet files
17
+ DEFAULT_COMPRESSION = "zstd"
18
+
19
+ # =============================================================================
20
+ # BATCH PROCESSING
21
+ # =============================================================================
22
+
23
+ # Default batch size for DataFrame operations
24
+ DEFAULT_BATCH_SIZE = 10_000
@@ -0,0 +1,43 @@
1
+ """
2
+ Execution engine for parallel query execution via Rust backend.
3
+
4
+ All parallel execution now goes through the Rust backend for GIL-free performance.
5
+
6
+ Components:
7
+ - executor: High-level parallel execution (execute_parallel_stream_to_cache)
8
+ - callback: Partitioned streaming for data lake population
9
+ - planner: Memory-aware execution planning and worker configuration
10
+
11
+ Python handles:
12
+ - Query planning and bracketing
13
+ - Memory budget calculations
14
+ - Result reading and DataFrame construction
15
+
16
+ Rust backend handles:
17
+ - Parallel MongoDB fetches (GIL-free)
18
+ - BSON decoding and Arrow encoding
19
+ - Memory-aware buffering
20
+ - Parquet writing
21
+ """
22
+
23
+ from .callback import PartitionWorkItem, execute_partitioned_callback
24
+ from .executor import execute_parallel_stream_to_cache
25
+ from .planner import (
26
+ Backend,
27
+ BackendConfig,
28
+ ExecutionPlan,
29
+ build_execution_plan,
30
+ )
31
+
32
+ __all__ = [
33
+ # Executor
34
+ "execute_parallel_stream_to_cache",
35
+ # Callback
36
+ "PartitionWorkItem",
37
+ "execute_partitioned_callback",
38
+ # Planner
39
+ "Backend",
40
+ "BackendConfig",
41
+ "ExecutionPlan",
42
+ "build_execution_plan",
43
+ ]