xlr8 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,218 @@
1
+ """Type stubs for XLR8 collection wrapper - provides IDE autocomplete."""
2
+
3
+ from typing import Any, Dict, List, Optional, Tuple, Union
4
+
5
+ from pymongo.collection import Collection as PyMongoCollection
6
+ from pymongo.results import (
7
+ BulkWriteResult,
8
+ DeleteResult,
9
+ InsertManyResult,
10
+ InsertOneResult,
11
+ UpdateResult,
12
+ )
13
+
14
+ from .cursor import XLR8Cursor
15
+
16
+ class XLR8Collection:
17
+ """
18
+ XLR8 accelerated collection - drop-in replacement for PyMongo collection.
19
+
20
+ Supports all PyMongo collection methods via delegation, with accelerated .find()
21
+ that returns XLR8Cursor for parallel query execution.
22
+
23
+ For direct access to underlying PyMongo collection, use .raw_collection().
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ pymongo_collection: PyMongoCollection,
29
+ schema: Optional[Any] = ...,
30
+ mongo_uri: Optional[str] = ...,
31
+ approx_document_size_bytes: int = ...,
32
+ ) -> None: ...
33
+ def find(
34
+ self,
35
+ filter: Optional[Dict[str, Any]] = ...,
36
+ projection: Optional[Dict[str, Any]] = ...,
37
+ skip: int = ...,
38
+ limit: int = ...,
39
+ sort: Optional[List[Tuple[str, int]]] = ...,
40
+ batch_size: int = ...,
41
+ ) -> XLR8Cursor: ...
42
+ def raw_collection(self) -> PyMongoCollection: ...
43
+ @property
44
+ def name(self) -> str: ...
45
+ @property
46
+ def full_name(self) -> str: ...
47
+ @property
48
+ def database(self) -> Any: ...
49
+ def insert_one(
50
+ self,
51
+ document: Dict[str, Any],
52
+ bypass_document_validation: bool = ...,
53
+ session: Optional[Any] = ...,
54
+ ) -> InsertOneResult: ...
55
+ def insert_many(
56
+ self,
57
+ documents: List[Dict[str, Any]],
58
+ ordered: bool = ...,
59
+ bypass_document_validation: bool = ...,
60
+ session: Optional[Any] = ...,
61
+ ) -> InsertManyResult: ...
62
+ def update_one(
63
+ self,
64
+ filter: Dict[str, Any],
65
+ update: Dict[str, Any],
66
+ upsert: bool = ...,
67
+ bypass_document_validation: bool = ...,
68
+ collation: Optional[Dict[str, Any]] = ...,
69
+ array_filters: Optional[List[Dict[str, Any]]] = ...,
70
+ hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
71
+ session: Optional[Any] = ...,
72
+ ) -> UpdateResult: ...
73
+ def update_many(
74
+ self,
75
+ filter: Dict[str, Any],
76
+ update: Dict[str, Any],
77
+ upsert: bool = ...,
78
+ array_filters: Optional[List[Dict[str, Any]]] = ...,
79
+ bypass_document_validation: bool = ...,
80
+ collation: Optional[Dict[str, Any]] = ...,
81
+ hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
82
+ session: Optional[Any] = ...,
83
+ ) -> UpdateResult: ...
84
+ def replace_one(
85
+ self,
86
+ filter: Dict[str, Any],
87
+ replacement: Dict[str, Any],
88
+ upsert: bool = ...,
89
+ bypass_document_validation: bool = ...,
90
+ collation: Optional[Dict[str, Any]] = ...,
91
+ hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
92
+ session: Optional[Any] = ...,
93
+ ) -> UpdateResult: ...
94
+ def delete_one(
95
+ self,
96
+ filter: Dict[str, Any],
97
+ collation: Optional[Dict[str, Any]] = ...,
98
+ hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
99
+ session: Optional[Any] = ...,
100
+ ) -> DeleteResult: ...
101
+ def delete_many(
102
+ self,
103
+ filter: Dict[str, Any],
104
+ collation: Optional[Dict[str, Any]] = ...,
105
+ hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
106
+ session: Optional[Any] = ...,
107
+ ) -> DeleteResult: ...
108
+ def find_one(
109
+ self,
110
+ filter: Optional[Dict[str, Any]] = ...,
111
+ *args: Any,
112
+ **kwargs: Any,
113
+ ) -> Optional[Dict[str, Any]]: ...
114
+ def find_one_and_delete(
115
+ self,
116
+ filter: Dict[str, Any],
117
+ projection: Optional[Dict[str, Any]] = ...,
118
+ sort: Optional[List[Tuple[str, int]]] = ...,
119
+ hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
120
+ session: Optional[Any] = ...,
121
+ **kwargs: Any,
122
+ ) -> Optional[Dict[str, Any]]: ...
123
+ def find_one_and_replace(
124
+ self,
125
+ filter: Dict[str, Any],
126
+ replacement: Dict[str, Any],
127
+ projection: Optional[Dict[str, Any]] = ...,
128
+ sort: Optional[List[Tuple[str, int]]] = ...,
129
+ upsert: bool = ...,
130
+ return_document: bool = ...,
131
+ hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
132
+ session: Optional[Any] = ...,
133
+ **kwargs: Any,
134
+ ) -> Optional[Dict[str, Any]]: ...
135
+ def find_one_and_update(
136
+ self,
137
+ filter: Dict[str, Any],
138
+ update: Dict[str, Any],
139
+ projection: Optional[Dict[str, Any]] = ...,
140
+ sort: Optional[List[Tuple[str, int]]] = ...,
141
+ upsert: bool = ...,
142
+ return_document: bool = ...,
143
+ array_filters: Optional[List[Dict[str, Any]]] = ...,
144
+ hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
145
+ session: Optional[Any] = ...,
146
+ **kwargs: Any,
147
+ ) -> Optional[Dict[str, Any]]: ...
148
+ def count_documents(
149
+ self,
150
+ filter: Dict[str, Any],
151
+ session: Optional[Any] = ...,
152
+ **kwargs: Any,
153
+ ) -> int: ...
154
+ def estimated_document_count(self, **kwargs: Any) -> int: ...
155
+ def distinct(
156
+ self,
157
+ key: str,
158
+ filter: Optional[Dict[str, Any]] = ...,
159
+ session: Optional[Any] = ...,
160
+ **kwargs: Any,
161
+ ) -> List[Any]: ...
162
+ def aggregate(
163
+ self,
164
+ pipeline: List[Dict[str, Any]],
165
+ session: Optional[Any] = ...,
166
+ **kwargs: Any,
167
+ ) -> Any: ...
168
+ def bulk_write(
169
+ self,
170
+ requests: List[Any],
171
+ ordered: bool = ...,
172
+ bypass_document_validation: bool = ...,
173
+ session: Optional[Any] = ...,
174
+ ) -> BulkWriteResult: ...
175
+ def create_index(
176
+ self,
177
+ keys: Union[str, List[Tuple[str, int]]],
178
+ session: Optional[Any] = ...,
179
+ **kwargs: Any,
180
+ ) -> str: ...
181
+ def create_indexes(
182
+ self,
183
+ indexes: List[Any],
184
+ session: Optional[Any] = ...,
185
+ **kwargs: Any,
186
+ ) -> List[str]: ...
187
+ def drop_index(
188
+ self,
189
+ index_or_name: Union[str, List[Tuple[str, int]]],
190
+ session: Optional[Any] = ...,
191
+ **kwargs: Any,
192
+ ) -> None: ...
193
+ def drop_indexes(self, session: Optional[Any] = ..., **kwargs: Any) -> None: ...
194
+ def list_indexes(self, session: Optional[Any] = ..., **kwargs: Any) -> Any: ...
195
+ def index_information(
196
+ self, session: Optional[Any] = ..., **kwargs: Any
197
+ ) -> Dict[str, Any]: ...
198
+ def drop(self, session: Optional[Any] = ..., **kwargs: Any) -> None: ...
199
+ def rename(
200
+ self,
201
+ new_name: str,
202
+ session: Optional[Any] = ...,
203
+ **kwargs: Any,
204
+ ) -> Dict[str, Any]: ...
205
+ def options(
206
+ self, session: Optional[Any] = ..., **kwargs: Any
207
+ ) -> Dict[str, Any]: ...
208
+ def __getattr__(self, name: str) -> Any: ...
209
+
210
+ def accelerate(
211
+ pymongo_collection: PyMongoCollection,
212
+ schema: Any,
213
+ mongo_uri: Union[str, Any],
214
+ cache_dir: Optional[str] = ...,
215
+ enable_cache: bool = ...,
216
+ metadata_cardinality: int = ...,
217
+ approx_document_size_bytes: int = ...,
218
+ ) -> XLR8Collection: ...
xlr8/constants.py ADDED
@@ -0,0 +1,24 @@
1
+ """
2
+ XLR8 constants and configuration values.
3
+
4
+ Centralized constants to avoid magic numbers scattered throughout codebase.
5
+ All tuneable performance parameters should be defined here.
6
+ """
7
+
8
+ # =============================================================================
9
+ # PARQUET FILE SETTINGS
10
+ # =============================================================================
11
+
12
+ # Default row group size for compression can be altered via argument passed
13
+ # to the special cursor methods e.g to_dataframe
14
+ PARQUET_ROW_GROUP_SIZE = 100_000
15
+
16
+ # Default compression codec for Parquet files
17
+ DEFAULT_COMPRESSION = "zstd"
18
+
19
+ # =============================================================================
20
+ # BATCH PROCESSING
21
+ # =============================================================================
22
+
23
+ # Default batch size for DataFrame operations
24
+ DEFAULT_BATCH_SIZE = 10_000
@@ -0,0 +1,43 @@
1
+ """
2
+ Execution engine for parallel query execution via Rust backend.
3
+
4
+ All parallel execution now goes through the Rust backend for GIL-free performance.
5
+
6
+ Components:
7
+ - executor: High-level parallel execution (execute_parallel_stream_to_cache)
8
+ - callback: Partitioned streaming for data lake population
9
+ - planner: Memory-aware execution planning and worker configuration
10
+
11
+ Python handles:
12
+ - Query planning and bracketing
13
+ - Memory budget calculations
14
+ - Result reading and DataFrame construction
15
+
16
+ Rust backend handles:
17
+ - Parallel MongoDB fetches (GIL-free)
18
+ - BSON decoding and Arrow encoding
19
+ - Memory-aware buffering
20
+ - Parquet writing
21
+ """
22
+
23
+ from .callback import PartitionWorkItem, execute_partitioned_callback
24
+ from .executor import execute_parallel_stream_to_cache
25
+ from .planner import (
26
+ Backend,
27
+ BackendConfig,
28
+ ExecutionPlan,
29
+ build_execution_plan,
30
+ )
31
+
32
+ __all__ = [
33
+ # Executor
34
+ "execute_parallel_stream_to_cache",
35
+ # Callback
36
+ "PartitionWorkItem",
37
+ "execute_partitioned_callback",
38
+ # Planner
39
+ "Backend",
40
+ "BackendConfig",
41
+ "ExecutionPlan",
42
+ "build_execution_plan",
43
+ ]