xlr8 0.1.7b2__cp313-cp313-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xlr8/__init__.py ADDED
@@ -0,0 +1,113 @@
1
+ # XLR8: High-Performance MongoDB Acceleration Layer
2
+
3
+ """
4
+ XLR8 - Accelerate MongoDB analytical queries with Parquet caching.
5
+
6
+ XLR8 is a high-performance wrapper for PyMongo that:
7
+ - Decomposes queries into brackets for parallel execution
8
+ - Executes parallel async MongoDB fetches with Rust backend
9
+ - Caches results in compressed Parquet files
10
+ - Reconstructs pandas/Polars DataFrames efficiently using Rust backend.
11
+
12
+ Quick Start:
13
+ ```python
14
+ from pymongo import MongoClient
15
+ from xlr8 import accelerate, Schema, Types
16
+
17
+ # Define your schema with time field and typed fields
18
+ schema = Schema(
19
+ time_field="recordedAt",
20
+ fields={
21
+ "recordedAt": Types.Timestamp("ms", tz="UTC"),
22
+ "metadata.device_id": Types.ObjectId(),
23
+ "metadata.sensor_id": Types.ObjectId(),
24
+ "value": Types.Any(), # Polymorphic - int, float, str, etc.
25
+ },
26
+ avg_doc_size_bytes=250,
27
+ )
28
+
29
+ # Connect to MongoDB and get collection
30
+ client = MongoClient("mongodb://localhost:27017")
31
+ collection = client["main"]["sensorData"]
32
+
33
+ # Wrap collection with schema for acceleration
34
+ xlr8_collection = accelerate(
35
+ collection,
36
+ schema=schema,
37
+ mongo_uri="mongodb://localhost:27017"
38
+ )
39
+
40
+ # Use like normal PyMongo - find() returns XLR8Cursor
41
+ cursor = xlr8_collection.find({
42
+ "recordedAt": {"$gte": start_date, "$lt": end_date}
43
+ }).sort("recordedAt", 1)
44
+
45
+ # Accelerated DataFrame construction
46
+ df = cursor.to_dataframe()
47
+
48
+ # Clean up
49
+ client.close()
50
+ ```
51
+ """
52
+
53
+ from __future__ import annotations
54
+
55
+ from importlib.metadata import version as _get_version
56
+ from typing import TYPE_CHECKING
57
+
58
+ from .schema import types as Types
59
+ from .schema.schema import Schema
60
+
61
+ try:
62
+ __version__ = _get_version("xlr8")
63
+ except Exception:
64
+ __version__ = "0.0.0.dev" # Fallback for editable installs without metadata
65
+
66
+ # Lazy loader for rust_backend to avoid import errors when Rust isn't built yet
67
+ # This allows `from xlr8 import rust_backend` to work without importing at module load
68
+ _rust_backend_cached = None
69
+ _collection_exports_cached: dict[str, object] | None = None
70
+
71
+
72
+ if TYPE_CHECKING:
73
+ from .collection.cursor import XLR8Cursor as XLR8Cursor
74
+ from .collection.wrapper import XLR8Collection as XLR8Collection
75
+ from .collection.wrapper import accelerate as accelerate
76
+
77
+
78
+ def __getattr__(name: str):
79
+ global _rust_backend_cached
80
+ global _collection_exports_cached
81
+ if name == "rust_backend":
82
+ if _rust_backend_cached is None:
83
+ # Import the module directly to avoid recursion
84
+ import importlib
85
+
86
+ _rust_backend_cached = importlib.import_module(
87
+ ".rust_backend", package="xlr8"
88
+ )
89
+ return _rust_backend_cached
90
+
91
+ if name in {"XLR8Cursor", "XLR8Collection", "accelerate"}:
92
+ if _collection_exports_cached is None:
93
+ from .collection.cursor import XLR8Cursor
94
+ from .collection.wrapper import XLR8Collection, accelerate
95
+
96
+ _collection_exports_cached = {
97
+ "XLR8Cursor": XLR8Cursor,
98
+ "XLR8Collection": XLR8Collection,
99
+ "accelerate": accelerate,
100
+ }
101
+ return _collection_exports_cached[name]
102
+
103
+ raise AttributeError(f"module 'xlr8' has no attribute '{name}'")
104
+
105
+
106
+ __all__ = [
107
+ "Schema",
108
+ "Types",
109
+ "rust_backend",
110
+ "accelerate",
111
+ "XLR8Collection",
112
+ "XLR8Cursor",
113
+ ]
Binary file
xlr8/_xlr8_rust.pyi ADDED
@@ -0,0 +1,71 @@
1
+ """Type stubs for _xlr8_rust native module.
2
+
3
+ This is the compiled Rust extension. Type hints enable intellisense.
4
+ """
5
+
6
+ from typing import Any, Dict, List
7
+
8
+ def fetch_chunks_bson(
9
+ mongodb_uri: str,
10
+ db_name: str,
11
+ collection_name: str,
12
+ chunks_bson: bytes,
13
+ schema_json: str,
14
+ cache_dir: str,
15
+ num_workers: int,
16
+ batch_size: int,
17
+ flush_trigger_mb: int,
18
+ avg_doc_size_bytes: int,
19
+ sort_spec_json: str,
20
+ time_field: str,
21
+ projection_json: str,
22
+ row_group_size: int | None = None,
23
+ ) -> Dict[str, Any]:
24
+ """Fetch MongoDB documents in parallel chunks and write to Parquet.
25
+
26
+ Args:
27
+ mongodb_uri: MongoDB connection string
28
+ db_name: Database name
29
+ collection_name: Collection name
30
+ chunks_bson: BSON-encoded chunk definitions
31
+ schema_json: JSON string describing Arrow schema
32
+ cache_dir: Directory where Parquet files will be written
33
+ num_workers: Number of parallel workers
34
+ batch_size: Documents per MongoDB batch
35
+ flush_trigger_mb: Memory threshold for flushing to disk (MB)
36
+ avg_doc_size_bytes: Average document size for memory estimation
37
+ sort_spec_json: JSON sort specification
38
+ time_field: Field name containing timestamps
39
+ projection_json: MongoDB projection as JSON
40
+ row_group_size: Parquet row group size (None = use Arrow default)
41
+
42
+ Returns:
43
+ Dictionary with total_docs, total_files, duration_secs
44
+ """
45
+ ...
46
+
47
+ def decode_any_struct_arrow(
48
+ arrow_array: Any, # pyarrow.StructArray
49
+ ) -> List[Any]:
50
+ """Decode PyArrow StructArray (Any type) to Python values.
51
+
52
+ Args:
53
+ arrow_array: PyArrow StructArray with 13-field Any encoding
54
+
55
+ Returns:
56
+ List of decoded Python values
57
+ """
58
+ ...
59
+
60
+ def encode_any_values_to_arrow(
61
+ values: List[Any],
62
+ ) -> Any: # pyarrow.StructArray
63
+ """Encode Python values to PyArrow StructArray (Any type).
64
+
65
+ Args:
66
+ values: List of Python values to encode
67
+
68
+ Returns:
69
+ PyArrow StructArray with 13-field Any encoding
70
+ """
71
+ ...
@@ -0,0 +1,58 @@
1
+ """
2
+ Query analysis and execution planning.
3
+
4
+ This module provides utilities for analyzing MongoDB queries and
5
+ creating optimal execution plans for parallel processing.
6
+ """
7
+
8
+ from .brackets import (
9
+ build_brackets_for_find,
10
+ )
11
+ from .chunker import (
12
+ chunk_time_range,
13
+ )
14
+ from .inspector import (
15
+ # Operator classification sets
16
+ ALWAYS_ALLOWED,
17
+ CONDITIONAL,
18
+ NEVER_ALLOWED,
19
+ # Validation
20
+ ValidationResult,
21
+ check_conditional_operators,
22
+ extract_time_bounds_recursive,
23
+ generate_sort_sql,
24
+ get_sort_field_info,
25
+ has_forbidden_ops,
26
+ is_chunkable_query,
27
+ normalize_datetime,
28
+ # Query analysis
29
+ or_depth,
30
+ split_global_and,
31
+ validate_query_for_chunking,
32
+ validate_sort_field,
33
+ )
34
+
35
+ __all__ = [
36
+ # inspector - operator sets
37
+ "ALWAYS_ALLOWED",
38
+ "CONDITIONAL",
39
+ "NEVER_ALLOWED",
40
+ # inspector - validation
41
+ "ValidationResult",
42
+ "has_forbidden_ops",
43
+ "validate_query_for_chunking",
44
+ "validate_sort_field",
45
+ "get_sort_field_info",
46
+ "generate_sort_sql",
47
+ "check_conditional_operators",
48
+ # inspector - analysis
49
+ "or_depth",
50
+ "split_global_and",
51
+ "normalize_datetime",
52
+ "extract_time_bounds_recursive",
53
+ "is_chunkable_query",
54
+ # brackets
55
+ "build_brackets_for_find",
56
+ # chunker
57
+ "chunk_time_range",
58
+ ]