PyPI - xlr8 - Versions diffs - 0.1.2__py3-none-any.whl - Mend

xlr8 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

xlr8/__init__.py +109 -0
xlr8/_xlr8_rust.pyi +71 -0
xlr8/analysis/__init__.py +58 -0
xlr8/analysis/brackets.py +1201 -0
xlr8/analysis/chunker.py +118 -0
xlr8/analysis/inspector.py +1889 -0
xlr8/collection/__init__.py +6 -0
xlr8/collection/cursor.py +2145 -0
xlr8/collection/cursor.pyi +173 -0
xlr8/collection/wrapper.py +661 -0
xlr8/collection/wrapper.pyi +218 -0
xlr8/constants.py +24 -0
xlr8/execution/__init__.py +43 -0
xlr8/execution/callback.py +792 -0
xlr8/execution/executor.py +500 -0
xlr8/execution/planner.py +377 -0
xlr8/py.typed +1 -0
xlr8/rust_backend.py +42 -0
xlr8/rust_backend.pyi +71 -0
xlr8/schema/__init__.py +42 -0
xlr8/schema/encoder.py +235 -0
xlr8/schema/schema.py +265 -0
xlr8/schema/types.py +239 -0
xlr8/storage/__init__.py +17 -0
xlr8/storage/cache.py +228 -0
xlr8/storage/reader.py +1276 -0
xlr8-0.1.2.dist-info/METADATA +177 -0
xlr8-0.1.2.dist-info/RECORD +30 -0
xlr8-0.1.2.dist-info/WHEEL +4 -0
xlr8-0.1.2.dist-info/licenses/LICENSE +201 -0

xlr8/collection/wrapper.pyi ADDED Viewed

@@ -0,0 +1,218 @@
+"""Type stubs for XLR8 collection wrapper - provides IDE autocomplete."""
+from typing import Any, Dict, List, Optional, Tuple, Union
+from pymongo.collection import Collection as PyMongoCollection
+from pymongo.results import (
+    BulkWriteResult,
+    DeleteResult,
+    InsertManyResult,
+    InsertOneResult,
+    UpdateResult,
+)
+from .cursor import XLR8Cursor
+class XLR8Collection:
+    """
+    XLR8 accelerated collection - drop-in replacement for PyMongo collection.
+    Supports all PyMongo collection methods via delegation, with accelerated .find()
+    that returns XLR8Cursor for parallel query execution.
+    For direct access to underlying PyMongo collection, use .raw_collection().
+    """
+    def __init__(
+        self,
+        pymongo_collection: PyMongoCollection,
+        schema: Optional[Any] = ...,
+        mongo_uri: Optional[str] = ...,
+        approx_document_size_bytes: int = ...,
+    ) -> None: ...
+    def find(
+        self,
+        filter: Optional[Dict[str, Any]] = ...,
+        projection: Optional[Dict[str, Any]] = ...,
+        skip: int = ...,
+        limit: int = ...,
+        sort: Optional[List[Tuple[str, int]]] = ...,
+        batch_size: int = ...,
+    ) -> XLR8Cursor: ...
+    def raw_collection(self) -> PyMongoCollection: ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def full_name(self) -> str: ...
+    @property
+    def database(self) -> Any: ...
+    def insert_one(
+        self,
+        document: Dict[str, Any],
+        bypass_document_validation: bool = ...,
+        session: Optional[Any] = ...,
+    ) -> InsertOneResult: ...
+    def insert_many(
+        self,
+        documents: List[Dict[str, Any]],
+        ordered: bool = ...,
+        bypass_document_validation: bool = ...,
+        session: Optional[Any] = ...,
+    ) -> InsertManyResult: ...
+    def update_one(
+        self,
+        filter: Dict[str, Any],
+        update: Dict[str, Any],
+        upsert: bool = ...,
+        bypass_document_validation: bool = ...,
+        collation: Optional[Dict[str, Any]] = ...,
+        array_filters: Optional[List[Dict[str, Any]]] = ...,
+        hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
+        session: Optional[Any] = ...,
+    ) -> UpdateResult: ...
+    def update_many(
+        self,
+        filter: Dict[str, Any],
+        update: Dict[str, Any],
+        upsert: bool = ...,
+        array_filters: Optional[List[Dict[str, Any]]] = ...,
+        bypass_document_validation: bool = ...,
+        collation: Optional[Dict[str, Any]] = ...,
+        hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
+        session: Optional[Any] = ...,
+    ) -> UpdateResult: ...
+    def replace_one(
+        self,
+        filter: Dict[str, Any],
+        replacement: Dict[str, Any],
+        upsert: bool = ...,
+        bypass_document_validation: bool = ...,
+        collation: Optional[Dict[str, Any]] = ...,
+        hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
+        session: Optional[Any] = ...,
+    ) -> UpdateResult: ...
+    def delete_one(
+        self,
+        filter: Dict[str, Any],
+        collation: Optional[Dict[str, Any]] = ...,
+        hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
+        session: Optional[Any] = ...,
+    ) -> DeleteResult: ...
+    def delete_many(
+        self,
+        filter: Dict[str, Any],
+        collation: Optional[Dict[str, Any]] = ...,
+        hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
+        session: Optional[Any] = ...,
+    ) -> DeleteResult: ...
+    def find_one(
+        self,
+        filter: Optional[Dict[str, Any]] = ...,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Optional[Dict[str, Any]]: ...
+    def find_one_and_delete(
+        self,
+        filter: Dict[str, Any],
+        projection: Optional[Dict[str, Any]] = ...,
+        sort: Optional[List[Tuple[str, int]]] = ...,
+        hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
+        session: Optional[Any] = ...,
+        **kwargs: Any,
+    ) -> Optional[Dict[str, Any]]: ...
+    def find_one_and_replace(
+        self,
+        filter: Dict[str, Any],
+        replacement: Dict[str, Any],
+        projection: Optional[Dict[str, Any]] = ...,
+        sort: Optional[List[Tuple[str, int]]] = ...,
+        upsert: bool = ...,
+        return_document: bool = ...,
+        hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
+        session: Optional[Any] = ...,
+        **kwargs: Any,
+    ) -> Optional[Dict[str, Any]]: ...
+    def find_one_and_update(
+        self,
+        filter: Dict[str, Any],
+        update: Dict[str, Any],
+        projection: Optional[Dict[str, Any]] = ...,
+        sort: Optional[List[Tuple[str, int]]] = ...,
+        upsert: bool = ...,
+        return_document: bool = ...,
+        array_filters: Optional[List[Dict[str, Any]]] = ...,
+        hint: Optional[Union[str, List[Tuple[str, int]]]] = ...,
+        session: Optional[Any] = ...,
+        **kwargs: Any,
+    ) -> Optional[Dict[str, Any]]: ...
+    def count_documents(
+        self,
+        filter: Dict[str, Any],
+        session: Optional[Any] = ...,
+        **kwargs: Any,
+    ) -> int: ...
+    def estimated_document_count(self, **kwargs: Any) -> int: ...
+    def distinct(
+        self,
+        key: str,
+        filter: Optional[Dict[str, Any]] = ...,
+        session: Optional[Any] = ...,
+        **kwargs: Any,
+    ) -> List[Any]: ...
+    def aggregate(
+        self,
+        pipeline: List[Dict[str, Any]],
+        session: Optional[Any] = ...,
+        **kwargs: Any,
+    ) -> Any: ...
+    def bulk_write(
+        self,
+        requests: List[Any],
+        ordered: bool = ...,
+        bypass_document_validation: bool = ...,
+        session: Optional[Any] = ...,
+    ) -> BulkWriteResult: ...
+    def create_index(
+        self,
+        keys: Union[str, List[Tuple[str, int]]],
+        session: Optional[Any] = ...,
+        **kwargs: Any,
+    ) -> str: ...
+    def create_indexes(
+        self,
+        indexes: List[Any],
+        session: Optional[Any] = ...,
+        **kwargs: Any,
+    ) -> List[str]: ...
+    def drop_index(
+        self,
+        index_or_name: Union[str, List[Tuple[str, int]]],
+        session: Optional[Any] = ...,
+        **kwargs: Any,
+    ) -> None: ...
+    def drop_indexes(self, session: Optional[Any] = ..., **kwargs: Any) -> None: ...
+    def list_indexes(self, session: Optional[Any] = ..., **kwargs: Any) -> Any: ...
+    def index_information(
+        self, session: Optional[Any] = ..., **kwargs: Any
+    ) -> Dict[str, Any]: ...
+    def drop(self, session: Optional[Any] = ..., **kwargs: Any) -> None: ...
+    def rename(
+        self,
+        new_name: str,
+        session: Optional[Any] = ...,
+        **kwargs: Any,
+    ) -> Dict[str, Any]: ...
+    def options(
+        self, session: Optional[Any] = ..., **kwargs: Any
+    ) -> Dict[str, Any]: ...
+    def __getattr__(self, name: str) -> Any: ...
+def accelerate(
+    pymongo_collection: PyMongoCollection,
+    schema: Any,
+    mongo_uri: Union[str, Any],
+    cache_dir: Optional[str] = ...,
+    enable_cache: bool = ...,
+    metadata_cardinality: int = ...,
+    approx_document_size_bytes: int = ...,
+) -> XLR8Collection: ...

xlr8/constants.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""
+XLR8 constants and configuration values.
+Centralized constants to avoid magic numbers scattered throughout codebase.
+All tuneable performance parameters should be defined here.
+"""
+# =============================================================================
+# PARQUET FILE SETTINGS
+# =============================================================================
+# Default row group size for compression can be altered via argument passed
+# to the special cursor methods e.g to_dataframe
+PARQUET_ROW_GROUP_SIZE = 100_000
+# Default compression codec for Parquet files
+DEFAULT_COMPRESSION = "zstd"
+# =============================================================================
+# BATCH PROCESSING
+# =============================================================================
+# Default batch size for DataFrame operations
+DEFAULT_BATCH_SIZE = 10_000

xlr8/execution/__init__.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""
+Execution engine for parallel query execution via Rust backend.
+All parallel execution now goes through the Rust backend for GIL-free performance.
+Components:
+- executor: High-level parallel execution (execute_parallel_stream_to_cache)
+- callback: Partitioned streaming for data lake population
+- planner: Memory-aware execution planning and worker configuration
+Python handles:
+- Query planning and bracketing
+- Memory budget calculations
+- Result reading and DataFrame construction
+Rust backend handles:
+- Parallel MongoDB fetches (GIL-free)
+- BSON decoding and Arrow encoding
+- Memory-aware buffering
+- Parquet writing
+"""
+from .callback import PartitionWorkItem, execute_partitioned_callback
+from .executor import execute_parallel_stream_to_cache
+from .planner import (
+    Backend,
+    BackendConfig,
+    ExecutionPlan,
+    build_execution_plan,
+)
+__all__ = [
+    # Executor
+    "execute_parallel_stream_to_cache",
+    # Callback
+    "PartitionWorkItem",
+    "execute_partitioned_callback",
+    # Planner
+    "Backend",
+    "BackendConfig",
+    "ExecutionPlan",
+    "build_execution_plan",
+]