PyPI - zoopipe - Versions diffs - 2026.1.14__cp310-abi3-win_amd64.whl - Mend

zoopipe 2026.1.14__cp310-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

zoopipe/__init__.py +53 -0
zoopipe/core.py +137 -0
zoopipe/hooks/__init__.py +3 -0
zoopipe/hooks/base.py +29 -0
zoopipe/input_adapter/__init__.py +19 -0
zoopipe/input_adapter/arrow.py +24 -0
zoopipe/input_adapter/base.py +8 -0
zoopipe/input_adapter/csv.py +36 -0
zoopipe/input_adapter/duckdb.py +38 -0
zoopipe/input_adapter/json.py +19 -0
zoopipe/input_adapter/parquet.py +24 -0
zoopipe/input_adapter/pygen.py +23 -0
zoopipe/input_adapter/sql.py +35 -0
zoopipe/output_adapter/__init__.py +19 -0
zoopipe/output_adapter/arrow.py +20 -0
zoopipe/output_adapter/base.py +8 -0
zoopipe/output_adapter/csv.py +31 -0
zoopipe/output_adapter/duckdb.py +31 -0
zoopipe/output_adapter/json.py +28 -0
zoopipe/output_adapter/parquet.py +19 -0
zoopipe/output_adapter/pygen.py +16 -0
zoopipe/output_adapter/sql.py +27 -0
zoopipe/report.py +108 -0
zoopipe/zoopipe_rust_core.pyd +0 -0
zoopipe-2026.1.14.dist-info/METADATA +110 -0
zoopipe-2026.1.14.dist-info/RECORD +28 -0
zoopipe-2026.1.14.dist-info/WHEEL +4 -0
zoopipe-2026.1.14.dist-info/licenses/LICENSE +21 -0

zoopipe/__init__.py ADDED Viewed

@@ -0,0 +1,53 @@
+from zoopipe.core import Pipe
+from zoopipe.hooks.base import BaseHook, HookStore
+from zoopipe.input_adapter.arrow import ArrowInputAdapter
+from zoopipe.input_adapter.csv import CSVInputAdapter
+from zoopipe.input_adapter.duckdb import DuckDBInputAdapter
+from zoopipe.input_adapter.json import JSONInputAdapter
+from zoopipe.input_adapter.parquet import ParquetInputAdapter
+from zoopipe.input_adapter.pygen import PyGeneratorInputAdapter
+from zoopipe.input_adapter.sql import SQLInputAdapter
+from zoopipe.output_adapter.arrow import ArrowOutputAdapter
+from zoopipe.output_adapter.csv import CSVOutputAdapter
+from zoopipe.output_adapter.duckdb import DuckDBOutputAdapter
+from zoopipe.output_adapter.json import JSONOutputAdapter
+from zoopipe.output_adapter.parquet import ParquetOutputAdapter
+from zoopipe.output_adapter.pygen import PyGeneratorOutputAdapter
+from zoopipe.output_adapter.sql import SQLOutputAdapter
+from zoopipe.report import (
+    EntryStatus,
+    EntryTypedDict,
+    FlowReport,
+    FlowStatus,
+    get_logger,
+)
+from zoopipe.zoopipe_rust_core import MultiThreadExecutor, SingleThreadExecutor
+__all__ = [
+    "Pipe",
+    "FlowReport",
+    "FlowStatus",
+    "BaseHook",
+    "HookStore",
+    "EntryStatus",
+    "EntryTypedDict",
+    "get_logger",
+    "SingleThreadExecutor",
+    "MultiThreadExecutor",
+    # Input Adapters
+    "ArrowInputAdapter",
+    "CSVInputAdapter",
+    "DuckDBInputAdapter",
+    "JSONInputAdapter",
+    "PyGeneratorInputAdapter",
+    "SQLInputAdapter",
+    "ParquetInputAdapter",
+    # Output Adapters
+    "ArrowOutputAdapter",
+    "CSVOutputAdapter",
+    "DuckDBOutputAdapter",
+    "JSONOutputAdapter",
+    "PyGeneratorOutputAdapter",
+    "SQLOutputAdapter",
+    "ParquetOutputAdapter",
+]

zoopipe/core.py ADDED Viewed

@@ -0,0 +1,137 @@
+import logging
+import threading
+import typing
+from pydantic import TypeAdapter
+from zoopipe.report import EntryStatus, FlowReport, get_logger
+from zoopipe.zoopipe_rust_core import (
+    MultiThreadExecutor,
+    NativePipe,
+    SingleThreadExecutor,
+)
+class Pipe:
+    def __init__(
+        self,
+        input_adapter: typing.Any,
+        output_adapter: typing.Any,
+        error_output_adapter: typing.Any = None,
+        schema_model: typing.Any = None,
+        pre_validation_hooks: list[typing.Any] | None = None,
+        post_validation_hooks: list[typing.Any] | None = None,
+        logger: logging.Logger | None = None,
+        report_update_interval: int = 1,
+        executor: typing.Any = None,
+    ) -> None:
+        from zoopipe.zoopipe_rust_core import SingleThreadExecutor
+        self.input_adapter = input_adapter
+        self.output_adapter = output_adapter
+        self.error_output_adapter = error_output_adapter
+        self.schema_model = schema_model
+        self.pre_validation_hooks = pre_validation_hooks or []
+        self.post_validation_hooks = post_validation_hooks or []
+        self.logger = logger or get_logger()
+        self.report_update_interval = report_update_interval
+        self.executor = executor or SingleThreadExecutor()
+        self._report = FlowReport()
+        self._thread: threading.Thread | None = None
+        self._store: dict[str, typing.Any] = {}
+        self._validator = TypeAdapter(self.schema_model) if self.schema_model else None
+    def _process_batch(self, entries: list[dict]) -> list[dict]:
+        for hook in self.pre_validation_hooks:
+            entries = hook.execute(entries, self._store)
+        if self._validator:
+            for entry in entries:
+                try:
+                    processed = self._validator.validate_python(entry["raw_data"])
+                    entry["validated_data"] = (
+                        processed.model_dump()
+                        if hasattr(processed, "model_dump")
+                        else processed
+                    )
+                    entry["status"] = EntryStatus.VALIDATED
+                except Exception as e:
+                    entry["status"] = EntryStatus.FAILED
+                    entry["errors"].append({"msg": str(e), "type": "validation_error"})
+        for hook in self.post_validation_hooks:
+            entries = hook.execute(entries, self._store)
+        return entries
+    @property
+    def report(self) -> FlowReport:
+        return self._report
+    def start(self) -> None:
+        if self._thread and self._thread.is_alive():
+            raise RuntimeError("Pipe is already running")
+        reader = self.input_adapter.get_native_reader()
+        writer = self.output_adapter.get_native_writer()
+        error_writer = None
+        if self.error_output_adapter:
+            error_writer = self.error_output_adapter.get_native_writer()
+        native_pipe = NativePipe(
+            reader=reader,
+            writer=writer,
+            error_writer=error_writer,
+            batch_processor=self._process_batch,
+            report=self._report,
+            report_update_interval=self.report_update_interval,
+            executor=self.executor,
+        )
+        self._thread = threading.Thread(
+            target=self._run_native,
+            args=(native_pipe,),
+            daemon=True,
+        )
+        self._thread.start()
+    def _run_native(self, native_pipe: NativePipe) -> None:
+        try:
+            for hook in self.pre_validation_hooks:
+                hook.setup(self._store)
+            for hook in self.post_validation_hooks:
+                hook.setup(self._store)
+            native_pipe.run()
+        except Exception as e:
+            self.logger.error(f"Pipeline execution failed: {e}")
+            self._report._mark_failed(e)
+            raise
+        finally:
+            for hook in self.pre_validation_hooks:
+                hook.teardown(self._store)
+            for hook in self.post_validation_hooks:
+                hook.teardown(self._store)
+    def shutdown(self) -> None:
+        self._report.abort()
+    def wait(self, timeout: float | None = None) -> bool:
+        return self._report.wait(timeout)
+    def __enter__(self) -> "Pipe":
+        self.start()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        if not self._report.is_finished:
+            self.shutdown()
+    def __repr__(self) -> str:
+        return f"<Pipe input={self.input_adapter} output={self.output_adapter}>"
+__all__ = ["Pipe", "SingleThreadExecutor", "MultiThreadExecutor"]

zoopipe/hooks/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from zoopipe.hooks.base import BaseHook, HookPriority, HookStore
+__all__ = ["BaseHook", "HookStore", "HookPriority"]

zoopipe/hooks/base.py ADDED Viewed

@@ -0,0 +1,29 @@
+import typing
+from zoopipe.report import EntryTypedDict
+HookStore = dict[str, typing.Any]
+class HookPriority:
+    VERY_HIGH = 0
+    HIGH = 25
+    NORMAL = 50
+    LOW = 75
+    VERY_LOW = 100
+class BaseHook:
+    def __init__(self, priority: int = HookPriority.NORMAL):
+        self.priority = priority
+    def setup(self, store: HookStore) -> None:
+        pass
+    def execute(
+        self, entries: list[EntryTypedDict], store: HookStore
+    ) -> list[EntryTypedDict]:
+        return entries
+    def teardown(self, store: HookStore) -> None:
+        pass

zoopipe/input_adapter/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+from zoopipe.input_adapter.arrow import ArrowInputAdapter
+from zoopipe.input_adapter.base import BaseInputAdapter
+from zoopipe.input_adapter.csv import CSVInputAdapter
+from zoopipe.input_adapter.duckdb import DuckDBInputAdapter
+from zoopipe.input_adapter.json import JSONInputAdapter
+from zoopipe.input_adapter.parquet import ParquetInputAdapter
+from zoopipe.input_adapter.pygen import PyGeneratorInputAdapter
+from zoopipe.input_adapter.sql import SQLInputAdapter
+__all__ = [
+    "BaseInputAdapter",
+    "CSVInputAdapter",
+    "JSONInputAdapter",
+    "DuckDBInputAdapter",
+    "ArrowInputAdapter",
+    "SQLInputAdapter",
+    "ParquetInputAdapter",
+    "PyGeneratorInputAdapter",
+]

zoopipe/input_adapter/arrow.py ADDED Viewed

@@ -0,0 +1,24 @@
+import pathlib
+import typing
+from zoopipe.input_adapter.base import BaseInputAdapter
+from zoopipe.zoopipe_rust_core import ArrowReader
+class ArrowInputAdapter(BaseInputAdapter):
+    def __init__(
+        self,
+        source: typing.Union[str, pathlib.Path],
+        generate_ids: bool = True,
+    ):
+        self.source_path = str(source)
+        self.generate_ids = generate_ids
+    def get_native_reader(self) -> ArrowReader:
+        return ArrowReader(
+            self.source_path,
+            generate_ids=self.generate_ids,
+        )
+__all__ = ["ArrowInputAdapter"]

zoopipe/input_adapter/base.py ADDED Viewed

@@ -0,0 +1,8 @@
+import abc
+import typing
+class BaseInputAdapter(abc.ABC):
+    @abc.abstractmethod
+    def get_native_reader(self) -> typing.Any:
+        raise NotImplementedError

zoopipe/input_adapter/csv.py ADDED Viewed

@@ -0,0 +1,36 @@
+import pathlib
+import typing
+from zoopipe.input_adapter.base import BaseInputAdapter
+from zoopipe.zoopipe_rust_core import CSVReader
+class CSVInputAdapter(BaseInputAdapter):
+    def __init__(
+        self,
+        source: typing.Union[str, pathlib.Path],
+        delimiter: str = ",",
+        quotechar: str = '"',
+        skip_rows: int = 0,
+        fieldnames: list[str] | None = None,
+        generate_ids: bool = True,
+    ):
+        self.source_path = str(source)
+        self.delimiter = delimiter
+        self.quotechar = quotechar
+        self.skip_rows = skip_rows
+        self.fieldnames = fieldnames
+        self.generate_ids = generate_ids
+    def get_native_reader(self) -> CSVReader:
+        return CSVReader(
+            self.source_path,
+            delimiter=ord(self.delimiter),
+            quote=ord(self.quotechar),
+            skip_rows=self.skip_rows,
+            fieldnames=self.fieldnames,
+            generate_ids=self.generate_ids,
+        )
+__all__ = ["CSVInputAdapter"]

zoopipe/input_adapter/duckdb.py ADDED Viewed

@@ -0,0 +1,38 @@
+import pathlib
+import typing
+from zoopipe.input_adapter.base import BaseInputAdapter
+from zoopipe.zoopipe_rust_core import DuckDBReader
+class DuckDBInputAdapter(BaseInputAdapter):
+    def __init__(
+        self,
+        source: typing.Union[str, pathlib.Path],
+        query: str | None = None,
+        table_name: str | None = None,
+        generate_ids: bool = True,
+    ):
+        self.source_path = str(source)
+        self.generate_ids = generate_ids
+        if query is None and table_name is None:
+            raise ValueError("Either query or table_name must be provided")
+        if query is not None and table_name is not None:
+            raise ValueError("Only one of query or table_name should be provided")
+        if query is not None:
+            self.query = query
+        else:
+            self.query = f"SELECT * FROM {table_name}"
+    def get_native_reader(self) -> DuckDBReader:
+        return DuckDBReader(
+            self.source_path,
+            self.query,
+            generate_ids=self.generate_ids,
+        )
+__all__ = ["DuckDBInputAdapter"]

zoopipe/input_adapter/json.py ADDED Viewed

@@ -0,0 +1,19 @@
+import pathlib
+import typing
+from zoopipe.input_adapter.base import BaseInputAdapter
+from zoopipe.zoopipe_rust_core import JSONReader
+class JSONInputAdapter(BaseInputAdapter):
+    def __init__(
+        self,
+        source: typing.Union[str, pathlib.Path],
+    ):
+        self.source_path = str(source)
+    def get_native_reader(self) -> JSONReader:
+        return JSONReader(self.source_path)
+__all__ = ["JSONInputAdapter"]

zoopipe/input_adapter/parquet.py ADDED Viewed

@@ -0,0 +1,24 @@
+import pathlib
+import typing
+from zoopipe.input_adapter.base import BaseInputAdapter
+from zoopipe.zoopipe_rust_core import ParquetReader
+class ParquetInputAdapter(BaseInputAdapter):
+    def __init__(
+        self,
+        source: typing.Union[str, pathlib.Path],
+        generate_ids: bool = True,
+    ):
+        self.source_path = str(source)
+        self.generate_ids = generate_ids
+    def get_native_reader(self) -> ParquetReader:
+        return ParquetReader(
+            self.source_path,
+            generate_ids=self.generate_ids,
+        )
+__all__ = ["ParquetInputAdapter"]

zoopipe/input_adapter/pygen.py ADDED Viewed

@@ -0,0 +1,23 @@
+import typing
+from zoopipe.input_adapter.base import BaseInputAdapter
+from zoopipe.zoopipe_rust_core import PyGeneratorReader
+class PyGeneratorInputAdapter(BaseInputAdapter):
+    def __init__(
+        self,
+        iterable: typing.Iterable[typing.Any],
+        generate_ids: bool = True,
+    ):
+        self.iterable = iterable
+        self.generate_ids = generate_ids
+    def get_native_reader(self) -> PyGeneratorReader:
+        return PyGeneratorReader(
+            self.iterable,
+            generate_ids=self.generate_ids,
+        )
+__all__ = ["PyGeneratorInputAdapter"]

zoopipe/input_adapter/sql.py ADDED Viewed

@@ -0,0 +1,35 @@
+from zoopipe.input_adapter.base import BaseInputAdapter
+from zoopipe.zoopipe_rust_core import SQLReader
+class SQLInputAdapter(BaseInputAdapter):
+    def __init__(
+        self,
+        uri: str,
+        query: str | None = None,
+        table_name: str | None = None,
+        generate_ids: bool = True,
+    ):
+        self.uri = uri
+        self.generate_ids = generate_ids
+        if query is None and table_name is None:
+            raise ValueError("Either query or table_name must be provided")
+        if query is not None and table_name is not None:
+            raise ValueError("Only one of query or table_name should be provided")
+        if query is not None:
+            self.query = query
+        else:
+            self.query = f"SELECT * FROM {table_name}"
+    def get_native_reader(self) -> SQLReader:
+        return SQLReader(
+            self.uri,
+            self.query,
+            generate_ids=self.generate_ids,
+        )
+__all__ = ["SQLInputAdapter"]

zoopipe/output_adapter/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+from zoopipe.output_adapter.arrow import ArrowOutputAdapter
+from zoopipe.output_adapter.base import BaseOutputAdapter
+from zoopipe.output_adapter.csv import CSVOutputAdapter
+from zoopipe.output_adapter.duckdb import DuckDBOutputAdapter
+from zoopipe.output_adapter.json import JSONOutputAdapter
+from zoopipe.output_adapter.parquet import ParquetOutputAdapter
+from zoopipe.output_adapter.pygen import PyGeneratorOutputAdapter
+from zoopipe.output_adapter.sql import SQLOutputAdapter
+__all__ = [
+    "BaseOutputAdapter",
+    "CSVOutputAdapter",
+    "JSONOutputAdapter",
+    "DuckDBOutputAdapter",
+    "ArrowOutputAdapter",
+    "SQLOutputAdapter",
+    "ParquetOutputAdapter",
+    "PyGeneratorOutputAdapter",
+]

zoopipe/output_adapter/arrow.py ADDED Viewed

@@ -0,0 +1,20 @@
+import pathlib
+import typing
+from zoopipe.output_adapter.base import BaseOutputAdapter
+from zoopipe.zoopipe_rust_core import ArrowWriter
+class ArrowOutputAdapter(BaseOutputAdapter):
+    def __init__(
+        self,
+        output: typing.Union[str, pathlib.Path],
+    ):
+        self.output_path = str(output)
+    def get_native_writer(self) -> ArrowWriter:
+        pathlib.Path(self.output_path).parent.mkdir(parents=True, exist_ok=True)
+        return ArrowWriter(self.output_path)
+__all__ = ["ArrowOutputAdapter"]

zoopipe/output_adapter/base.py ADDED Viewed

@@ -0,0 +1,8 @@
+import abc
+import typing
+class BaseOutputAdapter(abc.ABC):
+    @abc.abstractmethod
+    def get_native_writer(self) -> typing.Any:
+        raise NotImplementedError

zoopipe/output_adapter/csv.py ADDED Viewed

@@ -0,0 +1,31 @@
+import pathlib
+import typing
+from zoopipe.output_adapter.base import BaseOutputAdapter
+from zoopipe.zoopipe_rust_core import CSVWriter
+class CSVOutputAdapter(BaseOutputAdapter):
+    def __init__(
+        self,
+        output: typing.Union[str, pathlib.Path],
+        delimiter: str = ",",
+        quotechar: str = '"',
+        fieldnames: list[str] | None = None,
+    ):
+        self.output_path = str(output)
+        self.delimiter = delimiter
+        self.quotechar = quotechar
+        self.fieldnames = fieldnames
+    def get_native_writer(self) -> CSVWriter:
+        pathlib.Path(self.output_path).parent.mkdir(parents=True, exist_ok=True)
+        return CSVWriter(
+            self.output_path,
+            delimiter=ord(self.delimiter),
+            quote=ord(self.quotechar),
+            fieldnames=self.fieldnames,
+        )
+__all__ = ["CSVOutputAdapter"]

zoopipe/output_adapter/duckdb.py ADDED Viewed

@@ -0,0 +1,31 @@
+import pathlib
+import typing
+from zoopipe.output_adapter.base import BaseOutputAdapter
+from zoopipe.zoopipe_rust_core import DuckDBWriter
+class DuckDBOutputAdapter(BaseOutputAdapter):
+    def __init__(
+        self,
+        output: typing.Union[str, pathlib.Path],
+        table_name: str,
+        mode: str = "replace",
+    ):
+        self.output_path = str(output)
+        self.table_name = table_name
+        self.mode = mode
+        if mode not in ["replace", "append", "fail"]:
+            raise ValueError("mode must be 'replace', 'append', or 'fail'")
+    def get_native_writer(self) -> DuckDBWriter:
+        pathlib.Path(self.output_path).parent.mkdir(parents=True, exist_ok=True)
+        return DuckDBWriter(
+            self.output_path,
+            self.table_name,
+            mode=self.mode,
+        )
+__all__ = ["DuckDBOutputAdapter"]

zoopipe/output_adapter/json.py ADDED Viewed

@@ -0,0 +1,28 @@
+import pathlib
+import typing
+from zoopipe.output_adapter.base import BaseOutputAdapter
+from zoopipe.zoopipe_rust_core import JSONWriter
+class JSONOutputAdapter(BaseOutputAdapter):
+    def __init__(
+        self,
+        output: typing.Union[str, pathlib.Path],
+        format: str = "array",
+        indent: int | None = None,
+    ):
+        self.output_path = str(output)
+        self.format = format
+        self.indent = indent
+    def get_native_writer(self) -> JSONWriter:
+        pathlib.Path(self.output_path).parent.mkdir(parents=True, exist_ok=True)
+        return JSONWriter(
+            self.output_path,
+            format=self.format,
+            indent=self.indent,
+        )
+__all__ = ["JSONOutputAdapter"]

zoopipe/output_adapter/parquet.py ADDED Viewed

@@ -0,0 +1,19 @@
+import pathlib
+import typing
+from zoopipe.output_adapter.base import BaseOutputAdapter
+from zoopipe.zoopipe_rust_core import ParquetWriter
+class ParquetOutputAdapter(BaseOutputAdapter):
+    def __init__(
+        self,
+        path: typing.Union[str, pathlib.Path],
+    ):
+        self.path = str(path)
+    def get_native_writer(self) -> ParquetWriter:
+        return ParquetWriter(self.path)
+__all__ = ["ParquetOutputAdapter"]

zoopipe/output_adapter/pygen.py ADDED Viewed

@@ -0,0 +1,16 @@
+from zoopipe.output_adapter.base import BaseOutputAdapter
+from zoopipe.zoopipe_rust_core import PyGeneratorWriter
+class PyGeneratorOutputAdapter(BaseOutputAdapter):
+    def __init__(self, queue_size: int = 1000):
+        self._writer = PyGeneratorWriter(queue_size=queue_size)
+    def get_native_writer(self) -> PyGeneratorWriter:
+        return self._writer
+    def __iter__(self):
+        return self._writer
+__all__ = ["PyGeneratorOutputAdapter"]

zoopipe/output_adapter/sql.py ADDED Viewed

@@ -0,0 +1,27 @@
+from zoopipe.output_adapter.base import BaseOutputAdapter
+from zoopipe.zoopipe_rust_core import SQLWriter
+class SQLOutputAdapter(BaseOutputAdapter):
+    def __init__(
+        self,
+        uri: str,
+        table_name: str,
+        mode: str = "replace",
+        batch_size: int = 500,
+    ):
+        self.uri = uri
+        self.table_name = table_name
+        self.mode = mode
+        self.batch_size = batch_size
+    def get_native_writer(self) -> SQLWriter:
+        return SQLWriter(
+            self.uri,
+            self.table_name,
+            mode=self.mode,
+            batch_size=self.batch_size,
+        )
+__all__ = ["SQLOutputAdapter"]

zoopipe/report.py ADDED Viewed

@@ -0,0 +1,108 @@
+import enum
+import logging
+import sys
+import threading
+import typing
+from datetime import datetime
+def get_logger(name: str = "zoopipe") -> logging.Logger:
+    logger = logging.getLogger(name)
+    if not logger.handlers:
+        handler = logging.StreamHandler(sys.stdout)
+        handler.setFormatter(
+            logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+        )
+        logger.addHandler(handler)
+        logger.setLevel(logging.INFO)
+    return logger
+class EntryStatus(enum.Enum):
+    PENDING = "pending"
+    VALIDATED = "validated"
+    FAILED = "failed"
+class EntryTypedDict(typing.TypedDict):
+    id: typing.Any
+    position: int | None
+    status: EntryStatus
+    raw_data: dict[str, typing.Any]
+    validated_data: dict[str, typing.Any] | None
+    errors: list[dict[str, typing.Any]]
+    metadata: dict[str, typing.Any]
+class FlowStatus(enum.Enum):
+    PENDING = "pending"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    ABORTED = "aborted"
+class FlowReport:
+    def __init__(self) -> None:
+        self.status = FlowStatus.PENDING
+        self.total_processed = 0
+        self.success_count = 0
+        self.error_count = 0
+        self.ram_bytes = 0
+        self.exception: Exception | None = None
+        self.start_time: datetime | None = None
+        self.end_time: datetime | None = None
+        self._finished_event = threading.Event()
+    @property
+    def duration(self) -> float:
+        start = self.start_time
+        if not start:
+            return 0.0
+        end = self.end_time or datetime.now()
+        return (end - start).total_seconds()
+    @property
+    def items_per_second(self) -> float:
+        duration = self.duration
+        if duration == 0:
+            return 0.0
+        return self.total_processed / duration
+    @property
+    def is_finished(self) -> bool:
+        return self._finished_event.is_set()
+    def wait(self, timeout: float | None = None) -> bool:
+        return self._finished_event.wait(timeout)
+    def _mark_running(self) -> None:
+        self.status = FlowStatus.RUNNING
+        self.start_time = datetime.now()
+    def _mark_completed(self) -> None:
+        self.status = FlowStatus.COMPLETED
+        self.end_time = datetime.now()
+        self._finished_event.set()
+    def abort(self) -> None:
+        self.status = FlowStatus.ABORTED
+        self.end_time = datetime.now()
+        self._finished_event.set()
+    def _mark_failed(self, exception: Exception) -> None:
+        self.status = FlowStatus.FAILED
+        self.exception = exception
+        self.end_time = datetime.now()
+        self._finished_event.set()
+    def __repr__(self) -> str:
+        return (
+            f"<FlowReport status={self.status.value} "
+            f"processed={self.total_processed} "
+            f"success={self.success_count} "
+            f"error={self.error_count} "
+            f"ram={self.ram_bytes / 1024 / 1024:.2f}MB "
+            f"fps={self.items_per_second:.2f} "
+            f"duration={self.duration:.2f}s>"
+        )

zoopipe/zoopipe_rust_core.pyd ADDED Viewed

Binary file

zoopipe-2026.1.14.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,110 @@
+Metadata-Version: 2.4
+Name: zoopipe
+Version: 2026.1.14
+Requires-Dist: pydantic>=2.12.5
+License-File: LICENSE
+Summary: ZooPipe is a data processing framework that allows you to process data in a declarative way.
+Author-email: Alberto Daniel Badia <alberto_badia@enlacepatagonia.com>
+Requires-Python: >=3.10, <3.14
+Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
+Project-URL: Homepage, https://github.com/albertobadia/zoopipe
+# ZooPipe
+**ZooPipe** is a lean, ultra-high-performance data processing engine for Python. It leverages a **100% Rust core** to handle I/O and orchestration, while keeping the flexibility of Python for schema validation (via Pydantic) and custom data enrichment (via Hooks).
+---
+## ✨ Key Features
+- 🚀 **100% Native Rust Engine**: The core execution loop, including CSV and JSON parsing/writing, is implemented in Rust for maximum throughput.
+- 🔍 **Declarative Validation**: Use [Pydantic](https://docs.pydantic.dev/) models to define and validate your data structures naturally.
+- 🪝 **Python Hooks**: Transform and enrich data at any stage using standard Python functions or classes.
+- ⚡ **Zero-Copy Intent**: Minimal overhead between the Rust processing engine and Python validation/hooks.
+- 🚨 **Automated Error Routing**: Native support for routing failed records to a dedicated error output.
+- 📊 **Multiple Format Support**: Optimized readers/writers for CSV, JSONL, and SQL databases (via SQLx with batch inserts).
+- 🔧 **Pluggable Executors**: Choose between single-threaded or multi-threaded execution strategies.
+---
+## 🚀 Quick Start
+### Installation
+```bash
+uv build
+uv run maturin develop --release
+```
+### Simple Example
+```python
+from pydantic import BaseModel, ConfigDict
+from zoopipe import CSVInputAdapter, CSVOutputAdapter, Pipe
+class UserSchema(BaseModel):
+    model_config = ConfigDict(extra="ignore")
+    user_id: str
+    username: str
+    age: int
+pipe = Pipe(
+    input_adapter=CSVInputAdapter("users.csv"),
+    output_adapter=CSVOutputAdapter("processed_users.csv"),
+    error_output_adapter=CSVOutputAdapter("errors.csv"),
+    schema_model=UserSchema,
+)
+pipe.start()
+pipe.wait()
+print(f"Finished! Processed {pipe.report.total_processed} items.")
+```
+---
+## 📚 Documentation
+### Core Concepts
+- [**Executors Guide**](docs/executors.md) - Choose and configure execution strategies
+### Input/Output Adapters
+#### File Formats
+- [**CSV Adapters**](docs/csv.md) - High-performance CSV reading and writing
+- [**JSON Adapters**](docs/json.md) - JSONL and JSON array format support
+- [**Parquet Adapters**](docs/parquet.md) - Columnar storage for analytics and data lakes
+- [**Arrow Adapters**](docs/arrow.md) - Apache Arrow IPC format for zero-copy interoperability
+#### Databases
+- [**SQL Adapters**](docs/sql.md) - Read from and write to SQL databases with batch optimization
+- [**DuckDB Adapters**](docs/duckdb.md) - Analytical database for OLAP workloads
+#### Advanced
+- [**Python Generator Adapters**](docs/pygen.md) - In-memory streaming and testing
+- [**Cloud Storage (S3)**](docs/cloud-storage.md) - Read and write data from Amazon S3 and compatible services
+---
+## 🛠 Architecture
+ZooPipe is designed as a thin Python wrapper around a powerful Rust core:
+1. **Python Layer**: Configuration, Pydantic models, and custom Hooks.
+2. **Rust Core**:
+   - **Adapters**: High-speed CSV/JSON/SQL Readers and Writers with optimized batch operations.
+   - **NativePipe**: Orchestrates the loop, fetching chunks, calling a consolidated Python batch processor, and routing result batches.
+   - **Executors**: Single-threaded or multi-threaded batch processing strategies.
+---
+## 📄 License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

zoopipe-2026.1.14.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,28 @@
+zoopipe\__init__.py,sha256=DEMJtbHR_PKEMdh6VpEwuYKUNh76DskgZsGBrGJ0UJQ,1778
+zoopipe\core.py,sha256=Cmn_7uS1W7LM27WPG0FGaqs3kzrboX0sLvZb7bZJEyU,4731
+zoopipe\hooks\__init__.py,sha256=N6whYjJte3vYXYlMKsw9Yci-923JjnMdLiokV6hTueY,121
+zoopipe\hooks\base.py,sha256=h1aEqEuoZ9-TQhnBIWqQiSPZCzjcCIXHt47dlk2MFh8,594
+zoopipe\input_adapter\__init__.py,sha256=zjJRWJ1Pry3yXQG1aLB84tMYhogaUM6qzVoJ3Yrcvnw,701
+zoopipe\input_adapter\arrow.py,sha256=61Q_S-UaFXChiROdyG_NCvHUQFoBfSou7wzNeFG7-4A,603
+zoopipe\input_adapter\base.py,sha256=o-KYDzDnKMmmUnV8GXw7WIXxmKZqgEUCTiZcG-mg590,173
+zoopipe\input_adapter\csv.py,sha256=P8nj-xyCVRXRLiHW0E-clTgBMDJSJMHpqDHY6EjCytI,1040
+zoopipe\input_adapter\duckdb.py,sha256=UocSoTkQC6Nf1MnxIHMA152HpFSHNmDD5lCCAlfUhQ4,1117
+zoopipe\input_adapter\json.py,sha256=QR1Mr-9DrWvK5FE7O4YJd4Z_UfLCzRS8VjxIbDvg45c,450
+zoopipe\input_adapter\parquet.py,sha256=4RFyFpCcPCwZ7XnmwNnDjhr50wprk8gNsSbJ1lnXNV8,613
+zoopipe\input_adapter\pygen.py,sha256=krrk5OLn3UEitwHX9glNBl1w1E9HDgdtuaN4ZDati9c,606
+zoopipe\input_adapter\sql.py,sha256=BnfOv7Jz48kiC-FS-rPmRfglpalauoTYDoRBov8D0CY,1014
+zoopipe\output_adapter\__init__.py,sha256=w3fKxuPlS1dvfweRTR0o_aANit0EBWIRR_ZTd-18kCA,725
+zoopipe\output_adapter\arrow.py,sha256=5wsGfxy7cSF6Vf19SdYptOXqM9bMdI8lEAfGaARBwqc,542
+zoopipe\output_adapter\base.py,sha256=0HKXOJDQ3_QVEMgk9F3gF3WungN7QEbuJ_RlwDVep6Y,174
+zoopipe\output_adapter\csv.py,sha256=tqHZSHeBCLca6MKKRAindsl8rhKUCKbvxWaIdr2f9UA,900
+zoopipe\output_adapter\duckdb.py,sha256=4zdsGRdEDvd2glA4yrMk5Ll9Ah65tHS-PCoceXOOmRY,887
+zoopipe\output_adapter\json.py,sha256=I6t1RftpgPNVWDfJU9BXy6MQtBqYMLL0NJvyC_zASy8,756
+zoopipe\output_adapter\parquet.py,sha256=OMpRf4zupxfog686UqMsX1YAJ4IIsira_NjwYItLOwg,452
+zoopipe\output_adapter\pygen.py,sha256=2mihJoMKXKjgS8_o8LnhwbW2g8T4r9TCGpHNkmVzsHc,472
+zoopipe\output_adapter\sql.py,sha256=HnYFTzfUgb_K2MoPLlLJRQyjbvyEIIuGznRmuZ5rQFQ,679
+zoopipe\report.py,sha256=A_efG-4_bmW_qf4ga_fxRExWGFA6tDstO27frzOs6Ew,3169
+zoopipe\zoopipe_rust_core.pyd,sha256=AyZp9fFHvYpluF3-dhHPoyAoZheEhOBJj7N95WLNI5M,50961408
+zoopipe-2026.1.14.dist-info\METADATA,sha256=YAzRMFBk7mwUJW6u2VxVx2tmO70kHMOYAaFQPZ8MqBM,3904
+zoopipe-2026.1.14.dist-info\WHEEL,sha256=ZMDDxh9OPoaLQ4P2dJmgI1XsENYSzjzq8fErKKVw5iE,96
+zoopipe-2026.1.14.dist-info\licenses\LICENSE,sha256=9xT-RlPFyibUgCe3X7Hb8IwG987oONsgy55RpnHw2fI,1098
+zoopipe-2026.1.14.dist-info\RECORD,,

zoopipe-2026.1.14.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: maturin (1.11.5)
+Root-Is-Purelib: false
+Tag: cp310-abi3-win_amd64

zoopipe-2026.1.14.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Alberto Daniel Badia
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.