PyPI - streamtrace - Versions diffs - 0.1.0__py3-none-any.whl - Mend

streamtrace 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

streamtrace/__init__.py +57 -0
streamtrace/dag.py +386 -0
streamtrace/decorators/__init__.py +0 -0
streamtrace/decorators/app_decorator.py +77 -0
streamtrace/decorators/infer_decorator.py +88 -0
streamtrace/decorators/input_decorator.py +103 -0
streamtrace/decorators/output_decorator.py +106 -0
streamtrace/decorators/postprocess_decorator.py +64 -0
streamtrace/decorators/preprocess_decorator.py +69 -0
streamtrace/serializers.py +583 -0
streamtrace/widgets/__init__.py +0 -0
streamtrace/widgets/input_widgets.py +114 -0
streamtrace/widgets/output_widgets.py +139 -0
streamtrace-0.1.0.dist-info/METADATA +218 -0
streamtrace-0.1.0.dist-info/RECORD +17 -0
streamtrace-0.1.0.dist-info/WHEEL +5 -0
streamtrace-0.1.0.dist-info/top_level.txt +1 -0

streamtrace/__init__.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""
+Streamtrace SDK — decorator-based ML pipeline contracts for clinical AI.
+Usage:
+    import streamtrace as st
+    @st.app(title="Cardiac Segmentation", version="1.0.0")
+    class MyPipeline:
+        @st.input(title="CT Scan", widget=st.FileInput(...), returns="scan")
+        def load_scan(self, path): ...
+        @st.preprocess(title="Normalize", returns="normalized")
+        def normalize(self, scan): ...
+        @st.infer(title="Segment", returns="mask", weights_path="model.pt")
+        def segment(self, normalized): ...
+        @st.postprocess(title="Clean Mask", returns="clean_mask")
+        def clean(self, mask): ...
+        @st.output(title="Result", widget=st.FileOutput(...))
+        def save(self, clean_mask): ...
+"""
+from .decorators.app_decorator import app, get_app_metadata, is_app
+from .decorators.input_decorator import input, get_input_metadata, is_input
+from .decorators.preprocess_decorator import preprocess, get_preprocess_metadata, is_preprocess
+from .decorators.infer_decorator import infer, get_infer_metadata, is_infer
+from .decorators.postprocess_decorator import postprocess, get_postprocess_metadata, is_postprocess
+from .decorators.output_decorator import output, get_output_metadata, is_output
+from .widgets.input_widgets import FileInput, TextInput, InputDataType
+from .widgets.output_widgets import FileOutput, ImageOutput, PlotOutput, MetricOutput, OutputDataType
+from .dag import build_dag, DAG, Node
+__version__ = "0.1.0"
+__all__ = [
+    # App
+    "app", "get_app_metadata", "is_app",
+    # Node decorators
+    "input", "get_input_metadata", "is_input",
+    "preprocess", "get_preprocess_metadata", "is_preprocess",
+    "infer", "get_infer_metadata", "is_infer",
+    "postprocess", "get_postprocess_metadata", "is_postprocess",
+    "output", "get_output_metadata", "is_output",
+    # Input widgets
+    "FileInput", "TextInput", "InputDataType",
+    # Output widgets
+    "FileOutput", "ImageOutput", "PlotOutput", "MetricOutput", "OutputDataType",
+    # DAG
+    "build_dag", "DAG", "Node",
+    # Version
+    "__version__",
+]

streamtrace/dag.py ADDED Viewed

@@ -0,0 +1,386 @@
+"""
+streamtrace/dag.py
+Builds a dependency graph from @st.app decorated methods,
+validates the wiring, and provides topological execution order.
+The core contract: each decorated method's **parameter names** (excluding `self`)
+are matched against other methods' **return aliases** (the `returns=` kwarg on the
+decorator) or, if no alias is set, the method name itself.
+    @st.input(returns="scan")
+    def load_scan(self, path): ...
+    @st.preprocess(returns="normalized")
+    def normalize(self, scan): ...    # ← "scan" resolves to load_scan
+    @st.infer(returns="mask")
+    def segment(self, normalized): ...  # ← "normalized" resolves to normalize
+"""
+from __future__ import annotations
+import inspect
+from dataclasses import dataclass, field
+from typing import Any, Callable
+# ---------------------------------------------------------------------------
+# Node: one decorated method in the pipeline
+# ---------------------------------------------------------------------------
+PHASES = ("input", "preprocess", "infer", "postprocess", "output")
+# Map from decorator metadata attribute → phase name
+_META_ATTR_TO_PHASE: dict[str, str] = {
+    "__input_metadata__": "input",
+    "__preprocess_metadata__": "preprocess",
+    "__infer_metadata__": "infer",
+    "__postprocess_metadata__": "postprocess",
+    "__output_metadata__": "output",
+}
+@dataclass
+class Node:
+    """A single node in the execution DAG."""
+    name: str  # method name on the class
+    phase: str  # one of PHASES
+    method: Callable  # the unbound method
+    params: list[str]  # parameter names excluding 'self'
+    returns_alias: str | None  # the `returns=` value from the decorator, or None
+    metadata: Any = None  # the full decorator metadata object
+    @property
+    def output_key(self) -> str:
+        """The name downstream nodes use to reference this node's return value."""
+        return self.returns_alias or self.name
+# ---------------------------------------------------------------------------
+# DAG: the full dependency graph
+# ---------------------------------------------------------------------------
+@dataclass
+class DAG:
+    """
+    Directed acyclic graph of pipeline nodes.
+    Nodes are discovered from class decorator metadata.
+    Edges are inferred from parameter name → output key matching.
+    """
+    nodes: dict[str, Node] = field(default_factory=dict)
+    # Computed lazily and cached
+    _output_key_to_node: dict[str, str] = field(
+        default_factory=dict, repr=False
+    )
+    _edges: dict[str, list[str]] | None = field(default=None, repr=False)
+    # ------------------------------------------------------------------
+    # Construction
+    # ------------------------------------------------------------------
+    def add_node(self, node: Node) -> None:
+        if node.name in self.nodes:
+            raise ValueError(f"Duplicate node name: '{node.name}'")
+        output_key = node.output_key
+        if output_key in self._output_key_to_node:
+            existing = self._output_key_to_node[output_key]
+            raise ValueError(
+                f"Duplicate output key '{output_key}': "
+                f"both '{existing}' and '{node.name}' produce it"
+            )
+        self.nodes[node.name] = node
+        self._output_key_to_node[output_key] = node.name
+        self._edges = None  # invalidate cache
+    # ------------------------------------------------------------------
+    # Edge resolution
+    # ------------------------------------------------------------------
+    @property
+    def edges(self) -> dict[str, list[str]]:
+        """
+        Map of node_name → list of node_names it depends on.
+        Edges are resolved by matching parameter names to output keys.
+        Input nodes are excluded from edge resolution because their
+        parameters come from the user, not from upstream nodes.
+        """
+        if self._edges is not None:
+            return self._edges
+        self._edges = {}
+        for name, node in self.nodes.items():
+            if node.phase == "input":
+                # Input nodes receive user-provided values, not upstream outputs
+                self._edges[name] = []
+            else:
+                deps = []
+                for param in node.params:
+                    if param in self._output_key_to_node:
+                        deps.append(self._output_key_to_node[param])
+                    # If param doesn't match any output key, validation catches it
+                self._edges[name] = deps
+        return self._edges
+    # ------------------------------------------------------------------
+    # Validation
+    # ------------------------------------------------------------------
+    def validate(self) -> list[str]:
+        """
+        Return a list of error strings. Empty list = valid pipeline.
+        Checks:
+        1. At least one input node exists
+        2. At least one output node exists
+        3. Every non-input parameter resolves to an upstream output key
+        4. No dependency cycles
+        5. Phase ordering is respected (no preprocess depending on infer, etc.)
+        """
+        errors: list[str] = []
+        # --- Check: required phases ---
+        phases_present = {n.phase for n in self.nodes.values()}
+        if "input" not in phases_present:
+            errors.append("Pipeline has no @input nodes")
+        if "output" not in phases_present:
+            errors.append("Pipeline has no @output nodes")
+        # --- Check: all parameters resolve ---
+        for name, node in self.nodes.items():
+            if node.phase == "input":
+                continue
+            for param in node.params:
+                if param not in self._output_key_to_node:
+                    errors.append(
+                        f"Node '{name}' expects parameter '{param}' "
+                        f"but no node produces output key '{param}'. "
+                        f"Available keys: {sorted(self._output_key_to_node.keys())}"
+                    )
+        # --- Check: phase ordering ---
+        phase_order = {phase: i for i, phase in enumerate(PHASES)}
+        for name, dep_names in self.edges.items():
+            node_phase = phase_order[self.nodes[name].phase]
+            for dep_name in dep_names:
+                dep_phase = phase_order[self.nodes[dep_name].phase]
+                if dep_phase > node_phase:
+                    errors.append(
+                        f"Node '{name}' ({self.nodes[name].phase}) depends on "
+                        f"'{dep_name}' ({self.nodes[dep_name].phase}), "
+                        f"which is a later phase"
+                    )
+        # --- Check: cycle detection ---
+        cycle = self._detect_cycle()
+        if cycle:
+            errors.append(f"Dependency cycle detected: {' → '.join(cycle)}")
+        return errors
+    def _detect_cycle(self) -> list[str] | None:
+        """DFS-based cycle detection. Returns the cycle path or None."""
+        WHITE, GRAY, BLACK = 0, 1, 2
+        color = {name: WHITE for name in self.nodes}
+        parent: dict[str, str | None] = {}
+        def dfs(u: str) -> list[str] | None:
+            color[u] = GRAY
+            for v in self.edges.get(u, []):
+                if color[v] == GRAY:
+                    # Reconstruct cycle
+                    cycle = [v, u]
+                    curr = u
+                    while curr != v:
+                        curr = parent.get(curr)
+                        if curr is None:
+                            break
+                        cycle.append(curr)
+                    cycle.reverse()
+                    return cycle
+                if color[v] == WHITE:
+                    parent[v] = u
+                    result = dfs(v)
+                    if result:
+                        return result
+            color[u] = BLACK
+            return None
+        for name in self.nodes:
+            if color[name] == WHITE:
+                result = dfs(name)
+                if result:
+                    return result
+        return None
+    # ------------------------------------------------------------------
+    # Topological sort
+    # ------------------------------------------------------------------
+    def topological_sort(self) -> list[Node]:
+        """
+        Return nodes in execution order via Kahn's algorithm.
+        Ties within the same depth are broken by phase order first,
+        then by definition order. This ensures inputs run before
+        preprocessing even if there's no explicit dependency.
+        """
+        phase_priority = {phase: i for i, phase in enumerate(PHASES)}
+        # Build in-degree map
+        in_degree: dict[str, int] = {name: 0 for name in self.nodes}
+        for name, deps in self.edges.items():
+            in_degree[name] = len(deps)
+        # Seed with zero-degree nodes, sorted by phase then name
+        queue: list[str] = sorted(
+            [n for n, d in in_degree.items() if d == 0],
+            key=lambda n: (phase_priority[self.nodes[n].phase], n),
+        )
+        order: list[Node] = []
+        while queue:
+            current = queue.pop(0)
+            order.append(self.nodes[current])
+            # Find nodes that depend on current and decrement
+            for name, deps in self.edges.items():
+                if current in deps:
+                    in_degree[name] -= 1
+                    if in_degree[name] == 0:
+                        queue.append(name)
+                        # Re-sort to maintain phase priority
+                        queue.sort(
+                            key=lambda n: (
+                                phase_priority[self.nodes[n].phase],
+                                n,
+                            )
+                        )
+        if len(order) != len(self.nodes):
+            # Should not happen if validate() passed, but safety net
+            missing = set(self.nodes.keys()) - {n.name for n in order}
+            raise RuntimeError(
+                f"Topological sort incomplete — stuck nodes: {missing}. "
+                f"This usually indicates a cycle."
+            )
+        return order
+    # ------------------------------------------------------------------
+    # Introspection helpers
+    # ------------------------------------------------------------------
+    def get_input_nodes(self) -> list[Node]:
+        return [n for n in self.nodes.values() if n.phase == "input"]
+    def get_output_nodes(self) -> list[Node]:
+        return [n for n in self.nodes.values() if n.phase == "output"]
+    def get_intermediate_outputs(self) -> list[Node]:
+        return [
+            n
+            for n in self.nodes.values()
+            if n.phase == "output"
+            and getattr(n.metadata, "intermediate", False)
+        ]
+    def get_dependencies(self, node_name: str) -> list[Node]:
+        """Get the direct upstream dependencies of a node."""
+        return [self.nodes[dep] for dep in self.edges.get(node_name, [])]
+    def to_schema(self) -> dict:
+        """
+        Emit a JSON-serializable dict describing the full pipeline.
+        Used by `streamtrace push` to register with the backend,
+        and by the frontend to render the pipeline visualization.
+        """
+        nodes_schema = []
+        for node in self.topological_sort():
+            entry = {
+                "name": node.name,
+                "phase": node.phase,
+                "output_key": node.output_key,
+                "depends_on": [
+                    self.nodes[d].output_key
+                    for d in self.edges.get(node.name, [])
+                ],
+            }
+            # Include widget schema for input nodes
+            if node.phase == "input" and node.metadata:
+                if hasattr(node.metadata, "to_schema"):
+                    entry["widget"] = node.metadata.to_schema()
+            # Include output metadata
+            if node.phase == "output" and node.metadata:
+                entry["intermediate"] = getattr(node.metadata, "intermediate", False)
+                widget = getattr(node.metadata, "widget", None)
+                if widget is not None:
+                    entry["widget"] = widget.to_schema()
+            nodes_schema.append(entry)
+        return {"nodes": nodes_schema}
+# ---------------------------------------------------------------------------
+# build_dag: extract a DAG from an @st.app decorated class
+# ---------------------------------------------------------------------------
+def build_dag(app_cls: type) -> DAG:
+    """
+    Inspect an @st.app class and build the execution DAG from
+    its decorated methods.
+    Each method decorated with @st.input, @st.preprocess, @st.infer,
+    or @st.output becomes a node. Parameter names on non-input nodes
+    are matched against the `returns` alias (or method name) of other
+    nodes to form edges.
+    """
+    dag = DAG()
+    # Use vars(app_cls) to get methods defined on the class itself,
+    # not inherited ones. Preserves definition order in Python 3.7+.
+    for attr_name, attr_value in vars(app_cls).items():
+        if not callable(attr_value):
+            continue
+        for meta_attr, phase in _META_ATTR_TO_PHASE.items():
+            metadata = getattr(attr_value, meta_attr, None)
+            if metadata is None:
+                continue
+            # Extract parameter names (excluding 'self')
+            sig = inspect.signature(attr_value)
+            params = [
+                p
+                for p in sig.parameters
+                if p != "self"
+            ]
+            # Extract returns alias from metadata
+            returns_alias = getattr(metadata, "returns", None)
+            dag.add_node(
+                Node(
+                    name=attr_name,
+                    phase=phase,
+                    method=attr_value,
+                    params=params,
+                    returns_alias=returns_alias,
+                    metadata=metadata,
+                )
+            )
+            break  # a method can only have one phase
+    return dag

streamtrace/decorators/__init__.py ADDED Viewed

File without changes

streamtrace/decorators/app_decorator.py ADDED Viewed

@@ -0,0 +1,77 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+# ===================================================================
+# @st.app — class decorator
+# ===================================================================
+@dataclass(frozen=True)
+class AppMetadata:
+    """Metadata for the top-level pipeline class."""
+    title: str = "Streamtrace App"
+    version: str = "0.1.0"
+    description: str = ""
+    # Docker base image hint — used by `push` to select the right
+    # container. The agent picks this based on what the model needs.
+    docker_base: str | None = None
+    # Python dependencies beyond what's in the base image
+    requirements: list[str] = field(default_factory=list)
+    def to_schema(self) -> dict[str, Any]:
+        schema = {
+            "title": self.title,
+            "version": self.version,
+            "description": self.description,
+        }
+        if self.docker_base:
+            schema["docker_base"] = self.docker_base
+        if self.requirements:
+            schema["requirements"] = self.requirements
+        return schema
+def app(
+    target=None,
+    *,
+    title: str = "Streamtrace App",
+    version: str = "0.1.0",
+    description: str = "",
+    docker_base: str | None = None,
+    requirements: list[str] | None = None,
+):
+    """
+    Class decorator that marks a class as a Streamtrace app.
+    Usage:
+        @st.app
+        class MyPipeline: ...
+        @st.app(title="Cardiac Seg", version="1.0.0",
+                docker_base="pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime",
+                requirements=["nibabel", "scipy"])
+        class CardiacSegmentation: ...
+    """
+    def decorator(cls):
+        cls.__app_metadata__ = AppMetadata(
+            title=title,
+            version=version,
+            description=description,
+            docker_base=docker_base,
+            requirements=requirements or [],
+        )
+        return cls
+    if target is not None:
+        return decorator(target)
+    return decorator
+def get_app_metadata(cls) -> AppMetadata | None:
+    return getattr(cls, "__app_metadata__", None)
+def is_app(cls) -> bool:
+    return hasattr(cls, "__app_metadata__")

streamtrace/decorators/infer_decorator.py ADDED Viewed

@@ -0,0 +1,88 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+# ===================================================================
+# @st.infer — inference/model step decorator
+# ===================================================================
+@dataclass(frozen=True)
+class InferMetadata:
+    """Metadata for an inference node."""
+    title: str = "Inference"
+    returns: str | None = None
+    # Hint for the runtime about GPU requirements
+    device: str = "auto"  # "auto", "cpu", "cuda", "cuda:0", etc.
+    # Optional path to model weights — used by `push` to
+    # ensure weights are available in the container
+    weights_path: str | None = None
+    def to_schema(self) -> dict[str, Any]:
+        schema: dict[str, Any] = {
+            "title": self.title,
+            "device": self.device,
+        }
+        if self.returns:
+            schema["output_key"] = self.returns
+        if self.weights_path:
+            schema["weights_path"] = self.weights_path
+        return schema
+def infer(
+    target=None,
+    *,
+    title: str | None = None,
+    returns: str | None = None,
+    device: str = "auto",
+    weights_path: str | None = None,
+):
+    """
+    Decorator that marks a method as an inference step.
+    Inference nodes are where the model runs. They receive
+    preprocessed data and return predictions. The `device` hint
+    tells the runtime where to run — "auto" picks GPU if available.
+    The `weights_path` is informational — `push` uses it to verify
+    weights are bundled or accessible in the container.
+    Usage:
+        @st.infer(returns="mask",
+                  device="cuda",
+                  weights_path="checkpoints/best.pth")
+        def segment(self, resampled, threshold):
+            import torch
+            model = torch.load("checkpoints/best.pth", map_location="cpu")
+            model.eval()
+            with torch.no_grad():
+                tensor = torch.FloatTensor(resampled).unsqueeze(0).unsqueeze(0)
+                pred = model(tensor).squeeze().numpy()
+            return (pred > threshold).astype("uint8")
+        @st.infer(returns="embedding", device="cpu")
+        def encode(self, text):
+            return self.encoder.encode(text)
+    """
+    def decorator(fn):
+        fn.__infer_metadata__ = InferMetadata(
+            title=title or fn.__name__.replace("_", " ").title(),
+            returns=returns,
+            device=device,
+            weights_path=weights_path,
+        )
+        return fn
+    if target is not None:
+        return decorator(target)
+    return decorator
+def get_infer_metadata(obj) -> InferMetadata | None:
+    return getattr(obj, "__infer_metadata__", None)
+def is_infer(obj) -> bool:
+    return hasattr(obj, "__infer_metadata__")