weaveflow 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. weave/__init__.py +63 -0
  2. weave/agent/__init__.py +7 -0
  3. weave/agent/base.py +90 -0
  4. weave/agent/context.py +43 -0
  5. weave/agent/decorators.py +69 -0
  6. weave/cli/__init__.py +5 -0
  7. weave/cli/commands.py +72 -0
  8. weave/cli/main.py +55 -0
  9. weave/cli/templates.py +21 -0
  10. weave/connection/__init__.py +13 -0
  11. weave/connection/matcher.py +51 -0
  12. weave/connection/protocol.py +61 -0
  13. weave/connection/transform.py +108 -0
  14. weave/errors.py +75 -0
  15. weave/guardrails/__init__.py +10 -0
  16. weave/guardrails/hooks.py +62 -0
  17. weave/interop/__init__.py +12 -0
  18. weave/interop/crewai.py +48 -0
  19. weave/interop/foreign.py +144 -0
  20. weave/interop/langchain.py +57 -0
  21. weave/llm/__init__.py +15 -0
  22. weave/llm/_deps.py +38 -0
  23. weave/llm/_retry.py +55 -0
  24. weave/llm/adapters/__init__.py +17 -0
  25. weave/llm/adapters/anthropic_adapter.py +60 -0
  26. weave/llm/adapters/deepseek_adapter.py +25 -0
  27. weave/llm/adapters/google_adapter.py +46 -0
  28. weave/llm/adapters/mistral_adapter.py +25 -0
  29. weave/llm/adapters/ollama_adapter.py +30 -0
  30. weave/llm/adapters/openai_adapter.py +61 -0
  31. weave/llm/base.py +66 -0
  32. weave/llm/factory.py +61 -0
  33. weave/logger.py +104 -0
  34. weave/memory/__init__.py +13 -0
  35. weave/memory/base.py +35 -0
  36. weave/memory/long_term.py +67 -0
  37. weave/memory/short_term.py +39 -0
  38. weave/py.typed +0 -0
  39. weave/runtime/__init__.py +7 -0
  40. weave/runtime/parallel.py +119 -0
  41. weave/runtime/pipeline.py +67 -0
  42. weave/runtime/runner.py +69 -0
  43. weave/schema/__init__.py +7 -0
  44. weave/schema/port.py +49 -0
  45. weave/schema/registry.py +29 -0
  46. weave/schema/validator.py +82 -0
  47. weave/types/__init__.py +17 -0
  48. weave/types/payload.py +44 -0
  49. weave/types/primitives.py +51 -0
  50. weaveflow-1.1.0.dist-info/METADATA +202 -0
  51. weaveflow-1.1.0.dist-info/RECORD +54 -0
  52. weaveflow-1.1.0.dist-info/WHEEL +4 -0
  53. weaveflow-1.1.0.dist-info/entry_points.txt +2 -0
  54. weaveflow-1.1.0.dist-info/licenses/LICENSE +184 -0
weave/__init__.py ADDED
@@ -0,0 +1,63 @@
1
+ """Weave — composable AI agent framework.
2
+
3
+ A standard anatomy for AI agents: typed input/output ports, a swappable LLM brain,
4
+ optional memory, guardrails, and an auto-transforming connection protocol so any
5
+ compliant agent can plug into any other. Build an agent once, connect it everywhere.
6
+
7
+ Quick start
8
+ -----------
9
+ from weave import agent, DataType, Pipeline
10
+
11
+ @agent(name="summarizer", input=DataType.TEXT, output=DataType.TEXT,
12
+ tags=["summarization"], llm="anthropic:claude-opus-4-8")
13
+ async def summarize(ctx):
14
+ return await ctx.complete(f"Summarize:\\n{ctx.input.value}")
15
+
16
+ result = await Pipeline([summarize]).run("a long document ...")
17
+ """
18
+
19
+ from weave.agent import AgentContext, BaseAgent, agent
20
+ from weave.connection import ConnectionProtocol, Router, register_transform
21
+ from weave.errors import WeaveError
22
+ from weave.guardrails import Guardrails
23
+ from weave.interop import from_callable, from_crewai, from_langchain
24
+ from weave.llm import LLMAdapter, create_adapter, register_provider, supported_providers
25
+ from weave.logger import Logger, logger
26
+ from weave.memory import LongTermMemory, Memory, ShortTermMemory
27
+ from weave.runtime import LocalRunner, Parallel, Pipeline
28
+ from weave.schema import PortSchema, SchemaRegistry, validate
29
+ from weave.types import DataType, Payload
30
+
31
+ __version__ = "1.1.0"
32
+
33
+ __all__ = [
34
+ "agent",
35
+ "BaseAgent",
36
+ "AgentContext",
37
+ "DataType",
38
+ "Payload",
39
+ "PortSchema",
40
+ "SchemaRegistry",
41
+ "validate",
42
+ "Pipeline",
43
+ "Parallel",
44
+ "LocalRunner",
45
+ "ConnectionProtocol",
46
+ "Router",
47
+ "register_transform",
48
+ "Guardrails",
49
+ "from_callable",
50
+ "from_langchain",
51
+ "from_crewai",
52
+ "Memory",
53
+ "ShortTermMemory",
54
+ "LongTermMemory",
55
+ "LLMAdapter",
56
+ "create_adapter",
57
+ "register_provider",
58
+ "supported_providers",
59
+ "Logger",
60
+ "logger",
61
+ "WeaveError",
62
+ "__version__",
63
+ ]
@@ -0,0 +1,7 @@
1
+ """Agent layer — base class, context, and decorator."""
2
+
3
+ from weave.agent.base import BaseAgent
4
+ from weave.agent.context import AgentContext
5
+ from weave.agent.decorators import agent
6
+
7
+ __all__ = ["BaseAgent", "AgentContext", "agent"]
weave/agent/base.py ADDED
@@ -0,0 +1,90 @@
1
+ """BaseAgent — the standard agent anatomy and execution lifecycle (PRD §5.1, §5.3).
2
+
3
+ An agent's public interface is its ports + capability tags; its internals (brain,
4
+ memory, guardrails) are private. ``run`` orchestrates the nervous system: validate
5
+ input -> pre-guardrails -> handle -> coerce -> validate output -> post-guardrails,
6
+ with on-error hooks and typed error wrapping (fail fast, never swallow).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from abc import ABC, abstractmethod
12
+ from typing import Any
13
+
14
+ from weave.agent.context import AgentContext
15
+ from weave.errors import AgentExecutionError, WeaveError
16
+ from weave.guardrails.hooks import Guardrails
17
+ from weave.llm.base import LLMAdapter
18
+ from weave.logger import logger
19
+ from weave.memory.base import Memory
20
+ from weave.schema.port import PortSchema
21
+ from weave.schema.validator import validate
22
+ from weave.types.payload import Payload
23
+
24
+
25
+ class BaseAgent(ABC):
26
+ """Self-contained capability unit with typed input/output ports."""
27
+
28
+ def __init__(
29
+ self,
30
+ *,
31
+ name: str,
32
+ input_schema: PortSchema,
33
+ output_schema: PortSchema,
34
+ capability_tags: tuple[str, ...] = (),
35
+ brain: LLMAdapter | None = None,
36
+ memory: Memory | None = None,
37
+ guardrails: Guardrails | None = None,
38
+ ) -> None:
39
+ self.name = name
40
+ self.input_schema = input_schema
41
+ self.output_schema = output_schema
42
+ self.capability_tags = capability_tags
43
+ self._brain = brain
44
+ self._memory = memory
45
+ self._guardrails = guardrails or Guardrails()
46
+
47
+ @abstractmethod
48
+ async def handle(self, ctx: AgentContext) -> Payload | Any:
49
+ """The agent's private logic. Return a Payload or a raw value to wrap."""
50
+
51
+ async def run(self, payload: Payload | Any) -> Payload:
52
+ """Execute the full lifecycle and return a schema-valid output payload.
53
+
54
+ Accepts a raw value or a Payload; a raw value is wrapped to the input port type.
55
+ """
56
+ if not isinstance(payload, Payload):
57
+ payload = Payload(type=self.input_schema.type, value=payload)
58
+ try:
59
+ validate(payload, self.input_schema)
60
+ guarded_input = self._guardrails.run_pre(payload)
61
+ ctx = AgentContext(input=guarded_input, brain=self._brain, memory=self._memory)
62
+ raw = await self.handle(ctx)
63
+ output = self._coerce(raw)
64
+ validate(output, self.output_schema)
65
+ return self._guardrails.run_post(output)
66
+ except WeaveError as exc:
67
+ self._guardrails.run_on_error(exc)
68
+ logger.error("Agent failed", agent=self.name, code=exc.code)
69
+ raise
70
+ except Exception as exc:
71
+ self._guardrails.run_on_error(exc)
72
+ logger.error("Agent raised", agent=self.name, error=str(exc))
73
+ raise AgentExecutionError(
74
+ f"Agent '{self.name}' raised during execution",
75
+ detail=str(exc),
76
+ ) from exc
77
+
78
+ def _coerce(self, raw: Payload | Any) -> Payload:
79
+ if isinstance(raw, Payload):
80
+ return raw
81
+ return Payload(type=self.output_schema.type, value=raw)
82
+
83
+ def manifest(self) -> dict[str, Any]:
84
+ """Portable schema manifest carried by a packaged agent (FR-010)."""
85
+ return {
86
+ "name": self.name,
87
+ "input_schema": self.input_schema.manifest(),
88
+ "output_schema": self.output_schema.manifest(),
89
+ "capability_tags": list(self.capability_tags),
90
+ }
weave/agent/context.py ADDED
@@ -0,0 +1,43 @@
1
+ """Execution context handed to every agent's handler.
2
+
3
+ Bundles the input payload plus the injected Brain, Memory, and Logger so handlers
4
+ never reach for globals (Dependency Injection). Exposes a thin ``complete`` helper
5
+ so the common "ask the LLM" case is one line.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import AsyncIterator
11
+ from dataclasses import dataclass
12
+ from typing import Any
13
+
14
+ from weave.errors import AdapterError
15
+ from weave.llm.base import LLMAdapter
16
+ from weave.logger import Logger
17
+ from weave.logger import logger as default_logger
18
+ from weave.memory.base import Memory
19
+ from weave.types.payload import Payload
20
+
21
+
22
+ @dataclass(slots=True)
23
+ class AgentContext:
24
+ """Everything a handler needs to do its job, injected at run time."""
25
+
26
+ input: Payload
27
+ brain: LLMAdapter | None = None
28
+ memory: Memory | None = None
29
+ logger: Logger = default_logger
30
+
31
+ def require_brain(self) -> LLMAdapter:
32
+ if self.brain is None:
33
+ raise AdapterError(
34
+ "Agent requires an LLM but none was configured",
35
+ detail="pass llm='provider:model' when defining the agent",
36
+ )
37
+ return self.brain
38
+
39
+ async def complete(self, prompt: str, *, system: str | None = None, **opts: Any) -> str:
40
+ return await self.require_brain().complete(prompt, system=system, **opts)
41
+
42
+ def stream(self, prompt: str, *, system: str | None = None, **opts: Any) -> AsyncIterator[str]:
43
+ return self.require_brain().stream(prompt, system=system, **opts)
@@ -0,0 +1,69 @@
1
+ """The ``@agent`` decorator — the one-liner way to define an agent.
2
+
3
+ Wraps an ``async def handler(ctx)`` into a fully-formed ``BaseAgent`` with typed
4
+ ports, capability tags, and an optional LLM brain. This is the ergonomic surface
5
+ (CrewAI/LangChain-style) over the explicit ``BaseAgent`` subclassing API.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Awaitable, Callable, Iterable
11
+ from typing import Any
12
+
13
+ from weave.agent.base import BaseAgent
14
+ from weave.agent.context import AgentContext
15
+ from weave.guardrails.hooks import Guardrails
16
+ from weave.llm.base import LLMAdapter
17
+ from weave.llm.factory import create_adapter
18
+ from weave.memory.base import Memory
19
+ from weave.schema.port import PortSchema
20
+ from weave.types.payload import Payload
21
+ from weave.types.primitives import DataType
22
+
23
+ Handler = Callable[[AgentContext], Awaitable[Payload | Any]]
24
+ PortLike = DataType | PortSchema
25
+
26
+
27
+ class _FunctionAgent(BaseAgent):
28
+ """Adapts a plain async function to the BaseAgent contract."""
29
+
30
+ def __init__(self, handler: Handler, **kwargs: Any) -> None:
31
+ super().__init__(**kwargs)
32
+ self._handler = handler
33
+
34
+ async def handle(self, ctx: AgentContext) -> Payload | Any:
35
+ return await self._handler(ctx)
36
+
37
+
38
+ def _as_schema(port: PortLike) -> PortSchema:
39
+ if isinstance(port, PortSchema):
40
+ return port
41
+ return PortSchema.of(port)
42
+
43
+
44
+ def agent(
45
+ *,
46
+ name: str,
47
+ input: PortLike,
48
+ output: PortLike,
49
+ tags: Iterable[str] = (),
50
+ llm: str | LLMAdapter | None = None,
51
+ memory: Memory | None = None,
52
+ guardrails: Guardrails | None = None,
53
+ ) -> Callable[[Handler], BaseAgent]:
54
+ """Decorate an ``async def handler(ctx)`` to produce a ready-to-run agent."""
55
+
56
+ def decorator(handler: Handler) -> BaseAgent:
57
+ brain = create_adapter(llm) if llm is not None else None
58
+ return _FunctionAgent(
59
+ handler,
60
+ name=name,
61
+ input_schema=_as_schema(input),
62
+ output_schema=_as_schema(output),
63
+ capability_tags=tuple(tags),
64
+ brain=brain,
65
+ memory=memory,
66
+ guardrails=guardrails,
67
+ )
68
+
69
+ return decorator
weave/cli/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Weave command-line interface."""
2
+
3
+ from weave.cli.main import main
4
+
5
+ __all__ = ["main"]
weave/cli/commands.py ADDED
@@ -0,0 +1,72 @@
1
+ """CLI command implementations: scaffold, validate, package (FR-010).
2
+
3
+ Each command is a small pure-ish function returning an exit code. The loader imports
4
+ a target module by file path and discovers ``BaseAgent`` instances inside it.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import importlib.util
10
+ import json
11
+ import re
12
+ import zipfile
13
+ from collections.abc import Iterable
14
+ from pathlib import Path
15
+
16
+ from weave.agent.base import BaseAgent
17
+ from weave.cli.templates import AGENT_TEMPLATE
18
+ from weave.errors import WeaveError
19
+
20
+ _IDENT_PATTERN = re.compile(r"\W+")
21
+
22
+
23
+ def _to_ident(name: str) -> str:
24
+ cleaned = _IDENT_PATTERN.sub("_", name).strip("_")
25
+ return cleaned or "agent"
26
+
27
+
28
+ def _load_agents(path: Path) -> tuple[BaseAgent, ...]:
29
+ spec = importlib.util.spec_from_file_location(path.stem, path)
30
+ if spec is None or spec.loader is None:
31
+ raise WeaveError("Could not load module", detail=str(path))
32
+ module = importlib.util.module_from_spec(spec)
33
+ spec.loader.exec_module(module)
34
+ found = (value for value in vars(module).values() if isinstance(value, BaseAgent))
35
+ agents = tuple(found)
36
+ if not agents:
37
+ raise WeaveError("No Weave agents found in module", detail=str(path))
38
+ return agents
39
+
40
+
41
+ def scaffold(name: str, *, directory: str = ".") -> int:
42
+ ident = _to_ident(name)
43
+ target = Path(directory) / f"{ident}.py"
44
+ if target.exists():
45
+ raise WeaveError("File already exists", detail=str(target))
46
+ target.write_text(AGENT_TEMPLATE.format(name=name, ident=ident), encoding="utf-8")
47
+ print(f"Scaffolded agent -> {target}")
48
+ return 0
49
+
50
+
51
+ def validate(path: str) -> int:
52
+ agents = _load_agents(Path(path))
53
+ for agent in agents:
54
+ print(f"valid: {agent.name} {json.dumps(agent.manifest())}")
55
+ print(f"{len(agents)} agent(s) validated")
56
+ return 0
57
+
58
+
59
+ def package(path: str, *, output: str | None = None) -> int:
60
+ source = Path(path)
61
+ agents = _load_agents(source)
62
+ archive = Path(output) if output else source.with_suffix(".weave.zip")
63
+ manifest = {"version": "1.0", "agents": [a.manifest() for a in agents]}
64
+ with zipfile.ZipFile(archive, "w", zipfile.ZIP_DEFLATED) as bundle:
65
+ bundle.write(source, arcname=source.name)
66
+ bundle.writestr("manifest.json", json.dumps(manifest, indent=2))
67
+ print(f"Packaged {len(agents)} agent(s) -> {archive}")
68
+ return 0
69
+
70
+
71
+ def manifest_lines(agents: Iterable[BaseAgent]) -> list[str]:
72
+ return [json.dumps(agent.manifest()) for agent in agents]
weave/cli/main.py ADDED
@@ -0,0 +1,55 @@
1
+ """CLI entry point.
2
+
3
+ Subcommands are dispatched through a lookup map (no if/elif chain). Each handler
4
+ maps parsed args to a command function and returns a process exit code.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import argparse
10
+ import sys
11
+ from collections.abc import Callable, Sequence
12
+
13
+ from weave import __version__
14
+ from weave.cli import commands
15
+ from weave.errors import WeaveError
16
+
17
+ Handler = Callable[[argparse.Namespace], int]
18
+
19
+ _HANDLERS: dict[str, Handler] = {
20
+ "scaffold": lambda a: commands.scaffold(a.name, directory=a.directory),
21
+ "validate": lambda a: commands.validate(a.path),
22
+ "package": lambda a: commands.package(a.path, output=a.output),
23
+ }
24
+
25
+
26
+ def _build_parser() -> argparse.ArgumentParser:
27
+ parser = argparse.ArgumentParser(prog="weave", description="composable AI agent framework CLI")
28
+ parser.add_argument("--version", action="version", version=f"weave {__version__}")
29
+ sub = parser.add_subparsers(dest="command", required=True)
30
+
31
+ scaffold = sub.add_parser("scaffold", help="Create a starter agent file")
32
+ scaffold.add_argument("name")
33
+ scaffold.add_argument("-d", "--directory", default=".")
34
+
35
+ validate = sub.add_parser("validate", help="Validate agents in a module")
36
+ validate.add_argument("path")
37
+
38
+ package = sub.add_parser("package", help="Package agents into a portable archive")
39
+ package.add_argument("path")
40
+ package.add_argument("-o", "--output", default=None)
41
+ return parser
42
+
43
+
44
+ def main(argv: Sequence[str] | None = None) -> int:
45
+ args = _build_parser().parse_args(argv)
46
+ handler = _HANDLERS[args.command]
47
+ try:
48
+ return handler(args)
49
+ except WeaveError as exc:
50
+ print(str(exc), file=sys.stderr)
51
+ return 1
52
+
53
+
54
+ if __name__ == "__main__":
55
+ raise SystemExit(main())
weave/cli/templates.py ADDED
@@ -0,0 +1,21 @@
1
+ """Scaffold templates for the CLI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ AGENT_TEMPLATE = '''"""Weave agent: {name}."""
6
+
7
+ from weave import DataType, agent
8
+
9
+
10
+ @agent(
11
+ name="{name}",
12
+ input=DataType.TEXT,
13
+ output=DataType.TEXT,
14
+ tags=["{name}"],
15
+ # Swap the brain freely: "openai:gpt-4o", "google:gemini-1.5-pro", "ollama:llama3"
16
+ llm="anthropic:claude-opus-4-8",
17
+ )
18
+ async def {ident}(ctx):
19
+ """Describe what this agent does."""
20
+ return await ctx.complete(f"Process this input:\\n{{ctx.input.value}}")
21
+ '''
@@ -0,0 +1,13 @@
1
+ """Connection layer — protocol engine, router, and transforms."""
2
+
3
+ from weave.connection.matcher import Match, Router
4
+ from weave.connection.protocol import ConnectionProtocol
5
+ from weave.connection.transform import get_transform, register_transform
6
+
7
+ __all__ = [
8
+ "ConnectionProtocol",
9
+ "Router",
10
+ "Match",
11
+ "register_transform",
12
+ "get_transform",
13
+ ]
@@ -0,0 +1,51 @@
1
+ """Router / Matcher (PRD §8).
2
+
3
+ Given a source agent's output port, returns the candidate agents whose input port is
4
+ compatible, ranked by capability-tag affinity. Tag affinity uses Jaccard overlap as a
5
+ dependency-free stand-in for the optional embedding similarity pre-check (§5.4.3);
6
+ swap in an embedding scorer via ``score_fn`` without touching this logic.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Callable, Iterable, Sequence
12
+ from dataclasses import dataclass
13
+
14
+ from weave.agent.base import BaseAgent
15
+ from weave.types.primitives import DataType, is_compatible
16
+
17
+ TagScorer = Callable[[Sequence[str], Sequence[str]], float]
18
+
19
+
20
+ @dataclass(frozen=True, slots=True)
21
+ class Match:
22
+ agent: BaseAgent
23
+ score: float
24
+
25
+
26
+ def _jaccard(source_tags: Sequence[str], target_tags: Sequence[str]) -> float:
27
+ source, target = set(source_tags), set(target_tags)
28
+ if not source or not target:
29
+ return 0.0
30
+ union = source | target
31
+ return len(source & target) / len(union)
32
+
33
+
34
+ class Router:
35
+ """Matches an output port to compatible, ranked input ports."""
36
+
37
+ def __init__(self, score_fn: TagScorer = _jaccard) -> None:
38
+ self._score = score_fn
39
+
40
+ def match(
41
+ self,
42
+ source: BaseAgent,
43
+ candidates: Iterable[BaseAgent],
44
+ ) -> tuple[Match, ...]:
45
+ out_type: DataType = source.output_schema.type
46
+ compatible = (
47
+ Match(candidate, self._score(source.capability_tags, candidate.capability_tags))
48
+ for candidate in candidates
49
+ if is_compatible(out_type, candidate.input_schema.type)
50
+ )
51
+ return tuple(sorted(compatible, key=lambda m: m.score, reverse=True))
@@ -0,0 +1,61 @@
1
+ """Connection Protocol Engine (PRD §5.4).
2
+
3
+ Runs the four-step handshake when one agent's output feeds another's input:
4
+ 1. Schema type match (identical or declared-compatible).
5
+ 2. Shape validation (delegated to the schema validator for structured_json).
6
+ 3. Capability pre-check (delegated to the Router; optional, non-blocking here).
7
+ 4. Transform injection (compatible-but-not-identical types get an auto transform).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from weave.connection.transform import get_transform
13
+ from weave.errors import ConnectionIncompatibleError
14
+ from weave.llm.base import LLMAdapter
15
+ from weave.schema.port import PortSchema
16
+ from weave.schema.validator import validate
17
+ from weave.types.payload import Payload
18
+ from weave.types.primitives import is_compatible, needs_transform
19
+
20
+
21
+ class ConnectionProtocol:
22
+ """Validates and executes a single agent-to-agent handoff."""
23
+
24
+ def check(self, source: PortSchema, target: PortSchema) -> None:
25
+ """Step 1: fail fast if the source output cannot reach the target input."""
26
+ if is_compatible(source.type, target.type):
27
+ return
28
+ raise ConnectionIncompatibleError(
29
+ "Output port is not compatible with the target input port",
30
+ detail=f"'{source.type.value}' cannot connect to '{target.type.value}'",
31
+ )
32
+
33
+ async def handoff(
34
+ self,
35
+ payload: Payload,
36
+ source: PortSchema,
37
+ target: PortSchema,
38
+ *,
39
+ brain: LLMAdapter | None = None,
40
+ ) -> Payload:
41
+ """Run the full handshake and return a payload valid for the target port."""
42
+ self.check(source, target)
43
+ converted = await self._maybe_transform(payload, source, target, brain)
44
+ return validate(converted, target)
45
+
46
+ async def _maybe_transform(
47
+ self,
48
+ payload: Payload,
49
+ source: PortSchema,
50
+ target: PortSchema,
51
+ brain: LLMAdapter | None,
52
+ ) -> Payload:
53
+ if not needs_transform(source.type, target.type):
54
+ return payload
55
+ transform = get_transform(source.type, target.type)
56
+ if transform is None:
57
+ raise ConnectionIncompatibleError(
58
+ "No transform registered for compatible types",
59
+ detail=f"missing '{source.type.value}' -> '{target.type.value}'",
60
+ )
61
+ return await transform(payload, brain)
@@ -0,0 +1,108 @@
1
+ """Transform registry — auto-injected type converters (PRD §5.4.4).
2
+
3
+ When two ports are compatible but not identical, the protocol injects a transform to
4
+ convert the payload. Transforms live in a lookup map keyed by ``(from, to)`` so a new
5
+ conversion is one registration — never an edit to branching logic (Open/Closed).
6
+
7
+ Deterministic conversions need no LLM; semantic ones (e.g. text -> structured_json)
8
+ require a brain and fail fast with an actionable error when none is supplied.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import re
15
+ from collections.abc import Awaitable, Callable
16
+ from typing import Any
17
+
18
+ from weave.errors import ConnectionIncompatibleError
19
+ from weave.llm.base import LLMAdapter
20
+ from weave.types.payload import Payload
21
+ from weave.types.primitives import DataType
22
+
23
+ Transform = Callable[[Payload, LLMAdapter | None], Awaitable[Payload]]
24
+
25
+ _EXTRACTION_PROMPT = (
26
+ "Convert the following content into a single valid JSON object. "
27
+ "Respond with JSON only, no prose:\n\n{content}"
28
+ )
29
+
30
+ # Strips ```json … ``` / ``` … ``` fences that models often wrap JSON in.
31
+ _FENCE_PATTERN = re.compile(r"```(?:json)?\s*|\s*```", re.IGNORECASE)
32
+
33
+
34
+ async def _code_to_text(payload: Payload, _: LLMAdapter | None) -> Payload:
35
+ return Payload(type=DataType.TEXT, value=payload.value, metadata=payload.metadata)
36
+
37
+
38
+ async def _json_to_text(payload: Payload, _: LLMAdapter | None) -> Payload:
39
+ return Payload(type=DataType.TEXT, value=json.dumps(payload.value), metadata=payload.metadata)
40
+
41
+
42
+ async def _passthrough_to_text(payload: Payload, _: LLMAdapter | None) -> Payload:
43
+ return Payload(type=DataType.TEXT, value=str(payload.value), metadata=payload.metadata)
44
+
45
+
46
+ async def _stream_to_text(payload: Payload, _: LLMAdapter | None) -> Payload:
47
+ chunks = [chunk async for chunk in payload.value]
48
+ return Payload(type=DataType.TEXT, value="".join(chunks), metadata=payload.metadata)
49
+
50
+
51
+ async def _text_to_code(payload: Payload, _: LLMAdapter | None) -> Payload:
52
+ return Payload(type=DataType.CODE, value=payload.value, metadata=payload.metadata)
53
+
54
+
55
+ def _parse_json_object(raw: str) -> dict[str, Any]:
56
+ """Parse a JSON object from an LLM reply, tolerating code fences and prose.
57
+
58
+ Models often wrap JSON in ```json fences or add a sentence around it. We strip
59
+ fences, fall back to the outermost {...} span, and raise a typed error (never a
60
+ bare ValueError) when nothing parses.
61
+ """
62
+ cleaned = _FENCE_PATTERN.sub("", raw).strip()
63
+ candidates = [cleaned]
64
+ start, end = cleaned.find("{"), cleaned.rfind("}")
65
+ if 0 <= start < end:
66
+ candidates.append(cleaned[start : end + 1])
67
+ for candidate in candidates:
68
+ try:
69
+ parsed = json.loads(candidate)
70
+ except json.JSONDecodeError:
71
+ continue
72
+ if isinstance(parsed, dict):
73
+ return parsed
74
+ raise ConnectionIncompatibleError(
75
+ "text -> structured_json transform could not parse a JSON object",
76
+ detail=f"model returned: {raw[:200]}",
77
+ )
78
+
79
+
80
+ async def _text_to_json(payload: Payload, brain: LLMAdapter | None) -> Payload:
81
+ if brain is None:
82
+ raise ConnectionIncompatibleError(
83
+ "text -> structured_json transform needs an LLM brain",
84
+ detail="pass an llm to the pipeline/runner to enable semantic transforms",
85
+ )
86
+ raw = await brain.complete(_EXTRACTION_PROMPT.format(content=payload.value))
87
+ return Payload(
88
+ type=DataType.STRUCTURED_JSON, value=_parse_json_object(raw), metadata=payload.metadata
89
+ )
90
+
91
+
92
+ _TRANSFORMS: dict[tuple[DataType, DataType], Transform] = {
93
+ (DataType.CODE, DataType.TEXT): _code_to_text,
94
+ (DataType.STRUCTURED_JSON, DataType.TEXT): _json_to_text,
95
+ (DataType.DOCUMENT, DataType.TEXT): _passthrough_to_text,
96
+ (DataType.STREAM, DataType.TEXT): _stream_to_text,
97
+ (DataType.TEXT, DataType.CODE): _text_to_code,
98
+ (DataType.TEXT, DataType.STRUCTURED_JSON): _text_to_json,
99
+ }
100
+
101
+
102
+ def register_transform(source: DataType, target: DataType, transform: Transform) -> None:
103
+ """Register a custom transform (extension point)."""
104
+ _TRANSFORMS[(source, target)] = transform
105
+
106
+
107
+ def get_transform(source: DataType, target: DataType) -> Transform | None:
108
+ return _TRANSFORMS.get((source, target))