substrate-haystack 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""
|
|
2
|
+
substrate-haystack -- SUBSTRATE memory components for Haystack pipelines.
|
|
3
|
+
|
|
4
|
+
Provides retriever and writer components that connect Haystack to
|
|
5
|
+
SUBSTRATE's causal memory, emotional state, and identity continuity.
|
|
6
|
+
"""
|
|
7
|
+
from substrate_haystack.retriever import SubstrateMemoryRetriever
|
|
8
|
+
from substrate_haystack.writer import SubstrateMemoryWriter
|
|
9
|
+
|
|
10
|
+
__all__ = ["SubstrateMemoryRetriever", "SubstrateMemoryWriter"]
|
|
11
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Synchronous HTTP client for the SUBSTRATE MCP server (JSON-RPC over HTTP).
|
|
3
|
+
|
|
4
|
+
The SUBSTRATE MCP endpoint accepts standard JSON-RPC 2.0 requests with
|
|
5
|
+
Bearer token authentication. This client wraps that transport into a
|
|
6
|
+
clean Python interface for use by Haystack components.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import httpx
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("substrate_haystack.client")
|
|
18
|
+
|
|
19
|
+
_DEFAULT_BASE_URL = "https://substrate.garmolabs.com/mcp-server/mcp"
|
|
20
|
+
_DEFAULT_TIMEOUT = 30.0
|
|
21
|
+
_JSONRPC_VERSION = "2.0"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SubstrateClientError(Exception):
|
|
25
|
+
"""Raised when a SUBSTRATE MCP request fails."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, message: str, code: int = -1) -> None:
|
|
28
|
+
super().__init__(message)
|
|
29
|
+
self.code = code
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class SubstrateClientConfig:
|
|
34
|
+
"""Immutable configuration for the SUBSTRATE MCP client."""
|
|
35
|
+
|
|
36
|
+
api_key: str
|
|
37
|
+
base_url: str = _DEFAULT_BASE_URL
|
|
38
|
+
timeout: float = _DEFAULT_TIMEOUT
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SubstrateClient:
|
|
42
|
+
"""
|
|
43
|
+
Synchronous JSON-RPC client for the SUBSTRATE MCP server.
|
|
44
|
+
|
|
45
|
+
All tool calls go through ``call_tool`` which handles the JSON-RPC
|
|
46
|
+
envelope, authentication, error handling, and response unwrapping.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, config: SubstrateClientConfig) -> None:
|
|
50
|
+
if not config.api_key:
|
|
51
|
+
raise ValueError("api_key is required -- set SUBSTRATE_API_KEY")
|
|
52
|
+
self._config = config
|
|
53
|
+
self._request_id = 0
|
|
54
|
+
self._http = httpx.Client(
|
|
55
|
+
base_url="",
|
|
56
|
+
timeout=config.timeout,
|
|
57
|
+
headers={
|
|
58
|
+
"Content-Type": "application/json",
|
|
59
|
+
"Authorization": f"Bearer {config.api_key}",
|
|
60
|
+
},
|
|
61
|
+
)
|
|
62
|
+
self._session_id: str | None = None
|
|
63
|
+
|
|
64
|
+
# -- Public API --------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
def call_tool(self, name: str, arguments: dict[str, Any] | None = None) -> Any:
|
|
67
|
+
"""
|
|
68
|
+
Call a SUBSTRATE MCP tool by name and return the result content.
|
|
69
|
+
|
|
70
|
+
Raises ``SubstrateClientError`` on JSON-RPC errors or HTTP failures.
|
|
71
|
+
"""
|
|
72
|
+
self._request_id += 1
|
|
73
|
+
payload = {
|
|
74
|
+
"jsonrpc": _JSONRPC_VERSION,
|
|
75
|
+
"id": self._request_id,
|
|
76
|
+
"method": "tools/call",
|
|
77
|
+
"params": {
|
|
78
|
+
"name": name,
|
|
79
|
+
"arguments": arguments or {},
|
|
80
|
+
},
|
|
81
|
+
}
|
|
82
|
+
return self._send(payload)
|
|
83
|
+
|
|
84
|
+
def initialize(self) -> dict[str, Any]:
|
|
85
|
+
"""Perform the MCP initialize handshake."""
|
|
86
|
+
self._request_id += 1
|
|
87
|
+
payload = {
|
|
88
|
+
"jsonrpc": _JSONRPC_VERSION,
|
|
89
|
+
"id": self._request_id,
|
|
90
|
+
"method": "initialize",
|
|
91
|
+
"params": {
|
|
92
|
+
"protocolVersion": "2024-11-05",
|
|
93
|
+
"capabilities": {},
|
|
94
|
+
"clientInfo": {
|
|
95
|
+
"name": "substrate-haystack",
|
|
96
|
+
"version": "0.1.0",
|
|
97
|
+
},
|
|
98
|
+
},
|
|
99
|
+
}
|
|
100
|
+
return self._send(payload)
|
|
101
|
+
|
|
102
|
+
def close(self) -> None:
|
|
103
|
+
"""Close the HTTP client and release resources."""
|
|
104
|
+
self._http.close()
|
|
105
|
+
|
|
106
|
+
# -- Internal ----------------------------------------------------------
|
|
107
|
+
|
|
108
|
+
def _send(self, payload: dict[str, Any]) -> Any:
|
|
109
|
+
"""Send a JSON-RPC request and return the unwrapped result."""
|
|
110
|
+
headers: dict[str, str] = {}
|
|
111
|
+
if self._session_id:
|
|
112
|
+
headers["Mcp-Session-Id"] = self._session_id
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
response = self._http.post(
|
|
116
|
+
self._config.base_url,
|
|
117
|
+
json=payload,
|
|
118
|
+
headers=headers,
|
|
119
|
+
)
|
|
120
|
+
except httpx.HTTPError as exc:
|
|
121
|
+
raise SubstrateClientError(f"HTTP request failed: {exc}") from exc
|
|
122
|
+
|
|
123
|
+
session_id = response.headers.get("Mcp-Session-Id")
|
|
124
|
+
if session_id:
|
|
125
|
+
self._session_id = session_id
|
|
126
|
+
|
|
127
|
+
if response.status_code == 401:
|
|
128
|
+
raise SubstrateClientError("Authentication failed -- check your SUBSTRATE_API_KEY", code=-32000)
|
|
129
|
+
|
|
130
|
+
if response.status_code == 429:
|
|
131
|
+
raise SubstrateClientError("Rate limit exceeded -- slow down or upgrade your plan", code=-32029)
|
|
132
|
+
|
|
133
|
+
if response.status_code not in (200, 202):
|
|
134
|
+
raise SubstrateClientError(
|
|
135
|
+
f"Unexpected HTTP {response.status_code}: {response.text[:200]}",
|
|
136
|
+
code=response.status_code,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
body = response.json()
|
|
141
|
+
except (json.JSONDecodeError, ValueError) as exc:
|
|
142
|
+
raise SubstrateClientError(f"Invalid JSON response: {exc}") from exc
|
|
143
|
+
|
|
144
|
+
if "error" in body:
|
|
145
|
+
err = body["error"]
|
|
146
|
+
raise SubstrateClientError(
|
|
147
|
+
err.get("message", "Unknown error"),
|
|
148
|
+
code=err.get("code", -1),
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
return body.get("result", {})
|
|
152
|
+
|
|
153
|
+
def __enter__(self) -> SubstrateClient:
|
|
154
|
+
return self
|
|
155
|
+
|
|
156
|
+
def __exit__(self, *_: Any) -> None:
|
|
157
|
+
self.close()
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SUBSTRATE Memory Retriever for Haystack.
|
|
3
|
+
|
|
4
|
+
A Haystack component that retrieves documents from SUBSTRATE's causal
|
|
5
|
+
memory engine using hybrid search (semantic + keyword), and optionally
|
|
6
|
+
enriches results with the entity's emotional state vector.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
from typing import Any, Optional
|
|
14
|
+
|
|
15
|
+
from haystack import Document, component, default_from_dict, default_to_dict
|
|
16
|
+
|
|
17
|
+
from substrate_haystack.client import (
|
|
18
|
+
SubstrateClient,
|
|
19
|
+
SubstrateClientConfig,
|
|
20
|
+
SubstrateClientError,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger("substrate_haystack.retriever")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _extract_text(result: Any) -> str:
|
|
27
|
+
"""Extract text content from a SUBSTRATE MCP tool result."""
|
|
28
|
+
if isinstance(result, str):
|
|
29
|
+
return result
|
|
30
|
+
if isinstance(result, dict):
|
|
31
|
+
content = result.get("content", [])
|
|
32
|
+
if isinstance(content, list):
|
|
33
|
+
parts = [
|
|
34
|
+
block.get("text", "")
|
|
35
|
+
for block in content
|
|
36
|
+
if isinstance(block, dict) and block.get("type") == "text"
|
|
37
|
+
]
|
|
38
|
+
return "\n".join(parts)
|
|
39
|
+
return json.dumps(result, indent=2)
|
|
40
|
+
return str(result)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _parse_json_safe(text: str) -> dict[str, Any]:
|
|
44
|
+
"""Parse JSON from text, returning an empty dict on failure."""
|
|
45
|
+
try:
|
|
46
|
+
return json.loads(text)
|
|
47
|
+
except (json.JSONDecodeError, TypeError):
|
|
48
|
+
return {}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@component
|
|
52
|
+
class SubstrateMemoryRetriever:
|
|
53
|
+
"""
|
|
54
|
+
Retrieves documents from SUBSTRATE's causal memory using hybrid search.
|
|
55
|
+
|
|
56
|
+
This component calls SUBSTRATE's ``hybrid_search`` tool (falling back
|
|
57
|
+
to ``memory_search`` on the free tier) and returns results as Haystack
|
|
58
|
+
``Document`` objects. It also fetches the entity's emotional state
|
|
59
|
+
via ``get_emotion_state`` and exposes it as a separate output.
|
|
60
|
+
|
|
61
|
+
Usage in a Haystack pipeline::
|
|
62
|
+
|
|
63
|
+
from haystack import Pipeline
|
|
64
|
+
from substrate_haystack import SubstrateMemoryRetriever
|
|
65
|
+
|
|
66
|
+
retriever = SubstrateMemoryRetriever(api_key="sk_sub_...")
|
|
67
|
+
|
|
68
|
+
pipe = Pipeline()
|
|
69
|
+
pipe.add_component("retriever", retriever)
|
|
70
|
+
pipe.add_component("llm", some_generator)
|
|
71
|
+
pipe.connect("retriever.documents", "llm.documents")
|
|
72
|
+
|
|
73
|
+
result = pipe.run({"retriever": {"query": "What did we discuss?"}})
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(
|
|
77
|
+
self,
|
|
78
|
+
api_key: str = "",
|
|
79
|
+
base_url: str = "https://substrate.garmolabs.com/mcp-server/mcp",
|
|
80
|
+
timeout: float = 30.0,
|
|
81
|
+
include_emotion: bool = True,
|
|
82
|
+
) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Initialize the retriever.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
api_key: SUBSTRATE API key. Falls back to ``SUBSTRATE_API_KEY`` env var.
|
|
88
|
+
base_url: MCP server endpoint URL.
|
|
89
|
+
timeout: HTTP request timeout in seconds.
|
|
90
|
+
include_emotion: Whether to fetch emotional context alongside results.
|
|
91
|
+
"""
|
|
92
|
+
resolved_key = api_key or os.environ.get("SUBSTRATE_API_KEY", "")
|
|
93
|
+
if not resolved_key:
|
|
94
|
+
raise ValueError(
|
|
95
|
+
"SUBSTRATE API key required. Pass api_key= or set SUBSTRATE_API_KEY env var."
|
|
96
|
+
)
|
|
97
|
+
self._api_key = resolved_key
|
|
98
|
+
self._base_url = base_url
|
|
99
|
+
self._timeout = timeout
|
|
100
|
+
self._include_emotion = include_emotion
|
|
101
|
+
self._client: SubstrateClient | None = None
|
|
102
|
+
|
|
103
|
+
def _get_client(self) -> SubstrateClient:
|
|
104
|
+
"""Lazy-initialize the HTTP client."""
|
|
105
|
+
if self._client is None:
|
|
106
|
+
self._client = SubstrateClient(
|
|
107
|
+
SubstrateClientConfig(
|
|
108
|
+
api_key=self._api_key,
|
|
109
|
+
base_url=self._base_url,
|
|
110
|
+
timeout=self._timeout,
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
return self._client
|
|
114
|
+
|
|
115
|
+
@component.output_types(documents=list[Document], entity_state=dict)
|
|
116
|
+
def run(
|
|
117
|
+
self,
|
|
118
|
+
query: str,
|
|
119
|
+
top_k: int = 5,
|
|
120
|
+
) -> dict[str, Any]:
|
|
121
|
+
"""
|
|
122
|
+
Retrieve documents from SUBSTRATE memory.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
query: The search query string.
|
|
126
|
+
top_k: Maximum number of documents to return.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
A dict with ``documents`` (list of Haystack Documents) and
|
|
130
|
+
``entity_state`` (dict with emotional context if enabled).
|
|
131
|
+
"""
|
|
132
|
+
client = self._get_client()
|
|
133
|
+
documents = self._search(client, query, top_k)
|
|
134
|
+
entity_state: dict[str, Any] = {}
|
|
135
|
+
|
|
136
|
+
if self._include_emotion:
|
|
137
|
+
entity_state = self._get_emotion(client)
|
|
138
|
+
|
|
139
|
+
return {"documents": documents, "entity_state": entity_state}
|
|
140
|
+
|
|
141
|
+
def _search(
|
|
142
|
+
self, client: SubstrateClient, query: str, top_k: int
|
|
143
|
+
) -> list[Document]:
|
|
144
|
+
"""Execute hybrid search and convert results to Documents."""
|
|
145
|
+
try:
|
|
146
|
+
result = client.call_tool(
|
|
147
|
+
"hybrid_search",
|
|
148
|
+
{"query": query, "top_k": top_k},
|
|
149
|
+
)
|
|
150
|
+
return self._parse_results(result)
|
|
151
|
+
except SubstrateClientError as exc:
|
|
152
|
+
if exc.code == -32000 or "not available" in str(exc).lower():
|
|
153
|
+
logger.info("hybrid_search unavailable, falling back to memory_search")
|
|
154
|
+
return self._fallback_search(client, query, top_k)
|
|
155
|
+
raise
|
|
156
|
+
|
|
157
|
+
def _fallback_search(
|
|
158
|
+
self, client: SubstrateClient, query: str, top_k: int
|
|
159
|
+
) -> list[Document]:
|
|
160
|
+
"""Use basic memory_search when hybrid_search is unavailable."""
|
|
161
|
+
try:
|
|
162
|
+
result = client.call_tool("memory_search", {"query": query})
|
|
163
|
+
return self._parse_results(result)[:top_k]
|
|
164
|
+
except SubstrateClientError as exc:
|
|
165
|
+
logger.error("Fallback memory_search failed: %s", exc)
|
|
166
|
+
return []
|
|
167
|
+
|
|
168
|
+
def _parse_results(self, result: Any) -> list[Document]:
|
|
169
|
+
"""Convert MCP tool output into Haystack Document objects."""
|
|
170
|
+
text = _extract_text(result)
|
|
171
|
+
parsed = _parse_json_safe(text)
|
|
172
|
+
|
|
173
|
+
# If the result has a structured "results" list, convert each item
|
|
174
|
+
if isinstance(parsed, dict) and "results" in parsed:
|
|
175
|
+
docs: list[Document] = []
|
|
176
|
+
for item in parsed["results"]:
|
|
177
|
+
if isinstance(item, dict):
|
|
178
|
+
content = item.get("text", item.get("content", json.dumps(item)))
|
|
179
|
+
meta = {
|
|
180
|
+
k: v for k, v in item.items() if k not in ("text", "content")
|
|
181
|
+
}
|
|
182
|
+
meta["source"] = "substrate"
|
|
183
|
+
docs.append(Document(content=content, meta=meta))
|
|
184
|
+
else:
|
|
185
|
+
docs.append(
|
|
186
|
+
Document(content=str(item), meta={"source": "substrate"})
|
|
187
|
+
)
|
|
188
|
+
return docs
|
|
189
|
+
|
|
190
|
+
# Single text result
|
|
191
|
+
if text.strip():
|
|
192
|
+
return [Document(content=text, meta={"source": "substrate"})]
|
|
193
|
+
return []
|
|
194
|
+
|
|
195
|
+
def _get_emotion(self, client: SubstrateClient) -> dict[str, Any]:
|
|
196
|
+
"""Fetch the entity's emotional state vector."""
|
|
197
|
+
try:
|
|
198
|
+
result = client.call_tool("get_emotion_state")
|
|
199
|
+
text = _extract_text(result)
|
|
200
|
+
parsed = _parse_json_safe(text)
|
|
201
|
+
return parsed if parsed else {"raw": text}
|
|
202
|
+
except SubstrateClientError as exc:
|
|
203
|
+
logger.warning("Could not fetch emotion state: %s", exc)
|
|
204
|
+
return {"error": str(exc)}
|
|
205
|
+
|
|
206
|
+
def to_dict(self) -> dict[str, Any]:
|
|
207
|
+
"""Serialize for Haystack pipeline export."""
|
|
208
|
+
return default_to_dict(
|
|
209
|
+
self,
|
|
210
|
+
api_key=self._api_key,
|
|
211
|
+
base_url=self._base_url,
|
|
212
|
+
timeout=self._timeout,
|
|
213
|
+
include_emotion=self._include_emotion,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
@classmethod
|
|
217
|
+
def from_dict(cls, data: dict[str, Any]) -> SubstrateMemoryRetriever:
|
|
218
|
+
"""Deserialize from Haystack pipeline export."""
|
|
219
|
+
return default_from_dict(cls, data)
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SUBSTRATE Memory Writer for Haystack.
|
|
3
|
+
|
|
4
|
+
A Haystack component that stores documents into SUBSTRATE's causal memory
|
|
5
|
+
engine via the ``respond`` tool. Each document is processed through the
|
|
6
|
+
entity's causal memory, values, and reflection layers.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from haystack import Document, component, default_from_dict, default_to_dict
|
|
16
|
+
|
|
17
|
+
from substrate_haystack.client import (
|
|
18
|
+
SubstrateClient,
|
|
19
|
+
SubstrateClientConfig,
|
|
20
|
+
SubstrateClientError,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger("substrate_haystack.writer")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _extract_text(result: Any) -> str:
|
|
27
|
+
"""Extract text content from a SUBSTRATE MCP tool result."""
|
|
28
|
+
if isinstance(result, str):
|
|
29
|
+
return result
|
|
30
|
+
if isinstance(result, dict):
|
|
31
|
+
content = result.get("content", [])
|
|
32
|
+
if isinstance(content, list):
|
|
33
|
+
parts = [
|
|
34
|
+
block.get("text", "")
|
|
35
|
+
for block in content
|
|
36
|
+
if isinstance(block, dict) and block.get("type") == "text"
|
|
37
|
+
]
|
|
38
|
+
return "\n".join(parts)
|
|
39
|
+
return json.dumps(result, indent=2)
|
|
40
|
+
return str(result)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@component
|
|
44
|
+
class SubstrateMemoryWriter:
|
|
45
|
+
"""
|
|
46
|
+
Writes documents into SUBSTRATE's causal memory via the ``respond`` tool.
|
|
47
|
+
|
|
48
|
+
Each document's content is sent as a message to the SUBSTRATE entity,
|
|
49
|
+
which processes it through its full cognitive pipeline (causal memory,
|
|
50
|
+
values alignment, reflection layers) before storing.
|
|
51
|
+
|
|
52
|
+
Usage in a Haystack pipeline::
|
|
53
|
+
|
|
54
|
+
from haystack import Pipeline
|
|
55
|
+
from substrate_haystack import SubstrateMemoryWriter
|
|
56
|
+
|
|
57
|
+
writer = SubstrateMemoryWriter(api_key="sk_sub_...")
|
|
58
|
+
|
|
59
|
+
pipe = Pipeline()
|
|
60
|
+
pipe.add_component("writer", writer)
|
|
61
|
+
|
|
62
|
+
pipe.run({"writer": {"documents": [Document(content="Remember this.")]}})
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(
|
|
66
|
+
self,
|
|
67
|
+
api_key: str = "",
|
|
68
|
+
base_url: str = "https://substrate.garmolabs.com/mcp-server/mcp",
|
|
69
|
+
timeout: float = 30.0,
|
|
70
|
+
) -> None:
|
|
71
|
+
"""
|
|
72
|
+
Initialize the writer.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
api_key: SUBSTRATE API key. Falls back to ``SUBSTRATE_API_KEY`` env var.
|
|
76
|
+
base_url: MCP server endpoint URL.
|
|
77
|
+
timeout: HTTP request timeout in seconds.
|
|
78
|
+
"""
|
|
79
|
+
resolved_key = api_key or os.environ.get("SUBSTRATE_API_KEY", "")
|
|
80
|
+
if not resolved_key:
|
|
81
|
+
raise ValueError(
|
|
82
|
+
"SUBSTRATE API key required. Pass api_key= or set SUBSTRATE_API_KEY env var."
|
|
83
|
+
)
|
|
84
|
+
self._api_key = resolved_key
|
|
85
|
+
self._base_url = base_url
|
|
86
|
+
self._timeout = timeout
|
|
87
|
+
self._client: SubstrateClient | None = None
|
|
88
|
+
|
|
89
|
+
def _get_client(self) -> SubstrateClient:
|
|
90
|
+
"""Lazy-initialize the HTTP client."""
|
|
91
|
+
if self._client is None:
|
|
92
|
+
self._client = SubstrateClient(
|
|
93
|
+
SubstrateClientConfig(
|
|
94
|
+
api_key=self._api_key,
|
|
95
|
+
base_url=self._base_url,
|
|
96
|
+
timeout=self._timeout,
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
return self._client
|
|
100
|
+
|
|
101
|
+
@component.output_types(documents_written=int, responses=list[str])
|
|
102
|
+
def run(self, documents: list[Document]) -> dict[str, Any]:
|
|
103
|
+
"""
|
|
104
|
+
Store documents in SUBSTRATE memory.
|
|
105
|
+
|
|
106
|
+
Each document is sent to the entity via ``respond``. Document
|
|
107
|
+
metadata is serialized and appended to the message for context.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
documents: List of Haystack Documents to store.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
A dict with ``documents_written`` (count) and ``responses``
|
|
114
|
+
(the entity's response for each stored document).
|
|
115
|
+
"""
|
|
116
|
+
client = self._get_client()
|
|
117
|
+
responses: list[str] = []
|
|
118
|
+
written = 0
|
|
119
|
+
|
|
120
|
+
for doc in documents:
|
|
121
|
+
message = doc.content or ""
|
|
122
|
+
if not message.strip():
|
|
123
|
+
logger.warning("Skipping empty document")
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
# Append metadata if present
|
|
127
|
+
if doc.meta:
|
|
128
|
+
filtered_meta = {
|
|
129
|
+
k: v
|
|
130
|
+
for k, v in doc.meta.items()
|
|
131
|
+
if k != "source" # Don't echo back our own source tag
|
|
132
|
+
}
|
|
133
|
+
if filtered_meta:
|
|
134
|
+
message = f"{message}\n\n[metadata: {json.dumps(filtered_meta)}]"
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
result = client.call_tool("respond", {"message": message})
|
|
138
|
+
response_text = _extract_text(result)
|
|
139
|
+
responses.append(response_text)
|
|
140
|
+
written += 1
|
|
141
|
+
except SubstrateClientError as exc:
|
|
142
|
+
logger.error("Failed to store document: %s", exc)
|
|
143
|
+
responses.append(f"[error: {exc}]")
|
|
144
|
+
|
|
145
|
+
return {"documents_written": written, "responses": responses}
|
|
146
|
+
|
|
147
|
+
def to_dict(self) -> dict[str, Any]:
|
|
148
|
+
"""Serialize for Haystack pipeline export."""
|
|
149
|
+
return default_to_dict(
|
|
150
|
+
self,
|
|
151
|
+
api_key=self._api_key,
|
|
152
|
+
base_url=self._base_url,
|
|
153
|
+
timeout=self._timeout,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
@classmethod
|
|
157
|
+
def from_dict(cls, data: dict[str, Any]) -> SubstrateMemoryWriter:
|
|
158
|
+
"""Deserialize from Haystack pipeline export."""
|
|
159
|
+
return default_from_dict(cls, data)
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: substrate-haystack
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: SUBSTRATE memory components for Haystack -- causal memory, emotion, and identity retrieval and storage.
|
|
5
|
+
Project-URL: Homepage, https://garmolabs.com/substrate.html
|
|
6
|
+
Project-URL: Documentation, https://github.com/PKaldone/substrate-mcp
|
|
7
|
+
Project-URL: Repository, https://github.com/PKaldone/substrate-mcp
|
|
8
|
+
Project-URL: Issues, https://garmolabs.com/substrate.html
|
|
9
|
+
Author-email: Garmo Labs <hello@garmolabs.com>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
Keywords: ai,causal-memory,emotional-state,garmo-labs,haystack,mcp,memory,retriever,substrate
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: haystack-ai>=2.0
|
|
24
|
+
Requires-Dist: httpx>=0.25
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# substrate-haystack
|
|
28
|
+
|
|
29
|
+
SUBSTRATE memory components for [Haystack](https://haystack.deepset.ai/) pipelines. Retriever and writer that connect Haystack to [SUBSTRATE](https://garmolabs.com/substrate.html)'s causal memory, emotional state, and identity continuity.
|
|
30
|
+
|
|
31
|
+
## What SUBSTRATE adds to Haystack
|
|
32
|
+
|
|
33
|
+
- **Causal memory retrieval** -- episodes linked by cause-effect rules, not just vector similarity
|
|
34
|
+
- **Emotional context output** -- retriever exposes the entity's emotional state alongside documents
|
|
35
|
+
- **Persistent storage** -- writer stores documents through the entity's full cognitive pipeline
|
|
36
|
+
- **Hybrid search** -- semantic + keyword retrieval across the entity's full knowledge store
|
|
37
|
+
- **Identity continuity** -- cryptographically verified entity state persists across sessions
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install substrate-haystack
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Quick start: Retrieval pipeline
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
import os
|
|
49
|
+
from haystack import Pipeline
|
|
50
|
+
from haystack.components.generators import OpenAIGenerator
|
|
51
|
+
from haystack.components.builders import PromptBuilder
|
|
52
|
+
from substrate_haystack import SubstrateMemoryRetriever
|
|
53
|
+
|
|
54
|
+
retriever = SubstrateMemoryRetriever(
|
|
55
|
+
api_key=os.environ["SUBSTRATE_API_KEY"],
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
prompt_builder = PromptBuilder(
|
|
59
|
+
template="""
|
|
60
|
+
Context from SUBSTRATE memory:
|
|
61
|
+
{% for doc in documents %}
|
|
62
|
+
- {{ doc.content }}
|
|
63
|
+
{% endfor %}
|
|
64
|
+
|
|
65
|
+
Entity emotional state: {{ entity_state }}
|
|
66
|
+
|
|
67
|
+
Question: {{ query }}
|
|
68
|
+
Answer:
|
|
69
|
+
"""
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
generator = OpenAIGenerator(model="gpt-4o")
|
|
73
|
+
|
|
74
|
+
pipe = Pipeline()
|
|
75
|
+
pipe.add_component("retriever", retriever)
|
|
76
|
+
pipe.add_component("prompt_builder", prompt_builder)
|
|
77
|
+
pipe.add_component("llm", generator)
|
|
78
|
+
|
|
79
|
+
pipe.connect("retriever.documents", "prompt_builder.documents")
|
|
80
|
+
pipe.connect("retriever.entity_state", "prompt_builder.entity_state")
|
|
81
|
+
pipe.connect("prompt_builder", "llm")
|
|
82
|
+
|
|
83
|
+
result = pipe.run({
|
|
84
|
+
"retriever": {"query": "What patterns have we identified?"},
|
|
85
|
+
"prompt_builder": {"query": "What patterns have we identified?"},
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
print(result["llm"]["replies"][0])
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Quick start: Writing to memory
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from haystack import Document, Pipeline
|
|
95
|
+
from substrate_haystack import SubstrateMemoryWriter
|
|
96
|
+
|
|
97
|
+
writer = SubstrateMemoryWriter(
|
|
98
|
+
api_key=os.environ["SUBSTRATE_API_KEY"],
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
pipe = Pipeline()
|
|
102
|
+
pipe.add_component("writer", writer)
|
|
103
|
+
|
|
104
|
+
result = pipe.run({
|
|
105
|
+
"writer": {
|
|
106
|
+
"documents": [
|
|
107
|
+
Document(content="The team decided to use event sourcing for the audit trail."),
|
|
108
|
+
Document(content="Performance testing showed 99th percentile at 45ms."),
|
|
109
|
+
]
|
|
110
|
+
}
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
print(f"Stored {result['writer']['documents_written']} documents")
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Components
|
|
117
|
+
|
|
118
|
+
### SubstrateMemoryRetriever
|
|
119
|
+
|
|
120
|
+
Retrieves documents from SUBSTRATE memory using hybrid search.
|
|
121
|
+
|
|
122
|
+
**Inputs:**
|
|
123
|
+
| Name | Type | Description |
|
|
124
|
+
|---------|-------|--------------------------------------|
|
|
125
|
+
| `query` | `str` | Search query string |
|
|
126
|
+
| `top_k` | `int` | Maximum results to return (default 5)|
|
|
127
|
+
|
|
128
|
+
**Outputs:**
|
|
129
|
+
| Name | Type | Description |
|
|
130
|
+
|----------------|------------------|------------------------------------------|
|
|
131
|
+
| `documents` | `list[Document]` | Retrieved documents from SUBSTRATE memory|
|
|
132
|
+
| `entity_state` | `dict` | Entity emotional state (UASV dimensions) |
|
|
133
|
+
|
|
134
|
+
**Parameters:**
|
|
135
|
+
| Parameter | Default | Description |
|
|
136
|
+
|-------------------|---------------------------------------------------|-------------------------------------|
|
|
137
|
+
| `api_key` | `$SUBSTRATE_API_KEY` | Your SUBSTRATE API key |
|
|
138
|
+
| `base_url` | `https://substrate.garmolabs.com/mcp-server/mcp` | MCP server endpoint |
|
|
139
|
+
| `timeout` | `30.0` | HTTP request timeout (seconds) |
|
|
140
|
+
| `include_emotion` | `True` | Fetch emotional context with results|
|
|
141
|
+
|
|
142
|
+
### SubstrateMemoryWriter
|
|
143
|
+
|
|
144
|
+
Stores documents into SUBSTRATE memory via the `respond` tool.
|
|
145
|
+
|
|
146
|
+
**Inputs:**
|
|
147
|
+
| Name | Type | Description |
|
|
148
|
+
|-------------|------------------|------------------------------|
|
|
149
|
+
| `documents` | `list[Document]` | Documents to store in memory |
|
|
150
|
+
|
|
151
|
+
**Outputs:**
|
|
152
|
+
| Name | Type | Description |
|
|
153
|
+
|---------------------|--------------|----------------------------------------|
|
|
154
|
+
| `documents_written` | `int` | Number of documents successfully stored|
|
|
155
|
+
| `responses` | `list[str]` | Entity responses for each document |
|
|
156
|
+
|
|
157
|
+
## API key
|
|
158
|
+
|
|
159
|
+
Get your API key at [garmolabs.com](https://garmolabs.com). The free tier includes `memory_search` and `get_emotion_state`. Upgrade to Pro for `hybrid_search` and `get_trust_state`.
|
|
160
|
+
|
|
161
|
+
## License
|
|
162
|
+
|
|
163
|
+
MIT -- see [LICENSE](LICENSE) for details.
|
|
164
|
+
|
|
165
|
+
Built by [Garmo Labs](https://garmolabs.com).
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
substrate_haystack/__init__.py,sha256=dJwNnK_IBoyNh3QdOL0sYNTzXr5iMqW0vKZcPMcDXvo,431
|
|
2
|
+
substrate_haystack/client.py,sha256=jyPDS1-gbI7izPtXTrKbVC5c_GyVjDAIb5rdqFJ4IYk,4978
|
|
3
|
+
substrate_haystack/retriever.py,sha256=OswJNX2nK_fI1o7MSY7MjPRdVz9WiKQ-_eWZuNzTUi0,7782
|
|
4
|
+
substrate_haystack/writer.py,sha256=MwDs1381JgJv0kX7PtsyJn3vXnRmwxxZ-wOadfNKWy8,5247
|
|
5
|
+
substrate_haystack-0.1.1.dist-info/METADATA,sha256=w2C9es5VM1K5SdAO8TQ97bgIgyadVZJsrckTqITFlEk,6143
|
|
6
|
+
substrate_haystack-0.1.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
7
|
+
substrate_haystack-0.1.1.dist-info/RECORD,,
|