somm-langchain 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ *.egg
7
+ build/
8
+ dist/
9
+
10
+ # Environments
11
+ .venv/
12
+ .env
13
+ .env.*
14
+
15
+ # Tooling caches
16
+ .pytest_cache/
17
+ .mypy_cache/
18
+ .ruff_cache/
19
+
20
+ # Local Claude session id log (per-machine, not source of truth)
21
+ sessions.txt
22
+
23
+ # Local data (never commit telemetry)
24
+ .somm/
25
+ *.sqlite
26
+ *.sqlite-wal
27
+ *.sqlite-shm
28
+
29
+ # Author-local notes not for open source
30
+ notes/
31
+ .claude/
32
+
33
+ # Editor
34
+ .vscode/
35
+ .idea/
36
+ *.swp
37
+
38
+ # Archived internal design/process docs (see docs/BLUEPRINT.md for the public design doc)
@@ -0,0 +1,41 @@
1
+ Metadata-Version: 2.4
2
+ Name: somm-langchain
3
+ Version: 0.6.1
4
+ Summary: LangChain BaseChatModel adapter for somm — lets LangChain/LangGraph/deepagents apps use somm as their LLM substrate
5
+ Project-URL: Homepage, https://github.com/lavallee/somm
6
+ Project-URL: Repository, https://github.com/lavallee/somm
7
+ Project-URL: Issues, https://github.com/lavallee/somm/issues
8
+ Project-URL: Changelog, https://github.com/lavallee/somm/blob/main/CHANGELOG.md
9
+ Author: Marc Lavallee
10
+ License: MIT
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Requires-Python: >=3.12
17
+ Requires-Dist: langchain-core>=0.3
18
+ Requires-Dist: somm-core==0.6.1
19
+ Requires-Dist: somm==0.6.1
20
+ Description-Content-Type: text/markdown
21
+
22
+ # somm-langchain
23
+
24
+ LangChain `BaseChatModel` adapter for [`somm`](https://github.com/lavallee/somm).
25
+
26
+ Lets LangChain / LangGraph / `deepagents` apps use somm as their LLM substrate — telemetry, routing, cost tracking, provider fallback, sommelier model memory — all without changing the agent-framework call sites.
27
+
28
+ ```python
29
+ import somm
30
+ from somm_langchain import SommChatModel
31
+
32
+ llm = somm.llm(project="my_agent")
33
+ chat = SommChatModel(somm_llm=llm, workload="agent_thinking")
34
+
35
+ from deepagents import create_deep_agent
36
+ agent = create_deep_agent(model=chat, tools=[...])
37
+ ```
38
+
39
+ `SommChatModel` supports `bind_tools()` and routes tool-using calls through somm's neutral tool-calling shape (see [docs/tool-calling.md](https://github.com/lavallee/somm/blob/main/docs/tool-calling.md)). The adapter is thin: message translation in, somm.llm().generate() through, ChatGeneration out.
40
+
41
+ Built for agent orchestrators that run on `deepagents` and similar LangChain-based stacks, which mandate a tool-calling `BaseChatModel`.
@@ -0,0 +1,20 @@
1
+ # somm-langchain
2
+
3
+ LangChain `BaseChatModel` adapter for [`somm`](https://github.com/lavallee/somm).
4
+
5
+ Lets LangChain / LangGraph / `deepagents` apps use somm as their LLM substrate — telemetry, routing, cost tracking, provider fallback, sommelier model memory — all without changing the agent-framework call sites.
6
+
7
+ ```python
8
+ import somm
9
+ from somm_langchain import SommChatModel
10
+
11
+ llm = somm.llm(project="my_agent")
12
+ chat = SommChatModel(somm_llm=llm, workload="agent_thinking")
13
+
14
+ from deepagents import create_deep_agent
15
+ agent = create_deep_agent(model=chat, tools=[...])
16
+ ```
17
+
18
+ `SommChatModel` supports `bind_tools()` and routes tool-using calls through somm's neutral tool-calling shape (see [docs/tool-calling.md](https://github.com/lavallee/somm/blob/main/docs/tool-calling.md)). The adapter is thin: message translation in, somm.llm().generate() through, ChatGeneration out.
19
+
20
+ Built for agent orchestrators that run on `deepagents` and similar LangChain-based stacks, which mandate a tool-calling `BaseChatModel`.
@@ -0,0 +1,33 @@
1
+ [project]
2
+ name = "somm-langchain"
3
+ version = "0.6.1"
4
+ description = "LangChain BaseChatModel adapter for somm — lets LangChain/LangGraph/deepagents apps use somm as their LLM substrate"
5
+ requires-python = ">=3.12"
6
+ license = { text = "MIT" }
7
+ readme = "README.md"
8
+ authors = [{ name = "Marc Lavallee" }]
9
+ classifiers = [
10
+ "Development Status :: 4 - Beta",
11
+ "License :: OSI Approved :: MIT License",
12
+ "Programming Language :: Python :: 3.12",
13
+ "Programming Language :: Python :: 3.13",
14
+ "Topic :: Software Development :: Libraries",
15
+ ]
16
+ dependencies = [
17
+ "somm-core==0.6.1",
18
+ "somm==0.6.1",
19
+ "langchain-core>=0.3",
20
+ ]
21
+
22
+ [project.urls]
23
+ Homepage = "https://github.com/lavallee/somm"
24
+ Repository = "https://github.com/lavallee/somm"
25
+ Issues = "https://github.com/lavallee/somm/issues"
26
+ Changelog = "https://github.com/lavallee/somm/blob/main/CHANGELOG.md"
27
+
28
+ [build-system]
29
+ requires = ["hatchling"]
30
+ build-backend = "hatchling.build"
31
+
32
+ [tool.hatch.build.targets.wheel]
33
+ packages = ["src/somm_langchain"]
@@ -0,0 +1,5 @@
1
+ """somm-langchain — LangChain BaseChatModel adapter for somm."""
2
+
3
+ from somm_langchain.chat_model import SommChatModel
4
+
5
+ __all__ = ["SommChatModel"]
@@ -0,0 +1,295 @@
1
+ """SommChatModel — LangChain BaseChatModel implementation that routes through somm.
2
+
3
+ Lets LangChain / LangGraph / deepagents apps treat somm as their LLM substrate.
4
+ Thin adapter: message translation in, somm.llm().generate() through, ChatGeneration out.
5
+
6
+ Tool-calling routes through somm's neutral tool shape (see docs/tool-calling.md),
7
+ which currently lands on Anthropic + OpenAI-compat providers. Gemini/Ollama follow.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Any, ClassVar, cast
13
+
14
+ from langchain_core.callbacks.manager import CallbackManagerForLLMRun
15
+ from langchain_core.language_models.chat_models import BaseChatModel
16
+ from langchain_core.messages import (
17
+ AIMessage,
18
+ BaseMessage,
19
+ HumanMessage,
20
+ SystemMessage,
21
+ ToolMessage,
22
+ )
23
+ from langchain_core.outputs import ChatGeneration, ChatResult
24
+ from langchain_core.runnables import Runnable
25
+ from langchain_core.tools import BaseTool
26
+ from langchain_core.utils.function_calling import convert_to_openai_tool
27
+ from pydantic import ConfigDict
28
+ from somm.client import SommLLM
29
+ from somm_core import Outcome
30
+
31
+
32
+ class SommChatModel(BaseChatModel):
33
+ """LangChain chat model that delegates to a `SommLLM`.
34
+
35
+ Usage:
36
+ ```python
37
+ import somm
38
+ from somm_langchain import SommChatModel
39
+
40
+ llm = somm.llm(project="my_agent")
41
+ chat = SommChatModel(somm_llm=llm, workload="agent_thinking")
42
+ ```
43
+
44
+ Pass to `deepagents.create_deep_agent(model=chat, ...)` or any LangChain
45
+ runnable expecting a chat model. `bind_tools()` works — tool schemas are
46
+ converted to somm-neutral form and routed via `SommLLM.generate(tools=...)`.
47
+
48
+ All telemetry, provider routing, cost tracking, and fallback semantics are
49
+ somm's — this adapter adds no behavior of its own.
50
+ """
51
+
52
+ # Allow non-pydantic types (SommLLM) as fields.
53
+ model_config = ConfigDict(arbitrary_types_allowed=True)
54
+
55
+ somm_llm: SommLLM
56
+ workload: str = "agent"
57
+ # Pin a specific (model, provider) when set; otherwise somm's router picks.
58
+ somm_model: str | None = None
59
+ somm_provider: str | None = None
60
+ temperature: float = 0.2
61
+ max_tokens: int = 4096
62
+
63
+ # Whether SommLLM.generate() failures (outcome != OK) should raise a
64
+ # RuntimeError or return as an AIMessage with the error in
65
+ # response_metadata. Default: raise — matches LangChain convention so
66
+ # retry / circuit-breaker middleware in the agent stack engages.
67
+ raise_on_failure: bool = True
68
+
69
+ # ClassVar so pydantic doesn't treat it as a field
70
+ LC_ROLE_BLOCK_KEYS: ClassVar[set[str]] = {"text", "tool_use", "tool_result", "image"}
71
+
72
+ # ------------------------------------------------------------------
73
+ # LangChain BaseChatModel interface
74
+
75
+ @property
76
+ def _llm_type(self) -> str:
77
+ return "somm"
78
+
79
+ def _generate(
80
+ self,
81
+ messages: list[BaseMessage],
82
+ stop: list[str] | None = None,
83
+ run_manager: CallbackManagerForLLMRun | None = None,
84
+ **kwargs: Any,
85
+ ) -> ChatResult:
86
+ system, somm_messages = self._translate_messages(messages)
87
+
88
+ # Tools come from bind_tools() via Runnable.bind kwargs. They were
89
+ # converted to OpenAI shape there; unwrap to somm-neutral here.
90
+ raw_tools = kwargs.get("tools") or []
91
+ somm_tools = [self._unwrap_oai_tool(t) for t in raw_tools]
92
+
93
+ tool_choice = kwargs.get("tool_choice")
94
+ somm_tool_choice = self._translate_tool_choice_lc_to_somm(tool_choice)
95
+
96
+ result = self.somm_llm.generate(
97
+ # `prompt` is required by somm's signature but ignored when
98
+ # `messages` is set — we pass an empty string as a marker.
99
+ prompt="",
100
+ messages=somm_messages or None,
101
+ system=system,
102
+ workload=self.workload,
103
+ model=kwargs.get("model") or self.somm_model,
104
+ provider=self.somm_provider,
105
+ temperature=kwargs.get("temperature", self.temperature),
106
+ max_tokens=kwargs.get("max_tokens", self.max_tokens),
107
+ tools=somm_tools if somm_tools else None,
108
+ tool_choice=somm_tool_choice,
109
+ )
110
+
111
+ if result.outcome != Outcome.OK and self.raise_on_failure:
112
+ raise RuntimeError(
113
+ f"somm call failed: workload={self.workload} "
114
+ f"outcome={result.outcome.value} kind={result.error_kind} — "
115
+ f"{result.error_detail}"
116
+ )
117
+
118
+ lc_tool_calls = [
119
+ {"name": tc.name, "args": tc.arguments, "id": tc.id}
120
+ for tc in result.tool_calls
121
+ ]
122
+ # Preserve reasoning_content (DeepSeek v4 thinking models) in
123
+ # additional_kwargs so it survives in LangGraph's message history and
124
+ # can be echoed back on the next turn (_translate_ai_message). DeepSeek
125
+ # 400s on the 2nd turn of a tool-calling loop without it.
126
+ additional_kwargs: dict[str, Any] = {}
127
+ if result.reasoning_content:
128
+ additional_kwargs["reasoning_content"] = result.reasoning_content
129
+ ai_message = AIMessage(
130
+ content=result.text,
131
+ tool_calls=lc_tool_calls,
132
+ additional_kwargs=additional_kwargs,
133
+ response_metadata={
134
+ "provider": result.provider,
135
+ "model": result.model,
136
+ "tokens_in": result.tokens_in,
137
+ "tokens_out": result.tokens_out,
138
+ "latency_ms": result.latency_ms,
139
+ "cost_usd": result.cost_usd,
140
+ "stop_reason": result.stop_reason,
141
+ "outcome": result.outcome.value,
142
+ "call_id": result.call_id,
143
+ },
144
+ usage_metadata={
145
+ "input_tokens": result.tokens_in,
146
+ "output_tokens": result.tokens_out,
147
+ "total_tokens": result.tokens_in + result.tokens_out,
148
+ },
149
+ )
150
+ return ChatResult(generations=[ChatGeneration(message=ai_message)])
151
+
152
+ def bind_tools(
153
+ self,
154
+ tools: list[dict[str, Any] | type | BaseTool],
155
+ *,
156
+ tool_choice: str | dict[str, Any] | None = None,
157
+ **kwargs: Any,
158
+ ) -> Runnable:
159
+ """Bind tools to this model. Returns a Runnable that injects them on call.
160
+
161
+ Tools are converted to OpenAI shape (the most common cross-LangChain
162
+ format) here; `_generate` unwraps them to somm-neutral before calling
163
+ SommLLM. This keeps interop wide: any LangChain-shaped tool spec works.
164
+ """
165
+ formatted_tools = [convert_to_openai_tool(t) for t in tools]
166
+ if tool_choice is not None:
167
+ kwargs["tool_choice"] = tool_choice
168
+ return self.bind(tools=formatted_tools, **kwargs)
169
+
170
+ # ------------------------------------------------------------------
171
+ # Translation helpers — LangChain ⇄ somm-neutral
172
+
173
+ def _translate_messages(
174
+ self, messages: list[BaseMessage]
175
+ ) -> tuple[str, list[dict[str, Any]]]:
176
+ """LangChain messages → (system, somm-neutral messages).
177
+
178
+ somm takes system separately, so SystemMessage(s) are extracted and
179
+ joined into the `system` field. Remaining messages go into the
180
+ Anthropic-shaped neutral format. The OpenAI-compat provider adapter
181
+ translates the neutral shape back into OpenAI's tool_calls + role:tool
182
+ message form at the provider layer.
183
+ """
184
+ system_parts: list[str] = []
185
+ out: list[dict[str, Any]] = []
186
+ for m in messages:
187
+ if isinstance(m, SystemMessage):
188
+ system_parts.append(self._stringify_content(m.content))
189
+ continue
190
+ if isinstance(m, HumanMessage):
191
+ out.append({"role": "user", "content": m.content})
192
+ continue
193
+ if isinstance(m, AIMessage):
194
+ out.append(self._translate_ai_message(m))
195
+ continue
196
+ if isinstance(m, ToolMessage):
197
+ out.append(
198
+ {
199
+ "role": "user",
200
+ "content": [
201
+ {
202
+ "type": "tool_result",
203
+ "tool_use_id": m.tool_call_id,
204
+ "content": self._stringify_content(m.content),
205
+ }
206
+ ],
207
+ }
208
+ )
209
+ continue
210
+ # Unknown message subclass — best-effort string forward
211
+ out.append({"role": "user", "content": self._stringify_content(m.content)})
212
+ return "\n\n".join(p for p in system_parts if p), out
213
+
214
+ def _translate_ai_message(self, m: AIMessage) -> dict[str, Any]:
215
+ """AIMessage → somm-neutral assistant message with optional tool_use blocks."""
216
+ blocks: list[dict[str, Any]] = []
217
+ text = self._stringify_content(m.content)
218
+ if text:
219
+ blocks.append({"type": "text", "text": text})
220
+ for tc in m.tool_calls or []:
221
+ blocks.append(
222
+ {
223
+ "type": "tool_use",
224
+ "id": tc.get("id") or "",
225
+ "name": tc.get("name") or "",
226
+ "input": tc.get("args") or {},
227
+ }
228
+ )
229
+ # Echo reasoning_content (DeepSeek v4 thinking models) back on the
230
+ # assistant turn — required or DeepSeek 400s on the next turn. Carried
231
+ # as a top-level key the OpenAI-compat provider emits and the Anthropic
232
+ # provider strips. Only matters on tool-calling turns (which have
233
+ # tool_use blocks → list content, so collapse-to-string never drops it).
234
+ reasoning = (m.additional_kwargs or {}).get("reasoning_content")
235
+
236
+ # Collapse to plain-string content when the only block is text.
237
+ if len(blocks) == 1 and blocks[0].get("type") == "text" and not reasoning:
238
+ return {"role": "assistant", "content": cast(str, blocks[0]["text"])}
239
+ msg: dict[str, Any] = {"role": "assistant", "content": blocks}
240
+ if reasoning:
241
+ msg["reasoning_content"] = reasoning
242
+ return msg
243
+
244
+ @staticmethod
245
+ def _stringify_content(content: Any) -> str:
246
+ if isinstance(content, str):
247
+ return content
248
+ if isinstance(content, list):
249
+ parts: list[str] = []
250
+ for block in content:
251
+ if isinstance(block, dict) and block.get("type") == "text":
252
+ parts.append(block.get("text", ""))
253
+ elif isinstance(block, str):
254
+ parts.append(block)
255
+ return "".join(parts)
256
+ return str(content) if content is not None else ""
257
+
258
+ @staticmethod
259
+ def _unwrap_oai_tool(t: dict[str, Any]) -> dict[str, Any]:
260
+ """OpenAI `{type:function, function:{...}}` → somm-neutral
261
+ `{name, description, parameters}`. Pass through if already neutral."""
262
+ if t.get("type") == "function" and isinstance(t.get("function"), dict):
263
+ return dict(t["function"])
264
+ return t
265
+
266
+ @staticmethod
267
+ def _translate_tool_choice_lc_to_somm(
268
+ choice: str | dict[str, Any] | None,
269
+ ) -> str | dict[str, Any] | None:
270
+ """LangChain tool_choice (per bind_tools convention) → somm-neutral.
271
+
272
+ LangChain accepts: None, "auto", "any", "none", "required" (OpenAI
273
+ alias for any), a tool name string, or a dict like
274
+ `{"type":"function","function":{"name":"X"}}` / `{"type":"tool","name":"X"}`.
275
+ """
276
+ if choice is None:
277
+ return None
278
+ if isinstance(choice, str):
279
+ if choice in ("auto", "any", "none"):
280
+ return choice
281
+ if choice == "required":
282
+ return "any" # somm uses "any" for "must call SOME tool"
283
+ # Anything else: assume it's a specific tool name
284
+ return {"type": "tool", "name": choice}
285
+ if isinstance(choice, dict):
286
+ # Already somm-neutral?
287
+ if choice.get("type") == "tool" and "name" in choice:
288
+ return choice
289
+ # OpenAI-shaped — extract the function name
290
+ if choice.get("type") == "function":
291
+ fn = choice.get("function") or {}
292
+ name = fn.get("name")
293
+ if name:
294
+ return {"type": "tool", "name": name}
295
+ return choice
File without changes
@@ -0,0 +1,367 @@
1
+ """Tests for SommChatModel — LangChain ⇄ somm translation, bind_tools, error path."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import pytest
8
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
9
+ from langchain_core.tools import tool
10
+ from somm.client import SommLLM
11
+ from somm.providers.base import ProviderHealth, SommResponse
12
+ from somm.providers.base import ToolCall as ProviderToolCall
13
+ from somm_core.config import Config
14
+ from somm_langchain import SommChatModel
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # Recording fake provider — captures every SommRequest it sees
18
+
19
+
20
+ class _RecordingProvider:
21
+ name = "recorder"
22
+
23
+ def __init__(
24
+ self,
25
+ text: str = "",
26
+ tool_calls: list[ProviderToolCall] | None = None,
27
+ stop_reason: str = "end_turn",
28
+ ):
29
+ self._text = text
30
+ self._tool_calls = tool_calls or []
31
+ self._stop_reason = stop_reason
32
+ self.received: list = []
33
+
34
+ def generate(self, request):
35
+ self.received.append(request)
36
+ return SommResponse(
37
+ text=self._text,
38
+ model=request.model or "fake-model",
39
+ tokens_in=3,
40
+ tokens_out=2,
41
+ latency_ms=5,
42
+ tool_calls=self._tool_calls,
43
+ stop_reason=self._stop_reason,
44
+ )
45
+
46
+ def stream(self, request): # pragma: no cover
47
+ yield
48
+
49
+ def health(self):
50
+ return ProviderHealth(available=True)
51
+
52
+ def models(self):
53
+ return []
54
+
55
+ def estimate_tokens(self, text, model):
56
+ return 1
57
+
58
+
59
+ def _tmp_llm(tmp_path: Path, provider: _RecordingProvider) -> SommLLM:
60
+ cfg = Config()
61
+ cfg.project = "somm-lc-test"
62
+ cfg.db_dir = tmp_path / ".somm"
63
+ cfg.spool_dir = cfg.db_dir / "spool"
64
+ return SommLLM(config=cfg, providers=[provider], on_error=lambda _e: None)
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # Message translation
69
+
70
+
71
+ def test_system_message_extracted_to_system_field(tmp_path):
72
+ p = _RecordingProvider(text="ok")
73
+ llm = _tmp_llm(tmp_path, p)
74
+ try:
75
+ chat = SommChatModel(somm_llm=llm, workload="t")
76
+ chat.invoke([SystemMessage("be brief"), HumanMessage("hi")])
77
+ req = p.received[0]
78
+ assert req.system == "be brief"
79
+ assert req.messages == [{"role": "user", "content": "hi"}]
80
+ finally:
81
+ llm.close()
82
+
83
+
84
+ def test_multiple_system_messages_concatenate(tmp_path):
85
+ p = _RecordingProvider(text="ok")
86
+ llm = _tmp_llm(tmp_path, p)
87
+ try:
88
+ chat = SommChatModel(somm_llm=llm, workload="t")
89
+ chat.invoke([SystemMessage("rule one"), SystemMessage("rule two"), HumanMessage("hi")])
90
+ assert p.received[0].system == "rule one\n\nrule two"
91
+ finally:
92
+ llm.close()
93
+
94
+
95
+ def test_ai_message_with_tool_calls_becomes_tool_use_blocks(tmp_path):
96
+ p = _RecordingProvider(text="next")
97
+ llm = _tmp_llm(tmp_path, p)
98
+ try:
99
+ chat = SommChatModel(somm_llm=llm, workload="t")
100
+ chat.invoke(
101
+ [
102
+ HumanMessage("weather?"),
103
+ AIMessage(
104
+ content="checking",
105
+ tool_calls=[{"name": "get_weather", "args": {"location": "SF"}, "id": "tu_01"}],
106
+ ),
107
+ ToolMessage(content="62F sunny", tool_call_id="tu_01"),
108
+ ]
109
+ )
110
+ msgs = p.received[0].messages
111
+ # User text
112
+ assert msgs[0] == {"role": "user", "content": "weather?"}
113
+ # Assistant with text + tool_use blocks (mixed → list form)
114
+ assistant_blocks = msgs[1]["content"]
115
+ assert msgs[1]["role"] == "assistant"
116
+ assert {"type": "text", "text": "checking"} in assistant_blocks
117
+ tool_use_block = next(b for b in assistant_blocks if b.get("type") == "tool_use")
118
+ assert tool_use_block == {
119
+ "type": "tool_use",
120
+ "id": "tu_01",
121
+ "name": "get_weather",
122
+ "input": {"location": "SF"},
123
+ }
124
+ # Tool result becomes user message with tool_result block
125
+ assert msgs[2] == {
126
+ "role": "user",
127
+ "content": [
128
+ {"type": "tool_result", "tool_use_id": "tu_01", "content": "62F sunny"}
129
+ ],
130
+ }
131
+ finally:
132
+ llm.close()
133
+
134
+
135
+ def test_ai_message_text_only_collapses_to_string(tmp_path):
136
+ """Text-only assistant turns use plain-string content (interoperable shape)."""
137
+ p = _RecordingProvider(text="ok")
138
+ llm = _tmp_llm(tmp_path, p)
139
+ try:
140
+ chat = SommChatModel(somm_llm=llm, workload="t")
141
+ chat.invoke([HumanMessage("hi"), AIMessage(content="hello there")])
142
+ msgs = p.received[0].messages
143
+ assert msgs[1] == {"role": "assistant", "content": "hello there"}
144
+ finally:
145
+ llm.close()
146
+
147
+
148
+ def test_ai_message_tool_calls_only_no_content(tmp_path):
149
+ """Assistant with only tool_calls (no text) — blocks list contains only tool_use."""
150
+ p = _RecordingProvider(text="ok")
151
+ llm = _tmp_llm(tmp_path, p)
152
+ try:
153
+ chat = SommChatModel(somm_llm=llm, workload="t")
154
+ chat.invoke(
155
+ [
156
+ HumanMessage("x"),
157
+ AIMessage(
158
+ content="",
159
+ tool_calls=[{"name": "f", "args": {"a": 1}, "id": "tu_2"}],
160
+ ),
161
+ ToolMessage(content="r", tool_call_id="tu_2"),
162
+ ]
163
+ )
164
+ msgs = p.received[0].messages
165
+ assert msgs[1]["role"] == "assistant"
166
+ assert msgs[1]["content"] == [
167
+ {"type": "tool_use", "id": "tu_2", "name": "f", "input": {"a": 1}}
168
+ ]
169
+ finally:
170
+ llm.close()
171
+
172
+
173
+ # ---------------------------------------------------------------------------
174
+ # bind_tools — LangChain tools → somm-neutral; tool_choice routes correctly
175
+
176
+
177
+ @tool
178
+ def get_weather(location: str) -> str:
179
+ """Get the current weather for a location."""
180
+ return f"Sunny in {location}"
181
+
182
+
183
+ def test_bind_tools_routes_through_to_somm_neutral_shape(tmp_path):
184
+ p = _RecordingProvider(text="ok")
185
+ llm = _tmp_llm(tmp_path, p)
186
+ try:
187
+ chat = SommChatModel(somm_llm=llm, workload="t")
188
+ bound = chat.bind_tools([get_weather])
189
+ bound.invoke([HumanMessage("weather?")])
190
+
191
+ sent_tools = p.received[0].tools
192
+ assert len(sent_tools) == 1
193
+ # Neutral shape (post-unwrap), NOT OpenAI's {type:function,...}
194
+ sent = sent_tools[0]
195
+ assert sent["name"] == "get_weather"
196
+ assert "parameters" in sent
197
+ assert "type" not in sent # would be present if OpenAI wrapping leaked
198
+ finally:
199
+ llm.close()
200
+
201
+
202
+ def test_bind_tools_with_specific_tool_choice(tmp_path):
203
+ p = _RecordingProvider(text="ok")
204
+ llm = _tmp_llm(tmp_path, p)
205
+ try:
206
+ chat = SommChatModel(somm_llm=llm, workload="t")
207
+ bound = chat.bind_tools([get_weather], tool_choice="get_weather")
208
+ bound.invoke([HumanMessage("weather?")])
209
+ assert p.received[0].tool_choice == {"type": "tool", "name": "get_weather"}
210
+ finally:
211
+ llm.close()
212
+
213
+
214
+ def test_bind_tools_with_required(tmp_path):
215
+ """OpenAI alias 'required' maps to somm 'any'."""
216
+ p = _RecordingProvider(text="ok")
217
+ llm = _tmp_llm(tmp_path, p)
218
+ try:
219
+ chat = SommChatModel(somm_llm=llm, workload="t")
220
+ bound = chat.bind_tools([get_weather], tool_choice="required")
221
+ bound.invoke([HumanMessage("weather?")])
222
+ assert p.received[0].tool_choice == "any"
223
+ finally:
224
+ llm.close()
225
+
226
+
227
+ # ---------------------------------------------------------------------------
228
+ # Response translation — SommResult → AIMessage
229
+
230
+
231
+ def test_response_with_tool_calls_returns_aimessage_with_tool_calls(tmp_path):
232
+ tool_calls = [ProviderToolCall(id="tu_5", name="get_weather", arguments={"location": "NYC"})]
233
+ p = _RecordingProvider(text="checking", tool_calls=tool_calls, stop_reason="tool_use")
234
+ llm = _tmp_llm(tmp_path, p)
235
+ try:
236
+ chat = SommChatModel(somm_llm=llm, workload="t")
237
+ result = chat.invoke([HumanMessage("weather?")])
238
+ assert isinstance(result, AIMessage)
239
+ assert result.content == "checking"
240
+ assert len(result.tool_calls) == 1
241
+ call = result.tool_calls[0]
242
+ assert call["id"] == "tu_5"
243
+ assert call["name"] == "get_weather"
244
+ assert call["args"] == {"location": "NYC"}
245
+ # Metadata exposes somm provenance
246
+ assert result.response_metadata["provider"] == "recorder"
247
+ assert result.response_metadata["stop_reason"] == "tool_use"
248
+ assert result.usage_metadata["input_tokens"] == 3
249
+ assert result.usage_metadata["output_tokens"] == 2
250
+ finally:
251
+ llm.close()
252
+
253
+
254
+ def test_text_only_response_returns_aimessage_with_text(tmp_path):
255
+ p = _RecordingProvider(text="hello there")
256
+ llm = _tmp_llm(tmp_path, p)
257
+ try:
258
+ chat = SommChatModel(somm_llm=llm, workload="t")
259
+ result = chat.invoke([HumanMessage("hi")])
260
+ assert result.content == "hello there"
261
+ assert result.tool_calls == []
262
+ finally:
263
+ llm.close()
264
+
265
+
266
+ # ---------------------------------------------------------------------------
267
+ # Error semantics
268
+
269
+
270
+ class _FailingProvider(_RecordingProvider):
271
+ name = "broken"
272
+
273
+ def generate(self, request):
274
+ self.received.append(request)
275
+ from somm.errors import SommTransientError
276
+
277
+ raise SommTransientError("upstream down", cooldown_s=3600)
278
+
279
+
280
+ def test_raise_on_failure_true_raises(tmp_path):
281
+ p = _FailingProvider()
282
+ llm = _tmp_llm(tmp_path, p)
283
+ try:
284
+ chat = SommChatModel(somm_llm=llm, workload="t")
285
+ with pytest.raises(RuntimeError, match="somm call failed"):
286
+ chat.invoke([HumanMessage("hi")])
287
+ finally:
288
+ llm.close()
289
+
290
+
291
+ def test_raise_on_failure_false_returns_error_in_metadata(tmp_path):
292
+ p = _FailingProvider()
293
+ llm = _tmp_llm(tmp_path, p)
294
+ try:
295
+ chat = SommChatModel(somm_llm=llm, workload="t", raise_on_failure=False)
296
+ result = chat.invoke([HumanMessage("hi")])
297
+ # outcome is not OK; we still return a message rather than raising
298
+ assert result.response_metadata["outcome"] != "ok"
299
+ assert result.content == ""
300
+ finally:
301
+ llm.close()
302
+
303
+
304
+ def test_model_and_provider_pinning(tmp_path):
305
+ """somm_model + somm_provider on the adapter pin the underlying SommLLM call."""
306
+ p = _RecordingProvider(text="ok")
307
+ p.name = "pinprovider"
308
+ llm = _tmp_llm(tmp_path, p)
309
+ try:
310
+ chat = SommChatModel(
311
+ somm_llm=llm, workload="t",
312
+ somm_model="my-model-id", somm_provider="pinprovider",
313
+ )
314
+ chat.invoke([HumanMessage("hi")])
315
+ req = p.received[0]
316
+ assert req.model == "my-model-id"
317
+ finally:
318
+ llm.close()
319
+
320
+
321
+ # ---------------------------------------------------------------------------
322
+ # reasoning_content round-trip (DeepSeek v4 thinking models)
323
+
324
+
325
+ def test_reasoning_content_surfaced_to_aimessage(tmp_path):
326
+ """A provider response with reasoning_content lands in AIMessage.additional_kwargs."""
327
+ p = _RecordingProvider(text="answer")
328
+ # SommResponse carries reasoning_content; RecordingProvider builds it,
329
+ # so set it via a subclass-free monkey: wrap generate.
330
+ orig = p.generate
331
+ def gen(req):
332
+ r = orig(req)
333
+ r.reasoning_content = "let me think... 2+2=4"
334
+ return r
335
+ p.generate = gen
336
+ llm = _tmp_llm(tmp_path, p)
337
+ try:
338
+ chat = SommChatModel(somm_llm=llm, workload="t")
339
+ result = chat.invoke([HumanMessage("2+2?")])
340
+ assert result.additional_kwargs.get("reasoning_content") == "let me think... 2+2=4"
341
+ finally:
342
+ llm.close()
343
+
344
+
345
+ def test_reasoning_content_echoed_on_assistant_turn(tmp_path):
346
+ """An AIMessage carrying reasoning_content (with tool_calls) re-serializes it
347
+ onto the somm-neutral assistant message so the provider can echo it."""
348
+ p = _RecordingProvider(text="ok")
349
+ llm = _tmp_llm(tmp_path, p)
350
+ try:
351
+ chat = SommChatModel(somm_llm=llm, workload="t")
352
+ prior = AIMessage(
353
+ content="checking",
354
+ tool_calls=[{"name": "get_weather", "args": {"location": "SF"}, "id": "tu_1"}],
355
+ additional_kwargs={"reasoning_content": "I should call get_weather"},
356
+ )
357
+ chat.invoke([
358
+ HumanMessage("weather?"),
359
+ prior,
360
+ ToolMessage(content="62F", tool_call_id="tu_1"),
361
+ ])
362
+ msgs = p.received[0].messages
363
+ assistant = msgs[1]
364
+ assert assistant["role"] == "assistant"
365
+ assert assistant.get("reasoning_content") == "I should call get_weather"
366
+ finally:
367
+ llm.close()