xcoding 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xcode/__init__.py +3 -0
- xcode/__main__.py +4 -0
- xcode/agent.py +341 -0
- xcode/backends.py +141 -0
- xcode/cli.py +600 -0
- xcode/config.py +72 -0
- xcode/hooks.py +74 -0
- xcode/input_bar.py +357 -0
- xcode/mcp.py +157 -0
- xcode/memory.py +34 -0
- xcode/permissions.py +80 -0
- xcode/session.py +67 -0
- xcode/tools.py +451 -0
- xcode/ui.py +349 -0
- xcoding-0.1.0.dist-info/METADATA +190 -0
- xcoding-0.1.0.dist-info/RECORD +20 -0
- xcoding-0.1.0.dist-info/WHEEL +5 -0
- xcoding-0.1.0.dist-info/entry_points.txt +2 -0
- xcoding-0.1.0.dist-info/licenses/LICENSE +21 -0
- xcoding-0.1.0.dist-info/top_level.txt +1 -0
xcode/__init__.py
ADDED
xcode/__main__.py
ADDED
xcode/agent.py
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
"""The agent loop: stream the model's reply, run any tool calls it asks for,
|
|
2
|
+
feed the results back, and repeat until it stops calling tools.
|
|
3
|
+
|
|
4
|
+
Also handles context compaction, project-memory injection, and the todo plan.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from typing import Callable, Optional
|
|
11
|
+
|
|
12
|
+
from .backends import Backend
|
|
13
|
+
from .config import (COMPACT_AT, CONTEXT_TOKENS, KEEP_RECENT, MAX_AGENT_STEPS,
|
|
14
|
+
SYSTEM_PROMPT, estimate_tokens)
|
|
15
|
+
from . import tools
|
|
16
|
+
|
|
17
|
+
# Hooks the CLI provides so the agent stays UI-agnostic.
|
|
18
|
+
Confirm = Callable[[str, str, str], bool] # (kind, target, detail) -> allow?
|
|
19
|
+
OnToken = Callable[[str], None] # streamed assistant text delta
|
|
20
|
+
OnTurnEnd = Callable[[], None] # assistant text turn finished
|
|
21
|
+
OnTool = Callable[[str, dict], None] # (tool_name, args) about to run
|
|
22
|
+
OnToolResult = Callable[[str, dict, str], None] # (tool_name, args, result) done
|
|
23
|
+
OnTodos = Callable[[list], None] # the todo list changed
|
|
24
|
+
OnNotice = Callable[[str], None] # out-of-band status line
|
|
25
|
+
OnAsk = Callable[[str, list], str] # (question, options) -> chosen
|
|
26
|
+
OnWaitStart = Callable[[], None] # model call started — show spinner
|
|
27
|
+
OnWaitEnd = Callable[[], None] # model call produced output — hide it
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Agent:
|
|
31
|
+
def __init__(self, backend: Backend, confirm: Confirm, on_token: OnToken,
|
|
32
|
+
on_turn_end: OnTurnEnd, on_tool: OnTool,
|
|
33
|
+
on_todos: Optional[OnTodos] = None,
|
|
34
|
+
on_notice: Optional[OnNotice] = None,
|
|
35
|
+
on_tool_result: Optional[OnToolResult] = None,
|
|
36
|
+
on_ask: Optional[OnAsk] = None,
|
|
37
|
+
on_wait_start: Optional[OnWaitStart] = None,
|
|
38
|
+
on_wait_end: Optional[OnWaitEnd] = None,
|
|
39
|
+
project_memory: str = "", settings=None, mcp=None,
|
|
40
|
+
depth: int = 0):
|
|
41
|
+
self.backend = backend
|
|
42
|
+
self.confirm = confirm
|
|
43
|
+
self.on_token = on_token
|
|
44
|
+
self.on_turn_end = on_turn_end
|
|
45
|
+
self.on_tool = on_tool
|
|
46
|
+
self.on_tool_result = on_tool_result or (lambda n, a, r: None)
|
|
47
|
+
self.on_ask = on_ask or (lambda q, o: (o[0] if o else ""))
|
|
48
|
+
self.on_todos = on_todos or (lambda t: None)
|
|
49
|
+
self.on_notice = on_notice or (lambda s: None)
|
|
50
|
+
self.on_wait_start = on_wait_start or (lambda: None)
|
|
51
|
+
self.on_wait_end = on_wait_end or (lambda: None)
|
|
52
|
+
self.project_memory = project_memory
|
|
53
|
+
self.settings = settings # hooks.Settings or None
|
|
54
|
+
self.mcp = mcp # mcp.McpManager or None
|
|
55
|
+
self.depth = depth # 0 = top-level; children are deeper
|
|
56
|
+
self.todos: list[dict] = []
|
|
57
|
+
self.messages: list[dict] = [self._system()]
|
|
58
|
+
|
|
59
|
+
# ---- public ------------------------------------------------------------
|
|
60
|
+
def reset(self) -> None:
|
|
61
|
+
self.messages = [self._system()]
|
|
62
|
+
self.todos = []
|
|
63
|
+
|
|
64
|
+
def context_tokens(self) -> int:
|
|
65
|
+
return estimate_tokens(self.messages)
|
|
66
|
+
|
|
67
|
+
def conversation_tokens(self) -> int:
|
|
68
|
+
"""Tokens from the actual conversation, excluding the system prompt
|
|
69
|
+
and project memory — so a fresh session reads 0."""
|
|
70
|
+
body = [m for m in self.messages if m.get("role") != "system"]
|
|
71
|
+
return estimate_tokens(body) if body else 0
|
|
72
|
+
|
|
73
|
+
def load_messages(self, messages: list[dict]) -> None:
|
|
74
|
+
"""Adopt a restored transcript, refreshing the system prompt."""
|
|
75
|
+
body = [m for m in messages if m.get("role") != "system"]
|
|
76
|
+
self.messages = [self._system(), *body]
|
|
77
|
+
|
|
78
|
+
def send(self, user_input: str) -> None:
|
|
79
|
+
"""Run one full turn: user message -> (model <-> tools)* -> final reply."""
|
|
80
|
+
self.messages.append({"role": "user", "content": user_input})
|
|
81
|
+
self._maybe_compact()
|
|
82
|
+
|
|
83
|
+
for _ in range(MAX_AGENT_STEPS):
|
|
84
|
+
content, tool_calls = self._stream_once()
|
|
85
|
+
|
|
86
|
+
entry: dict = {"role": "assistant", "content": content}
|
|
87
|
+
if tool_calls:
|
|
88
|
+
entry["tool_calls"] = [
|
|
89
|
+
{"id": tc["id"], "type": "function",
|
|
90
|
+
"function": {"name": tc["name"], "arguments": tc["arguments"]}}
|
|
91
|
+
for tc in tool_calls
|
|
92
|
+
]
|
|
93
|
+
self.messages.append(entry)
|
|
94
|
+
|
|
95
|
+
if not tool_calls:
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
for tc in tool_calls:
|
|
99
|
+
args = _parse_args(tc["arguments"])
|
|
100
|
+
self.on_tool(tc["name"], args)
|
|
101
|
+
result = self._run_tool(tc["name"], args)
|
|
102
|
+
self.on_tool_result(tc["name"], args, result)
|
|
103
|
+
self.messages.append({"role": "tool",
|
|
104
|
+
"tool_call_id": tc["id"],
|
|
105
|
+
"content": result})
|
|
106
|
+
|
|
107
|
+
self.on_token("\n[stopped: hit the max step limit for this turn]")
|
|
108
|
+
self.on_turn_end()
|
|
109
|
+
|
|
110
|
+
# ---- internals ---------------------------------------------------------
|
|
111
|
+
def _system(self) -> dict:
|
|
112
|
+
content = SYSTEM_PROMPT
|
|
113
|
+
if self.project_memory:
|
|
114
|
+
content += "\n\n" + self.project_memory
|
|
115
|
+
return {"role": "system", "content": content}
|
|
116
|
+
|
|
117
|
+
def _run_tool(self, name: str, args: dict) -> str:
|
|
118
|
+
if name == "update_todos":
|
|
119
|
+
self.todos = _normalize_todos(args.get("todos", []))
|
|
120
|
+
self.on_todos(self.todos)
|
|
121
|
+
done = sum(t["status"] == "completed" for t in self.todos)
|
|
122
|
+
return f"OK: todos updated ({done}/{len(self.todos)} complete)"
|
|
123
|
+
if name == "ask_user":
|
|
124
|
+
choice = self.on_ask(args.get("question", ""),
|
|
125
|
+
args.get("options", []) or [])
|
|
126
|
+
return f"User chose: {choice}"
|
|
127
|
+
if name == "spawn_agent":
|
|
128
|
+
return self._spawn(args.get("task", ""))
|
|
129
|
+
if self.mcp and self.mcp.handles(name):
|
|
130
|
+
return self.mcp.call(name, args)
|
|
131
|
+
|
|
132
|
+
# Pass streaming callback for run_command
|
|
133
|
+
if name == "run_command":
|
|
134
|
+
result = tools.dispatch(name, args, self.confirm, on_output=self.on_token)
|
|
135
|
+
else:
|
|
136
|
+
result = tools.dispatch(name, args, self.confirm)
|
|
137
|
+
return self._fire_hooks(name, args, result)
|
|
138
|
+
|
|
139
|
+
def _fire_hooks(self, name: str, args: dict, result: str) -> str:
|
|
140
|
+
if not self.settings or not result.startswith("OK"):
|
|
141
|
+
return result
|
|
142
|
+
from . import hooks
|
|
143
|
+
event = {"write_file": "after_write", "edit_file": "after_edit",
|
|
144
|
+
"run_command": "after_command"}.get(name)
|
|
145
|
+
if not event:
|
|
146
|
+
return result
|
|
147
|
+
note = hooks.run_hooks(self.settings, event,
|
|
148
|
+
path=args.get("path", ""),
|
|
149
|
+
command=args.get("command", ""))
|
|
150
|
+
return f"{result}\n{note}" if note else result
|
|
151
|
+
|
|
152
|
+
def _spawn(self, task: str) -> str:
|
|
153
|
+
if self.depth >= 1:
|
|
154
|
+
return "ERROR: sub-agents cannot spawn more sub-agents."
|
|
155
|
+
if not task.strip():
|
|
156
|
+
return "ERROR: spawn_agent needs a task."
|
|
157
|
+
self.on_notice(f"sub-agent ▷ {task[:70]}")
|
|
158
|
+
child = Agent(self.backend,
|
|
159
|
+
confirm=self.confirm,
|
|
160
|
+
on_token=lambda s: self.on_token(s),
|
|
161
|
+
on_turn_end=self.on_turn_end,
|
|
162
|
+
on_tool=lambda n, a: self.on_notice(f" sub · {n}"),
|
|
163
|
+
on_notice=self.on_notice, on_ask=self.on_ask,
|
|
164
|
+
project_memory=self.project_memory,
|
|
165
|
+
settings=self.settings, mcp=self.mcp, depth=self.depth + 1)
|
|
166
|
+
try:
|
|
167
|
+
child.send(task)
|
|
168
|
+
except Exception as e:
|
|
169
|
+
return f"ERROR in sub-agent: {e}"
|
|
170
|
+
report = next((m["content"] for m in reversed(child.messages)
|
|
171
|
+
if m["role"] == "assistant" and m.get("content")),
|
|
172
|
+
"(sub-agent produced no report)")
|
|
173
|
+
return f"[sub-agent report]\n{report}"
|
|
174
|
+
|
|
175
|
+
def _schemas(self) -> list[dict]:
|
|
176
|
+
schemas = list(tools.TOOL_SCHEMAS)
|
|
177
|
+
if self.depth >= 1: # children can't spawn further
|
|
178
|
+
schemas = [s for s in schemas
|
|
179
|
+
if s["function"]["name"] != "spawn_agent"]
|
|
180
|
+
if self.mcp:
|
|
181
|
+
schemas += self.mcp.schemas()
|
|
182
|
+
return schemas
|
|
183
|
+
|
|
184
|
+
def _stream_once(self):
|
|
185
|
+
"""One streamed model call. Returns (content, tool_calls list)."""
|
|
186
|
+
self.on_wait_start()
|
|
187
|
+
waiting = True
|
|
188
|
+
stream = self.backend.client.chat.completions.create(
|
|
189
|
+
model=self.backend.model,
|
|
190
|
+
messages=self.messages,
|
|
191
|
+
tools=self._schemas(),
|
|
192
|
+
temperature=0.2,
|
|
193
|
+
stream=True,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
content_parts: list[str] = []
|
|
197
|
+
calls: dict[int, dict] = {}
|
|
198
|
+
printed_any = False
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
for chunk in stream:
|
|
202
|
+
if not chunk.choices:
|
|
203
|
+
continue
|
|
204
|
+
delta = chunk.choices[0].delta
|
|
205
|
+
|
|
206
|
+
if getattr(delta, "content", None):
|
|
207
|
+
self.on_token(delta.content)
|
|
208
|
+
content_parts.append(delta.content)
|
|
209
|
+
printed_any = True
|
|
210
|
+
|
|
211
|
+
for tc in (getattr(delta, "tool_calls", None) or []):
|
|
212
|
+
if waiting: # first tool token: drop the spinner
|
|
213
|
+
self.on_wait_end(); waiting = False
|
|
214
|
+
slot = calls.setdefault(tc.index,
|
|
215
|
+
{"id": "", "name": "", "arguments": ""})
|
|
216
|
+
if tc.id:
|
|
217
|
+
slot["id"] = tc.id
|
|
218
|
+
if tc.function:
|
|
219
|
+
if tc.function.name:
|
|
220
|
+
slot["name"] += tc.function.name
|
|
221
|
+
if tc.function.arguments:
|
|
222
|
+
slot["arguments"] += tc.function.arguments
|
|
223
|
+
finally:
|
|
224
|
+
if waiting:
|
|
225
|
+
self.on_wait_end()
|
|
226
|
+
|
|
227
|
+
if printed_any:
|
|
228
|
+
self.on_turn_end()
|
|
229
|
+
|
|
230
|
+
ordered = [calls[i] for i in sorted(calls)]
|
|
231
|
+
for n, c in enumerate(ordered):
|
|
232
|
+
if not c["id"]:
|
|
233
|
+
c["id"] = f"call_{n}"
|
|
234
|
+
return "".join(content_parts), ordered
|
|
235
|
+
|
|
236
|
+
def compact(self, force: bool = False) -> bool:
|
|
237
|
+
return self._maybe_compact(force=force)
|
|
238
|
+
|
|
239
|
+
def _maybe_compact(self, force: bool = False) -> bool:
|
|
240
|
+
budget = CONTEXT_TOKENS
|
|
241
|
+
if not force and estimate_tokens(self.messages) < budget * COMPACT_AT:
|
|
242
|
+
return False
|
|
243
|
+
|
|
244
|
+
body = self.messages[1:] # everything after system
|
|
245
|
+
if len(body) <= KEEP_RECENT:
|
|
246
|
+
return False
|
|
247
|
+
|
|
248
|
+
# Snap the split to a clean user-turn boundary so we never orphan a
|
|
249
|
+
# tool result from its assistant tool_call.
|
|
250
|
+
split = max(0, len(body) - KEEP_RECENT)
|
|
251
|
+
while split < len(body) and body[split]["role"] != "user":
|
|
252
|
+
split += 1
|
|
253
|
+
to_summarize, tail = body[:split], body[split:]
|
|
254
|
+
if not to_summarize:
|
|
255
|
+
return False
|
|
256
|
+
|
|
257
|
+
self.on_notice("compacting earlier conversation…")
|
|
258
|
+
summary = self._summarize(to_summarize)
|
|
259
|
+
self.messages = [
|
|
260
|
+
self._system(),
|
|
261
|
+
{"role": "user",
|
|
262
|
+
"content": "[Summary of earlier conversation]\n" + summary},
|
|
263
|
+
{"role": "assistant", "content": "Got it — continuing from there."},
|
|
264
|
+
*tail,
|
|
265
|
+
]
|
|
266
|
+
return True
|
|
267
|
+
|
|
268
|
+
def _summarize(self, msgs: list[dict]) -> str:
|
|
269
|
+
transcript = _render(msgs)
|
|
270
|
+
try:
|
|
271
|
+
resp = self.backend.client.chat.completions.create(
|
|
272
|
+
model=self.backend.model,
|
|
273
|
+
messages=[
|
|
274
|
+
{"role": "system",
|
|
275
|
+
"content": "Summarize this coding-session transcript so work "
|
|
276
|
+
"can continue. Capture: the user's goals, decisions "
|
|
277
|
+
"made, files created/changed, and any open TODOs. "
|
|
278
|
+
"Be concise and factual."},
|
|
279
|
+
{"role": "user", "content": transcript[:12000]},
|
|
280
|
+
],
|
|
281
|
+
temperature=0.1,
|
|
282
|
+
stream=False,
|
|
283
|
+
)
|
|
284
|
+
return resp.choices[0].message.content or "(summary unavailable)"
|
|
285
|
+
except Exception as e:
|
|
286
|
+
return f"(summary failed: {e})"
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
_VALID_STATUS = {"pending", "in_progress", "completed"}
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _normalize_todos(raw) -> list[dict]:
|
|
293
|
+
"""Coerce whatever the model sent into [{content, status}], since smaller
|
|
294
|
+
models often return bare strings, a JSON-encoded string, or a bad status."""
|
|
295
|
+
if isinstance(raw, str):
|
|
296
|
+
try:
|
|
297
|
+
raw = json.loads(raw)
|
|
298
|
+
except (json.JSONDecodeError, TypeError):
|
|
299
|
+
raw = [raw]
|
|
300
|
+
if isinstance(raw, dict): # single todo, or {"todos": [...]}
|
|
301
|
+
raw = raw.get("todos", [raw]) if "todos" in raw else [raw]
|
|
302
|
+
out = []
|
|
303
|
+
for item in raw or []:
|
|
304
|
+
if isinstance(item, str):
|
|
305
|
+
out.append({"content": item, "status": "pending"})
|
|
306
|
+
elif isinstance(item, dict):
|
|
307
|
+
content = (item.get("content") or item.get("task")
|
|
308
|
+
or item.get("text") or "").strip()
|
|
309
|
+
if not content:
|
|
310
|
+
continue
|
|
311
|
+
status = str(item.get("status", "pending")).lower()
|
|
312
|
+
if status not in _VALID_STATUS:
|
|
313
|
+
status = "pending"
|
|
314
|
+
out.append({"content": content, "status": status})
|
|
315
|
+
return out
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _render(msgs: list[dict]) -> str:
|
|
319
|
+
out = []
|
|
320
|
+
for m in msgs:
|
|
321
|
+
role = m.get("role")
|
|
322
|
+
if role == "tool":
|
|
323
|
+
out.append(f"[tool result] {(m.get('content') or '')[:400]}")
|
|
324
|
+
elif role == "assistant":
|
|
325
|
+
if m.get("content"):
|
|
326
|
+
out.append(f"[assistant] {m['content']}")
|
|
327
|
+
for tc in m.get("tool_calls", []) or []:
|
|
328
|
+
fn = tc.get("function", {})
|
|
329
|
+
out.append(f"[assistant called {fn.get('name')}] {fn.get('arguments','')[:200]}")
|
|
330
|
+
elif role == "user":
|
|
331
|
+
out.append(f"[user] {m.get('content','')}")
|
|
332
|
+
return "\n".join(out)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _parse_args(raw: str | None) -> dict:
|
|
336
|
+
if not raw:
|
|
337
|
+
return {}
|
|
338
|
+
try:
|
|
339
|
+
return json.loads(raw)
|
|
340
|
+
except (json.JSONDecodeError, TypeError):
|
|
341
|
+
return {}
|
xcode/backends.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Backend auto-detection and client construction.
|
|
2
|
+
|
|
3
|
+
Both Ollama and llama.cpp's ``llama-server`` expose an OpenAI-compatible
|
|
4
|
+
``/v1`` endpoint, so we can drive either of them through the ``openai`` SDK.
|
|
5
|
+
This module figures out which one is actually running and what model to use.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
from openai import OpenAI
|
|
15
|
+
|
|
16
|
+
# (label, base_url-without-/v1, how-to-list-models)
|
|
17
|
+
_CANDIDATES = [
|
|
18
|
+
("ollama", "http://localhost:11434", "/api/tags"),
|
|
19
|
+
("llama.cpp", "http://localhost:8080", "/v1/models"),
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
# Substrings (in priority order) of models that tend to be good at tool-calling.
|
|
23
|
+
# Used to pick a sensible default instead of just grabbing the first model.
|
|
24
|
+
_PREFERRED = [
|
|
25
|
+
"qwen2.5-coder", "qwen3-coder", "qwen2.5", "qwen3",
|
|
26
|
+
"llama3.1", "llama3.3", "mistral-nemo", "mistral", "deepseek-coder",
|
|
27
|
+
"command-r", "firefunction", "gpt-oss",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _pick_default(models: list[str]) -> str:
|
|
32
|
+
for needle in _PREFERRED:
|
|
33
|
+
for m in models:
|
|
34
|
+
if needle in m.lower():
|
|
35
|
+
return m
|
|
36
|
+
return models[0]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class Backend:
|
|
41
|
+
name: str # "ollama" | "llama.cpp"
|
|
42
|
+
base_url: str # e.g. http://localhost:11434/v1
|
|
43
|
+
model: str
|
|
44
|
+
client: OpenAI
|
|
45
|
+
|
|
46
|
+
def describe(self) -> str:
|
|
47
|
+
return f"{self.name} · {self.model} · {self.base_url}"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _list_ollama_models(root: str) -> list[str]:
|
|
51
|
+
r = httpx.get(f"{root}/api/tags", timeout=1.0) # Reduced from 2.0s
|
|
52
|
+
r.raise_for_status()
|
|
53
|
+
return [m["name"] for m in r.json().get("models", [])]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _list_openai_models(root: str) -> list[str]:
|
|
57
|
+
r = httpx.get(f"{root}/v1/models", timeout=1.0) # Reduced from 2.0s
|
|
58
|
+
r.raise_for_status()
|
|
59
|
+
return [m["id"] for m in r.json().get("data", [])]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _probe(root: str) -> bool:
|
|
63
|
+
try:
|
|
64
|
+
httpx.get(root, timeout=0.5) # Reduced from 1.5s to 0.5s
|
|
65
|
+
return True
|
|
66
|
+
except Exception:
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def detect_backend() -> Backend:
|
|
71
|
+
"""Find a running local backend, or raise RuntimeError with guidance.
|
|
72
|
+
|
|
73
|
+
Honors overrides:
|
|
74
|
+
XCODE_BASE_URL — point straight at an OpenAI-compatible /v1 endpoint
|
|
75
|
+
XCODE_MODEL — force a specific model name
|
|
76
|
+
XCODE_API_KEY — token, if your endpoint needs one (defaults to "local")
|
|
77
|
+
"""
|
|
78
|
+
forced_model = os.getenv("XCODE_MODEL")
|
|
79
|
+
api_key = os.getenv("XCODE_API_KEY", "local")
|
|
80
|
+
|
|
81
|
+
base_override = os.getenv("XCODE_BASE_URL")
|
|
82
|
+
if base_override:
|
|
83
|
+
base_url = base_override.rstrip("/")
|
|
84
|
+
model = forced_model or _first_model_at(base_url) or "local-model"
|
|
85
|
+
return Backend("custom", base_url, model,
|
|
86
|
+
OpenAI(base_url=base_url, api_key=api_key))
|
|
87
|
+
|
|
88
|
+
errors = []
|
|
89
|
+
# Try candidates in parallel for speed
|
|
90
|
+
for name, root, _ in _CANDIDATES:
|
|
91
|
+
if not _probe(root):
|
|
92
|
+
errors.append(f" - {name}: nothing listening at {root}")
|
|
93
|
+
continue
|
|
94
|
+
try:
|
|
95
|
+
if name == "ollama":
|
|
96
|
+
models = _list_ollama_models(root)
|
|
97
|
+
else:
|
|
98
|
+
models = _list_openai_models(root)
|
|
99
|
+
except Exception as e: # responded but couldn't list models
|
|
100
|
+
errors.append(f" - {name}: reachable but model list failed ({e})")
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
if not models and not forced_model:
|
|
104
|
+
errors.append(f" - {name}: running but no models pulled")
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
model = forced_model or _pick_default(models)
|
|
108
|
+
base_url = f"{root}/v1"
|
|
109
|
+
return Backend(name, base_url, model,
|
|
110
|
+
OpenAI(base_url=base_url, api_key=api_key))
|
|
111
|
+
|
|
112
|
+
raise RuntimeError(
|
|
113
|
+
"No local model backend found.\n"
|
|
114
|
+
+ "\n".join(errors)
|
|
115
|
+
+ "\n\nStart one of:\n"
|
|
116
|
+
" Ollama : `ollama serve` then `ollama pull qwen2.5-coder`\n"
|
|
117
|
+
" llama.cpp: `llama-server -m model.gguf` (listens on :8080)\n"
|
|
118
|
+
"Or set XCODE_BASE_URL to any OpenAI-compatible endpoint."
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _first_model_at(base_url: str) -> str | None:
|
|
123
|
+
root = base_url[:-3] if base_url.endswith("/v1") else base_url
|
|
124
|
+
try:
|
|
125
|
+
return (_list_openai_models(root) or [None])[0]
|
|
126
|
+
except Exception:
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def list_models() -> dict[str, list[str]]:
|
|
131
|
+
"""For diagnostics: every model visible on every reachable backend."""
|
|
132
|
+
out: dict[str, list[str]] = {}
|
|
133
|
+
for name, root, _ in _CANDIDATES:
|
|
134
|
+
if not _probe(root):
|
|
135
|
+
continue
|
|
136
|
+
try:
|
|
137
|
+
out[name] = (_list_ollama_models(root) if name == "ollama"
|
|
138
|
+
else _list_openai_models(root))
|
|
139
|
+
except Exception:
|
|
140
|
+
out[name] = []
|
|
141
|
+
return out
|