xcoding 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xcode/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """xcode — a local-model coding agent (Claude Code, but on Ollama / llama.cpp)."""
2
+
3
+ __version__ = "0.1.0"
xcode/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
xcode/agent.py ADDED
@@ -0,0 +1,341 @@
1
+ """The agent loop: stream the model's reply, run any tool calls it asks for,
2
+ feed the results back, and repeat until it stops calling tools.
3
+
4
+ Also handles context compaction, project-memory injection, and the todo plan.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from typing import Callable, Optional
11
+
12
+ from .backends import Backend
13
+ from .config import (COMPACT_AT, CONTEXT_TOKENS, KEEP_RECENT, MAX_AGENT_STEPS,
14
+ SYSTEM_PROMPT, estimate_tokens)
15
+ from . import tools
16
+
17
+ # Hooks the CLI provides so the agent stays UI-agnostic.
18
+ Confirm = Callable[[str, str, str], bool] # (kind, target, detail) -> allow?
19
+ OnToken = Callable[[str], None] # streamed assistant text delta
20
+ OnTurnEnd = Callable[[], None] # assistant text turn finished
21
+ OnTool = Callable[[str, dict], None] # (tool_name, args) about to run
22
+ OnToolResult = Callable[[str, dict, str], None] # (tool_name, args, result) done
23
+ OnTodos = Callable[[list], None] # the todo list changed
24
+ OnNotice = Callable[[str], None] # out-of-band status line
25
+ OnAsk = Callable[[str, list], str] # (question, options) -> chosen
26
+ OnWaitStart = Callable[[], None] # model call started — show spinner
27
+ OnWaitEnd = Callable[[], None] # model call produced output — hide it
28
+
29
+
30
+ class Agent:
31
+ def __init__(self, backend: Backend, confirm: Confirm, on_token: OnToken,
32
+ on_turn_end: OnTurnEnd, on_tool: OnTool,
33
+ on_todos: Optional[OnTodos] = None,
34
+ on_notice: Optional[OnNotice] = None,
35
+ on_tool_result: Optional[OnToolResult] = None,
36
+ on_ask: Optional[OnAsk] = None,
37
+ on_wait_start: Optional[OnWaitStart] = None,
38
+ on_wait_end: Optional[OnWaitEnd] = None,
39
+ project_memory: str = "", settings=None, mcp=None,
40
+ depth: int = 0):
41
+ self.backend = backend
42
+ self.confirm = confirm
43
+ self.on_token = on_token
44
+ self.on_turn_end = on_turn_end
45
+ self.on_tool = on_tool
46
+ self.on_tool_result = on_tool_result or (lambda n, a, r: None)
47
+ self.on_ask = on_ask or (lambda q, o: (o[0] if o else ""))
48
+ self.on_todos = on_todos or (lambda t: None)
49
+ self.on_notice = on_notice or (lambda s: None)
50
+ self.on_wait_start = on_wait_start or (lambda: None)
51
+ self.on_wait_end = on_wait_end or (lambda: None)
52
+ self.project_memory = project_memory
53
+ self.settings = settings # hooks.Settings or None
54
+ self.mcp = mcp # mcp.McpManager or None
55
+ self.depth = depth # 0 = top-level; children are deeper
56
+ self.todos: list[dict] = []
57
+ self.messages: list[dict] = [self._system()]
58
+
59
+ # ---- public ------------------------------------------------------------
60
+ def reset(self) -> None:
61
+ self.messages = [self._system()]
62
+ self.todos = []
63
+
64
+ def context_tokens(self) -> int:
65
+ return estimate_tokens(self.messages)
66
+
67
+ def conversation_tokens(self) -> int:
68
+ """Tokens from the actual conversation, excluding the system prompt
69
+ and project memory — so a fresh session reads 0."""
70
+ body = [m for m in self.messages if m.get("role") != "system"]
71
+ return estimate_tokens(body) if body else 0
72
+
73
+ def load_messages(self, messages: list[dict]) -> None:
74
+ """Adopt a restored transcript, refreshing the system prompt."""
75
+ body = [m for m in messages if m.get("role") != "system"]
76
+ self.messages = [self._system(), *body]
77
+
78
+ def send(self, user_input: str) -> None:
79
+ """Run one full turn: user message -> (model <-> tools)* -> final reply."""
80
+ self.messages.append({"role": "user", "content": user_input})
81
+ self._maybe_compact()
82
+
83
+ for _ in range(MAX_AGENT_STEPS):
84
+ content, tool_calls = self._stream_once()
85
+
86
+ entry: dict = {"role": "assistant", "content": content}
87
+ if tool_calls:
88
+ entry["tool_calls"] = [
89
+ {"id": tc["id"], "type": "function",
90
+ "function": {"name": tc["name"], "arguments": tc["arguments"]}}
91
+ for tc in tool_calls
92
+ ]
93
+ self.messages.append(entry)
94
+
95
+ if not tool_calls:
96
+ return
97
+
98
+ for tc in tool_calls:
99
+ args = _parse_args(tc["arguments"])
100
+ self.on_tool(tc["name"], args)
101
+ result = self._run_tool(tc["name"], args)
102
+ self.on_tool_result(tc["name"], args, result)
103
+ self.messages.append({"role": "tool",
104
+ "tool_call_id": tc["id"],
105
+ "content": result})
106
+
107
+ self.on_token("\n[stopped: hit the max step limit for this turn]")
108
+ self.on_turn_end()
109
+
110
+ # ---- internals ---------------------------------------------------------
111
+ def _system(self) -> dict:
112
+ content = SYSTEM_PROMPT
113
+ if self.project_memory:
114
+ content += "\n\n" + self.project_memory
115
+ return {"role": "system", "content": content}
116
+
117
+ def _run_tool(self, name: str, args: dict) -> str:
118
+ if name == "update_todos":
119
+ self.todos = _normalize_todos(args.get("todos", []))
120
+ self.on_todos(self.todos)
121
+ done = sum(t["status"] == "completed" for t in self.todos)
122
+ return f"OK: todos updated ({done}/{len(self.todos)} complete)"
123
+ if name == "ask_user":
124
+ choice = self.on_ask(args.get("question", ""),
125
+ args.get("options", []) or [])
126
+ return f"User chose: {choice}"
127
+ if name == "spawn_agent":
128
+ return self._spawn(args.get("task", ""))
129
+ if self.mcp and self.mcp.handles(name):
130
+ return self.mcp.call(name, args)
131
+
132
+ # Pass streaming callback for run_command
133
+ if name == "run_command":
134
+ result = tools.dispatch(name, args, self.confirm, on_output=self.on_token)
135
+ else:
136
+ result = tools.dispatch(name, args, self.confirm)
137
+ return self._fire_hooks(name, args, result)
138
+
139
+ def _fire_hooks(self, name: str, args: dict, result: str) -> str:
140
+ if not self.settings or not result.startswith("OK"):
141
+ return result
142
+ from . import hooks
143
+ event = {"write_file": "after_write", "edit_file": "after_edit",
144
+ "run_command": "after_command"}.get(name)
145
+ if not event:
146
+ return result
147
+ note = hooks.run_hooks(self.settings, event,
148
+ path=args.get("path", ""),
149
+ command=args.get("command", ""))
150
+ return f"{result}\n{note}" if note else result
151
+
152
+ def _spawn(self, task: str) -> str:
153
+ if self.depth >= 1:
154
+ return "ERROR: sub-agents cannot spawn more sub-agents."
155
+ if not task.strip():
156
+ return "ERROR: spawn_agent needs a task."
157
+ self.on_notice(f"sub-agent ▷ {task[:70]}")
158
+ child = Agent(self.backend,
159
+ confirm=self.confirm,
160
+ on_token=lambda s: self.on_token(s),
161
+ on_turn_end=self.on_turn_end,
162
+ on_tool=lambda n, a: self.on_notice(f" sub · {n}"),
163
+ on_notice=self.on_notice, on_ask=self.on_ask,
164
+ project_memory=self.project_memory,
165
+ settings=self.settings, mcp=self.mcp, depth=self.depth + 1)
166
+ try:
167
+ child.send(task)
168
+ except Exception as e:
169
+ return f"ERROR in sub-agent: {e}"
170
+ report = next((m["content"] for m in reversed(child.messages)
171
+ if m["role"] == "assistant" and m.get("content")),
172
+ "(sub-agent produced no report)")
173
+ return f"[sub-agent report]\n{report}"
174
+
175
+ def _schemas(self) -> list[dict]:
176
+ schemas = list(tools.TOOL_SCHEMAS)
177
+ if self.depth >= 1: # children can't spawn further
178
+ schemas = [s for s in schemas
179
+ if s["function"]["name"] != "spawn_agent"]
180
+ if self.mcp:
181
+ schemas += self.mcp.schemas()
182
+ return schemas
183
+
184
+ def _stream_once(self):
185
+ """One streamed model call. Returns (content, tool_calls list)."""
186
+ self.on_wait_start()
187
+ waiting = True
188
+ stream = self.backend.client.chat.completions.create(
189
+ model=self.backend.model,
190
+ messages=self.messages,
191
+ tools=self._schemas(),
192
+ temperature=0.2,
193
+ stream=True,
194
+ )
195
+
196
+ content_parts: list[str] = []
197
+ calls: dict[int, dict] = {}
198
+ printed_any = False
199
+
200
+ try:
201
+ for chunk in stream:
202
+ if not chunk.choices:
203
+ continue
204
+ delta = chunk.choices[0].delta
205
+
206
+ if getattr(delta, "content", None):
207
+ self.on_token(delta.content)
208
+ content_parts.append(delta.content)
209
+ printed_any = True
210
+
211
+ for tc in (getattr(delta, "tool_calls", None) or []):
212
+ if waiting: # first tool token: drop the spinner
213
+ self.on_wait_end(); waiting = False
214
+ slot = calls.setdefault(tc.index,
215
+ {"id": "", "name": "", "arguments": ""})
216
+ if tc.id:
217
+ slot["id"] = tc.id
218
+ if tc.function:
219
+ if tc.function.name:
220
+ slot["name"] += tc.function.name
221
+ if tc.function.arguments:
222
+ slot["arguments"] += tc.function.arguments
223
+ finally:
224
+ if waiting:
225
+ self.on_wait_end()
226
+
227
+ if printed_any:
228
+ self.on_turn_end()
229
+
230
+ ordered = [calls[i] for i in sorted(calls)]
231
+ for n, c in enumerate(ordered):
232
+ if not c["id"]:
233
+ c["id"] = f"call_{n}"
234
+ return "".join(content_parts), ordered
235
+
236
+ def compact(self, force: bool = False) -> bool:
237
+ return self._maybe_compact(force=force)
238
+
239
+ def _maybe_compact(self, force: bool = False) -> bool:
240
+ budget = CONTEXT_TOKENS
241
+ if not force and estimate_tokens(self.messages) < budget * COMPACT_AT:
242
+ return False
243
+
244
+ body = self.messages[1:] # everything after system
245
+ if len(body) <= KEEP_RECENT:
246
+ return False
247
+
248
+ # Snap the split to a clean user-turn boundary so we never orphan a
249
+ # tool result from its assistant tool_call.
250
+ split = max(0, len(body) - KEEP_RECENT)
251
+ while split < len(body) and body[split]["role"] != "user":
252
+ split += 1
253
+ to_summarize, tail = body[:split], body[split:]
254
+ if not to_summarize:
255
+ return False
256
+
257
+ self.on_notice("compacting earlier conversation…")
258
+ summary = self._summarize(to_summarize)
259
+ self.messages = [
260
+ self._system(),
261
+ {"role": "user",
262
+ "content": "[Summary of earlier conversation]\n" + summary},
263
+ {"role": "assistant", "content": "Got it — continuing from there."},
264
+ *tail,
265
+ ]
266
+ return True
267
+
268
+ def _summarize(self, msgs: list[dict]) -> str:
269
+ transcript = _render(msgs)
270
+ try:
271
+ resp = self.backend.client.chat.completions.create(
272
+ model=self.backend.model,
273
+ messages=[
274
+ {"role": "system",
275
+ "content": "Summarize this coding-session transcript so work "
276
+ "can continue. Capture: the user's goals, decisions "
277
+ "made, files created/changed, and any open TODOs. "
278
+ "Be concise and factual."},
279
+ {"role": "user", "content": transcript[:12000]},
280
+ ],
281
+ temperature=0.1,
282
+ stream=False,
283
+ )
284
+ return resp.choices[0].message.content or "(summary unavailable)"
285
+ except Exception as e:
286
+ return f"(summary failed: {e})"
287
+
288
+
289
+ _VALID_STATUS = {"pending", "in_progress", "completed"}
290
+
291
+
292
+ def _normalize_todos(raw) -> list[dict]:
293
+ """Coerce whatever the model sent into [{content, status}], since smaller
294
+ models often return bare strings, a JSON-encoded string, or a bad status."""
295
+ if isinstance(raw, str):
296
+ try:
297
+ raw = json.loads(raw)
298
+ except (json.JSONDecodeError, TypeError):
299
+ raw = [raw]
300
+ if isinstance(raw, dict): # single todo, or {"todos": [...]}
301
+ raw = raw.get("todos", [raw]) if "todos" in raw else [raw]
302
+ out = []
303
+ for item in raw or []:
304
+ if isinstance(item, str):
305
+ out.append({"content": item, "status": "pending"})
306
+ elif isinstance(item, dict):
307
+ content = (item.get("content") or item.get("task")
308
+ or item.get("text") or "").strip()
309
+ if not content:
310
+ continue
311
+ status = str(item.get("status", "pending")).lower()
312
+ if status not in _VALID_STATUS:
313
+ status = "pending"
314
+ out.append({"content": content, "status": status})
315
+ return out
316
+
317
+
318
+ def _render(msgs: list[dict]) -> str:
319
+ out = []
320
+ for m in msgs:
321
+ role = m.get("role")
322
+ if role == "tool":
323
+ out.append(f"[tool result] {(m.get('content') or '')[:400]}")
324
+ elif role == "assistant":
325
+ if m.get("content"):
326
+ out.append(f"[assistant] {m['content']}")
327
+ for tc in m.get("tool_calls", []) or []:
328
+ fn = tc.get("function", {})
329
+ out.append(f"[assistant called {fn.get('name')}] {fn.get('arguments','')[:200]}")
330
+ elif role == "user":
331
+ out.append(f"[user] {m.get('content','')}")
332
+ return "\n".join(out)
333
+
334
+
335
+ def _parse_args(raw: str | None) -> dict:
336
+ if not raw:
337
+ return {}
338
+ try:
339
+ return json.loads(raw)
340
+ except (json.JSONDecodeError, TypeError):
341
+ return {}
xcode/backends.py ADDED
@@ -0,0 +1,141 @@
1
+ """Backend auto-detection and client construction.
2
+
3
+ Both Ollama and llama.cpp's ``llama-server`` expose an OpenAI-compatible
4
+ ``/v1`` endpoint, so we can drive either of them through the ``openai`` SDK.
5
+ This module figures out which one is actually running and what model to use.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ from dataclasses import dataclass
12
+
13
+ import httpx
14
+ from openai import OpenAI
15
+
16
+ # (label, base_url-without-/v1, how-to-list-models)
17
+ _CANDIDATES = [
18
+ ("ollama", "http://localhost:11434", "/api/tags"),
19
+ ("llama.cpp", "http://localhost:8080", "/v1/models"),
20
+ ]
21
+
22
+ # Substrings (in priority order) of models that tend to be good at tool-calling.
23
+ # Used to pick a sensible default instead of just grabbing the first model.
24
+ _PREFERRED = [
25
+ "qwen2.5-coder", "qwen3-coder", "qwen2.5", "qwen3",
26
+ "llama3.1", "llama3.3", "mistral-nemo", "mistral", "deepseek-coder",
27
+ "command-r", "firefunction", "gpt-oss",
28
+ ]
29
+
30
+
31
+ def _pick_default(models: list[str]) -> str:
32
+ for needle in _PREFERRED:
33
+ for m in models:
34
+ if needle in m.lower():
35
+ return m
36
+ return models[0]
37
+
38
+
39
+ @dataclass
40
+ class Backend:
41
+ name: str # "ollama" | "llama.cpp"
42
+ base_url: str # e.g. http://localhost:11434/v1
43
+ model: str
44
+ client: OpenAI
45
+
46
+ def describe(self) -> str:
47
+ return f"{self.name} · {self.model} · {self.base_url}"
48
+
49
+
50
+ def _list_ollama_models(root: str) -> list[str]:
51
+ r = httpx.get(f"{root}/api/tags", timeout=1.0) # Reduced from 2.0s
52
+ r.raise_for_status()
53
+ return [m["name"] for m in r.json().get("models", [])]
54
+
55
+
56
+ def _list_openai_models(root: str) -> list[str]:
57
+ r = httpx.get(f"{root}/v1/models", timeout=1.0) # Reduced from 2.0s
58
+ r.raise_for_status()
59
+ return [m["id"] for m in r.json().get("data", [])]
60
+
61
+
62
+ def _probe(root: str) -> bool:
63
+ try:
64
+ httpx.get(root, timeout=0.5) # Reduced from 1.5s to 0.5s
65
+ return True
66
+ except Exception:
67
+ return False
68
+
69
+
70
+ def detect_backend() -> Backend:
71
+ """Find a running local backend, or raise RuntimeError with guidance.
72
+
73
+ Honors overrides:
74
+ XCODE_BASE_URL — point straight at an OpenAI-compatible /v1 endpoint
75
+ XCODE_MODEL — force a specific model name
76
+ XCODE_API_KEY — token, if your endpoint needs one (defaults to "local")
77
+ """
78
+ forced_model = os.getenv("XCODE_MODEL")
79
+ api_key = os.getenv("XCODE_API_KEY", "local")
80
+
81
+ base_override = os.getenv("XCODE_BASE_URL")
82
+ if base_override:
83
+ base_url = base_override.rstrip("/")
84
+ model = forced_model or _first_model_at(base_url) or "local-model"
85
+ return Backend("custom", base_url, model,
86
+ OpenAI(base_url=base_url, api_key=api_key))
87
+
88
+ errors = []
89
+ # Try candidates in parallel for speed
90
+ for name, root, _ in _CANDIDATES:
91
+ if not _probe(root):
92
+ errors.append(f" - {name}: nothing listening at {root}")
93
+ continue
94
+ try:
95
+ if name == "ollama":
96
+ models = _list_ollama_models(root)
97
+ else:
98
+ models = _list_openai_models(root)
99
+ except Exception as e: # responded but couldn't list models
100
+ errors.append(f" - {name}: reachable but model list failed ({e})")
101
+ continue
102
+
103
+ if not models and not forced_model:
104
+ errors.append(f" - {name}: running but no models pulled")
105
+ continue
106
+
107
+ model = forced_model or _pick_default(models)
108
+ base_url = f"{root}/v1"
109
+ return Backend(name, base_url, model,
110
+ OpenAI(base_url=base_url, api_key=api_key))
111
+
112
+ raise RuntimeError(
113
+ "No local model backend found.\n"
114
+ + "\n".join(errors)
115
+ + "\n\nStart one of:\n"
116
+ " Ollama : `ollama serve` then `ollama pull qwen2.5-coder`\n"
117
+ " llama.cpp: `llama-server -m model.gguf` (listens on :8080)\n"
118
+ "Or set XCODE_BASE_URL to any OpenAI-compatible endpoint."
119
+ )
120
+
121
+
122
+ def _first_model_at(base_url: str) -> str | None:
123
+ root = base_url[:-3] if base_url.endswith("/v1") else base_url
124
+ try:
125
+ return (_list_openai_models(root) or [None])[0]
126
+ except Exception:
127
+ return None
128
+
129
+
130
+ def list_models() -> dict[str, list[str]]:
131
+ """For diagnostics: every model visible on every reachable backend."""
132
+ out: dict[str, list[str]] = {}
133
+ for name, root, _ in _CANDIDATES:
134
+ if not _probe(root):
135
+ continue
136
+ try:
137
+ out[name] = (_list_ollama_models(root) if name == "ollama"
138
+ else _list_openai_models(root))
139
+ except Exception:
140
+ out[name] = []
141
+ return out