utim-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3209 @@
1
+ """
2
+ UTIM Orchestrator — Manages the full agentic loop.
3
+
4
+ Architecture:
5
+ - Maintains local message history (system prompt + conversation)
6
+ - For each user message, runs a ReAct loop:
7
+ 1. Calls LLM via the UTIM server (/completions, streaming) — keeps API key off client
8
+ 2. Content tokens are written to stdout in real-time as they arrive
9
+ 3. If the LLM returns tool_calls, executes them locally (filesystem tools)
10
+ 4. Feeds tool results back into the loop
11
+ 5. Repeats until the LLM responds with plain text (no more tool calls)
12
+ - Falls back to calling OpenRouter directly if the server is unreachable
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import difflib
17
+ import json
18
+ import os
19
+ import re
20
+ import shutil
21
+ import subprocess
22
+ import sys
23
+ import threading
24
+ import time
25
+ from concurrent.futures import ThreadPoolExecutor, as_completed
26
+ from typing import Any, Dict, List, Optional, Tuple
27
+
28
+ import requests
29
+ # openai SDK removed — we call OpenRouter directly via requests (no Rust/jiter needed)
30
+ from rich.console import Console
31
+ from rich.live import Live
32
+ from rich.markdown import Markdown
33
+ from rich.panel import Panel
34
+ from rich.rule import Rule
35
+ from rich.spinner import Spinner
36
+ from rich.text import Text
37
+
38
+ from .billing import CreditManager
39
+ from .tools import TOOL_FUNCTIONS, UTIM_TOOLS
40
+ import utim_cli.tools as _tools_module # for injecting cancel_event
41
+ from .config import config
42
+
43
+ # ─── Dynamic Context Budget ─────────────────────────────────────────────────
44
+ def _get_compression_threshold(model_id: str, context_window: int) -> int:
45
+ """Calculate dynamic compression threshold based on model's context window.
46
+
47
+ Strategy:
48
+ - Small windows (<= 32k): Compress at 70% of context window (minimum 16k)
49
+ - Medium windows (32k - 128k): Compress at 75% of context window
50
+ - Large windows (128k - 512k): Compress at 80% of context window
51
+ - Huge windows (>512k): Compress at 85% of context window
52
+ """
53
+ if not context_window or context_window <= 0:
54
+ context_window = 128_000
55
+
56
+ if context_window <= 32_000:
57
+ threshold_pct = 0.70
58
+ elif context_window <= 128_000:
59
+ threshold_pct = 0.75
60
+ elif context_window <= 512_000:
61
+ threshold_pct = 0.80
62
+ else:
63
+ threshold_pct = 0.85
64
+
65
+ threshold = int(context_window * threshold_pct)
66
+
67
+ # Ensure minimum threshold for safety
68
+ if threshold < 16_000:
69
+ threshold = 16_000
70
+
71
+ return threshold
72
+
73
+ # System Prompt
74
+ SYSTEM_PROMPT = """You are UTIM AI, a high-agency senior software engineer operating autonomously inside a CLI. You focus purely on the technical project or task at hand.
75
+
76
+ ### CORE DIRECTIVES:
77
+ 1. **Explore & Route**: Classify the task immediately:
78
+ - *Surgical/Localized* (styling, copy edits, single-file changes): Limit context gathering to the target file and central conventions. Use `query_codebase`. Avoid global listings.
79
+ - *Architectural/System-Wide*: Map project structure and dependencies.
80
+ 2. **Planning/Autonomous Tooling**: Do not wait for permissions. Use tools (`edit_file`, `write_file`, `run_command`, `project_res`, `plan_project`, `manage_todos`) proactively to achieve the goal. No placeholders or stubs.
81
+ 3. **Think-Create-Verify**: ALWAYS use the `plan_project` tool to formulate a plan before taking any mutating actions. NEVER outline or generate the plan yourself in plain text. Track tasks, write code, and ALWAYS verify with execution (`run_command` -> build/test/run) rather than just reading files.
82
+ 4. **Manifesto Reference**: Detailed engineering rules, coding standards, and safety/sandbox instructions are in .utim/UTIM.md. Read/reference it only when specific guidance is needed.
83
+ 5. **Output**: Be concise and professional. Summarize all changes and test results when complete. Speak colloquially and warmly in the user's language.
84
+ 6. **Execution**: When executing tools to get the project ready always create a todo list and follow them to the end. Add todos based on detailed steps.
85
+ 7. **Tool Calling format**: You MUST use the native JSON-schema function-calling mechanism provided by the API to invoke tools. NEVER output raw JSON, <think to=...> tags, <|message|> tags, or other raw tool-call markup in your text response. Any tool usage must be strictly structured via the API.
86
+ 8. **Premium Web Design**: When creating, updating, or styling web user interfaces, read/reference `.utim/DESIGN.md` to apply modern visual design conventions (such as premium typography, HSL color themes, glassmorphism, and smooth animations) instead of default browser styles or raw colors.
87
+ """
88
+
89
+ # ─── Runtime environment detection ───────────────────────────────────────────
90
+
91
+ def _detect_environment() -> str:
92
+ """Detect the runtime environment and return a context string for the prompt."""
93
+ import platform, os
94
+
95
+ is_termux = os.path.isdir("/data/data/com.termux")
96
+ is_wsl = "microsoft" in platform.uname().release.lower()
97
+ system = platform.system() # 'Linux', 'Windows', 'Darwin'
98
+ machine = platform.machine() # 'x86_64', 'aarch64', etc.
99
+ home = os.path.expanduser("~")
100
+ cwd = os.getcwd()
101
+ shell = os.environ.get("SHELL", os.environ.get("COMSPEC", "unknown"))
102
+
103
+ lines = ["\n\nRUNTIME ENVIRONMENT (auto-detected):"]
104
+ lines.append(f"- OS: {system} ({machine})")
105
+ lines.append(f"- Shell: {shell}")
106
+ lines.append(f"- Home: {home}")
107
+ lines.append(f"- Working directory: {cwd}")
108
+
109
+ if is_termux:
110
+ lines += [
111
+ "- Platform: Android Termux",
112
+ "- Package manager: pkg (use `pkg install <name>` not apt/brew/choco)",
113
+ "- Home path: /data/data/com.termux/files/home",
114
+ "- No sudo — Termux is already a user-level Linux environment",
115
+ "- Node.js, Python, git, curl all available via `pkg install`",
116
+ "- The user is on a MOBILE DEVICE (Android). Keep file paths short,",
117
+ " avoid opening browsers or GUIs, prefer terminal-based workflows.",
118
+ "- Do NOT suggest desktop editors (VS Code, etc.) — use nano/vim instead.",
119
+ ]
120
+ elif is_wsl:
121
+ lines += [
122
+ "- Platform: Windows Subsystem for Linux (WSL)",
123
+ "- Package manager: apt (sudo apt install <name>)",
124
+ "- Windows drives mounted at /mnt/c, /mnt/d, etc.",
125
+ "- Can run both Linux and Windows commands",
126
+ ]
127
+ elif system == "Windows":
128
+ lines += [
129
+ "- Platform: Windows (native PowerShell/CMD)",
130
+ "- Package manager: winget, choco, or scoop",
131
+ "- Use PowerShell syntax for shell commands",
132
+ "- Use backslashes or raw strings for paths when needed",
133
+ "- **CRITICAL**: '&&' and '||' are NOT valid in PowerShell. Use ';' to chain commands.",
134
+ " WRONG: npm test && npm run build",
135
+ " RIGHT: npm test ; npm run build",
136
+ ]
137
+ elif system == "Darwin":
138
+ lines += [
139
+ "- Platform: macOS",
140
+ "- Package manager: brew (brew install <name>)",
141
+ ]
142
+ elif system == "Linux":
143
+ lines += [
144
+ "- Platform: Linux",
145
+ "- Package manager: apt / dnf / pacman depending on distro",
146
+ ]
147
+
148
+ return "\n".join(lines)
149
+
150
+
151
+ # Build the prompt once at import time (environment is stable per process)
152
+ SYSTEM_PROMPT = SYSTEM_PROMPT + _detect_environment()
153
+
154
+ def is_casual_message(prompt: str) -> bool:
155
+ if not prompt:
156
+ return True
157
+ p = prompt.strip().lower().rstrip("?.!")
158
+ if not p:
159
+ return True
160
+
161
+ casual_words = {
162
+ "hello", "hi", "hey", "yo", "sup", "hola", "greetings", "good morning", "good afternoon", "good evening",
163
+ "how are you", "how's it going", "howdy", "hi there", "hello there", "test", "testing", "ping", "clear",
164
+ "exit", "quit", "menu", "help", "restart", "reset", "ok", "okay", "yes", "no", "thanks", "thank you",
165
+ "nice", "cool", "sure", "fine", "awesome", "perfect", "good", "great", "hello!", "hi!"
166
+ }
167
+
168
+ if p in casual_words:
169
+ return True
170
+
171
+ # If the message is very short (e.g. less than 15 chars) and doesn't contain code/paths/technical symbols
172
+ if len(p) <= 15:
173
+ # Heuristics: if it doesn't contain slashes, backslashes, dots, underscores, braces, or brackets
174
+ import re
175
+ if not re.search(r'[./_\\{}()\[\]=+\-*<>]', p):
176
+ # Check if it has any common casual words as substrings
177
+ for w in casual_words:
178
+ if w in p:
179
+ return True
180
+ # Otherwise, check if it's purely letters and spaces
181
+ if re.match(r'^[a-z\s]+$', p):
182
+ return True
183
+
184
+ return False
185
+
186
+ def get_system_prompt(user_prompt: str = "", current_iteration: int = 0, elapsed_seconds: int = 0, turn_history: Optional[List[Dict]] = None) -> str:
187
+ """Gets the dynamic system prompt with active MCP servers and semantically fetched Hugging Face vector memories."""
188
+ mcp_prompt = ""
189
+ try:
190
+ from utim_cli.mcp_client import mcp_manager
191
+ mcp_context = mcp_manager.get_notification_context()
192
+ if mcp_context:
193
+ mcp_prompt += f"\n\n### MCP SERVERS AND TOOLS NOTIFICATION ###\n{mcp_context}\n"
194
+ except Exception:
195
+ pass
196
+
197
+ exp_prompt = ""
198
+ if user_prompt:
199
+ try:
200
+ from utim_cli.vector_memory import fetch_relevant_experiences
201
+ experiences = fetch_relevant_experiences(user_prompt, top_k=2)
202
+ if experiences:
203
+ exp_items = []
204
+ for e in experiences:
205
+ c = str(e.get("content", "")).strip().replace("\n", " ")
206
+ if c:
207
+ # Ultra-lightweight truncation
208
+ exp_items.append(f"- {c[:100]}")
209
+ if exp_items:
210
+ exp_prompt = "\n\n[RELEVANT LESSONS]: " + " | ".join(exp_items) + "\n"
211
+ except Exception:
212
+ pass
213
+
214
+ return SYSTEM_PROMPT + mcp_prompt + exp_prompt
215
+
216
+ # ── Context management settings ───────────────────────────────────────────────
217
+ KEEP_FULL_TURNS = 10 # last N turns (including current) kept with full fidelity
218
+ TOKEN_BUDGET = 90_000 # hard token cap for messages sent to the LLM per call
219
+
220
+
221
+ # Tool display metadata
222
+ # Color constants for consolidated 3-color palette
223
+ PURPLE = "#cba6f7"
224
+ BLUE = "#42bcf5"
225
+ YELLOW = "#f9e2af"
226
+
227
+ # Accent colour per tool (subtle, no bold labels)
228
+ TOOL_COLOR: Dict[str, str] = {
229
+ "read_file": BLUE,
230
+ "write_file": YELLOW,
231
+ "edit_file": YELLOW,
232
+ "move_file": BLUE,
233
+ "delete_file": PURPLE,
234
+ "run_command": YELLOW,
235
+ "list_directory": PURPLE,
236
+ "get_background_output": BLUE,
237
+ "send_background_input": YELLOW,
238
+ "stop_background_process": PURPLE,
239
+ "list_background_processes": BLUE,
240
+ "web_search": YELLOW,
241
+ "manage_todos": PURPLE,
242
+ "query_codebase": YELLOW,
243
+ "generate_image": YELLOW,
244
+ }
245
+
246
+
247
+ class _ServerUnavailableError(RuntimeError):
248
+ """Raised when the UTIM server cannot be reached and no local key is configured.
249
+ Caught in run_task to display a clean user-facing message (no traceback).
250
+ """
251
+
252
+
253
+ class Orchestrator:
254
+ """Runs the full ReAct agentic loop, proxying LLM calls through the UTIM server."""
255
+
256
+ def __init__(self, console: Console):
257
+ self.console = console
258
+ # Start MCP Manager
259
+ try:
260
+ from utim_cli.mcp_client import mcp_manager
261
+ mcp_manager.start()
262
+ except Exception:
263
+ pass
264
+ self.credits = CreditManager()
265
+ self.server_url = "https://utim-cli-production.up.railway.app"
266
+ self.session_id: Optional[str] = None
267
+ # Primary model — falls back through config.fallback_models on failure
268
+ self.model_id: str = "cohere/north-mini-code:free"
269
+ self._current_line_len = 0
270
+ self.tool_results: List[Dict[str, Any]] = []
271
+ self.turn_step_timings: List[Dict[str, Any]] = []
272
+
273
+ # Track session start for elapsed time awareness
274
+ self._session_start_time: float = time.time()
275
+
276
+
277
+
278
+ # Dynamic compression threshold based on model's context window
279
+ self._compression_threshold = self._get_dynamic_threshold()
280
+
281
+ # Local conversation history — the single source of truth for this session.
282
+ # Commands like /clear, /resume operate on this list directly.
283
+ self.messages: List[Dict[str, Any]] = [
284
+ {"role": "system", "content": get_system_prompt()}
285
+ ]
286
+
287
+ # ── API key / .env loading ────────────────────────────────────────────
288
+ # Priority (highest to lowest):
289
+ # 1. Shell environment variable already set by the user
290
+ # 2. .env file in the CURRENT WORKING DIRECTORY (folder-local key)
291
+ # 3. .utim/.env (global fallback written by /auth)
292
+ #
293
+ # IMPORTANT: We load the CWD .env with override=True so that a
294
+ # project-local key always beats any key inherited from a previous
295
+ # utim installation in a different folder (which was the root cause of
296
+ # "Server unavailable" errors when running `utim` from random folders).
297
+ # We also load it by EXPLICIT absolute path — not by letting dotenv
298
+ # walk up the directory tree — so there is no ambiguity about which
299
+ # file wins.
300
+ _cwd_env = os.path.join(os.getcwd(), ".env")
301
+ try:
302
+ from dotenv import load_dotenv as _load_dotenv
303
+ if os.path.isfile(_cwd_env):
304
+ _load_dotenv(_cwd_env, override=True)
305
+ else:
306
+ # No local .env — still call load_dotenv so it picks up any
307
+ # shell-level exports, but do NOT override them.
308
+ _load_dotenv(override=False)
309
+ except Exception:
310
+ pass
311
+
312
+ # Load user-saved API key from local .utim/.env (written by the setup wizard / /auth)
313
+ import pathlib
314
+ _user_env = pathlib.Path(".utim").resolve() / ".env"
315
+ if _user_env.exists():
316
+ try:
317
+ from dotenv import load_dotenv as _load_dotenv
318
+ _load_dotenv(_user_env, override=False) # override=False: env vars win
319
+ except Exception:
320
+ pass
321
+
322
+ # Load user-saved API key from global ~/.utim/.env (written by the setup wizard / /auth)
323
+ _global_env = pathlib.Path.home() / ".utim" / ".env"
324
+ if _global_env.exists():
325
+ try:
326
+ from dotenv import load_dotenv as _load_dotenv
327
+ _load_dotenv(_global_env, override=False) # override=False: env vars win
328
+ except Exception:
329
+ pass
330
+
331
+ # User identity from config (can be removed later)
332
+ self.email = config.email or os.getenv("UTIM_EMAIL", "local@utim.dev")
333
+ self.token = config.token
334
+
335
+ # Local API key used for OpenRouter.
336
+ # Read AFTER all .env files have been loaded so the correct key wins.
337
+ self._local_api_key: Optional[str] = os.getenv("OPENROUTER_API_KEY")
338
+ if not self._local_api_key:
339
+ self.console.print(
340
+ "\n[bold yellow]Warning: OPENROUTER_API_KEY not found in environment "
341
+ f"or .env file (looked in {_cwd_env!r} then .utim/.env).[/bold yellow]\n"
342
+ )
343
+
344
+ self._local_client: bool = bool(self._local_api_key)
345
+
346
+ # OpenRouter base URL (can be overridden per-model for custom providers)
347
+ self._openrouter_base_url = "https://openrouter.ai/api/v1/chat/completions"
348
+
349
+ # Cancellation flag
350
+ self.cancel_event = threading.Event()
351
+
352
+ # Lock protecting self.messages from concurrent reads/writes.
353
+ # The background summarisation thread and the main agent loop both
354
+ # access self.messages; without this lock they can race and produce
355
+ # hallucinated summaries or corrupt the message list.
356
+ self._messages_lock = threading.Lock()
357
+
358
+ # Manual-mode confirm hook
359
+ self._get_confirm_fn = lambda: None
360
+
361
+ # Turn-level file-change tracking
362
+ self.turn_history: List[Dict[str, Any]] = []
363
+ self.redo_history: List[Dict[str, Any]] = []
364
+ self._turn_changes: List[Dict[str, Any]] = []
365
+ self._current_turn_start: int = 1
366
+
367
+ # Eager session create removed for local mode - sessions are created by _persist_messages
368
+
369
+
370
+ # LLM calling
371
+
372
+ def _persist_messages(self, in_progress_turn: Optional[Dict] = None) -> None:
373
+ """Push the current full message list to the local database in a background thread.
374
+ Silently drops on error — this is best-effort local persistence.
375
+ """
376
+ if not self.session_id:
377
+ # Create a local session if we don't have one
378
+ try:
379
+ from utim_cli.server.history import HistoryManager
380
+ hm = HistoryManager()
381
+ # Use the orchestrator's email (which defaults to local@utim.dev)
382
+ user_email = self.email or os.getenv("UTIM_EMAIL", "local@utim.dev")
383
+ self.session_id = hm.create_session(self.model_id, email=user_email)
384
+ except Exception:
385
+ return
386
+
387
+ # Find the first user message to use as the conversation title
388
+ first_user = next(
389
+ (m.get("content", "") or "" for m in self.messages if m.get("role") == "user"),
390
+ "",
391
+ )
392
+ if isinstance(first_user, list):
393
+ first_user = " ".join(p.get("text", "") for p in first_user if isinstance(p, dict))
394
+
395
+ # Serialise the messages — exclude any private _-prefixed keys we add for tracking
396
+ clean_messages = [
397
+ {k: v for k, v in m.items() if not k.startswith("_")}
398
+ for m in self.messages
399
+ ]
400
+
401
+ # Clean turn_history messages
402
+ clean_turn_history = []
403
+ for turn in self.turn_history:
404
+ clean_turn = dict(turn)
405
+ if "messages" in clean_turn:
406
+ clean_turn["messages"] = [
407
+ {k: v for k, v in m.items() if not k.startswith("_")}
408
+ for m in clean_turn["messages"]
409
+ ]
410
+ clean_turn_history.append(clean_turn)
411
+
412
+ # Append in-progress turn if provided
413
+ if in_progress_turn:
414
+ clean_ipt = dict(in_progress_turn)
415
+ if "messages" in clean_ipt:
416
+ clean_ipt["messages"] = [
417
+ {k: v for k, v in m.items() if not k.startswith("_")}
418
+ for m in clean_ipt["messages"]
419
+ ]
420
+ clean_turn_history.append(clean_ipt)
421
+
422
+ clean_redo_history = []
423
+ if hasattr(self, "redo_history"):
424
+ for turn in self.redo_history:
425
+ clean_turn = dict(turn)
426
+ if "messages" in clean_turn:
427
+ clean_turn["messages"] = [
428
+ {k: v for k, v in m.items() if not k.startswith("_")}
429
+ for m in clean_turn["messages"]
430
+ ]
431
+ clean_redo_history.append(clean_turn)
432
+
433
+ # Save to local database
434
+ def _save_local():
435
+ try:
436
+ from utim_cli.server.history import HistoryManager
437
+ hm = HistoryManager()
438
+ hm.add_messages(
439
+ self.session_id,
440
+ clean_messages,
441
+ self.email,
442
+ first_user,
443
+ turn_history=clean_turn_history,
444
+ redo_history=clean_redo_history
445
+ )
446
+ from utim_cli.backup import backup_state
447
+ backup_state()
448
+ except Exception:
449
+ pass # best-effort — never crash the agent loop
450
+
451
+ threading.Thread(target=_save_local, daemon=True).start()
452
+
453
+ # Pre-think marker patterns that qwen3.6-plus and similar models emit
454
+ # OUTSIDE their <think> tags — these should be hidden too.
455
+ _PRE_THINK_PATTERNS = re.compile(
456
+ r"^\s*(\*\s*Thinking\.\.\.?|\.\.\.(\s*thinking)?|thinking\.\.\.?)\s*$",
457
+ re.IGNORECASE | re.MULTILINE,
458
+ )
459
+
460
+ # ── Custom-provider endpoint resolution ─────────────────────────────────
461
+ def _resolve_model_endpoint(self, model_id: str) -> tuple:
462
+ """Return (chat_completions_url, api_key) for *model_id*.
463
+
464
+ Custom models (added via /model add) carry their own base_url and
465
+ api_key; everything else falls back to OpenRouter.
466
+ """
467
+ custom = config.get_custom_model(model_id)
468
+ if custom:
469
+ base = custom.get("base_url", "").rstrip("/")
470
+ # Append /chat/completions if the caller gave us just the base path
471
+ if not base.endswith("/chat/completions"):
472
+ url = base + "/chat/completions"
473
+ else:
474
+ url = base
475
+ key = custom.get("api_key") or self._local_api_key or ""
476
+ return url, key
477
+ # Built-in / OpenRouter model
478
+ return self._openrouter_base_url, self._local_api_key or ""
479
+
480
+ def _call_llm(self, messages: List[Dict], override_tools: Optional[List[Dict]] = None, override_model: Optional[str] = None, silent: bool = False) -> Tuple[Dict[str, Any], bool]:
481
+ """POST /chat/completions to OpenRouter (or a custom provider) with real-time streaming."""
482
+ if self.cancel_event.is_set():
483
+ return {
484
+ "role": "assistant",
485
+ "content": "[Aborted by user]",
486
+ "tool_calls": None,
487
+ "was_cut_off": True,
488
+ "aborted": True,
489
+ }, False
490
+
491
+ # Pre-flight quota check
492
+ api_key = config.get("api_key")
493
+ if api_key:
494
+ try:
495
+ from utim_cli.auth import SERVER_URL
496
+ resp = requests.get(
497
+ f"{SERVER_URL}/quota",
498
+ headers={"X-API-Key": api_key},
499
+ timeout=5,
500
+ )
501
+ if resp.status_code == 200:
502
+ quota = resp.json()
503
+
504
+ # 1. Check if quota is exhausted
505
+ used = quota.get("credits_used", quota.get("requests_used", 0.0))
506
+ limit = quota.get("credits_limit", quota.get("requests_limit", 1000))
507
+ if used >= limit:
508
+ self.console.print("\n[bold red]✗ Monthly credit quota exhausted.[/bold red]")
509
+ self.console.print(f" Resets at: {quota['reset_at']} • run [bold]utim upgrade[/bold] to upgrade.\n")
510
+ return {
511
+ "role": "assistant",
512
+ "content": "Monthly credit quota exhausted. Please upgrade your plan.",
513
+ "tool_calls": None,
514
+ }, False
515
+
516
+ # 2. Check if chosen model is allowed, if not downgrade
517
+ models_allowed = quota["models_allowed"]
518
+ chosen_model = override_model if override_model else self.model_id
519
+
520
+ if models_allowed != ["all"] and chosen_model not in models_allowed:
521
+ fallback_model = "cohere/north-mini-code:free" # default free fallback
522
+ self.console.print(f"\n[bold yellow]⚠ Model '{chosen_model}' is gated under your current '{quota['display_name']}' plan.[/bold yellow]")
523
+ self.console.print(f" Downgrading to default allowed model: '{fallback_model}' for this request.")
524
+ if override_model:
525
+ override_model = fallback_model
526
+ else:
527
+ self.model_id = fallback_model
528
+ except Exception:
529
+ pass
530
+
531
+ # Determine models to try for fallback support
532
+ primary_model = override_model if override_model else self.model_id
533
+
534
+ # Setup fallback for layer 2 (always include fallback models unless override_model is set)
535
+ if not override_model:
536
+ fallback_models = config.fallback_models
537
+ fallback_list = [m for m in fallback_models if m != primary_model]
538
+ models_to_try = [primary_model] + fallback_list
539
+ else:
540
+ models_to_try = [primary_model]
541
+
542
+ last_exc = None
543
+
544
+ for model_idx, current_model in enumerate(models_to_try):
545
+ if self.cancel_event.is_set():
546
+ break
547
+
548
+ current_is_custom = bool(config.get_custom_model(current_model))
549
+ # Check for API key only if we need it for this built-in/OpenRouter model
550
+ if not current_is_custom and not self._local_api_key and not config.get("api_key"):
551
+ continue
552
+
553
+ if model_idx > 0 and not silent:
554
+ self.console.print(f"\n[bold yellow]🔄 Falling back to model: {current_model}...[/bold yellow]")
555
+
556
+
557
+ model_retries = 2
558
+ for attempt in range(model_retries + 1):
559
+ if self.cancel_event.is_set():
560
+ break
561
+
562
+ mcp_tools = []
563
+ try:
564
+ from utim_cli.mcp_client import mcp_manager
565
+ mcp_tools = mcp_manager.get_tools()
566
+ except Exception:
567
+ pass
568
+
569
+ all_tools = (override_tools if override_tools is not None else UTIM_TOOLS) + mcp_tools
570
+ # Filter disabled tools
571
+ disabled = config.get("disabled_tools", [])
572
+ all_tools = [t for t in all_tools if t["function"]["name"] not in disabled]
573
+
574
+ payload = {
575
+ "model": current_model,
576
+ "messages": messages,
577
+ "stream": True,
578
+ "max_tokens": 8192,
579
+ }
580
+ if all_tools:
581
+ payload["tools"] = all_tools
582
+ printed_header = False
583
+ in_think = False
584
+ native_reasoning = False
585
+ display_buf = ""
586
+ _think_buf = ""
587
+ _proxy = sys.stdout
588
+ _term_width = self.console.width or 80
589
+ _line_buf = ""
590
+
591
+ try:
592
+ start_time = time.time()
593
+ # last_content_time: updated only when real content/tool-call data arrives.
594
+ # Intentionally NOT reset by keep-alive pings (empty lines) so stall
595
+ # detection isn't fooled by the server sending blank heartbeats.
596
+ last_content_time = start_time
597
+ _api_key = config.get("api_key")
598
+ if _api_key and not current_is_custom:
599
+ from utim_cli.auth import SERVER_URL
600
+ _endpoint_url = f"{SERVER_URL}/completions"
601
+ _headers = {
602
+ "X-API-Key": _api_key,
603
+ "Content-Type": "application/json"
604
+ }
605
+ request_payload = {
606
+ "messages": messages,
607
+ "model_id": current_model,
608
+ "tools": all_tools or None,
609
+ "session_id": self.session_id,
610
+ }
611
+ else:
612
+ _endpoint_url, _endpoint_key = self._resolve_model_endpoint(current_model)
613
+ _headers = {
614
+ "Authorization": f"Bearer {_endpoint_key}",
615
+ "Content-Type": "application/json"
616
+ }
617
+ request_payload = payload
618
+
619
+ with requests.post(
620
+ _endpoint_url,
621
+ json=request_payload,
622
+ headers=_headers,
623
+ stream=True,
624
+ timeout=(15, 300), # 300s per-chunk socket timeout — models need time to process large contexts
625
+ ) as resp:
626
+ resp.raise_for_status()
627
+ resp.encoding = "utf-8"
628
+
629
+ final_content = ""
630
+ final_tool_calls = []
631
+ was_cut_off = False
632
+
633
+ try:
634
+ # Dynamic thinking phases — cycle through contextual messages
635
+ # during the TTFT wait so the spinner feels alive, not stuck.
636
+ _THINKING_PHASES = [
637
+ "Analyzing context...",
638
+ "Reasoning through approach...",
639
+ "Evaluating options...",
640
+ "Structuring response...",
641
+ "Processing deeply...",
642
+ "Connecting patterns...",
643
+ "Formulating plan...",
644
+ "Almost there...",
645
+ ]
646
+ _phase_idx = 0
647
+ _last_phase_time = start_time
648
+
649
+ for raw_line in resp.iter_lines(decode_unicode=True):
650
+ if self.cancel_event.is_set():
651
+ return {
652
+ "role": "assistant",
653
+ "content": "[Aborted by user]",
654
+ "tool_calls": None,
655
+ "was_cut_off": True,
656
+ "aborted": True,
657
+ }, False
658
+
659
+ now = time.time()
660
+
661
+ # Cycle thinking topic every 8s during TTFT wait
662
+ if not final_content and not final_tool_calls:
663
+ if now - _last_phase_time > 8:
664
+ try:
665
+ from utim_cli.utim import STATE
666
+ STATE["thinking_topic"] = _THINKING_PHASES[_phase_idx % len(_THINKING_PHASES)]
667
+ _phase_idx += 1
668
+ _last_phase_time = now
669
+ except Exception:
670
+ pass
671
+
672
+ # Stall detection runs on EVERY iteration (including empty
673
+ # keep-alive lines) so a true stream stall is always caught.
674
+ if not final_content and not final_tool_calls:
675
+ # Hard 180-second timeout for Time-To-First-Token
676
+ # Models often need 60-120s to process large tool outputs before streaming
677
+ if now - start_time > 180:
678
+ raise requests.exceptions.Timeout("Hard TTFT timeout exceeded 180s")
679
+ else:
680
+ # Inter-content stall detection: abort if no real content
681
+ # has arrived for 120 seconds, even during keep-alive pings.
682
+ if now - last_content_time > 120:
683
+ raise requests.exceptions.Timeout("Inter-token stall timeout exceeded 120s")
684
+
685
+ if not raw_line:
686
+ continue
687
+
688
+ # NOTE: last_content_time is updated further below, only when
689
+ # actual content or tool-call data is parsed from the chunk.
690
+
691
+ if not raw_line.startswith("data: "):
692
+ # Catch non-streaming JSON responses or errors
693
+ stripped_line = raw_line.strip()
694
+ if stripped_line.startswith("{"):
695
+ try:
696
+ chunk = json.loads(stripped_line)
697
+ # Check if it's UTIM server response format
698
+ if "type" in chunk:
699
+ last_content_time = time.time()
700
+ t = chunk["type"]
701
+ if t == "content_delta":
702
+ text = chunk.get("text", "")
703
+ final_content += text
704
+ if not silent:
705
+ display_buf += text
706
+ printed_header = True
707
+ elif t == "done":
708
+ if "error" in chunk and chunk["error"]:
709
+ raise RuntimeError(f"Server completion error: {chunk['error']}")
710
+ final_content = chunk.get("content") or final_content
711
+ final_tool_calls = chunk.get("tool_calls") or final_tool_calls
712
+ break
713
+ continue
714
+
715
+ # Otherwise fall back to original OpenRouter logic
716
+ if "error" in chunk:
717
+ raise RuntimeError(f"OpenRouter error: {chunk['error'].get('message', str(chunk['error']))}")
718
+ if "choices" in chunk and len(chunk["choices"]) > 0:
719
+ choice = chunk["choices"][0]
720
+ if choice.get("finish_reason") == "length":
721
+ was_cut_off = True
722
+ msg = choice.get("message", {})
723
+ if "content" in msg and msg["content"]:
724
+ last_content_time = time.time()
725
+ final_content += msg["content"]
726
+ if not silent:
727
+ display_buf += msg["content"]
728
+ printed_header = True
729
+ if "tool_calls" in msg and msg["tool_calls"]:
730
+ last_content_time = time.time()
731
+ final_tool_calls = msg["tool_calls"]
732
+ break
733
+ except json.JSONDecodeError:
734
+ pass
735
+ continue
736
+
737
+ data_str = raw_line[6:]
738
+ if data_str == "[DONE]":
739
+ break
740
+ try:
741
+ chunk = json.loads(data_str)
742
+ except json.JSONDecodeError:
743
+ continue
744
+
745
+ # Check for API errors returned mid-stream
746
+ if "error" in chunk:
747
+ raise RuntimeError(f"OpenRouter error: {chunk['error'].get('message', str(chunk['error']))}")
748
+
749
+ if not chunk.get("choices"):
750
+ continue
751
+
752
+ choice = chunk["choices"][0]
753
+ if choice.get("finish_reason") == "length":
754
+ was_cut_off = True
755
+ delta = choice.get("delta", {})
756
+
757
+ # Handle tool calls accumulation
758
+ if "tool_calls" in delta:
759
+ last_content_time = time.time() # real data arrived
760
+ for tc in delta["tool_calls"]:
761
+ idx = tc.get("index", 0)
762
+ while len(final_tool_calls) <= idx:
763
+ final_tool_calls.append({"id": "", "type": "function", "function": {"name": "", "arguments": ""}})
764
+ if tc.get("id"):
765
+ final_tool_calls[idx]["id"] = tc["id"]
766
+ if tc.get("function"):
767
+ f = tc["function"]
768
+ if "name" in f:
769
+ final_tool_calls[idx]["function"]["name"] += f["name"]
770
+ if "arguments" in f:
771
+ final_tool_calls[idx]["function"]["arguments"] += f["arguments"]
772
+
773
+ # Handle content streaming
774
+ chunk_text = delta.get("content")
775
+ reasoning_text = delta.get("reasoning")
776
+
777
+ if reasoning_text:
778
+ last_content_time = time.time() # real data arrived
779
+ if not in_think:
780
+ in_think = True
781
+ native_reasoning = True
782
+ final_content += "<think>\n"
783
+ final_content += reasoning_text
784
+ _think_buf += reasoning_text
785
+ try:
786
+ from utim_cli.utim import STATE
787
+ lines = [l.strip() for l in _think_buf.split('\n') if l.strip()]
788
+ if lines:
789
+ topic = lines[-1]
790
+ if len(topic) > 60:
791
+ topic = topic[:57] + "..."
792
+ STATE["thinking_topic"] = topic
793
+ except Exception:
794
+ pass
795
+ continue
796
+
797
+ if chunk_text is not None and chunk_text != "":
798
+ last_content_time = time.time() # real data arrived
799
+ if native_reasoning:
800
+ native_reasoning = False
801
+ in_think = False
802
+ final_content += "\n</think>\n"
803
+
804
+ final_content += chunk_text
805
+
806
+ display = ""
807
+ remaining = chunk_text
808
+ while remaining:
809
+ if in_think:
810
+ for closing in ("</think>", "</thinking>", "[/THINKING]"):
811
+ end_idx = remaining.find(closing)
812
+ if end_idx >= 0:
813
+ _think_buf += remaining[:end_idx]
814
+ remaining = remaining[end_idx + len(closing):]
815
+ in_think = False
816
+ break
817
+ else:
818
+ _think_buf += remaining
819
+ remaining = ""
820
+
821
+ try:
822
+ from utim_cli.utim import STATE
823
+ lines = [l.strip() for l in _think_buf.split('\n') if l.strip()]
824
+ if lines:
825
+ topic = lines[-1]
826
+ if len(topic) > 60:
827
+ topic = topic[:57] + "..."
828
+ STATE["thinking_topic"] = topic
829
+ except Exception:
830
+ pass
831
+ else:
832
+ open_found = False
833
+ for opening in ("<think>", "<thinking>", "[THINKING]"):
834
+ start_idx = remaining.find(opening)
835
+ if start_idx >= 0:
836
+ display += remaining[:start_idx]
837
+ remaining = remaining[start_idx + len(opening):]
838
+ in_think = True
839
+ open_found = True
840
+ break
841
+ if not open_found:
842
+ display += remaining
843
+ remaining = ""
844
+
845
+ if display and not silent:
846
+ cleaned = self._PRE_THINK_PATTERNS.sub("", display)
847
+ if cleaned:
848
+ if not printed_header:
849
+ printed_header = True
850
+ display_buf += cleaned
851
+ except Exception as stream_exc:
852
+ # If we have received some content/tool calls, recover gracefully
853
+ if final_content or final_tool_calls:
854
+ if not silent:
855
+ self.console.print(f"\n[dim yellow]⚠ Stream interrupted: {stream_exc}. Returning partial response.[/dim yellow]\n")
856
+ was_cut_off = True
857
+ else:
858
+ raise stream_exc
859
+
860
+ # ── End of `with resp` streaming block ────────────────────────────
861
+ # Render the fully-buffered response as rich Markdown (tables, bold, code, etc.)
862
+ if printed_header and display_buf and not silent:
863
+ self.console.print()
864
+ self.console.print(Markdown(display_buf))
865
+ self.console.print()
866
+
867
+ clean_content = re.sub(
868
+ r"<think(?:ing)?>.*?</think(?:ing)?>", "", final_content, flags=re.DOTALL
869
+ ).strip()
870
+ clean_content = self._PRE_THINK_PATTERNS.sub("", clean_content).strip()
871
+
872
+ # Failsafe: if the model ONLY output reasoning and no actual content,
873
+ # use the reasoning as the content so the user sees it.
874
+ if not clean_content and final_content.strip():
875
+ clean_content = final_content.strip()
876
+ clean_content = re.sub(r"</?think(?:ing)?>", "", clean_content).strip()
877
+
878
+ clean_content = clean_content if clean_content else None
879
+ final_tool_calls = final_tool_calls if final_tool_calls else None
880
+
881
+ if not clean_content and not final_tool_calls:
882
+ if model_idx < len(models_to_try) - 1:
883
+ raise RuntimeError("Model returned an empty response (no content and no tool calls).")
884
+
885
+ final_msg = {
886
+ "role": "assistant",
887
+ "content": clean_content,
888
+ "tool_calls": final_tool_calls,
889
+ "was_cut_off": was_cut_off,
890
+ }
891
+ return final_msg, True
892
+
893
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as exc:
894
+ last_exc = exc
895
+ if not silent:
896
+ self.console.print(f"\n[yellow]⚠ Model {current_model} failed (Connection/Timeout error: {exc}). Trying next model...[/yellow]")
897
+ break # try next model
898
+ except requests.exceptions.HTTPError as exc:
899
+ last_exc = exc
900
+ code = exc.response.status_code if exc.response is not None else "?"
901
+ if code == 400 and exc.response is not None:
902
+ try:
903
+ error_details = exc.response.json()
904
+ self.console.print(f"\n[red]HTTP 400 Error Details from {current_model}: {error_details}[/red]")
905
+ except Exception:
906
+ self.console.print(f"\n[red]HTTP 400 Error Details from {current_model}: {exc.response.text}[/red]")
907
+ if code == 429:
908
+ if not silent:
909
+ self.console.print(f"\n[yellow]⚠ Model {current_model} rate-limited (429). Trying next model...[/yellow]")
910
+ break
911
+ if attempt < model_retries:
912
+ delay = 3 * (attempt + 1)
913
+ if not silent:
914
+ self.console.print(f"\n[dim yellow]⟳ Model {current_model} returned HTTP {code}. Retrying in {delay}s (attempt {attempt+1}/{model_retries})...[/dim yellow]")
915
+ time.sleep(delay)
916
+ continue
917
+ if not silent:
918
+ self.console.print(f"\n[yellow]⚠ Model {current_model} failed (HTTP {code}). Trying next model...[/yellow]")
919
+ break # try next model
920
+ except RuntimeError as exc:
921
+ # Mid-stream API errors (e.g. model overloaded) or custom empty response error
922
+ last_exc = exc
923
+ if not silent:
924
+ self.console.print(f"\n[yellow]⚠ Model {current_model} failed: {exc}. Trying next model...[/yellow]")
925
+ break # try next model
926
+ except Exception as exc:
927
+ last_exc = exc
928
+ if not silent:
929
+ self.console.print(f"\n[yellow]⚠ Model {current_model} failed (unexpected error: {exc}). Trying next model...[/yellow]")
930
+ break # try next model
931
+
932
+ # If we exit the loop, all models failed
933
+ if isinstance(last_exc, requests.exceptions.HTTPError):
934
+ code = last_exc.response.status_code if last_exc.response is not None else "?"
935
+ raise _ServerUnavailableError(f"Model API returned an error after trying all fallbacks (HTTP {code}).") from last_exc
936
+ elif last_exc:
937
+ raise _ServerUnavailableError(f"Cannot reach model API after trying all fallback models. Last error: {last_exc}") from last_exc
938
+ else:
939
+ raise _ServerUnavailableError("OPENROUTER_API_KEY is missing. Please set it in your .env file.")
940
+
941
+ # Tool display helpers
942
+
943
+ # Icons per tool
944
+ TOOL_ICON: Dict[str, str] = {
945
+ "read_file": "📄",
946
+ "write_file": "✏️ ",
947
+ "edit_file": "🔧",
948
+ "move_file": "📦",
949
+ "delete_file": "🗑️ ",
950
+ "run_command": "⚡",
951
+ "list_directory": "📁",
952
+ "get_background_output": "📤",
953
+ "send_background_input": "⌨️ ",
954
+ "stop_background_process":"⏹️ ",
955
+ "list_background_processes": "📋",
956
+ "web_search": "🔍",
957
+ "plan_project": "🧠",
958
+ "manage_todos": "📝",
959
+ "query_codebase": "🧠",
960
+ "generate_image": "🎨",
961
+ }
962
+
963
+ # User-friendly display names for tools
964
+ TOOL_DISPLAY_NAME: Dict[str, str] = {
965
+ "read_file": "Reading file",
966
+ "write_file": "Writing file",
967
+ "edit_file": "Editing file",
968
+ "move_file": "Moving file",
969
+ "delete_file": "Deleting file",
970
+ "run_command": "Running command",
971
+ "list_directory": "Listing directory",
972
+ "get_background_output": "Reading background output",
973
+ "send_background_input": "Sending background input",
974
+ "web_search": "Searching web",
975
+ "plan_project": "Planning project",
976
+ "manage_todos": "Managing To-Dos",
977
+ "query_codebase": "Querying Codebase",
978
+ "generate_image": "Generating image",
979
+ }
980
+
981
+ @staticmethod
982
+ def _get_display_arg(func_name: str, arguments: Dict) -> str:
983
+ """Extract the most informative single argument to display inline."""
984
+ if "__" in func_name:
985
+ return ", ".join(f"{k}={v}" for k, v in arguments.items())[:60]
986
+ if func_name in ("read_file", "write_file", "delete_file"):
987
+ path = arguments.get("filepath", arguments.get("path", ""))
988
+ # Append line range for read_file when a range was requested
989
+ if func_name == "read_file":
990
+ s = arguments.get("start_line")
991
+ e = arguments.get("end_line")
992
+ if s or e:
993
+ path = f"{path}:{s or ''}–{e or ''}"
994
+ return path
995
+ if func_name == "edit_file":
996
+ return arguments.get("filepath", arguments.get("path", ""))
997
+ if func_name == "run_command":
998
+ cmd = arguments.get("command", "")
999
+ if not cmd:
1000
+ cmds = arguments.get("commands", [])
1001
+ if cmds and isinstance(cmds, list):
1002
+ cmd = "; ".join(cmds)
1003
+ display = (cmd or "")[:80] + ("…" if len(cmd or "") > 80 else "")
1004
+ dir_p = arguments.get("dir_path", "")
1005
+ if dir_p:
1006
+ display += f" [{dir_p}]"
1007
+ return display
1008
+ if func_name == "list_directory":
1009
+ return arguments.get("path", ".")
1010
+ if func_name == "move_file":
1011
+ src = arguments.get("source", arguments.get("src", ""))
1012
+ dst = arguments.get("destination", arguments.get("dst", ""))
1013
+ return f"{src} → {dst}"
1014
+ if func_name == "web_search":
1015
+ return arguments.get("prompt", arguments.get("query", ""))
1016
+ if func_name == "generate_image":
1017
+ return arguments.get("prompt", "")[:40]
1018
+ if func_name == "plan_project":
1019
+ return f"{arguments.get('plan_part', 'general')} - {arguments.get('prompt', '')[:30]}"
1020
+ if func_name == "query_codebase":
1021
+ return arguments.get('query', '')[:40]
1022
+ if func_name == "manage_todos":
1023
+ ops = arguments.get('operations', [])
1024
+ if ops:
1025
+ return f"{len(ops)} operations"
1026
+
1027
+ action = arguments.get('action', '')
1028
+ tid = arguments.get('task_id', '')
1029
+ desc = arguments.get('description', '')[:30]
1030
+ if action == 'add': return f"Add: {desc}"
1031
+ if action in ('mark_done', 'mark_pending', 'delete'): return f"{action}: {tid}"
1032
+ return action
1033
+ if func_name == "get_background_output":
1034
+ return f"process #{arguments.get('process_id', '?')}"
1035
+ if func_name == "send_background_input":
1036
+ return f"to #{arguments.get('process_id', '?')}: {arguments.get('input_text', '')[:30]}"
1037
+ return ""
1038
+
1039
+ def _render_result(self, func_name: str, arguments: Dict, result: str, color: str, user_confirmed: bool = False) -> None:
1040
+ """Render the tool result inside a styled panel appropriate to the tool type."""
1041
+ width = min(self.console.width - 2, 120)
1042
+ display_name = self.TOOL_DISPLAY_NAME.get(func_name, func_name)
1043
+
1044
+ # ── When the user already saw and approved a diff dialog, skip the
1045
+ # ── verbose diff body — it was already shown in the confirm panel.
1046
+ if user_confirmed and func_name in ("edit_file", "write_file", "delete_file", "move_file"):
1047
+ icon = self.TOOL_ICON.get(func_name, "●")
1048
+ display_arg = self._get_display_arg(func_name, arguments)
1049
+ line = Text()
1050
+ line.append(f" ✓ ", style=f"bold {color}")
1051
+ line.append(display_name, style=f"{color}")
1052
+ if display_arg:
1053
+ line.append(f" {display_arg}", style="white")
1054
+ self.console.print(line)
1055
+ return
1056
+
1057
+ # ── Header line: icon ToolName path/arg ─────────────────────────────
1058
+ display_arg = self._get_display_arg(func_name, arguments)
1059
+ icon = self.TOOL_ICON.get(func_name, "●")
1060
+ header = Text()
1061
+ header.append(f"✓ ", style="bold white")
1062
+ header.append(f"{display_name}", style=f"bold {color}")
1063
+ if display_arg:
1064
+ header.append(f" {display_arg}", style="white")
1065
+
1066
+ # ── Body: tool-specific formatting ─────────────────────────────────────
1067
+ if func_name == "edit_file":
1068
+ old_str = arguments.get("old_str", "")
1069
+ new_str = arguments.get("new_str", "")
1070
+ old_lines = old_str.splitlines()
1071
+ new_lines = new_str.splitlines()
1072
+ removed = len(old_lines)
1073
+ added = len(new_lines)
1074
+ body = Text()
1075
+ # Show up to 4 removed lines then up to 4 added lines
1076
+ for line in old_lines[:4]:
1077
+ body.append(f"- {line}\n", style="bold red")
1078
+ if removed > 4:
1079
+ body.append(f" … ({removed - 4} more lines)\n", style="dim red")
1080
+ for line in new_lines[:4]:
1081
+ body.append(f"+ {line}\n", style="bold green")
1082
+ if added > 4:
1083
+ body.append(f" … ({added - 4} more lines)\n", style="dim green")
1084
+ # Stat footer
1085
+ body.append("\n")
1086
+ body.append(f" -{removed} lines", style="bold red")
1087
+ body.append(" ", style="dim")
1088
+ body.append(f"+{added} lines", style="bold green")
1089
+ if "…" in body.plain or "diff truncated" in body.plain:
1090
+ header.append(" (Ctrl+O to expand)", style="dim italic")
1091
+ self.console.print(Panel(
1092
+ body,
1093
+ title=header,
1094
+ title_align="left",
1095
+ border_style=color,
1096
+ padding=(0, 1),
1097
+ width=width,
1098
+ ))
1099
+
1100
+ elif func_name == "write_file":
1101
+ old_content = arguments.get("_old_content") or ""
1102
+ new_content = arguments.get("content", "")
1103
+ old_lines = old_content.splitlines(keepends=True)
1104
+ new_lines = new_content.splitlines(keepends=True)
1105
+ diff_lines = list(difflib.unified_diff(old_lines, new_lines, lineterm=""))
1106
+
1107
+ body = Text()
1108
+ removed_count = 0
1109
+ added_count = 0
1110
+
1111
+ if not diff_lines:
1112
+ # No change (same content written again)
1113
+ body.append(" (no changes)", style="dim")
1114
+ else:
1115
+ # Skip the --- / +++ header lines, show the hunks
1116
+ shown = 0
1117
+ for dl in diff_lines:
1118
+ if dl.startswith("---") or dl.startswith("+++"):
1119
+ continue
1120
+ if shown >= 30:
1121
+ remaining = sum(1 for d in diff_lines
1122
+ if d.startswith("+") and not d.startswith("+++ ")
1123
+ or d.startswith("-") and not d.startswith("--- "))
1124
+ body.append(f" … (diff truncated)\n", style="dim")
1125
+ break
1126
+ if dl.startswith("+"):
1127
+ body.append(f"{dl}\n", style="bold green")
1128
+ added_count += 1
1129
+ shown += 1
1130
+ elif dl.startswith("-"):
1131
+ body.append(f"{dl}\n", style="bold red")
1132
+ removed_count += 1
1133
+ shown += 1
1134
+ else:
1135
+ body.append(f"{dl}\n", style="dim white")
1136
+ shown += 1
1137
+ # Stat footer
1138
+ if removed_count or added_count:
1139
+ body.append("\n")
1140
+ if removed_count:
1141
+ body.append(f" -{removed_count} lines", style="bold red")
1142
+ body.append(" ", style="dim")
1143
+ body.append(f"+{added_count} lines", style="bold green")
1144
+ elif not old_content:
1145
+ total = len(new_lines)
1146
+ body.append(f"\n +{total} lines (new file)", style="bold green")
1147
+
1148
+ if "…" in body.plain or "diff truncated" in body.plain:
1149
+ header.append(" (Ctrl+O to expand)", style="dim italic")
1150
+ self.console.print(Panel(
1151
+ body,
1152
+ title=header,
1153
+ title_align="left",
1154
+ border_style=color,
1155
+ padding=(0, 1),
1156
+ width=width,
1157
+ ))
1158
+
1159
+
1160
+ elif func_name == "run_command":
1161
+ # Parse structured result: [exit_code: N] / [stdout] / [stderr] sections
1162
+ raw_output = result.strip()
1163
+ body = Text()
1164
+
1165
+ exit_code_val: Optional[str] = None
1166
+ stdout_section = ""
1167
+ stderr_section = ""
1168
+ current_section = None
1169
+
1170
+ for line in raw_output.splitlines():
1171
+ if line.startswith("[exit_code:"):
1172
+ exit_code_val = line.strip().lstrip("[").rstrip("]").split(":", 1)[1].strip()
1173
+ elif line == "[stdout]":
1174
+ current_section = "stdout"
1175
+ elif line == "[stderr]":
1176
+ current_section = "stderr"
1177
+ else:
1178
+ if current_section == "stdout":
1179
+ stdout_section += line + "\n"
1180
+ elif current_section == "stderr":
1181
+ stderr_section += line + "\n"
1182
+
1183
+ # Exit code badge
1184
+ if exit_code_val is not None:
1185
+ code_int = int(exit_code_val) if exit_code_val.lstrip("-").isdigit() else None
1186
+ code_style = "bold red" if (code_int is not None and code_int != 0) else "bold green"
1187
+ body.append(f"exit {exit_code_val}\n", style=code_style)
1188
+
1189
+ # Stdout block
1190
+ if stdout_section.strip():
1191
+ stdout_lines = stdout_section.splitlines()
1192
+ if len(stdout_lines) > 20:
1193
+ shown_block = "\n".join(stdout_lines[:20])
1194
+ tail = f"\n\u2026 ({len(stdout_lines) - 20} more lines)"
1195
+ else:
1196
+ shown_block = "\n".join(stdout_lines)
1197
+ tail = ""
1198
+ body.append(shown_block, style="dim white")
1199
+ if tail:
1200
+ body.append(tail, style="dim")
1201
+ body.append("\n")
1202
+
1203
+ # Stderr block (yellow to distinguish from stdout)
1204
+ if stderr_section.strip():
1205
+ body.append("\n[stderr]\n", style="bold yellow")
1206
+ stderr_lines = stderr_section.splitlines()
1207
+ if len(stderr_lines) > 10:
1208
+ shown_err = "\n".join(stderr_lines[:10])
1209
+ err_tail = f"\n\u2026 ({len(stderr_lines) - 10} more lines)"
1210
+ else:
1211
+ shown_err = "\n".join(stderr_lines)
1212
+ err_tail = ""
1213
+ body.append(shown_err, style="dim #f9e2af")
1214
+ if err_tail:
1215
+ body.append(err_tail, style="dim")
1216
+
1217
+ if not stdout_section.strip() and not stderr_section.strip():
1218
+ body.append("(no output)", style="dim")
1219
+
1220
+ if "…" in body.plain or "diff truncated" in body.plain:
1221
+ header.append(" (Ctrl+O to expand)", style="dim italic")
1222
+ self.console.print(Panel(
1223
+ body,
1224
+ title=header,
1225
+ title_align="left",
1226
+ border_style=color,
1227
+ padding=(0, 1),
1228
+ width=width,
1229
+ ))
1230
+
1231
+ elif func_name == "list_directory":
1232
+ output = result.strip()
1233
+ lines = output.splitlines()
1234
+ body = Text()
1235
+ # Skip the "Contents of X:" header line — it's already in the panel title
1236
+ items = lines[1:] if lines and lines[0].startswith("Contents") else lines
1237
+ for item in items[:30]:
1238
+ body.append(f" {item}\n", style="dim white")
1239
+ if len(items) > 30:
1240
+ body.append(f" … ({len(items) - 30} more items)", style="dim")
1241
+ if "…" in body.plain or "diff truncated" in body.plain:
1242
+ header.append(" (Ctrl+O to expand)", style="dim italic")
1243
+ self.console.print(Panel(
1244
+ body,
1245
+ title=header,
1246
+ title_align="left",
1247
+ border_style=color,
1248
+ padding=(0, 1),
1249
+ width=width,
1250
+ ))
1251
+
1252
+ elif func_name == "read_file":
1253
+ # First line of result is the metadata header [File: ... | Lines ...]
1254
+ all_lines = result.splitlines()
1255
+ meta = all_lines[0] if all_lines and all_lines[0].startswith("[") else ""
1256
+ content_lines = all_lines[1:] if meta else all_lines
1257
+ preview_lines = content_lines[:15]
1258
+ body = Text()
1259
+ if meta:
1260
+ body.append(meta + "\n", style="dim #585b70")
1261
+ for line in preview_lines:
1262
+ body.append(line + "\n", style="dim white")
1263
+ if len(content_lines) > 15:
1264
+ body.append(f"… ({len(content_lines) - 15} more lines in this chunk)", style="dim")
1265
+ if "…" in body.plain or "diff truncated" in body.plain:
1266
+ header.append(" (Ctrl+O to expand)", style="dim italic")
1267
+ self.console.print(Panel(
1268
+ body,
1269
+ title=header,
1270
+ title_align="left",
1271
+ border_style=color,
1272
+ padding=(0, 1),
1273
+ width=width,
1274
+ ))
1275
+
1276
+ elif func_name == "manage_todos":
1277
+ self.console.print(f" {icon} [bold {color}]{display_name}[/bold {color}] [dim italic](Ctrl+O to expand)[/dim italic]")
1278
+ # plain text print for todos without panel
1279
+ self.console.print(Text(result.strip(), style="dim white"))
1280
+ self.console.print()
1281
+
1282
+ else:
1283
+ # Generic: show the result as plain text
1284
+ summary = result.strip()
1285
+ if len(summary) > 300:
1286
+ summary = summary[:300] + "…"
1287
+ if "…" in summary:
1288
+ header.append(" (Ctrl+O to expand)", style="dim italic")
1289
+ self.console.print(Panel(
1290
+ Text(summary, style="dim white"),
1291
+ title=header,
1292
+ title_align="left",
1293
+ border_style=color,
1294
+ padding=(0, 1),
1295
+ width=width,
1296
+ ))
1297
+
1298
+ # Tool execution
1299
+
1300
+ def _execute_tool_timed(self, tool_call: Dict) -> str:
1301
+ """Execute a tool call without measuring its duration."""
1302
+ return self._execute_tool(tool_call)
1303
+
1304
+ def _execute_tool(self, tool_call: Dict) -> str:
1305
+ """Execute a single tool call and render a prominent panel indicator."""
1306
+ func_name = tool_call["function"]["name"]
1307
+
1308
+ # Clean corrupted func_name (e.g. from buggy OpenRouter proxy XML to tool-call translations)
1309
+ # E.g. 'read_file filepath=".utim/UTIM.md" />'
1310
+ arguments = {}
1311
+ raw_args = tool_call["function"].get("arguments", "{}")
1312
+ if raw_args:
1313
+ try:
1314
+ arguments = json.loads(raw_args)
1315
+ if not isinstance(arguments, dict):
1316
+ arguments = {}
1317
+ except Exception:
1318
+ pass
1319
+
1320
+ func_name_clean = func_name.strip("<> ")
1321
+ if func_name_clean:
1322
+ parts = func_name_clean.split(None, 1)
1323
+ actual_name = parts[0]
1324
+ if len(parts) > 1:
1325
+ attr_string = parts[1].rstrip("/> ")
1326
+ import re
1327
+ attrs = re.findall(r'(\w+)\s*=\s*(?:"([^"]*)"|\'([^\']*)\'|([^\s>]+))', attr_string)
1328
+ for key, val1, val2, val3 in attrs:
1329
+ val = val1 or val2 or val3 or ""
1330
+ arguments[key] = val
1331
+ func_name = actual_name
1332
+
1333
+ # Update the tool_call dict back with the cleaned values
1334
+ tool_call["function"]["name"] = func_name
1335
+ tool_call["function"]["arguments"] = json.dumps(arguments)
1336
+
1337
+ color = TOOL_COLOR.get(func_name, "#888888")
1338
+ icon = self.TOOL_ICON.get(func_name, "●")
1339
+
1340
+ # The JSON arguments are now guaranteed to be clean/valid
1341
+ arguments = json.loads(tool_call["function"]["arguments"])
1342
+
1343
+ # Check if it's an MCP tool
1344
+ if "__" in func_name:
1345
+ server_name, actual_tool_name = func_name.split("__", 1)
1346
+ try:
1347
+ from utim_cli.mcp_client import mcp_manager
1348
+ if server_name in mcp_manager.sessions:
1349
+ color = "#cba6f7" # purple accent for MCP
1350
+ icon = "🔌"
1351
+ display_name = f"{server_name} ➔ {actual_tool_name}"
1352
+
1353
+ display_arg = self._get_display_arg(func_name, arguments)
1354
+
1355
+ # Print running indicator
1356
+ self.console.print(f" {icon} Calling MCP tool {display_name}...", style=f"dim {color}")
1357
+
1358
+ # Call tool synchronously
1359
+ result = mcp_manager.call_tool(server_name, actual_tool_name, arguments)
1360
+
1361
+ # Temporarily register metadata for render
1362
+ self.TOOL_DISPLAY_NAME[func_name] = display_name
1363
+ self.TOOL_ICON[func_name] = icon
1364
+ TOOL_COLOR[func_name] = color
1365
+
1366
+ self._render_result(func_name, arguments, result, color)
1367
+ return result
1368
+ except Exception as e:
1369
+ self.console.print(Panel(
1370
+ Text(f"Error executing MCP tool {func_name}: {str(e)}", style="red"),
1371
+ border_style="red", padding=(0, 1),
1372
+ ))
1373
+ return f"Error executing MCP tool {func_name}: {str(e)}"
1374
+
1375
+ if func_name not in TOOL_FUNCTIONS:
1376
+ self.console.print(Panel(
1377
+ Text(f"Unknown tool: {func_name}", style="red"),
1378
+ border_style="red", padding=(0, 1),
1379
+ ))
1380
+ return f"Unknown tool: {func_name}"
1381
+
1382
+ display_arg = self._get_display_arg(func_name, arguments)
1383
+
1384
+ # ── Capture before-state for /rewind tracking ─────────────────────────
1385
+ _rewind_entry: Optional[Dict[str, Any]] = None
1386
+ _modifying = ("write_file", "edit_file", "delete_file", "move_file")
1387
+ if func_name in _modifying:
1388
+ path = arguments.get("filepath", arguments.get("path",
1389
+ arguments.get("dst", arguments.get("destination", ""))))
1390
+ if func_name == "write_file":
1391
+ _rewind_entry = {"action": func_name, "path": path, "before": None, "after": None}
1392
+ elif func_name in ("edit_file", "delete_file"):
1393
+ before = ""
1394
+ try:
1395
+ with open(path, "r", encoding="utf-8") as _rf:
1396
+ before = _rf.read()
1397
+ except Exception:
1398
+ pass
1399
+ _rewind_entry = {"action": func_name, "path": path, "before": before, "after": None}
1400
+ elif func_name == "move_file":
1401
+ src = arguments.get("src", arguments.get("source", ""))
1402
+ dst = arguments.get("dst", arguments.get("destination", ""))
1403
+ # Capture source content before the move, and note whether src existed
1404
+ src_content = None
1405
+ src_existed = os.path.exists(src)
1406
+ if src_existed:
1407
+ try:
1408
+ with open(src, "r", encoding="utf-8") as _sf:
1409
+ src_content = _sf.read()
1410
+ except Exception:
1411
+ src_existed = False
1412
+ _rewind_entry = {
1413
+ "action": "move_file",
1414
+ "path": dst, # destination path (where file will end up)
1415
+ "before_path": src, # original source path
1416
+ "before": src_content, # content of source before move (if existed)
1417
+ "before_existed": src_existed,
1418
+ "after": None,
1419
+ }
1420
+
1421
+ # For write_file: read old content before overwriting so we can diff later
1422
+ if func_name == "write_file":
1423
+ filepath = arguments.get("filepath", arguments.get("path", ""))
1424
+ try:
1425
+ with open(filepath, "r", encoding="utf-8") as _f:
1426
+ arguments["_old_content"] = _f.read()
1427
+ except Exception:
1428
+ arguments["_old_content"] = None # File didn't exist before (will trigger deletion on rewind)
1429
+ if _rewind_entry:
1430
+ _rewind_entry["before"] = arguments["_old_content"]
1431
+
1432
+ # write_file doesn't accept _old_content — strip private keys before calling
1433
+ call_args = {k: v for k, v in arguments.items() if not k.startswith("_")}
1434
+
1435
+ # ── Manual-mode confirmation ──────────────────────────────────────────
1436
+ _user_confirmed = False
1437
+ _CONFIRM_TOOLS = ("write_file", "edit_file", "delete_file", "move_file", "run_command")
1438
+ if func_name in _CONFIRM_TOOLS:
1439
+ _confirm_fn = self._get_confirm_fn()
1440
+ if _confirm_fn is not None:
1441
+ # Build compact diff lines for the dialog preview
1442
+ _diff_preview: list = []
1443
+ if func_name == "edit_file":
1444
+ repls = arguments.get("replacements")
1445
+ if repls and isinstance(repls, list):
1446
+ for r_idx, r in enumerate(repls[:3]):
1447
+ o_str = r.get("old_str", "") or ""
1448
+ n_str = r.get("new_str", "") or ""
1449
+ _diff_preview.append(f"--- Replacement #{r_idx+1} ---")
1450
+ for _l in o_str.splitlines()[:2]:
1451
+ _diff_preview.append(f"- {_l}")
1452
+ for _l in n_str.splitlines()[:2]:
1453
+ _diff_preview.append(f"+ {_l}")
1454
+ if len(repls) > 3:
1455
+ _diff_preview.append(f"... and {len(repls) - 3} more replacements")
1456
+ else:
1457
+ old_str = arguments.get("old_str", "") or ""
1458
+ new_str = arguments.get("new_str", "") or ""
1459
+ for _l in old_str.splitlines()[:5]:
1460
+ _diff_preview.append(f"- {_l}")
1461
+ for _l in new_str.splitlines()[:5]:
1462
+ _diff_preview.append(f"+ {_l}")
1463
+ elif func_name == "write_file":
1464
+ import difflib as _dl
1465
+ old_c = arguments.get("_old_content") or ""
1466
+ new_c = arguments.get("content", "")
1467
+ _diff_preview = [
1468
+ ln for ln in list(_dl.unified_diff(
1469
+ old_c.splitlines(), new_c.splitlines(), lineterm="",
1470
+ ))[:15]
1471
+ if not ln.startswith("---") and not ln.startswith("+++")
1472
+ ]
1473
+ _decision = _confirm_fn(func_name, arguments, _diff_preview)
1474
+ if _decision == "reject":
1475
+ return f"[User rejected {func_name}. Do NOT retry this action — ask the user what they want instead.]"
1476
+ # 'allow' or 'allow_session' → user saw and approved the diff
1477
+ _user_confirmed = True
1478
+ else:
1479
+ # Fallback to standard CLI stdin/stdout prompt if in interactive shell
1480
+ import sys
1481
+ from rich.prompt import Confirm
1482
+ if sys.stdin.isatty():
1483
+ self.console.print(f"\n[bold yellow]⬡ Approval Required for {func_name}:[/bold yellow]")
1484
+ if func_name == "run_command":
1485
+ cmd = arguments.get("command") or arguments.get("commands")
1486
+ self.console.print(f" Command: [bold white]{cmd}[/bold white]")
1487
+ elif func_name in ("write_file", "edit_file", "delete_file", "move_file"):
1488
+ filepath = arguments.get("filepath") or arguments.get("src") or arguments.get("dst")
1489
+ self.console.print(f" File Action: [bold white]{func_name} on {filepath}[/bold white]")
1490
+ import difflib as _dl
1491
+ old_c = arguments.get("_old_content") or ""
1492
+ new_c = arguments.get("content", "")
1493
+ if func_name == "edit_file":
1494
+ repls = arguments.get("replacements")
1495
+ if repls and isinstance(repls, list):
1496
+ for r in repls[:2]:
1497
+ self.console.print(f" - Replace: [red]{repr(r.get('old_str'))}[/red] with [green]{repr(r.get('new_str'))}[/green]")
1498
+ else:
1499
+ old_c = arguments.get("old_str", "") or ""
1500
+ new_c = arguments.get("new_str", "") or ""
1501
+ if func_name == "write_file" or (func_name == "edit_file" and not arguments.get("replacements")):
1502
+ diff_lines = list(_dl.unified_diff(
1503
+ old_c.splitlines(), new_c.splitlines(), lineterm=""
1504
+ ))[:10]
1505
+ for dl in diff_lines:
1506
+ if dl.startswith("+"):
1507
+ self.console.print(f" [green]{dl}[/green]")
1508
+ elif dl.startswith("-"):
1509
+ self.console.print(f" [red]{dl}[/red]")
1510
+ else:
1511
+ self.console.print(f" {dl}")
1512
+
1513
+ if not Confirm.ask("Do you want to proceed?"):
1514
+ self.console.print("[bold red]✗ Execution cancelled by user.[/bold red]")
1515
+ return f"[User rejected {func_name}. Do NOT retry this action — ask the user what they want instead.]"
1516
+
1517
+ # ── Silent tools: skip all visual output ─────────────────────────────
1518
+ _SILENT_TOOLS = {"manage_memory", "recall_experience", "store_experience"}
1519
+ if func_name in _SILENT_TOOLS:
1520
+ try:
1521
+ result = TOOL_FUNCTIONS[func_name](**call_args)
1522
+ except Exception as exc:
1523
+ result = f"Error executing {func_name}: {exc}"
1524
+ self.tool_results.append({
1525
+ "func_name": func_name,
1526
+ "arguments": arguments,
1527
+ "result": str(result),
1528
+ "color": color
1529
+ })
1530
+ return str(result)
1531
+
1532
+ # Print a single static "running" line so the user knows which tool
1533
+ # is executing. We intentionally avoid Rich Live/Spinner here because
1534
+ # it animates at 12 fps and conflicts with prompt_toolkit's own redraws,
1535
+ # causing the double-spinner glitch and constant screen flicker.
1536
+ _pre = Text()
1537
+ _pre.append(f" {icon} ", style=color)
1538
+ _pre.append(func_name, style=f"bold {color}")
1539
+ if display_arg:
1540
+ _pre.append(f" {display_arg}", style="dim white")
1541
+ _pre.append(" …", style="dim")
1542
+ self.console.print(_pre)
1543
+
1544
+ try:
1545
+ # Dynamically update the thinking indicator so it shows what tool is running
1546
+ original_topic = "Thinking..."
1547
+ try:
1548
+ from utim_cli.utim import STATE
1549
+ import os
1550
+ original_topic = STATE.get("thinking_topic", "Thinking...")
1551
+
1552
+ if func_name == "run_command":
1553
+ cmd = arguments.get("command", display_arg)
1554
+ if len(cmd) > 30: cmd = cmd[:27] + "..."
1555
+ STATE["thinking_topic"] = f"Running: {cmd}"
1556
+ elif func_name == "plan_project":
1557
+ STATE["thinking_topic"] = f"Architecting {arguments.get('plan_part', 'project')}..."
1558
+ elif func_name == "search_web":
1559
+ q = arguments.get("query", "")
1560
+ if len(q) > 25: q = q[:22] + "..."
1561
+ STATE["thinking_topic"] = f"Searching web for '{q}'..."
1562
+ elif func_name == "read_file":
1563
+ STATE["thinking_topic"] = f"Reading {os.path.basename(arguments.get('filepath', 'file'))}..."
1564
+ elif func_name == "write_file":
1565
+ STATE["thinking_topic"] = f"Writing to {os.path.basename(arguments.get('filepath', 'file'))}..."
1566
+ elif func_name in ("edit_file", "multi_replace_file_content"):
1567
+ STATE["thinking_topic"] = f"Editing {os.path.basename(arguments.get('filepath', 'file'))}..."
1568
+ else:
1569
+ STATE["thinking_topic"] = f"Executing {func_name}..."
1570
+ except Exception:
1571
+ pass
1572
+
1573
+ result = TOOL_FUNCTIONS[func_name](**call_args)
1574
+
1575
+ # Restore the indicator to evaluating logic
1576
+ try:
1577
+ STATE["thinking_topic"] = "Evaluating tool results..."
1578
+ except Exception:
1579
+ pass
1580
+ except Exception as exc:
1581
+ self.console.print(Panel(
1582
+ Text(str(exc), style="red"),
1583
+ title=Text(f"✗ {func_name}", style=f"bold red"),
1584
+ title_align="left",
1585
+ border_style="red",
1586
+ padding=(0, 1),
1587
+ ))
1588
+ return f"Error executing {func_name}: {exc}"
1589
+
1590
+ # Record after-state for rewind tracking
1591
+ if _rewind_entry:
1592
+ if func_name == "delete_file":
1593
+ _rewind_entry["after"] = None # file no longer exists
1594
+ elif func_name == "move_file":
1595
+ # After move: destination exists with content, source is gone
1596
+ try:
1597
+ with open(_rewind_entry["path"], "r", encoding="utf-8") as _af:
1598
+ _rewind_entry["after"] = _af.read()
1599
+ except Exception:
1600
+ _rewind_entry["after"] = None
1601
+ # Note: we don't need to track source's after state because it's gone
1602
+ else:
1603
+ try:
1604
+ with open(_rewind_entry["path"], "r", encoding="utf-8") as _af:
1605
+ _rewind_entry["after"] = _af.read()
1606
+ except Exception:
1607
+ _rewind_entry["after"] = None
1608
+ self._turn_changes.append(_rewind_entry)
1609
+
1610
+ # Render the result panel (compact if user already approved via dialog)
1611
+ self._render_result(func_name, arguments, str(result), color, user_confirmed=_user_confirmed)
1612
+
1613
+ self.tool_results.append({
1614
+ "func_name": func_name,
1615
+ "arguments": arguments,
1616
+ "result": str(result),
1617
+ "color": color
1618
+ })
1619
+
1620
+ return str(result)
1621
+
1622
+ def _execute_tools_parallel(self, tool_calls: List[Dict]) -> List[Tuple[Dict, str]]:
1623
+ """Execute multiple tool calls in parallel when possible.
1624
+
1625
+ Groups tools by dependency type and executes independent tools concurrently.
1626
+ Tools that modify files (write_file, edit_file, delete_file) are executed
1627
+ sequentially to avoid conflicts.
1628
+
1629
+ Returns list of (tool_call, result) tuples in original order.
1630
+ """
1631
+ # Tools that can be safely executed in parallel (read-only operations)
1632
+ PARALLEL_SAFE = {"read_file", "list_directory", "query_codebase", "web_search",
1633
+ "project_res", "plan_project", "manage_todos", "manage_memory",
1634
+ "analyze_image", "analyze_blast_radius", "generate_image"}
1635
+
1636
+ # Tools that must be sequential (modify state)
1637
+ SEQUENTIAL = {"write_file", "edit_file", "delete_file", "run_command",
1638
+ "move_file", "compress_context"}
1639
+
1640
+ # Build list of (original_index, tool_call, is_parallel) for ordering
1641
+ indexed_calls = []
1642
+ for i, tc in enumerate(tool_calls):
1643
+ func_name = tc.get("function", {}).get("name", "")
1644
+ is_parallel = func_name in PARALLEL_SAFE
1645
+ indexed_calls.append((i, tc, is_parallel))
1646
+
1647
+ parallel_calls = [(i, tc) for i, tc, is_par in indexed_calls if is_par]
1648
+ sequential_calls = [(i, tc) for i, tc, is_par in indexed_calls if not is_par]
1649
+
1650
+ results = [None] * len(tool_calls) # Pre-allocate to preserve order
1651
+
1652
+ # Execute parallel-safe tools concurrently
1653
+ if parallel_calls:
1654
+ self.console.print()
1655
+ self.console.print(f"[dim cyan]⊘ Executing {len(parallel_calls)} tool(s) in parallel...[/dim cyan]")
1656
+
1657
+ with ThreadPoolExecutor(max_workers=min(len(parallel_calls), 8)) as executor:
1658
+ # Submit all parallel tasks with their original indices
1659
+ future_to_idx = {executor.submit(self._execute_tool_timed, tc): (orig_idx, tc)
1660
+ for orig_idx, tc in parallel_calls}
1661
+
1662
+ # Collect results and place them in correct positions
1663
+ for future in as_completed(future_to_idx):
1664
+ orig_idx, tc = future_to_idx[future]
1665
+ try:
1666
+ result = future.result()
1667
+ results[orig_idx] = (tc, result)
1668
+ except Exception as e:
1669
+ func_name = tc.get("function", {}).get("name", "unknown")
1670
+ results[orig_idx] = (tc, f"Error executing {func_name}: {e}")
1671
+
1672
+ # Execute sequential tools one by one, placing in correct positions
1673
+ for orig_idx, tc in sequential_calls:
1674
+ if self.cancel_event.is_set():
1675
+ results[orig_idx] = (tc, "[Aborted by user]")
1676
+ continue
1677
+ _tools_module._cancel_event = self.cancel_event
1678
+ result = self._execute_tool_timed(tc)
1679
+ self._current_line_len = 0
1680
+ results[orig_idx] = (tc, result)
1681
+
1682
+ return results
1683
+
1684
+ # ── Rewind support ────────────────────────────────────────────────────────
1685
+
1686
+ @staticmethod
1687
+ def _change_stats(changes: List[Dict]) -> str:
1688
+ """Return a '+N -M lines' summary for a list of changes."""
1689
+ add_total = del_total = 0
1690
+ for ch in changes:
1691
+ before = ch.get("before") or ""
1692
+ after = ch.get("after") or ""
1693
+ b_lines = before.splitlines()
1694
+ a_lines = after.splitlines()
1695
+ # Simple heuristic: added = lines only in after, removed = lines only in before
1696
+ b_set = set(b_lines); a_set = set(a_lines)
1697
+ add_total += len(a_lines) - len([l for l in a_lines if l in b_set])
1698
+ del_total += len(b_lines) - len([l for l in b_lines if l in a_set])
1699
+ n_files = len({ch["path"] for ch in changes})
1700
+ parts = []
1701
+ if n_files:
1702
+ parts.append(f"{n_files} file{'s' if n_files != 1 else ''} changed")
1703
+ if add_total:
1704
+ parts.append(f"[bold green]+{add_total}[/bold green]")
1705
+ if del_total:
1706
+ parts.append(f"[bold red]-{del_total}[/bold red]")
1707
+ return " ".join(parts) if parts else "No files changed"
1708
+
1709
+ def rewind_single_turn(self, turn_idx: int, revert_code: bool = True,
1710
+ revert_msgs: bool = True) -> Dict[str, Any]:
1711
+ """Rewind only a single turn (not all subsequent turns)."""
1712
+ if turn_idx >= len(self.turn_history):
1713
+ return {"reverted": [], "errors": []}
1714
+
1715
+ turn = self.turn_history[turn_idx]
1716
+ res: Dict[str, Any] = {"reverted": [], "errors": []}
1717
+
1718
+ if revert_code:
1719
+ # Revert code changes for this turn only
1720
+ for ch in reversed(turn["changes"]):
1721
+ path = ch["path"]
1722
+ try:
1723
+ if ch["action"] == "move_file":
1724
+ src = ch["before_path"]
1725
+ if os.path.exists(path):
1726
+ os.makedirs(os.path.dirname(os.path.abspath(src)), exist_ok=True)
1727
+ shutil.move(path, src)
1728
+ res["reverted"].append(f"{path} → {src}")
1729
+ elif ch.get("before") is None:
1730
+ # File was created (or didn't exist) — delete it if it exists now
1731
+ if os.path.exists(path):
1732
+ os.remove(path)
1733
+ res["reverted"].append(path)
1734
+ else:
1735
+ os.makedirs(
1736
+ os.path.dirname(os.path.abspath(path)), exist_ok=True
1737
+ )
1738
+ with open(path, "w", encoding="utf-8") as wf:
1739
+ wf.write(ch["before"])
1740
+ res["reverted"].append(path)
1741
+ except Exception as e:
1742
+ res["errors"].append(f"{path}: {e}")
1743
+
1744
+ if revert_msgs:
1745
+ # Remove messages for this turn only
1746
+ msg_start = turn["msg_start"]
1747
+ msg_end = turn["msg_end"]
1748
+ res["msgs_removed"] = msg_end - msg_start
1749
+
1750
+ # Remove the messages for this turn
1751
+ self.messages = self.messages[:msg_start] + self.messages[msg_end:]
1752
+
1753
+ # Update msg_start and msg_end for all subsequent turns
1754
+ msgs_removed = msg_end - msg_start
1755
+ for i in range(turn_idx + 1, len(self.turn_history)):
1756
+ self.turn_history[i]["msg_start"] -= msgs_removed
1757
+ self.turn_history[i]["msg_end"] -= msgs_removed
1758
+
1759
+ # Remove this turn from history and add it to redo history
1760
+ undone_turn = self.turn_history.pop(turn_idx)
1761
+ if not hasattr(self, "redo_history"):
1762
+ self.redo_history = []
1763
+ self.redo_history.append(undone_turn)
1764
+
1765
+ return res
1766
+
1767
+ def rewind_to_turn(self, turn_idx: int, revert_code: bool = True,
1768
+ revert_msgs: bool = True) -> Dict[str, Any]:
1769
+ """Revert everything from turn_idx onward."""
1770
+ turns = self.turn_history[turn_idx:]
1771
+ res: Dict[str, Any] = {"reverted": [], "errors": []}
1772
+ if not turns:
1773
+ return res
1774
+
1775
+ if revert_code:
1776
+ seen: set = set()
1777
+ for turn in reversed(turns):
1778
+ for ch in reversed(turn["changes"]):
1779
+ path = ch["path"]
1780
+ if path in seen:
1781
+ continue
1782
+ seen.add(path)
1783
+ try:
1784
+ if ch["action"] == "move_file":
1785
+ src = ch["before_path"]
1786
+ if os.path.exists(path):
1787
+ os.makedirs(os.path.dirname(os.path.abspath(src)), exist_ok=True)
1788
+ shutil.move(path, src)
1789
+ res["reverted"].append(f"{path} → {src}")
1790
+ elif ch.get("before") is None:
1791
+ # File was created (or didn't exist) — delete it if it exists now
1792
+ if os.path.exists(path):
1793
+ os.remove(path)
1794
+ res["reverted"].append(path)
1795
+ else:
1796
+ os.makedirs(
1797
+ os.path.dirname(os.path.abspath(path)), exist_ok=True
1798
+ )
1799
+ with open(path, "w", encoding="utf-8") as wf:
1800
+ wf.write(ch["before"])
1801
+ res["reverted"].append(path)
1802
+ except Exception as e:
1803
+ res["errors"].append(f"{path}: {e}")
1804
+
1805
+ if revert_msgs:
1806
+ target = turns[0]["msg_start"]
1807
+ res["msgs_removed"] = len(self.messages) - target
1808
+ self.messages = self.messages[:target]
1809
+
1810
+ # Push all popped turns onto redo_history in reverse order (so popping redos in original forward order!)
1811
+ if not hasattr(self, "redo_history"):
1812
+ self.redo_history = []
1813
+ for t in reversed(turns):
1814
+ self.redo_history.append(t)
1815
+
1816
+ self.turn_history = self.turn_history[:turn_idx]
1817
+
1818
+ return res
1819
+
1820
+ def undo_last_turn(self) -> Dict[str, Any]:
1821
+ """Undo the very last turn (conversation + code changes)."""
1822
+ if not self.turn_history:
1823
+ return {"reverted": [], "errors": ["No turns to undo."]}
1824
+
1825
+ last_idx = len(self.turn_history) - 1
1826
+ res = self.rewind_single_turn(last_idx, revert_code=True, revert_msgs=True)
1827
+ self._persist_messages()
1828
+ return res
1829
+
1830
+ def redo_last_undone_turn(self) -> Dict[str, Any]:
1831
+ """Redo the most recently undone turn."""
1832
+ if not hasattr(self, "redo_history") or not self.redo_history:
1833
+ return {"reverted": [], "errors": ["No undone turns to redo."]}
1834
+
1835
+ turn = self.redo_history.pop()
1836
+ res: Dict[str, Any] = {"redone_code": [], "errors": []}
1837
+
1838
+ # Redo code changes
1839
+ for ch in turn.get("changes", []):
1840
+ path = ch["path"]
1841
+ action = ch["action"]
1842
+ try:
1843
+ if action == "move_file":
1844
+ src = ch["before_path"]
1845
+ if os.path.exists(src):
1846
+ os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
1847
+ shutil.move(src, path)
1848
+ res["redone_code"].append(f"{src} → {path}")
1849
+ elif ch.get("after") is None:
1850
+ # File was deleted
1851
+ if os.path.exists(path):
1852
+ os.remove(path)
1853
+ res["redone_code"].append(f"deleted {path}")
1854
+ else:
1855
+ # File was written/edited
1856
+ os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
1857
+ with open(path, "w", encoding="utf-8") as wf:
1858
+ wf.write(ch["after"])
1859
+ res["redone_code"].append(path)
1860
+ except Exception as e:
1861
+ res["errors"].append(f"{path}: {e}")
1862
+
1863
+ # Redo messages: append them back
1864
+ msg_start = len(self.messages)
1865
+ messages_to_add = turn.get("messages", [])
1866
+ self.messages.extend(messages_to_add)
1867
+ msg_end = len(self.messages)
1868
+
1869
+ # Reconstruct the turn entry and append back to turn_history
1870
+ turn["msg_start"] = msg_start
1871
+ turn["msg_end"] = msg_end
1872
+ self.turn_history.append(turn)
1873
+
1874
+ # Persist messages and redo history to DB
1875
+ self._persist_messages()
1876
+ return res
1877
+
1878
+ def redo_up_to_turn(self, redo_idx: int) -> Dict[str, Any]:
1879
+ """Redo all undone turns from index 0 up to redo_idx (inclusive)."""
1880
+ if not hasattr(self, "redo_history") or not self.redo_history or redo_idx >= len(self.redo_history):
1881
+ return {"reverted": [], "errors": ["No undone turns to redo."]}
1882
+
1883
+ # Get the slice of turns to redo
1884
+ turns_to_redo = self.redo_history[:redo_idx + 1]
1885
+ # Keep the remaining undone turns
1886
+ self.redo_history = self.redo_history[redo_idx + 1:]
1887
+
1888
+ res: Dict[str, Any] = {"redone_code": [], "errors": []}
1889
+
1890
+ # Redo them in order
1891
+ for turn in turns_to_redo:
1892
+ # Redo code changes
1893
+ for ch in turn.get("changes", []):
1894
+ path = ch["path"]
1895
+ action = ch["action"]
1896
+ try:
1897
+ if action == "move_file":
1898
+ src = ch["before_path"]
1899
+ if os.path.exists(src):
1900
+ os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
1901
+ shutil.move(src, path)
1902
+ res["redone_code"].append(f"{src} → {path}")
1903
+ elif ch.get("after") is None:
1904
+ if os.path.exists(path):
1905
+ os.remove(path)
1906
+ res["redone_code"].append(f"deleted {path}")
1907
+ else:
1908
+ os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
1909
+ with open(path, "w", encoding="utf-8") as wf:
1910
+ wf.write(ch["after"])
1911
+ res["redone_code"].append(path)
1912
+ except Exception as e:
1913
+ res["errors"].append(f"{path}: {e}")
1914
+
1915
+ # Redo messages: append them back
1916
+ msg_start = len(self.messages)
1917
+ messages_to_add = turn.get("messages", [])
1918
+ self.messages.extend(messages_to_add)
1919
+ msg_end = len(self.messages)
1920
+
1921
+ # Reconstruct the turn entry and append back to turn_history
1922
+ turn["msg_start"] = msg_start
1923
+ turn["msg_end"] = msg_end
1924
+ self.turn_history.append(turn)
1925
+
1926
+ self._persist_messages()
1927
+ return res
1928
+
1929
+ # ── Context compression ──────────────────────────────────────────────────
1930
+
1931
+ @staticmethod
1932
+ def _estimate_tokens(obj) -> int:
1933
+ """Rough token count: 1 token ≈ 4 chars of serialised JSON."""
1934
+ try:
1935
+ return len(json.dumps(obj, ensure_ascii=False)) // 4
1936
+ except Exception:
1937
+ return len(str(obj)) // 4
1938
+
1939
+ def _get_dynamic_threshold(self) -> int:
1940
+ """Get dynamic compression threshold based on current model's context window."""
1941
+ try:
1942
+ from .server.models import get_model
1943
+ model_entry = get_model(self.model_id)
1944
+ return _get_compression_threshold(self.model_id, model_entry.context_window)
1945
+ except Exception:
1946
+ # Fallback to safe default if model registry unavailable
1947
+ return 65_000
1948
+
1949
+ def _update_model_threshold(self, new_model_id: str) -> None:
1950
+ """Update compression threshold when model changes."""
1951
+ self.model_id = new_model_id
1952
+ self._compression_threshold = self._get_dynamic_threshold()
1953
+
1954
+ def _trigger_bg_summarization(self) -> None:
1955
+ """Background thread to compress turns older than KEEP_FULL_TURNS into a rolling LLM summary."""
1956
+ if not hasattr(self, "_llm_summary"):
1957
+ self._llm_summary = ""
1958
+ self._summarized_turns = 0
1959
+ self._summarizing = False
1960
+
1961
+ if self._summarizing:
1962
+ return
1963
+
1964
+ completed = self.turn_history
1965
+ unsummarized = len(completed) - self._summarized_turns
1966
+
1967
+ # Only summarize if there are turns falling OUTSIDE the KEEP_FULL_TURNS window
1968
+ if unsummarized > KEEP_FULL_TURNS:
1969
+ turns_to_summarize = unsummarized - KEEP_FULL_TURNS
1970
+ turns_slice = completed[self._summarized_turns : self._summarized_turns + turns_to_summarize]
1971
+ current_summary = self._llm_summary
1972
+
1973
+ self._summarizing = True
1974
+
1975
+ def _summarize_task():
1976
+ try:
1977
+ text_parts = []
1978
+ for t in turns_slice:
1979
+ req = t.get("user_msg", "").strip()
1980
+ c_str = self._change_stats(t.get("changes", []))
1981
+
1982
+ # BUG 6 FIX: Read from the stored per-turn message snapshot
1983
+ # instead of slicing self.messages with potentially stale
1984
+ # absolute indices (which shift whenever _compress_intra_turn
1985
+ # rewrites self.messages).
1986
+ conclusion = ""
1987
+ stored_msgs = t.get("messages", [])
1988
+ if stored_msgs:
1989
+ # Use the snapshot saved at turn-end
1990
+ source_msgs = stored_msgs
1991
+ else:
1992
+ # Fallback: try live slice under lock
1993
+ with self._messages_lock:
1994
+ source_msgs = list(self.messages[t["msg_start"]: t["msg_end"]])
1995
+ for m in source_msgs:
1996
+ if m.get("role") == "assistant" and m.get("content"):
1997
+ conclusion = m["content"].strip()
1998
+
1999
+ text_parts.append(f"User: {req}\nChanges: {c_str}\nAssistant: {conclusion}\n---")
2000
+
2001
+ raw_turns = "\n".join(text_parts)
2002
+
2003
+ sys_prompt = "You are a highly analytical AI core memory compressor. Your job is to compress conversational history into a dense, highly technical narrative paragraph. Retain all factual details, architectural decisions, file paths, and current project state. Do not use conversational filler."
2004
+
2005
+ if current_summary:
2006
+ user_prompt = f"Existing Memory Summary:\n{current_summary}\n\nNew Interactions to Merge:\n{raw_turns}\n\nUpdate the memory summary to incorporate these new interactions seamlessly. Return ONLY the new summary."
2007
+ else:
2008
+ user_prompt = f"New Interactions:\n{raw_turns}\n\nCreate a dense memory summary of these interactions. Return ONLY the summary."
2009
+
2010
+ # Use fallback model system from context_pruner
2011
+ from utim_cli.context_pruner import _call_compression_model_with_fallback
2012
+ new_summary = _call_compression_model_with_fallback(
2013
+ messages=[
2014
+ {"role": "system", "content": sys_prompt},
2015
+ {"role": "user", "content": user_prompt}
2016
+ ],
2017
+ llm_key=self._local_api_key,
2018
+ max_tokens=2000,
2019
+ primary_model=self.model_id
2020
+ )
2021
+
2022
+ if new_summary:
2023
+ self._llm_summary = new_summary
2024
+ self._summarized_turns += len(turns_slice)
2025
+ else:
2026
+ print(f"[WARNING] Context summarization returned None - all fallback models failed", file=sys.stderr)
2027
+ except Exception as e:
2028
+ print(f"[ERROR] Context summarization failed: {e}", file=sys.stderr)
2029
+ finally:
2030
+ self._summarizing = False
2031
+
2032
+ import threading
2033
+ threading.Thread(target=_summarize_task, daemon=True).start()
2034
+
2035
+ def _get_send_messages(self, turn_msg_start: Optional[int] = None) -> List[Dict]:
2036
+ """Return the context payload, injecting the rolling LLM summary (passive memory)
2037
+ and a dynamic active context checklist.
2038
+
2039
+ Args:
2040
+ turn_msg_start: The absolute index in self.messages where the current
2041
+ user turn begins. Passing this explicitly avoids relying on the
2042
+ stale self._current_turn_start class attribute, which can point to
2043
+ the wrong position after _compress_intra_turn rewrites self.messages.
2044
+ """
2045
+ # BUG 1 FIX: Prefer the caller-supplied index; fall back to the cached
2046
+ # attribute only when called from paths that haven't been updated yet.
2047
+ effective_turn_start = turn_msg_start if turn_msg_start is not None else self._current_turn_start
2048
+
2049
+ with self._messages_lock:
2050
+ messages_snapshot = list(self.messages)
2051
+
2052
+ system_msg = dict(messages_snapshot[0])
2053
+
2054
+ # Extract current user prompt to perform dynamic keyword-based RAG search
2055
+ user_prompt = ""
2056
+ if effective_turn_start is not None and effective_turn_start < len(messages_snapshot):
2057
+ for m in messages_snapshot[effective_turn_start:]:
2058
+ if m.get("role") == "user":
2059
+ user_prompt = m.get("content", "")
2060
+ break
2061
+
2062
+ # Reconstruct system prompt with prompt-relevant experiences
2063
+ task_elapsed = int(time.time() - getattr(self, "task_start_time", time.time()))
2064
+ task_iter = getattr(self, "current_iteration", 0)
2065
+ try:
2066
+ system_msg["content"] = get_system_prompt(user_prompt, task_iter, task_elapsed, self.turn_history)
2067
+ except Exception:
2068
+ try:
2069
+ system_msg["content"] = get_system_prompt(user_prompt, turn_history=self.turn_history)
2070
+ except Exception:
2071
+ pass
2072
+
2073
+ # Inject current timestamp so the model lives in the present
2074
+ from datetime import datetime
2075
+ current_ts = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
2076
+ system_msg["content"] = f"Current date/time: {current_ts}\n\n" + system_msg["content"]
2077
+
2078
+ # Exclude duration, temporal logs, consciousness state, and milestone reflections from message payload to keep it as a pure coding agent.
2079
+
2080
+
2081
+ try:
2082
+ from utim_cli.utim import STATE
2083
+ if STATE.get("planning_mode", True):
2084
+ system_msg["content"] += (
2085
+ "\n\n### PLANNING MODE ACTIVE (User Review Required):\n"
2086
+ "Before implementing any architectural, complex, or multi-file changes, you MUST:\n"
2087
+ "1. Formulate a detailed technical plan (e.g. using `plan_project` or outlining it in text).\n"
2088
+ "2. Present this plan to the user clearly and ask for explicit approval, modification, or rejection.\n"
2089
+ "3. DO NOT modify the codebase or run mutating terminal commands until the user approves the plan."
2090
+ )
2091
+ else:
2092
+ system_msg["content"] += (
2093
+ "\n\n### AUTONOMOUS MODE ACTIVE (Direct Execution):\n"
2094
+ "You are operating in fully autonomous mode.\n"
2095
+ "1. Formulate a technical plan internally (using `plan_project` or by tracking subtasks).\n"
2096
+ "2. Proceed directly to implement the code and execute actions without presenting the plan or waiting for user approval."
2097
+ )
2098
+ except Exception:
2099
+ pass
2100
+
2101
+ # ── 1. ACTIVE CONTEXT CHECKLIST (Checklist-based Focus) ────────────────
2102
+ active_context = "\n\n### ACTIVE CONTEXT CHECKLIST:"
2103
+
2104
+ # A. Find active file path dynamically from recent messages
2105
+ active_file = ""
2106
+ for msg in reversed(messages_snapshot):
2107
+ if msg.get("role") == "assistant" and msg.get("tool_calls"):
2108
+ for tc in msg["tool_calls"]:
2109
+ func = tc.get("function", {})
2110
+ if func.get("name") in ("write_file", "edit_file", "read_file"):
2111
+ try:
2112
+ args = json.loads(func.get("arguments", "{}"))
2113
+ active_file = args.get("filepath", args.get("path", ""))
2114
+ if active_file:
2115
+ break
2116
+ except Exception:
2117
+ pass
2118
+ if active_file:
2119
+ break
2120
+ if active_file:
2121
+ active_context += f"\n- **Current File**: {active_file}"
2122
+
2123
+ # B. Get latest command status from the most recent run_command output
2124
+ last_command_output = ""
2125
+ for msg in reversed(messages_snapshot):
2126
+ if msg.get("role") == "tool" and msg.get("name") == "run_command":
2127
+ content = msg.get("content", "")
2128
+ lines = [l.strip() for l in content.splitlines() if l.strip()]
2129
+ if lines:
2130
+ exit_code_line = next((l for l in lines if "exit_code:" in l), "")
2131
+ err_lines = [l for l in lines if "error" in l.lower() or "failed" in l.lower() or "exception" in l.lower()]
2132
+ last_command_output = f"Command exit: {exit_code_line or 'unknown'}"
2133
+ if err_lines:
2134
+ last_command_output += f" | Errors: {'; '.join(err_lines[:2])}"
2135
+ else:
2136
+ last_command_output += f" | Output: {'; '.join(lines[:2])}"
2137
+ break
2138
+ if last_command_output:
2139
+ active_context += f"\n- **Latest Command Status**: {last_command_output}"
2140
+
2141
+ # C. Read active todos from todos.json
2142
+ active_todo_checklist = ""
2143
+ todo_file = ".utim_tmp/todos.json"
2144
+ if os.path.exists(todo_file):
2145
+ try:
2146
+ with open(todo_file, "r", encoding="utf-8") as f:
2147
+ todos = json.load(f)
2148
+ if todos:
2149
+ active_todo_checklist = "\n### ACTIVE TASK CHECKLIST:\n"
2150
+ for tid, t in todos.items():
2151
+ status_mark = "[x]" if t.get("status") == "done" else "[ ]"
2152
+ active_todo_checklist += f"{status_mark} {t.get('description', '')}\n"
2153
+ except Exception:
2154
+ pass
2155
+
2156
+ if active_todo_checklist:
2157
+ active_context += active_todo_checklist
2158
+ else:
2159
+ # BUG 7 FIX: Guard against effective_turn_start being out-of-bounds
2160
+ # after _compress_intra_turn shortens self.messages. Without this
2161
+ # guard the IndexError is silently swallowed and the model gets no
2162
+ # active objective in its system prompt for the rest of the turn.
2163
+ if 0 < effective_turn_start < len(messages_snapshot):
2164
+ obj = (messages_snapshot[effective_turn_start].get("content") or "")[:200]
2165
+ active_context += f"\n- **Active Objective**: {obj}..."
2166
+
2167
+ system_msg["content"] += active_context
2168
+
2169
+ # ── 2. PASSIVE MEMORY SUMMARY (Whole memory rollup) ───────────────────
2170
+ if getattr(self, "_llm_summary", ""):
2171
+ system_msg["content"] += "\n\n### PASSIVE MEMORY SUMMARY (Older events):\n" + self._llm_summary
2172
+
2173
+ completed = self.turn_history
2174
+ n_full = max(0, len(completed) - getattr(self, "_summarized_turns", 0))
2175
+ recent = completed[-n_full:] if n_full > 0 else []
2176
+
2177
+ if recent:
2178
+ rec_slice = messages_snapshot[recent[0]["msg_start"]: effective_turn_start]
2179
+ else:
2180
+ rec_slice = messages_snapshot[1: effective_turn_start]
2181
+
2182
+ cur_msgs = messages_snapshot[effective_turn_start:]
2183
+
2184
+ return [system_msg] + rec_slice + cur_msgs
2185
+
2186
+ # Main agentic loop
2187
+
2188
+ def _compress_intra_turn(self, turn_msg_start: int, instruction: str = "") -> None:
2189
+ """Stable, synchronous compression of the current turn's tool calls if it gets too long or if requested.
2190
+
2191
+ Uses importance-weighted pruning (score >= 0.75 = preserved verbatim) so that
2192
+ high-signal context such as file reads and error messages is carried forward
2193
+ in full, while low-signal chatter is condensed by a compression model.
2194
+ sanitize_message_sequence() is applied on the rebuilt list to keep
2195
+ assistant/tool-call pairs structurally intact.
2196
+ """
2197
+ current_turn_msgs = self.messages[turn_msg_start:]
2198
+
2199
+ # We need at least 5 messages to justify compression
2200
+ if len(current_turn_msgs) < 5:
2201
+ return
2202
+
2203
+ # If no explicit instruction is given, check multiple conditions for proactive compression:
2204
+ # 1. Token estimate > dynamic threshold (based on model's context window)
2205
+ # 2. Message count > dynamic limit (scaled based on model's context window)
2206
+ threshold = getattr(self, "_compression_threshold", 65_000)
2207
+ if not instruction:
2208
+ est_tokens = self._estimate_tokens(self.messages)
2209
+ msg_count = len(self.messages)
2210
+
2211
+ # Determine dynamic message count limit
2212
+ try:
2213
+ from .server.models import get_model
2214
+ model_entry = get_model(self.model_id)
2215
+ context_window = model_entry.context_window
2216
+ except Exception:
2217
+ context_window = 128_000
2218
+
2219
+ if context_window <= 64_000:
2220
+ msg_count_limit = 35
2221
+ elif context_window <= 300_000:
2222
+ msg_count_limit = 100
2223
+ else:
2224
+ msg_count_limit = 200
2225
+
2226
+ if est_tokens < threshold and msg_count < msg_count_limit:
2227
+ return
2228
+
2229
+ tail_keep = 8
2230
+
2231
+ # Build a compact state anchor so compression never drops the active objective.
2232
+ latest_user = ""
2233
+ latest_assistant = ""
2234
+ latest_tool_plan = ""
2235
+ for m in reversed(current_turn_msgs):
2236
+ if not latest_user and m.get("role") == "user":
2237
+ latest_user = (m.get("content", "") or "")[:1200]
2238
+ if m.get("role") == "assistant":
2239
+ if not latest_assistant and (m.get("content", "") or "").strip():
2240
+ latest_assistant = (m.get("content", "") or "")[:1200]
2241
+ if not latest_tool_plan and m.get("tool_calls"):
2242
+ tc_names = [tc.get("function", {}).get("name", "") for tc in m.get("tool_calls", [])]
2243
+ latest_tool_plan = ", ".join([n for n in tc_names if n])[:500]
2244
+ if latest_user and latest_assistant and latest_tool_plan:
2245
+ break
2246
+
2247
+ state_anchor = {
2248
+ "role": "user",
2249
+ "content": (
2250
+ "### SYSTEM NOTE: TASK STATE ANCHOR (MUST PRESERVE)\n"
2251
+ f"Current user objective (latest):\n{latest_user or '[not found]'}\n\n"
2252
+ f"Most recent assistant intent/progress:\n{latest_assistant or '[not found]'}\n\n"
2253
+ f"Most recent pending/attempted tool actions:\n{latest_tool_plan or '[none]'}\n\n"
2254
+ "Continue from this exact objective. Do not restart completed steps."
2255
+ ),
2256
+ }
2257
+
2258
+ # ── Unified importance-weighted compression ──────────────────────────
2259
+ # Threshold 0.75: messages scoring at or above are kept verbatim so that
2260
+ # file-read payloads, error traces, and key facts survive into context.
2261
+ # Everything below goes to the compression model for condensation.
2262
+ try:
2263
+ from utim_cli.context_pruner import score_message_importance, sanitize_message_sequence
2264
+
2265
+ candidate_msgs = current_turn_msgs[1:-tail_keep]
2266
+ scored_msgs = [(score_message_importance(m), m) for m in candidate_msgs]
2267
+
2268
+ # Split into verbatim-keep (high) and compress (low) pools
2269
+ preserved_messages = [m for score, m in scored_msgs if score >= 0.75]
2270
+ to_summarize = [m for score, m in scored_msgs if score < 0.75]
2271
+
2272
+ # Cap verbatim-keep to 10 to prevent bloat; extras go to compress pool
2273
+ if len(preserved_messages) > 10:
2274
+ sorted_by_val = sorted(enumerate(scored_msgs), key=lambda x: (x[1][0], x[0]), reverse=True)
2275
+ keep_indices = {idx for idx, _ in sorted_by_val[:10]}
2276
+ new_preserved, extra = [], []
2277
+ for idx, (score, m) in enumerate(scored_msgs):
2278
+ (new_preserved if idx in keep_indices else extra).append(m)
2279
+ preserved_messages = new_preserved
2280
+ to_summarize.extend(extra)
2281
+
2282
+ if to_summarize:
2283
+ text_parts = []
2284
+ import re as _re
2285
+ for m in to_summarize:
2286
+ role = m.get("role", "")
2287
+ if role == "assistant":
2288
+ content = _re.sub(
2289
+ r"<think(?:ing)?>.*?</think(?:ing)?>",
2290
+ "[thought process]",
2291
+ (m.get("content", "") or ""),
2292
+ flags=_re.DOTALL,
2293
+ ).strip()
2294
+ tcs = m.get("tool_calls", [])
2295
+ tc_str = ", ".join(
2296
+ f"{tc['function']['name']}({tc['function'].get('arguments', '')})"
2297
+ for tc in tcs
2298
+ )
2299
+ if content or tc_str:
2300
+ text_parts.append(f"Action: {content}\nTools Called: {tc_str}")
2301
+ elif role == "tool":
2302
+ name = m.get("name", "tool")
2303
+ content = m.get("content", "")
2304
+
2305
+ # FIX #3: Intelligent truncation - preserve critical parts
2306
+ # Detect if content has critical markers that should never be truncated
2307
+ critical_patterns = [
2308
+ r"error", r"exception", r"failed", r"failure",
2309
+ r"traceback", r"undefined", r"not found",
2310
+ r"def \w+", r"class \w+", r"import ",
2311
+ r'"[^"]*":\s*', r"'[^']*':\s*", # Key-value pairs
2312
+ ]
2313
+
2314
+ # Check if this is critical content that needs full preservation
2315
+ is_critical = any(re.search(p, content, re.I) for p in critical_patterns)
2316
+
2317
+ # Higher char limit for critical content, but still respect reasonable bounds
2318
+ if len(content) > 1500:
2319
+ if is_critical:
2320
+ # For critical content, try to find and preserve the key part
2321
+ # Look for error lines, function definitions, etc.
2322
+ lines = content.split('\n')
2323
+ critical_lines = []
2324
+ for line in lines:
2325
+ if any(re.search(p, line, re.I) for p in critical_patterns):
2326
+ critical_lines.append(line)
2327
+
2328
+ if critical_lines:
2329
+ # Preserve the critical lines plus context
2330
+ content = (content[:800] +
2331
+ "\n... [critical excerpts preserved] ...\n" +
2332
+ "\n".join(critical_lines[:20]))
2333
+ else:
2334
+ content = content[:1500] + "... [truncated]"
2335
+ else:
2336
+ content = content[:1200] + "... [truncated]"
2337
+ text_parts.append(f"Result of {name}: {content}")
2338
+
2339
+ if text_parts:
2340
+ if instruction:
2341
+ self.console.print("\n[dim magenta]⊘ Agent requested context compression: condensing intermediate tool logs...[/dim magenta]")
2342
+ else:
2343
+ est_tokens = self._estimate_tokens(self.messages)
2344
+ msg_count = len(self.messages)
2345
+ triggers = []
2346
+ if est_tokens >= threshold:
2347
+ triggers.append(f"tokens ~{est_tokens}")
2348
+ if msg_count >= 50:
2349
+ triggers.append(f"msg count {msg_count}")
2350
+ trigger_str = ", ".join(triggers) if triggers else "context"
2351
+ self.console.print(f"\n[dim magenta]⊘ Proactive compression (high {trigger_str}): condensing intermediate tool logs...[/dim magenta]")
2352
+
2353
+ raw_log = "\n---\n".join(text_parts)
2354
+
2355
+ sys_prompt = (
2356
+ "You are an internal context stabilizer for an autonomous AI agent.\n"
2357
+ "The agent has been running tool calls in a loop. Summarize intermediate steps "
2358
+ "while preserving strict technical continuity.\n"
2359
+ "Required sections:\n"
2360
+ "1) GOAL\n2) COMPLETED\n3) IN_PROGRESS\n4) BLOCKERS/FAILURES\n5) NEXT_ACTION\n"
2361
+ "CRITICAL: Preserve ALL specific file paths, line numbers, variable names, "
2362
+ "error messages, and facts learned from file reads verbatim. No filler.\n\n"
2363
+ "HALLUCINATION PREVENTION RULES:\n"
2364
+ "- Do NOT add facts not present in the source logs\n"
2365
+ "- Do NOT make up file paths, variable names, or error messages\n"
2366
+ "- Do NOT invent technical details not explicitly stated\n"
2367
+ "- When in doubt, use verbatim quotes from the source\n"
2368
+ "- If you cannot determine a fact, state 'not specified' rather than guessing"
2369
+ )
2370
+ if instruction:
2371
+ sys_prompt += (
2372
+ f"\n\nCRITICAL PRESERVATION RULES FROM THE AGENT:\n{instruction}\n"
2373
+ "You MUST strictly preserve these facts, constraints, and code snippets."
2374
+ )
2375
+
2376
+ from utim_cli.context_pruner import _call_compression_model_with_fallback
2377
+ # Pass raw_log for deduplication tracking
2378
+ summary = _call_compression_model_with_fallback(
2379
+ messages=[
2380
+ {"role": "system", "content": sys_prompt},
2381
+ {"role": "user", "content": f"Intermediate Logs to Compress:\n{raw_log}"},
2382
+ ],
2383
+ llm_key=self._local_api_key,
2384
+ max_tokens=1500,
2385
+ content_hint=raw_log[:1000], # Use first 1000 chars for dedup hash
2386
+ primary_model=self.model_id
2387
+ )
2388
+
2389
+ if summary:
2390
+ summary_msg = {
2391
+ "role": "user",
2392
+ "content": (
2393
+ "### SYSTEM NOTE: INTERMEDIATE STEPS COMPRESSED\n"
2394
+ "The following earlier steps in this task were compressed to save memory:\n"
2395
+ f"{summary}\n\n"
2396
+ "Continue from IN_PROGRESS/NEXT_ACTION and finish unresolved work."
2397
+ ),
2398
+ }
2399
+ recent_tail = current_turn_msgs[-tail_keep:]
2400
+
2401
+ # BUG 3 FIX: Use object identity (id()) instead of value
2402
+ # equality (==) for deduplication. Dict value-equality
2403
+ # was silently dropping preserved_messages entries whose
2404
+ # content happened to match a message in recent_tail
2405
+ # (e.g. repeated read_file of the same file).
2406
+ merged_tail = []
2407
+ seen_ids: set = set()
2408
+ for msg in preserved_messages + recent_tail:
2409
+ if id(msg) not in seen_ids:
2410
+ seen_ids.add(id(msg))
2411
+ merged_tail.append(msg)
2412
+
2413
+ with self._messages_lock:
2414
+ new_messages = (
2415
+ self.messages[:turn_msg_start]
2416
+ + [current_turn_msgs[0], state_anchor, summary_msg]
2417
+ + merged_tail
2418
+ )
2419
+ self.messages = sanitize_message_sequence(new_messages)
2420
+ # BUG 1 FIX: After rewriting self.messages the
2421
+ # turn_msg_start boundary is still valid (we only
2422
+ # shrank the current-turn slice, not the prefix).
2423
+ # Refresh _current_turn_start so _get_send_messages
2424
+ # slices at the correct position on the next call.
2425
+ self._current_turn_start = turn_msg_start
2426
+ return
2427
+ else:
2428
+ self.console.print(
2429
+ "\n[dim red]⊘ Warning: Context compression failed "
2430
+ "(no response from fallback models). Continuing with full context.[/dim red]"
2431
+ )
2432
+ else:
2433
+ # Nothing to compress — just sanitize the existing list
2434
+ self.messages = sanitize_message_sequence(self.messages)
2435
+ return
2436
+
2437
+ except Exception as e:
2438
+ self.console.print(
2439
+ f"\n[dim red]⊘ Warning: Importance-weighted compression failed ({e}). "
2440
+ "Continuing with full context.[/dim red]"
2441
+ )
2442
+
2443
+
2444
+ # ── Cleanup utilities ───────────────────────────────────────────────────
2445
+
2446
+ def _cleanup_tmp_folder(self, keep_current_session: bool = True) -> int:
2447
+ """Clean up the .utim_tmp folder to remove files from previous runs.
2448
+
2449
+ Args:
2450
+ keep_current_session: If True, preserve files from the current session.
2451
+
2452
+ Returns:
2453
+ Number of files removed.
2454
+ """
2455
+ import glob
2456
+
2457
+ tmp_dir = ".utim_tmp"
2458
+ if not os.path.exists(tmp_dir):
2459
+ return 0
2460
+
2461
+ removed_count = 0
2462
+ errors = []
2463
+ # Define cleanup rules - files/patterns to remove
2464
+ cleanup_patterns = [
2465
+ # Research files older than 1 day
2466
+ (os.path.join(tmp_dir, "research"), "dir"),
2467
+ # Plan files are kept for /rewind functionality
2468
+ # But we can clean up very old ones
2469
+ ]
2470
+
2471
+ # Remove old research directory contents
2472
+ research_dir = os.path.join(tmp_dir, "research")
2473
+ if os.path.exists(research_dir):
2474
+ try:
2475
+ # Remove files older than 1 day
2476
+ now = time.time()
2477
+ for root, dirs, files in os.walk(research_dir):
2478
+ for f in files:
2479
+ fp = os.path.join(root, f)
2480
+ try:
2481
+ if os.path.getmtime(fp) < now - 86400: # 1 day old
2482
+ os.remove(fp)
2483
+ removed_count += 1
2484
+ except OSError as e:
2485
+ errors.append(str(e))
2486
+ except Exception as e:
2487
+ errors.append(str(e))
2488
+
2489
+ # Clean up old reflection files (keep last 10)
2490
+ reflection_file = os.path.join(tmp_dir, "task_reflections.json")
2491
+ if os.path.exists(reflection_file):
2492
+ try:
2493
+ import json
2494
+ with open(reflection_file, 'r') as f:
2495
+ reflections = json.load(f)
2496
+ if len(reflections) > 50:
2497
+ # Keep only the most recent 50
2498
+ reflections = reflections[-50:]
2499
+ with open(reflection_file, 'w') as f:
2500
+ json.dump(reflections, f)
2501
+ removed_count += len(reflections) - 50
2502
+ except Exception:
2503
+ pass
2504
+
2505
+ return removed_count
2506
+
2507
+ def _detect_and_run_tests(self) -> Optional[str]:
2508
+ import subprocess
2509
+ import os
2510
+ import json
2511
+
2512
+ # 1. Check for Python/pytest
2513
+ if os.path.exists("pytest.ini") or os.path.exists("conftest.py") or os.path.isdir("tests"):
2514
+ try:
2515
+ res = subprocess.run(["pytest"], capture_output=True, text=True, timeout=60)
2516
+ if res.returncode != 0 and res.returncode != 5:
2517
+ return f"pytest failed:\n{res.stdout}\n{res.stderr}"
2518
+ return None
2519
+ except subprocess.TimeoutExpired:
2520
+ return "pytest timed out (took longer than 60 seconds)"
2521
+ except Exception:
2522
+ pass
2523
+
2524
+ # 2. Check for package.json / npm test
2525
+ if os.path.exists("package.json"):
2526
+ try:
2527
+ with open("package.json", "r", encoding="utf-8") as f:
2528
+ pkg = json.load(f)
2529
+ if "scripts" in pkg and "test" in pkg["scripts"]:
2530
+ res = subprocess.run(["npm", "test"], capture_output=True, text=True, timeout=60, shell=True)
2531
+ if res.returncode != 0:
2532
+ return f"npm test failed:\n{res.stdout}\n{res.stderr}"
2533
+ return None
2534
+ except subprocess.TimeoutExpired:
2535
+ return "npm test timed out"
2536
+ except Exception:
2537
+ pass
2538
+
2539
+ # 3. Check for tox.ini
2540
+ if os.path.exists("tox.ini"):
2541
+ try:
2542
+ res = subprocess.run(["tox"], capture_output=True, text=True, timeout=90)
2543
+ if res.returncode != 0:
2544
+ return f"tox failed:\n{res.stdout}\n{res.stderr}"
2545
+ return None
2546
+ except Exception:
2547
+ pass
2548
+
2549
+ # 4. Check for Cargo.toml
2550
+ if os.path.exists("Cargo.toml"):
2551
+ try:
2552
+ res = subprocess.run(["cargo", "test"], capture_output=True, text=True, timeout=60)
2553
+ if res.returncode != 0:
2554
+ return f"cargo test failed:\n{res.stdout}\n{res.stderr}"
2555
+ return None
2556
+ except Exception:
2557
+ pass
2558
+
2559
+ # 5. Check for go.mod
2560
+ if os.path.exists("go.mod"):
2561
+ try:
2562
+ res = subprocess.run(["go", "test", "./..."], capture_output=True, text=True, timeout=60)
2563
+ if res.returncode != 0:
2564
+ return f"go test failed:\n{res.stdout}\n{res.stderr}"
2565
+ return None
2566
+ except Exception:
2567
+ pass
2568
+
2569
+ return None
2570
+
2571
+
2572
+
2573
+ def run_task(self, user_message: str, max_iterations: int = 500) -> None:
2574
+ """Append user_message to history and run the full ReAct loop until the
2575
+ model stops issuing tool calls or we hit max_iterations.
2576
+ """
2577
+ self.turn_step_timings = []
2578
+
2579
+
2580
+ # Refresh console width at start of task
2581
+ try:
2582
+ import shutil
2583
+ width = shutil.get_terminal_size().columns
2584
+ if width > 0:
2585
+ self.console.width = width
2586
+ except:
2587
+ pass
2588
+
2589
+ self.cancel_event.clear()
2590
+ self.pre_prompt_text = ""
2591
+ try:
2592
+ pre_prompt_file = ".utim/pre_prompt_thoughts.json"
2593
+ if os.path.exists(pre_prompt_file):
2594
+ os.remove(pre_prompt_file)
2595
+ except Exception:
2596
+ pass
2597
+ try:
2598
+ from utim_cli.utim import STATE
2599
+ STATE["thinking_topic"] = ""
2600
+ except Exception:
2601
+ pass
2602
+
2603
+ turn_msg_start = len(self.messages) # snapshot before user msg is appended
2604
+ self._current_turn_start = turn_msg_start # used by _get_send_messages()
2605
+ self._turn_changes = []
2606
+
2607
+ # Analyze previous turn feedback and user sentiment
2608
+ prev_assistant_content = ""
2609
+ prev_iteration_count = 0
2610
+ prev_elapsed_time = 0
2611
+ if self.turn_history:
2612
+ prev_turn = self.turn_history[-1]
2613
+ prev_iteration_count = prev_turn.get("iteration_count", 0)
2614
+ prev_elapsed_time = prev_turn.get("elapsed_time", 0)
2615
+
2616
+ if self.messages:
2617
+ for msg in reversed(self.messages):
2618
+ if msg.get("role") == "assistant" and msg.get("content"):
2619
+ prev_assistant_content = msg["content"]
2620
+ break
2621
+
2622
+
2623
+ # Inject secret guidance hint if cached in global CLI state
2624
+ try:
2625
+ from utim_cli.utim import STATE
2626
+ hint = STATE.pop("hint", None)
2627
+ if hint:
2628
+ user_message = f"[Secret Hint Guidance: {hint}]\n{user_message}"
2629
+ except Exception:
2630
+ pass
2631
+
2632
+ self.messages.append({"role": "user", "content": user_message})
2633
+ self.redo_history = [] # Clear redo history on new user action
2634
+ self._persist_messages(in_progress_turn={
2635
+ "user_msg": user_message,
2636
+ "msg_start": turn_msg_start,
2637
+ "msg_end": len(self.messages),
2638
+ "messages": list(self.messages[turn_msg_start:]),
2639
+ "changes": [],
2640
+ })
2641
+ task_start_time = time.time()
2642
+ self.task_start_time = task_start_time
2643
+ self._test_run_attempts = 0
2644
+
2645
+ _empty_response_streak = 0 # tracks consecutive empty (no content, no tools) responses
2646
+ turn_iteration = 0
2647
+ for iteration in range(max_iterations):
2648
+ self.current_iteration = iteration
2649
+ turn_iteration = iteration + 1
2650
+ # Check for cancellation before each LLM call
2651
+ if self.cancel_event.is_set():
2652
+ self.console.print("\n[dim yellow]⊘ Aborted.[/dim yellow]\n")
2653
+ self.messages.pop() # Roll back the unsent user message if first iter
2654
+ break
2655
+
2656
+ # ── Resilient LLM call with per-iteration retry ────────────────────
2657
+ # We allow up to 3 transient-error retries per iteration before
2658
+ # giving up for real. This prevents a single network blip from
2659
+ # silently killing a long-running task.
2660
+ _llm_retries = 0
2661
+ _llm_max_retries = 3
2662
+ msg = None
2663
+ while _llm_retries <= _llm_max_retries:
2664
+ try:
2665
+ # Make the thinking indicator interactive before TTFT
2666
+ from utim_cli.utim import STATE
2667
+ if iteration == 0:
2668
+ if is_casual_message(user_message):
2669
+ STATE["thinking_topic"] = "Formulating greeting..."
2670
+ else:
2671
+ STATE["thinking_topic"] = "Formulating response..."
2672
+ draft_text = getattr(self, "_pre_computation_text", "").strip()
2673
+ actual_text = user_message.strip()
2674
+
2675
+ def get_similarity(s1, s2):
2676
+ s1_clean = "".join(c for c in s1.lower() if c.isalnum() or c.isspace()).strip()
2677
+ s2_clean = "".join(c for c in s2.lower() if c.isalnum() or c.isspace()).strip()
2678
+ s1_words = s1_clean.split()
2679
+ s2_words = s2_clean.split()
2680
+ if not s1_words or not s2_words:
2681
+ return 0.0
2682
+ w1 = set(s1_words)
2683
+ w2 = set(s2_words)
2684
+ intersection = w1.intersection(w2)
2685
+ union = w1.union(w2)
2686
+ return len(intersection) / len(union)
2687
+
2688
+ is_match = False
2689
+ match_reason = ""
2690
+ if draft_text:
2691
+ if draft_text == actual_text:
2692
+ is_match = True
2693
+ match_reason = "exact match"
2694
+ elif actual_text.startswith(draft_text) and len(actual_text) - len(draft_text) < 20:
2695
+ is_match = True
2696
+ match_reason = "prefix match"
2697
+ else:
2698
+ similarity = get_similarity(draft_text, actual_text)
2699
+ if similarity >= 0.80:
2700
+ is_match = True
2701
+ match_reason = f"fuzzy match ({similarity:.1%} similarity)"
2702
+
2703
+ if is_match:
2704
+ if (self._pre_computation_thread and
2705
+ self._pre_computation_thread.is_alive() and
2706
+ not self._pre_computation_done):
2707
+ STATE["thinking_topic"] = "Anticipating response (finishing background reasoning)..."
2708
+ self._pre_computation_thread.join(timeout=30)
2709
+
2710
+ if self._pre_computation_done and self._pre_computation_result:
2711
+ self.console.print(f"[bold green]⚡ Anticipatory Cache HIT: Reused background reasoning ({match_reason}).[/bold green]")
2712
+ msg = self._pre_computation_result
2713
+ was_streamed = True
2714
+ clean_content = msg.get("content") or ""
2715
+ if clean_content:
2716
+ self.console.print()
2717
+ self.console.print(Markdown(clean_content))
2718
+ self.console.print()
2719
+ self.turn_step_timings.append({
2720
+ "step": turn_iteration,
2721
+ "reasoning_time": 0.0,
2722
+ "tool_time": 0.0,
2723
+ "tools": []
2724
+ })
2725
+ break
2726
+ else:
2727
+ STATE["thinking_topic"] = "Evaluating tool results & logic..."
2728
+
2729
+ # BUG 1 FIX: Pass the live turn_msg_start so _get_send_messages
2730
+ # always slices at the correct boundary, even after compression
2731
+ # has rewritten self.messages and potentially shifted indices.
2732
+ send_msgs = self._get_send_messages(turn_msg_start)
2733
+ STATE["thinking_topic"] = "Synthesizing response..."
2734
+ t_llm_start = time.time()
2735
+ msg, was_streamed = self._call_llm(send_msgs)
2736
+ reasoning_duration = time.time() - t_llm_start
2737
+ break # success
2738
+ except _ServerUnavailableError as exc:
2739
+ if _llm_retries < _llm_max_retries:
2740
+ _llm_retries += 1
2741
+ wait_s = 5 * _llm_retries
2742
+ self.console.print(
2743
+ f"\n[bold yellow]⚠ All models unreachable (attempt {_llm_retries}/{_llm_max_retries}). "
2744
+ f"Retrying in {wait_s}s...[/bold yellow]"
2745
+ )
2746
+ time.sleep(wait_s)
2747
+ continue
2748
+ # All retries exhausted — show error and abort turn
2749
+ self.console.print()
2750
+ self.console.print(Panel(
2751
+ Text.from_markup(
2752
+ f"[bold #FFE066]⚠ UTIM Server Unavailable[/bold #FFE066]\n\n"
2753
+ f"[white]{exc}[/white]\n\n"
2754
+ "[dim]All retry attempts failed. The task has been paused.\n"
2755
+ "Type your message again when the connection is restored.[/dim]"
2756
+ ),
2757
+ border_style="#FFE066",
2758
+ padding=(0, 2),
2759
+ expand=False,
2760
+ width=min(70, self.console.width - 4),
2761
+ ))
2762
+ self.console.print()
2763
+ del self.messages[turn_msg_start:]
2764
+ return
2765
+ except Exception as exc:
2766
+ if _llm_retries < _llm_max_retries:
2767
+ _llm_retries += 1
2768
+ wait_s = 3 * _llm_retries
2769
+ self.console.print(
2770
+ f"\n[dim yellow]⟳ Transient error on iteration {iteration+1} "
2771
+ f"(attempt {_llm_retries}/{_llm_max_retries}): {exc}. "
2772
+ f"Retrying in {wait_s}s...[/dim yellow]"
2773
+ )
2774
+ time.sleep(wait_s)
2775
+ continue
2776
+ # All retries exhausted — log and abort turn cleanly
2777
+ self.console.print(f"\n[bold red]Error (all retries failed):[/bold red] {exc}\n")
2778
+ del self.messages[turn_msg_start:]
2779
+ return
2780
+
2781
+ if msg is None or msg.get("aborted") or self.cancel_event.is_set():
2782
+ self.console.print("\n[dim yellow]⊘ Aborted.[/dim yellow]\n")
2783
+ del self.messages[turn_msg_start:]
2784
+ return
2785
+
2786
+
2787
+
2788
+ content: str = msg.get("content") or ""
2789
+ tool_calls: List[Dict] = msg.get("tool_calls") or []
2790
+
2791
+ # Print content that wasn't already streamed live
2792
+ if not was_streamed and content and content.strip():
2793
+ self.console.print()
2794
+ self.console.print(Markdown(content))
2795
+ self._current_line_len = 0
2796
+ if not tool_calls:
2797
+ self.console.print()
2798
+ elif was_streamed and content:
2799
+ # We finished streaming. The cursor is at some position on the current line.
2800
+ # No extra newline here - let the next block handle it
2801
+ pass
2802
+
2803
+ # Parse text-based tool calls fallback if native tool calls are empty
2804
+ if not tool_calls and content:
2805
+ parsed_calls = []
2806
+ try:
2807
+ from utim_cli.tools import TOOL_FUNCTIONS
2808
+ tool_names = set(TOOL_FUNCTIONS.keys())
2809
+ import json
2810
+ decoder = json.JSONDecoder()
2811
+ pos = 0
2812
+ while pos < len(content):
2813
+ start = content.find('{', pos)
2814
+ if start == -1:
2815
+ break
2816
+ try:
2817
+ obj, end_idx = decoder.raw_decode(content[start:])
2818
+ extracted = []
2819
+ # 1. Standard OpenAI format
2820
+ if "function" in obj and isinstance(obj["function"], dict):
2821
+ func_obj = obj["function"]
2822
+ name = func_obj.get("name")
2823
+ if name in tool_names:
2824
+ args = func_obj.get("arguments", "{}")
2825
+ if isinstance(args, dict):
2826
+ args = json.dumps(args)
2827
+ extracted = [{
2828
+ "id": obj.get("id", f"call_parsed_{iteration}"),
2829
+ "type": "function",
2830
+ "function": {"name": name, "arguments": args}
2831
+ }]
2832
+ # 2. Simplified formats
2833
+ if not extracted:
2834
+ name_keys = ["name", "tool", "function", "action", "tool_name"]
2835
+ name = None
2836
+ for k in name_keys:
2837
+ if k in obj and isinstance(obj[k], str) and obj[k] in tool_names:
2838
+ name = obj[k]
2839
+ break
2840
+ if name:
2841
+ args_obj = {}
2842
+ args_keys = ["arguments", "args", "parameters", "params"]
2843
+ for k in args_keys:
2844
+ if k in obj and isinstance(obj[k], dict):
2845
+ args_obj = obj[k]
2846
+ break
2847
+ else:
2848
+ args_obj = {k: v for k, v in obj.items() if k not in name_keys}
2849
+ extracted = [{
2850
+ "id": f"call_parsed_{iteration}",
2851
+ "type": "function",
2852
+ "function": {"name": name, "arguments": json.dumps(args_obj)}
2853
+ }]
2854
+ if extracted:
2855
+ parsed_calls.extend(extracted)
2856
+ pos = start + end_idx
2857
+ except json.JSONDecodeError:
2858
+ pos = start + 1
2859
+ except Exception:
2860
+ pass
2861
+ if parsed_calls:
2862
+ tool_calls = parsed_calls
2863
+ self.console.print(f"\n[bold yellow]🔧 Parsed {len(tool_calls)} tool call(s) from assistant text response.[/bold yellow]")
2864
+
2865
+ # If the model response was cut off mid-turn due to length/token limits, nudge it to continue
2866
+ if msg.get("was_cut_off"):
2867
+ self.console.print("\n[bold yellow]⚠ Response truncated by token limits. Continuing response...[/bold yellow]\n")
2868
+ self.messages.append(
2869
+ {
2870
+ "role": "assistant",
2871
+ "content": content or None,
2872
+ "tool_calls": tool_calls if tool_calls else None,
2873
+ }
2874
+ )
2875
+ self.messages.append(
2876
+ {
2877
+ "role": "user",
2878
+ "content": "You were cut off mid-response (token limit reached). Please continue your response exactly where you left off. Do not repeat yourself; just resume writing from the cutoff point."
2879
+ }
2880
+ )
2881
+ continue
2882
+
2883
+ # No tool calls → potentially done
2884
+ if not tool_calls:
2885
+ # Gather recent tool names from this turn to build context-aware nudges
2886
+ _recent_tool_names = []
2887
+ for _prev_msg in reversed(self.messages[turn_msg_start:]):
2888
+ if _prev_msg.get("role") == "tool":
2889
+ _tname = _prev_msg.get("name", "")
2890
+ if _tname and _tname not in {"recall_experience", "store_experience", "manage_memory"}:
2891
+ _recent_tool_names.append(_tname)
2892
+ elif _prev_msg.get("role") == "user":
2893
+ break # don't look past the user's message
2894
+ _had_tools_this_turn = bool(_recent_tool_names)
2895
+
2896
+ # If the model stopped with empty OR trivially short response after running tools
2897
+ _is_empty = not content.strip()
2898
+ _is_lazy_transition = False
2899
+
2900
+ if _had_tools_this_turn and not _is_empty and iteration > 0:
2901
+ _lower_content = content.strip().lower()
2902
+
2903
+ # Strong indicators that the model forgot to output a tool call
2904
+ _ends_with_cliffhanger = content.strip().endswith(":") or content.strip().endswith("...")
2905
+
2906
+ _has_lazy_phrases = any(phrase in _lower_content for phrase in [
2907
+ "i will now", "let's run", "next, i'll", "running the",
2908
+ "i am going to", "i'll now", "let me check", "let me run",
2909
+ "continuing", "my bad", "apologies", "proceeding to",
2910
+ "now i will", "next i will", "moving on to", "let's proceed",
2911
+ "i will execute", "let's execute", "i will use", "executing the"
2912
+ ])
2913
+
2914
+ _is_lazy_transition = (
2915
+ (len(_lower_content) < 400 and _has_lazy_phrases) or
2916
+ _ends_with_cliffhanger
2917
+ )
2918
+
2919
+ if len(_lower_content) < 50:
2920
+ _is_lazy_transition = True
2921
+
2922
+ if (_is_empty or _is_lazy_transition) and iteration < max_iterations - 1:
2923
+ _empty_response_streak += 1
2924
+ if _empty_response_streak >= 4:
2925
+ # Model stuck in a loop — give up
2926
+ self.console.print(
2927
+ f"\n[bold yellow]⚠ The model got stuck providing lazy or empty responses ({_empty_response_streak} times). "
2928
+ "It has been paused. Try nudging it manually or switching models.[/bold yellow]\n"
2929
+ )
2930
+ break
2931
+
2932
+ # Build a context-aware continuation nudge
2933
+ if _had_tools_this_turn:
2934
+ tool_list = ", ".join(dict.fromkeys(reversed(_recent_tool_names))) # dedupe, preserve order
2935
+ nudge = (
2936
+ f"You just executed tool(s) [{tool_list}]. You then wrote a short response without "
2937
+ f"calling any further tools. If the task is incomplete, you MUST output the required JSON tool calls. "
2938
+ f"Do NOT just tell me what you are going to do — actually DO IT by calling the tool. "
2939
+ f"If the task is truly complete, provide a comprehensive final summary."
2940
+ )
2941
+ else:
2942
+ nudge = (
2943
+ "You stopped without writing anything or taking action. The user's request was: "
2944
+ f"\"{user_message[:200]}\". Please provide a substantive response or take action."
2945
+ )
2946
+
2947
+ self.console.print(f"\n[dim yellow]⚠ Model provided lazy response without tools. Auto-nudging (attempt {_empty_response_streak}/3)...[/dim yellow]")
2948
+ self.messages.append({"role": "assistant", "content": content or " "})
2949
+ self.messages.append({"role": "user", "content": nudge})
2950
+ continue
2951
+
2952
+ # Got a valid response — reset streak counter
2953
+ _empty_response_streak = 0
2954
+
2955
+ # ── Automated Regression Testing Loop ───────────────────────
2956
+ import utim_cli.tools as _t
2957
+ if self._turn_changes and not _t._DRY_RUN and getattr(self, "_test_run_attempts", 0) < 3:
2958
+ self.console.print("\n[bold yellow]🔍 Running automated regression tests to verify changes...[/bold yellow]")
2959
+ test_error = self._detect_and_run_tests()
2960
+ if test_error:
2961
+ self._test_run_attempts = getattr(self, "_test_run_attempts", 0) + 1
2962
+ self.console.print(f"[bold red]❌ Automated tests failed (Attempt {self._test_run_attempts}/3). Nudging agent to self-heal...[/bold red]")
2963
+ self.messages.append({"role": "assistant", "content": content})
2964
+ self.messages.append({
2965
+ "role": "user",
2966
+ "content": f"Automated regression testing failed after your changes. Please fix the failing test(s) or compilation error(s) shown below:\n\n{test_error}"
2967
+ })
2968
+ continue
2969
+ else:
2970
+ self.console.print("[bold green]✓ All automated tests passed successfully![/bold green]\n")
2971
+
2972
+ self.messages.append({"role": "assistant", "content": content})
2973
+ self.turn_step_timings.append({
2974
+ "step": turn_iteration,
2975
+ "reasoning_time": reasoning_duration,
2976
+ "tool_time": 0.0,
2977
+ "tools": []
2978
+ })
2979
+
2980
+ break
2981
+
2982
+ # Append assistant message (with tool_calls) to history
2983
+ self.messages.append(
2984
+ {
2985
+ "role": "assistant",
2986
+ "content": content or None,
2987
+ "tool_calls": tool_calls,
2988
+ }
2989
+ )
2990
+ # Real-time persistence: save the assistant response & tool calls immediately
2991
+ self._persist_messages(in_progress_turn={
2992
+ "user_msg": user_message,
2993
+ "msg_start": turn_msg_start,
2994
+ "msg_end": len(self.messages),
2995
+ "messages": list(self.messages[turn_msg_start:]),
2996
+ "changes": list(self._turn_changes),
2997
+ })
2998
+
2999
+ # Execute tools - use parallel execution for better performance
3000
+ t_tool_start = time.time()
3001
+ compression_instruction = ""
3002
+
3003
+ # Extract compression instruction before parallel execution
3004
+ for tc in tool_calls:
3005
+ func_name = tc.get("function", {}).get("name", "")
3006
+ if func_name == "compress_context":
3007
+ try:
3008
+ args = json.loads(tc["function"].get("arguments", "{}"))
3009
+ compression_instruction = args.get("preservation_rules", "Keep critical facts and architecture decisions.")
3010
+ except:
3011
+ pass
3012
+
3013
+ # ── Two-phase execution: Knowledge-first gate ─────────────────
3014
+ # When recall_experience is called alongside MUTATING tools
3015
+ # (run_command, write_file, edit_file, etc.), the model has already
3016
+ # decided on those tool arguments BEFORE seeing the recall results.
3017
+ # This creates a race condition where knowledge arrives too late.
3018
+ #
3019
+ # Fix: execute ONLY recall_experience first, inject its results,
3020
+ # DROP the remaining planned calls, and force a re-plan so the
3021
+ # model can use the recalled knowledge to make better decisions.
3022
+ MUTATING_TOOLS = {"run_command", "write_file", "edit_file", "delete_file", "move_file"}
3023
+ KNOWLEDGE_TOOLS = {"recall_experience"}
3024
+
3025
+ knowledge_calls = [tc for tc in tool_calls
3026
+ if tc.get("function", {}).get("name", "") in KNOWLEDGE_TOOLS]
3027
+ mutating_calls = [tc for tc in tool_calls
3028
+ if tc.get("function", {}).get("name", "") in MUTATING_TOOLS]
3029
+
3030
+ if knowledge_calls and mutating_calls:
3031
+ # Phase 1: Execute ONLY the knowledge tools (silently)
3032
+
3033
+ for ktc in knowledge_calls:
3034
+ result = self._execute_tool_timed(ktc)
3035
+ tc_id = ktc.get("id") or str(ktc.get("index", "0"))
3036
+ func_name = ktc.get("function", {}).get("name", "")
3037
+ self.messages.append({
3038
+ "role": "tool",
3039
+ "tool_call_id": tc_id,
3040
+ "name": func_name,
3041
+ "content": result,
3042
+ })
3043
+
3044
+ # Phase 2: Tell the model the remaining calls were NOT executed
3045
+ # and ask it to re-plan with the new knowledge
3046
+ dropped_names = [tc.get("function", {}).get("name", "?") for tc in mutating_calls]
3047
+ non_knowledge_non_mutating = [tc for tc in tool_calls
3048
+ if tc.get("function", {}).get("name", "") not in KNOWLEDGE_TOOLS
3049
+ and tc.get("function", {}).get("name", "") not in MUTATING_TOOLS]
3050
+
3051
+ # Execute non-mutating, non-knowledge tools normally (they're safe)
3052
+ for safe_tc in non_knowledge_non_mutating:
3053
+ result = self._execute_tool_timed(safe_tc)
3054
+ tc_id = safe_tc.get("id") or str(safe_tc.get("index", "0"))
3055
+ func_name = safe_tc.get("function", {}).get("name", "")
3056
+ self.messages.append({
3057
+ "role": "tool",
3058
+ "tool_call_id": tc_id,
3059
+ "name": func_name,
3060
+ "content": result,
3061
+ })
3062
+
3063
+ # Insert placeholder results for dropped mutating calls so the API
3064
+ # doesn't complain about missing tool_call_id responses
3065
+ for mtc in mutating_calls:
3066
+ tc_id = mtc.get("id") or str(mtc.get("index", "0"))
3067
+ func_name = mtc.get("function", {}).get("name", "?")
3068
+ self.messages.append({
3069
+ "role": "tool",
3070
+ "tool_call_id": tc_id,
3071
+ "name": func_name,
3072
+ "content": f"[NOT EXECUTED] This {func_name} call was held back. "
3073
+ f"Review the recall_experience results above — they may "
3074
+ f"contain constraints that affect how you should call this tool. "
3075
+ f"Please re-plan and re-issue the call with any necessary adjustments.",
3076
+ })
3077
+
3078
+ # Force the model to re-plan by continuing the loop
3079
+ tool_duration = time.time() - t_tool_start
3080
+ self.turn_step_timings.append({
3081
+ "step": turn_iteration,
3082
+ "reasoning_time": reasoning_duration,
3083
+ "tool_time": tool_duration,
3084
+ "tools": [tc.get("function", {}).get("name", "") for tc in tool_calls]
3085
+ })
3086
+ continue
3087
+
3088
+ # Execute tools in parallel when beneficial
3089
+ if len(tool_calls) > 1:
3090
+ # Use parallel execution for multiple tools
3091
+ parallel_results = self._execute_tools_parallel(tool_calls)
3092
+ for slot in parallel_results:
3093
+ if self.cancel_event.is_set():
3094
+ break
3095
+ # Guard against None slots (defensive: shouldn't happen but prevents crash)
3096
+ if slot is None:
3097
+ continue
3098
+ tc, result = slot
3099
+
3100
+ tc_id = tc.get("id") or str(tc.get("index", "0"))
3101
+ func_name = tc.get("function", {}).get("name", "")
3102
+
3103
+
3104
+
3105
+ self.messages.append(
3106
+ {
3107
+ "role": "tool",
3108
+ "tool_call_id": tc_id,
3109
+ "name": func_name,
3110
+ "content": result,
3111
+ }
3112
+ )
3113
+ else:
3114
+ # Single tool - execute directly
3115
+ for tc in tool_calls:
3116
+ if self.cancel_event.is_set():
3117
+ break
3118
+
3119
+ func_name = tc.get("function", {}).get("name", "")
3120
+ _tools_module._cancel_event = self.cancel_event
3121
+ result = self._execute_tool_timed(tc)
3122
+ self._current_line_len = 0
3123
+ tc_id = tc.get("id") or str(tc.get("index", "0"))
3124
+
3125
+
3126
+
3127
+ self.messages.append(
3128
+ {
3129
+ "role": "tool",
3130
+ "tool_call_id": tc_id,
3131
+ "name": func_name,
3132
+ "content": result,
3133
+ }
3134
+ )
3135
+
3136
+ # Attempt to compress context if requested or if token limit is breached.
3137
+ # turn_msg_start is passed so that after compression rewrites
3138
+ # self.messages the method can refresh _current_turn_start.
3139
+ self._compress_intra_turn(turn_msg_start, compression_instruction)
3140
+
3141
+ # Real-time persistence: save tool results and file diffs immediately
3142
+ self._persist_messages(in_progress_turn={
3143
+ "user_msg": user_message,
3144
+ "msg_start": turn_msg_start,
3145
+ "msg_end": len(self.messages),
3146
+ "messages": list(self.messages[turn_msg_start:]),
3147
+ "changes": list(self._turn_changes),
3148
+ })
3149
+
3150
+ tool_duration = time.time() - t_tool_start
3151
+ self.turn_step_timings.append({
3152
+ "step": turn_iteration,
3153
+ "reasoning_time": reasoning_duration,
3154
+ "tool_time": tool_duration,
3155
+ "tools": [tc.get("function", {}).get("name", "") for tc in tool_calls]
3156
+ })
3157
+
3158
+ else:
3159
+ if not self.cancel_event.is_set():
3160
+ self.console.print(f"\n[bold yellow]⚠ Agent paused after reaching maximum iterations ({max_iterations}).[/bold yellow]")
3161
+ self.console.print("[dim]You can type 'continue' to resume the task.[/dim]\n")
3162
+
3163
+ elapsed = int(time.time() - task_start_time)
3164
+ elapsed_str = (
3165
+ f"{elapsed // 60}m {elapsed % 60}s" if elapsed >= 60 else f"{elapsed}s"
3166
+ )
3167
+ self.console.print(Rule(f"[dim]⚙ {elapsed_str}[/dim]"))
3168
+
3169
+ # Save turn snapshot for /rewind (even if cancelled — partial work matters)
3170
+ # ALWAYS save the turn, even if there are no code changes, so the user can rewind the conversation
3171
+ if not self.cancel_event.is_set():
3172
+ turn_entry = {
3173
+ "user_msg": user_message,
3174
+ "msg_start": turn_msg_start,
3175
+ "msg_end": len(self.messages),
3176
+ "messages": list(self.messages[turn_msg_start:]), # Save messages slice!
3177
+ "changes": list(self._turn_changes),
3178
+ "iteration_count": turn_iteration,
3179
+ "elapsed_time": elapsed,
3180
+ "step_timings": list(self.turn_step_timings),
3181
+ }
3182
+ self.turn_history.append(turn_entry)
3183
+ # Persist messages to server for /resume (background, non-blocking)
3184
+ self._persist_messages()
3185
+ self._trigger_bg_summarization()
3186
+
3187
+ # Automated Reflection Phase powered by Hugging Face Vector DB
3188
+ try:
3189
+ from utim_cli.reflection import run_reflection_phase
3190
+ run_reflection_phase(
3191
+ user_message=user_message or "",
3192
+ assistant_content=final_answer or "",
3193
+ tool_results=self._turn_changes or [],
3194
+ elapsed_seconds=int(elapsed),
3195
+ iterations=turn_iteration
3196
+ )
3197
+ except Exception:
3198
+ pass
3199
+
3200
+ # ── Cleanup old tmp files ─────────────────────────────────────────────
3201
+ # Remove stale files to prevent accumulation across sessions
3202
+ try:
3203
+ removed = self._cleanup_tmp_folder()
3204
+ if removed > 0:
3205
+ self.console.print(f"[dim]⊘ Cleaned up {removed} stale file(s) from .utim_tmp[/dim]")
3206
+ except Exception:
3207
+ pass # Cleanup failures should be silent
3208
+
3209
+ self._turn_changes = []