PyPI - utim-cli - Versions diffs - 1.0.0__py3-none-any.whl - Mend

utim-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

utim_cli/__init__.py +40 -0
utim_cli/agent.py +359 -0
utim_cli/auth.py +208 -0
utim_cli/backup.py +101 -0
utim_cli/billing.py +40 -0
utim_cli/blender_agent.py +1018 -0
utim_cli/bootstrap.py +324 -0
utim_cli/client_utils.py +135 -0
utim_cli/config.py +194 -0
utim_cli/context_pruner.py +504 -0
utim_cli/doctor.py +118 -0
utim_cli/knowledge_graph.py +462 -0
utim_cli/logger.py +121 -0
utim_cli/mcp_clean_wrapper.py +55 -0
utim_cli/mcp_client.py +198 -0
utim_cli/mcp_registry.json +1102 -0
utim_cli/orchestrator.py +3209 -0
utim_cli/reflection.py +200 -0
utim_cli/report.py +100 -0
utim_cli/scrapy_search.py +229 -0
utim_cli/share.py +320 -0
utim_cli/share_tui.py +554 -0
utim_cli/situational_scoring.py +269 -0
utim_cli/state.py +15 -0
utim_cli/tools.py +3381 -0
utim_cli/utim.py +4051 -0
utim_cli/vector_memory.py +629 -0
utim_cli/workspace.py +33 -0
utim_cli-1.0.0.dist-info/METADATA +134 -0
utim_cli-1.0.0.dist-info/RECORD +34 -0
utim_cli-1.0.0.dist-info/WHEEL +5 -0
utim_cli-1.0.0.dist-info/entry_points.txt +2 -0
utim_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
utim_cli-1.0.0.dist-info/top_level.txt +1 -0

utim_cli/orchestrator.py ADDED Viewed

@@ -0,0 +1,3209 @@
+"""
+UTIM Orchestrator — Manages the full agentic loop.
+Architecture:
+  - Maintains local message history (system prompt + conversation)
+  - For each user message, runs a ReAct loop:
+      1. Calls LLM via the UTIM server (/completions, streaming) — keeps API key off client
+      2. Content tokens are written to stdout in real-time as they arrive
+      3. If the LLM returns tool_calls, executes them locally (filesystem tools)
+      4. Feeds tool results back into the loop
+      5. Repeats until the LLM responds with plain text (no more tool calls)
+  - Falls back to calling OpenRouter directly if the server is unreachable
+"""
+from __future__ import annotations
+import difflib
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Any, Dict, List, Optional, Tuple
+import requests
+# openai SDK removed — we call OpenRouter directly via requests (no Rust/jiter needed)
+from rich.console import Console
+from rich.live import Live
+from rich.markdown import Markdown
+from rich.panel import Panel
+from rich.rule import Rule
+from rich.spinner import Spinner
+from rich.text import Text
+from .billing import CreditManager
+from .tools import TOOL_FUNCTIONS, UTIM_TOOLS
+import utim_cli.tools as _tools_module   # for injecting cancel_event
+from .config import config
+# ─── Dynamic Context Budget ─────────────────────────────────────────────────
+def _get_compression_threshold(model_id: str, context_window: int) -> int:
+    """Calculate dynamic compression threshold based on model's context window.
+    Strategy:
+    - Small windows (<= 32k): Compress at 70% of context window (minimum 16k)
+    - Medium windows (32k - 128k): Compress at 75% of context window
+    - Large windows (128k - 512k): Compress at 80% of context window
+    - Huge windows (>512k): Compress at 85% of context window
+    """
+    if not context_window or context_window <= 0:
+        context_window = 128_000
+    if context_window <= 32_000:
+        threshold_pct = 0.70
+    elif context_window <= 128_000:
+        threshold_pct = 0.75
+    elif context_window <= 512_000:
+        threshold_pct = 0.80
+    else:
+        threshold_pct = 0.85
+    threshold = int(context_window * threshold_pct)
+    # Ensure minimum threshold for safety
+    if threshold < 16_000:
+        threshold = 16_000
+    return threshold
+# System Prompt
+SYSTEM_PROMPT = """You are UTIM AI, a high-agency senior software engineer operating autonomously inside a CLI. You focus purely on the technical project or task at hand.
+### CORE DIRECTIVES:
+1. **Explore & Route**: Classify the task immediately:
+   - *Surgical/Localized* (styling, copy edits, single-file changes): Limit context gathering to the target file and central conventions. Use `query_codebase`. Avoid global listings.
+   - *Architectural/System-Wide*: Map project structure and dependencies.
+2. **Planning/Autonomous Tooling**: Do not wait for permissions. Use tools (`edit_file`, `write_file`, `run_command`, `project_res`, `plan_project`, `manage_todos`) proactively to achieve the goal. No placeholders or stubs.
+3. **Think-Create-Verify**: ALWAYS use the `plan_project` tool to formulate a plan before taking any mutating actions. NEVER outline or generate the plan yourself in plain text. Track tasks, write code, and ALWAYS verify with execution (`run_command` -> build/test/run) rather than just reading files.
+4. **Manifesto Reference**: Detailed engineering rules, coding standards, and safety/sandbox instructions are in .utim/UTIM.md. Read/reference it only when specific guidance is needed.
+5. **Output**: Be concise and professional. Summarize all changes and test results when complete. Speak colloquially and warmly in the user's language.
+6. **Execution**: When executing tools to get the project ready always create a todo list and follow them to the end. Add todos based on detailed steps.
+7. **Tool Calling format**: You MUST use the native JSON-schema function-calling mechanism provided by the API to invoke tools. NEVER output raw JSON, <think to=...> tags, <|message|> tags, or other raw tool-call markup in your text response. Any tool usage must be strictly structured via the API.
+8. **Premium Web Design**: When creating, updating, or styling web user interfaces, read/reference `.utim/DESIGN.md` to apply modern visual design conventions (such as premium typography, HSL color themes, glassmorphism, and smooth animations) instead of default browser styles or raw colors.
+"""
+# ─── Runtime environment detection ───────────────────────────────────────────
+def _detect_environment() -> str:
+    """Detect the runtime environment and return a context string for the prompt."""
+    import platform, os
+    is_termux   = os.path.isdir("/data/data/com.termux")
+    is_wsl      = "microsoft" in platform.uname().release.lower()
+    system      = platform.system()   # 'Linux', 'Windows', 'Darwin'
+    machine     = platform.machine()  # 'x86_64', 'aarch64', etc.
+    home        = os.path.expanduser("~")
+    cwd         = os.getcwd()
+    shell       = os.environ.get("SHELL", os.environ.get("COMSPEC", "unknown"))
+    lines = ["\n\nRUNTIME ENVIRONMENT (auto-detected):"]
+    lines.append(f"- OS: {system} ({machine})")
+    lines.append(f"- Shell: {shell}")
+    lines.append(f"- Home: {home}")
+    lines.append(f"- Working directory: {cwd}")
+    if is_termux:
+        lines += [
+            "- Platform: Android Termux",
+            "- Package manager: pkg (use `pkg install <name>` not apt/brew/choco)",
+            "- Home path: /data/data/com.termux/files/home",
+            "- No sudo — Termux is already a user-level Linux environment",
+            "- Node.js, Python, git, curl all available via `pkg install`",
+            "- The user is on a MOBILE DEVICE (Android). Keep file paths short,",
+            "  avoid opening browsers or GUIs, prefer terminal-based workflows.",
+            "- Do NOT suggest desktop editors (VS Code, etc.) — use nano/vim instead.",
+        ]
+    elif is_wsl:
+        lines += [
+            "- Platform: Windows Subsystem for Linux (WSL)",
+            "- Package manager: apt (sudo apt install <name>)",
+            "- Windows drives mounted at /mnt/c, /mnt/d, etc.",
+            "- Can run both Linux and Windows commands",
+        ]
+    elif system == "Windows":
+        lines += [
+            "- Platform: Windows (native PowerShell/CMD)",
+            "- Package manager: winget, choco, or scoop",
+            "- Use PowerShell syntax for shell commands",
+            "- Use backslashes or raw strings for paths when needed",
+            "- **CRITICAL**: '&&' and '||' are NOT valid in PowerShell. Use ';' to chain commands.",
+            "  WRONG: npm test && npm run build",
+            "  RIGHT: npm test ; npm run build",
+        ]
+    elif system == "Darwin":
+        lines += [
+            "- Platform: macOS",
+            "- Package manager: brew (brew install <name>)",
+        ]
+    elif system == "Linux":
+        lines += [
+            "- Platform: Linux",
+            "- Package manager: apt / dnf / pacman depending on distro",
+        ]
+    return "\n".join(lines)
+# Build the prompt once at import time (environment is stable per process)
+SYSTEM_PROMPT = SYSTEM_PROMPT + _detect_environment()
+def is_casual_message(prompt: str) -> bool:
+    if not prompt:
+        return True
+    p = prompt.strip().lower().rstrip("?.!")
+    if not p:
+        return True
+    casual_words = {
+        "hello", "hi", "hey", "yo", "sup", "hola", "greetings", "good morning", "good afternoon", "good evening",
+        "how are you", "how's it going", "howdy", "hi there", "hello there", "test", "testing", "ping", "clear",
+        "exit", "quit", "menu", "help", "restart", "reset", "ok", "okay", "yes", "no", "thanks", "thank you",
+        "nice", "cool", "sure", "fine", "awesome", "perfect", "good", "great", "hello!", "hi!"
+    }
+    if p in casual_words:
+        return True
+    # If the message is very short (e.g. less than 15 chars) and doesn't contain code/paths/technical symbols
+    if len(p) <= 15:
+        # Heuristics: if it doesn't contain slashes, backslashes, dots, underscores, braces, or brackets
+        import re
+        if not re.search(r'[./_\\{}()\[\]=+\-*<>]', p):
+            # Check if it has any common casual words as substrings
+            for w in casual_words:
+                if w in p:
+                    return True
+            # Otherwise, check if it's purely letters and spaces
+            if re.match(r'^[a-z\s]+$', p):
+                return True
+    return False
+def get_system_prompt(user_prompt: str = "", current_iteration: int = 0, elapsed_seconds: int = 0, turn_history: Optional[List[Dict]] = None) -> str:
+    """Gets the dynamic system prompt with active MCP servers and semantically fetched Hugging Face vector memories."""
+    mcp_prompt = ""
+    try:
+        from utim_cli.mcp_client import mcp_manager
+        mcp_context = mcp_manager.get_notification_context()
+        if mcp_context:
+            mcp_prompt += f"\n\n### MCP SERVERS AND TOOLS NOTIFICATION ###\n{mcp_context}\n"
+    except Exception:
+        pass
+    exp_prompt = ""
+    if user_prompt:
+        try:
+            from utim_cli.vector_memory import fetch_relevant_experiences
+            experiences = fetch_relevant_experiences(user_prompt, top_k=2)
+            if experiences:
+                exp_items = []
+                for e in experiences:
+                    c = str(e.get("content", "")).strip().replace("\n", " ")
+                    if c:
+                        # Ultra-lightweight truncation
+                        exp_items.append(f"- {c[:100]}")
+                if exp_items:
+                    exp_prompt = "\n\n[RELEVANT LESSONS]: " + " | ".join(exp_items) + "\n"
+        except Exception:
+            pass
+    return SYSTEM_PROMPT + mcp_prompt + exp_prompt
+# ── Context management settings ───────────────────────────────────────────────
+KEEP_FULL_TURNS = 10      # last N turns (including current) kept with full fidelity
+TOKEN_BUDGET    = 90_000  # hard token cap for messages sent to the LLM per call
+# Tool display metadata
+# Color constants for consolidated 3-color palette
+PURPLE = "#cba6f7"
+BLUE = "#42bcf5"
+YELLOW = "#f9e2af"
+# Accent colour per tool (subtle, no bold labels)
+TOOL_COLOR: Dict[str, str] = {
+    "read_file":             BLUE,
+    "write_file":            YELLOW,
+    "edit_file":             YELLOW,
+    "move_file":             BLUE,
+    "delete_file":           PURPLE,
+    "run_command":           YELLOW,
+    "list_directory":        PURPLE,
+    "get_background_output": BLUE,
+    "send_background_input": YELLOW,
+    "stop_background_process": PURPLE,
+    "list_background_processes": BLUE,
+    "web_search":            YELLOW,
+    "manage_todos":            PURPLE,
+    "query_codebase":          YELLOW,
+    "generate_image":          YELLOW,
+}
+class _ServerUnavailableError(RuntimeError):
+    """Raised when the UTIM server cannot be reached and no local key is configured.
+    Caught in run_task to display a clean user-facing message (no traceback).
+    """
+class Orchestrator:
+    """Runs the full ReAct agentic loop, proxying LLM calls through the UTIM server."""
+    def __init__(self, console: Console):
+        self.console = console
+        # Start MCP Manager
+        try:
+            from utim_cli.mcp_client import mcp_manager
+            mcp_manager.start()
+        except Exception:
+            pass
+        self.credits = CreditManager()
+        self.server_url = "https://utim-cli-production.up.railway.app"
+        self.session_id: Optional[str] = None
+        # Primary model — falls back through config.fallback_models on failure
+        self.model_id: str = "cohere/north-mini-code:free"
+        self._current_line_len = 0
+        self.tool_results: List[Dict[str, Any]] = []
+        self.turn_step_timings: List[Dict[str, Any]] = []
+        # Track session start for elapsed time awareness
+        self._session_start_time: float = time.time()
+        # Dynamic compression threshold based on model's context window
+        self._compression_threshold = self._get_dynamic_threshold()
+        # Local conversation history — the single source of truth for this session.
+        # Commands like /clear, /resume operate on this list directly.
+        self.messages: List[Dict[str, Any]] = [
+            {"role": "system", "content": get_system_prompt()}
+        ]
+        # ── API key / .env loading ────────────────────────────────────────────
+        # Priority (highest to lowest):
+        #   1. Shell environment variable already set by the user
+        #   2. .env file in the CURRENT WORKING DIRECTORY  (folder-local key)
+        #   3. .utim/.env  (global fallback written by /auth)
+        #
+        # IMPORTANT: We load the CWD .env with override=True so that a
+        # project-local key always beats any key inherited from a previous
+        # utim installation in a different folder (which was the root cause of
+        # "Server unavailable" errors when running `utim` from random folders).
+        # We also load it by EXPLICIT absolute path — not by letting dotenv
+        # walk up the directory tree — so there is no ambiguity about which
+        # file wins.
+        _cwd_env = os.path.join(os.getcwd(), ".env")
+        try:
+            from dotenv import load_dotenv as _load_dotenv
+            if os.path.isfile(_cwd_env):
+                _load_dotenv(_cwd_env, override=True)
+            else:
+                # No local .env — still call load_dotenv so it picks up any
+                # shell-level exports, but do NOT override them.
+                _load_dotenv(override=False)
+        except Exception:
+            pass
+        # Load user-saved API key from local .utim/.env (written by the setup wizard / /auth)
+        import pathlib
+        _user_env = pathlib.Path(".utim").resolve() / ".env"
+        if _user_env.exists():
+            try:
+                from dotenv import load_dotenv as _load_dotenv
+                _load_dotenv(_user_env, override=False)  # override=False: env vars win
+            except Exception:
+                pass
+        # Load user-saved API key from global ~/.utim/.env (written by the setup wizard / /auth)
+        _global_env = pathlib.Path.home() / ".utim" / ".env"
+        if _global_env.exists():
+            try:
+                from dotenv import load_dotenv as _load_dotenv
+                _load_dotenv(_global_env, override=False)  # override=False: env vars win
+            except Exception:
+                pass
+        # User identity from config (can be removed later)
+        self.email = config.email or os.getenv("UTIM_EMAIL", "local@utim.dev")
+        self.token = config.token
+        # Local API key used for OpenRouter.
+        # Read AFTER all .env files have been loaded so the correct key wins.
+        self._local_api_key: Optional[str] = os.getenv("OPENROUTER_API_KEY")
+        if not self._local_api_key:
+            self.console.print(
+                "\n[bold yellow]Warning: OPENROUTER_API_KEY not found in environment "
+                f"or .env file (looked in {_cwd_env!r} then .utim/.env).[/bold yellow]\n"
+            )
+        self._local_client: bool = bool(self._local_api_key)
+        # OpenRouter base URL (can be overridden per-model for custom providers)
+        self._openrouter_base_url = "https://openrouter.ai/api/v1/chat/completions"
+        # Cancellation flag
+        self.cancel_event = threading.Event()
+        # Lock protecting self.messages from concurrent reads/writes.
+        # The background summarisation thread and the main agent loop both
+        # access self.messages; without this lock they can race and produce
+        # hallucinated summaries or corrupt the message list.
+        self._messages_lock = threading.Lock()
+        # Manual-mode confirm hook
+        self._get_confirm_fn = lambda: None
+        # Turn-level file-change tracking
+        self.turn_history: List[Dict[str, Any]] = []
+        self.redo_history: List[Dict[str, Any]] = []
+        self._turn_changes: List[Dict[str, Any]] = []
+        self._current_turn_start: int = 1
+        # Eager session create removed for local mode - sessions are created by _persist_messages
+    # LLM calling
+    def _persist_messages(self, in_progress_turn: Optional[Dict] = None) -> None:
+        """Push the current full message list to the local database in a background thread.
+        Silently drops on error — this is best-effort local persistence.
+        """
+        if not self.session_id:
+            # Create a local session if we don't have one
+            try:
+                from utim_cli.server.history import HistoryManager
+                hm = HistoryManager()
+                # Use the orchestrator's email (which defaults to local@utim.dev)
+                user_email = self.email or os.getenv("UTIM_EMAIL", "local@utim.dev")
+                self.session_id = hm.create_session(self.model_id, email=user_email)
+            except Exception:
+                return
+        # Find the first user message to use as the conversation title
+        first_user = next(
+            (m.get("content", "") or "" for m in self.messages if m.get("role") == "user"),
+            "",
+        )
+        if isinstance(first_user, list):
+            first_user = " ".join(p.get("text", "") for p in first_user if isinstance(p, dict))
+        # Serialise the messages — exclude any private _-prefixed keys we add for tracking
+        clean_messages = [
+            {k: v for k, v in m.items() if not k.startswith("_")}
+            for m in self.messages
+        ]
+        # Clean turn_history messages
+        clean_turn_history = []
+        for turn in self.turn_history:
+            clean_turn = dict(turn)
+            if "messages" in clean_turn:
+                clean_turn["messages"] = [
+                    {k: v for k, v in m.items() if not k.startswith("_")}
+                    for m in clean_turn["messages"]
+                ]
+            clean_turn_history.append(clean_turn)
+        # Append in-progress turn if provided
+        if in_progress_turn:
+            clean_ipt = dict(in_progress_turn)
+            if "messages" in clean_ipt:
+                clean_ipt["messages"] = [
+                    {k: v for k, v in m.items() if not k.startswith("_")}
+                    for m in clean_ipt["messages"]
+                ]
+            clean_turn_history.append(clean_ipt)
+        clean_redo_history = []
+        if hasattr(self, "redo_history"):
+            for turn in self.redo_history:
+                clean_turn = dict(turn)
+                if "messages" in clean_turn:
+                    clean_turn["messages"] = [
+                        {k: v for k, v in m.items() if not k.startswith("_")}
+                        for m in clean_turn["messages"]
+                    ]
+                clean_redo_history.append(clean_turn)
+        # Save to local database
+        def _save_local():
+            try:
+                from utim_cli.server.history import HistoryManager
+                hm = HistoryManager()
+                hm.add_messages(
+                    self.session_id,
+                    clean_messages,
+                    self.email,
+                    first_user,
+                    turn_history=clean_turn_history,
+                    redo_history=clean_redo_history
+                )
+                from utim_cli.backup import backup_state
+                backup_state()
+            except Exception:
+                pass  # best-effort — never crash the agent loop
+        threading.Thread(target=_save_local, daemon=True).start()
+    # Pre-think marker patterns that qwen3.6-plus and similar models emit
+    # OUTSIDE their <think> tags — these should be hidden too.
+    _PRE_THINK_PATTERNS = re.compile(
+        r"^\s*(\*\s*Thinking\.\.\.?|\.\.\.(\s*thinking)?|thinking\.\.\.?)\s*$",
+        re.IGNORECASE | re.MULTILINE,
+    )
+    # ── Custom-provider endpoint resolution ─────────────────────────────────
+    def _resolve_model_endpoint(self, model_id: str) -> tuple:
+        """Return (chat_completions_url, api_key) for *model_id*.
+        Custom models (added via /model add) carry their own base_url and
+        api_key; everything else falls back to OpenRouter.
+        """
+        custom = config.get_custom_model(model_id)
+        if custom:
+            base = custom.get("base_url", "").rstrip("/")
+            # Append /chat/completions if the caller gave us just the base path
+            if not base.endswith("/chat/completions"):
+                url = base + "/chat/completions"
+            else:
+                url = base
+            key = custom.get("api_key") or self._local_api_key or ""
+            return url, key
+        # Built-in / OpenRouter model
+        return self._openrouter_base_url, self._local_api_key or ""
+    def _call_llm(self, messages: List[Dict], override_tools: Optional[List[Dict]] = None, override_model: Optional[str] = None, silent: bool = False) -> Tuple[Dict[str, Any], bool]:
+        """POST /chat/completions to OpenRouter (or a custom provider) with real-time streaming."""
+        if self.cancel_event.is_set():
+            return {
+                "role": "assistant",
+                "content": "[Aborted by user]",
+                "tool_calls": None,
+                "was_cut_off": True,
+                "aborted": True,
+            }, False
+        # Pre-flight quota check
+        api_key = config.get("api_key")
+        if api_key:
+            try:
+                from utim_cli.auth import SERVER_URL
+                resp = requests.get(
+                    f"{SERVER_URL}/quota",
+                    headers={"X-API-Key": api_key},
+                    timeout=5,
+                )
+                if resp.status_code == 200:
+                    quota = resp.json()
+                    # 1. Check if quota is exhausted
+                    used = quota.get("credits_used", quota.get("requests_used", 0.0))
+                    limit = quota.get("credits_limit", quota.get("requests_limit", 1000))
+                    if used >= limit:
+                        self.console.print("\n[bold red]✗ Monthly credit quota exhausted.[/bold red]")
+                        self.console.print(f"  Resets at: {quota['reset_at']}  •  run [bold]utim upgrade[/bold] to upgrade.\n")
+                        return {
+                            "role": "assistant",
+                            "content": "Monthly credit quota exhausted. Please upgrade your plan.",
+                            "tool_calls": None,
+                        }, False
+                    # 2. Check if chosen model is allowed, if not downgrade
+                    models_allowed = quota["models_allowed"]
+                    chosen_model = override_model if override_model else self.model_id
+                    if models_allowed != ["all"] and chosen_model not in models_allowed:
+                        fallback_model = "cohere/north-mini-code:free"  # default free fallback
+                        self.console.print(f"\n[bold yellow]⚠ Model '{chosen_model}' is gated under your current '{quota['display_name']}' plan.[/bold yellow]")
+                        self.console.print(f"  Downgrading to default allowed model: '{fallback_model}' for this request.")
+                        if override_model:
+                            override_model = fallback_model
+                        else:
+                            self.model_id = fallback_model
+            except Exception:
+                pass
+        # Determine models to try for fallback support
+        primary_model = override_model if override_model else self.model_id
+        # Setup fallback for layer 2 (always include fallback models unless override_model is set)
+        if not override_model:
+            fallback_models = config.fallback_models
+            fallback_list = [m for m in fallback_models if m != primary_model]
+            models_to_try = [primary_model] + fallback_list
+        else:
+            models_to_try = [primary_model]
+        last_exc = None
+        for model_idx, current_model in enumerate(models_to_try):
+            if self.cancel_event.is_set():
+                break
+            current_is_custom = bool(config.get_custom_model(current_model))
+            # Check for API key only if we need it for this built-in/OpenRouter model
+            if not current_is_custom and not self._local_api_key and not config.get("api_key"):
+                continue
+            if model_idx > 0 and not silent:
+                self.console.print(f"\n[bold yellow]🔄 Falling back to model: {current_model}...[/bold yellow]")
+            model_retries = 2
+            for attempt in range(model_retries + 1):
+                if self.cancel_event.is_set():
+                    break
+                mcp_tools = []
+                try:
+                    from utim_cli.mcp_client import mcp_manager
+                    mcp_tools = mcp_manager.get_tools()
+                except Exception:
+                    pass
+                all_tools = (override_tools if override_tools is not None else UTIM_TOOLS) + mcp_tools
+                # Filter disabled tools
+                disabled = config.get("disabled_tools", [])
+                all_tools = [t for t in all_tools if t["function"]["name"] not in disabled]
+                payload = {
+                    "model": current_model,
+                    "messages": messages,
+                    "stream": True,
+                    "max_tokens": 8192,
+                }
+                if all_tools:
+                    payload["tools"] = all_tools
+                printed_header = False
+                in_think = False
+                native_reasoning = False
+                display_buf = ""
+                _think_buf = ""
+                _proxy = sys.stdout
+                _term_width = self.console.width or 80
+                _line_buf = ""
+                try:
+                    start_time = time.time()
+                    # last_content_time: updated only when real content/tool-call data arrives.
+                    # Intentionally NOT reset by keep-alive pings (empty lines) so stall
+                    # detection isn't fooled by the server sending blank heartbeats.
+                    last_content_time = start_time
+                    _api_key = config.get("api_key")
+                    if _api_key and not current_is_custom:
+                        from utim_cli.auth import SERVER_URL
+                        _endpoint_url = f"{SERVER_URL}/completions"
+                        _headers = {
+                            "X-API-Key": _api_key,
+                            "Content-Type": "application/json"
+                        }
+                        request_payload = {
+                            "messages": messages,
+                            "model_id": current_model,
+                            "tools": all_tools or None,
+                            "session_id": self.session_id,
+                        }
+                    else:
+                        _endpoint_url, _endpoint_key = self._resolve_model_endpoint(current_model)
+                        _headers = {
+                            "Authorization": f"Bearer {_endpoint_key}",
+                            "Content-Type": "application/json"
+                        }
+                        request_payload = payload
+                    with requests.post(
+                        _endpoint_url,
+                        json=request_payload,
+                        headers=_headers,
+                        stream=True,
+                        timeout=(15, 300),  # 300s per-chunk socket timeout — models need time to process large contexts
+                    ) as resp:
+                        resp.raise_for_status()
+                        resp.encoding = "utf-8"
+                        final_content = ""
+                        final_tool_calls = []
+                        was_cut_off = False
+                        try:
+                            # Dynamic thinking phases — cycle through contextual messages
+                            # during the TTFT wait so the spinner feels alive, not stuck.
+                            _THINKING_PHASES = [
+                                "Analyzing context...",
+                                "Reasoning through approach...",
+                                "Evaluating options...",
+                                "Structuring response...",
+                                "Processing deeply...",
+                                "Connecting patterns...",
+                                "Formulating plan...",
+                                "Almost there...",
+                            ]
+                            _phase_idx = 0
+                            _last_phase_time = start_time
+                            for raw_line in resp.iter_lines(decode_unicode=True):
+                                if self.cancel_event.is_set():
+                                    return {
+                                        "role": "assistant",
+                                        "content": "[Aborted by user]",
+                                        "tool_calls": None,
+                                        "was_cut_off": True,
+                                        "aborted": True,
+                                    }, False
+                                now = time.time()
+                                # Cycle thinking topic every 8s during TTFT wait
+                                if not final_content and not final_tool_calls:
+                                    if now - _last_phase_time > 8:
+                                        try:
+                                            from utim_cli.utim import STATE
+                                            STATE["thinking_topic"] = _THINKING_PHASES[_phase_idx % len(_THINKING_PHASES)]
+                                            _phase_idx += 1
+                                            _last_phase_time = now
+                                        except Exception:
+                                            pass
+                                # Stall detection runs on EVERY iteration (including empty
+                                # keep-alive lines) so a true stream stall is always caught.
+                                if not final_content and not final_tool_calls:
+                                    # Hard 180-second timeout for Time-To-First-Token
+                                    # Models often need 60-120s to process large tool outputs before streaming
+                                    if now - start_time > 180:
+                                        raise requests.exceptions.Timeout("Hard TTFT timeout exceeded 180s")
+                                else:
+                                    # Inter-content stall detection: abort if no real content
+                                    # has arrived for 120 seconds, even during keep-alive pings.
+                                    if now - last_content_time > 120:
+                                        raise requests.exceptions.Timeout("Inter-token stall timeout exceeded 120s")
+                                if not raw_line:
+                                    continue
+                                # NOTE: last_content_time is updated further below, only when
+                                # actual content or tool-call data is parsed from the chunk.
+                                if not raw_line.startswith("data: "):
+                                    # Catch non-streaming JSON responses or errors
+                                    stripped_line = raw_line.strip()
+                                    if stripped_line.startswith("{"):
+                                        try:
+                                            chunk = json.loads(stripped_line)
+                                            # Check if it's UTIM server response format
+                                            if "type" in chunk:
+                                                last_content_time = time.time()
+                                                t = chunk["type"]
+                                                if t == "content_delta":
+                                                    text = chunk.get("text", "")
+                                                    final_content += text
+                                                    if not silent:
+                                                        display_buf += text
+                                                        printed_header = True
+                                                elif t == "done":
+                                                    if "error" in chunk and chunk["error"]:
+                                                        raise RuntimeError(f"Server completion error: {chunk['error']}")
+                                                    final_content = chunk.get("content") or final_content
+                                                    final_tool_calls = chunk.get("tool_calls") or final_tool_calls
+                                                    break
+                                                continue
+                                            # Otherwise fall back to original OpenRouter logic
+                                            if "error" in chunk:
+                                                raise RuntimeError(f"OpenRouter error: {chunk['error'].get('message', str(chunk['error']))}")
+                                            if "choices" in chunk and len(chunk["choices"]) > 0:
+                                                choice = chunk["choices"][0]
+                                                if choice.get("finish_reason") == "length":
+                                                    was_cut_off = True
+                                                msg = choice.get("message", {})
+                                                if "content" in msg and msg["content"]:
+                                                    last_content_time = time.time()
+                                                    final_content += msg["content"]
+                                                    if not silent:
+                                                        display_buf += msg["content"]
+                                                        printed_header = True
+                                                if "tool_calls" in msg and msg["tool_calls"]:
+                                                    last_content_time = time.time()
+                                                    final_tool_calls = msg["tool_calls"]
+                                                break
+                                        except json.JSONDecodeError:
+                                            pass
+                                    continue
+                                data_str = raw_line[6:]
+                                if data_str == "[DONE]":
+                                    break
+                                try:
+                                    chunk = json.loads(data_str)
+                                except json.JSONDecodeError:
+                                    continue
+                                # Check for API errors returned mid-stream
+                                if "error" in chunk:
+                                    raise RuntimeError(f"OpenRouter error: {chunk['error'].get('message', str(chunk['error']))}")
+                                if not chunk.get("choices"):
+                                    continue
+                                choice = chunk["choices"][0]
+                                if choice.get("finish_reason") == "length":
+                                    was_cut_off = True
+                                delta = choice.get("delta", {})
+                                # Handle tool calls accumulation
+                                if "tool_calls" in delta:
+                                    last_content_time = time.time()  # real data arrived
+                                    for tc in delta["tool_calls"]:
+                                        idx = tc.get("index", 0)
+                                        while len(final_tool_calls) <= idx:
+                                            final_tool_calls.append({"id": "", "type": "function", "function": {"name": "", "arguments": ""}})
+                                        if tc.get("id"):
+                                            final_tool_calls[idx]["id"] = tc["id"]
+                                        if tc.get("function"):
+                                            f = tc["function"]
+                                            if "name" in f:
+                                                final_tool_calls[idx]["function"]["name"] += f["name"]
+                                            if "arguments" in f:
+                                                final_tool_calls[idx]["function"]["arguments"] += f["arguments"]
+                                # Handle content streaming
+                                chunk_text = delta.get("content")
+                                reasoning_text = delta.get("reasoning")
+                                if reasoning_text:
+                                    last_content_time = time.time()  # real data arrived
+                                    if not in_think:
+                                        in_think = True
+                                        native_reasoning = True
+                                        final_content += "<think>\n"
+                                    final_content += reasoning_text
+                                    _think_buf += reasoning_text
+                                    try:
+                                        from utim_cli.utim import STATE
+                                        lines = [l.strip() for l in _think_buf.split('\n') if l.strip()]
+                                        if lines:
+                                            topic = lines[-1]
+                                            if len(topic) > 60:
+                                                topic = topic[:57] + "..."
+                                            STATE["thinking_topic"] = topic
+                                    except Exception:
+                                        pass
+                                    continue
+                                if chunk_text is not None and chunk_text != "":
+                                    last_content_time = time.time()  # real data arrived
+                                    if native_reasoning:
+                                        native_reasoning = False
+                                        in_think = False
+                                        final_content += "\n</think>\n"
+                                    final_content += chunk_text
+                                    display = ""
+                                    remaining = chunk_text
+                                    while remaining:
+                                        if in_think:
+                                            for closing in ("</think>", "</thinking>", "[/THINKING]"):
+                                                end_idx = remaining.find(closing)
+                                                if end_idx >= 0:
+                                                    _think_buf += remaining[:end_idx]
+                                                    remaining = remaining[end_idx + len(closing):]
+                                                    in_think = False
+                                                    break
+                                            else:
+                                                _think_buf += remaining
+                                                remaining = ""
+                                            try:
+                                                from utim_cli.utim import STATE
+                                                lines = [l.strip() for l in _think_buf.split('\n') if l.strip()]
+                                                if lines:
+                                                    topic = lines[-1]
+                                                    if len(topic) > 60:
+                                                        topic = topic[:57] + "..."
+                                                    STATE["thinking_topic"] = topic
+                                            except Exception:
+                                                pass
+                                        else:
+                                            open_found = False
+                                            for opening in ("<think>", "<thinking>", "[THINKING]"):
+                                                start_idx = remaining.find(opening)
+                                                if start_idx >= 0:
+                                                    display += remaining[:start_idx]
+                                                    remaining = remaining[start_idx + len(opening):]
+                                                    in_think = True
+                                                    open_found = True
+                                                    break
+                                            if not open_found:
+                                                display += remaining
+                                                remaining = ""
+                                    if display and not silent:
+                                        cleaned = self._PRE_THINK_PATTERNS.sub("", display)
+                                        if cleaned:
+                                            if not printed_header:
+                                                printed_header = True
+                                            display_buf += cleaned
+                        except Exception as stream_exc:
+                            # If we have received some content/tool calls, recover gracefully
+                            if final_content or final_tool_calls:
+                                if not silent:
+                                    self.console.print(f"\n[dim yellow]⚠ Stream interrupted: {stream_exc}. Returning partial response.[/dim yellow]\n")
+                                was_cut_off = True
+                            else:
+                                raise stream_exc
+                        # ── End of `with resp` streaming block ────────────────────────────
+                        # Render the fully-buffered response as rich Markdown (tables, bold, code, etc.)
+                        if printed_header and display_buf and not silent:
+                            self.console.print()
+                            self.console.print(Markdown(display_buf))
+                            self.console.print()
+                        clean_content = re.sub(
+                            r"<think(?:ing)?>.*?</think(?:ing)?>", "", final_content, flags=re.DOTALL
+                        ).strip()
+                        clean_content = self._PRE_THINK_PATTERNS.sub("", clean_content).strip()
+                        # Failsafe: if the model ONLY output reasoning and no actual content,
+                        # use the reasoning as the content so the user sees it.
+                        if not clean_content and final_content.strip():
+                            clean_content = final_content.strip()
+                            clean_content = re.sub(r"</?think(?:ing)?>", "", clean_content).strip()
+                        clean_content = clean_content if clean_content else None
+                        final_tool_calls = final_tool_calls if final_tool_calls else None
+                        if not clean_content and not final_tool_calls:
+                            if model_idx < len(models_to_try) - 1:
+                                raise RuntimeError("Model returned an empty response (no content and no tool calls).")
+                        final_msg = {
+                            "role": "assistant",
+                            "content": clean_content,
+                            "tool_calls": final_tool_calls,
+                            "was_cut_off": was_cut_off,
+                        }
+                        return final_msg, True
+                except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as exc:
+                    last_exc = exc
+                    if not silent:
+                        self.console.print(f"\n[yellow]⚠ Model {current_model} failed (Connection/Timeout error: {exc}). Trying next model...[/yellow]")
+                    break  # try next model
+                except requests.exceptions.HTTPError as exc:
+                    last_exc = exc
+                    code = exc.response.status_code if exc.response is not None else "?"
+                    if code == 400 and exc.response is not None:
+                        try:
+                            error_details = exc.response.json()
+                            self.console.print(f"\n[red]HTTP 400 Error Details from {current_model}: {error_details}[/red]")
+                        except Exception:
+                            self.console.print(f"\n[red]HTTP 400 Error Details from {current_model}: {exc.response.text}[/red]")
+                    if code == 429:
+                        if not silent:
+                            self.console.print(f"\n[yellow]⚠ Model {current_model} rate-limited (429). Trying next model...[/yellow]")
+                        break
+                    if attempt < model_retries:
+                        delay = 3 * (attempt + 1)
+                        if not silent:
+                            self.console.print(f"\n[dim yellow]⟳ Model {current_model} returned HTTP {code}. Retrying in {delay}s (attempt {attempt+1}/{model_retries})...[/dim yellow]")
+                        time.sleep(delay)
+                        continue
+                    if not silent:
+                        self.console.print(f"\n[yellow]⚠ Model {current_model} failed (HTTP {code}). Trying next model...[/yellow]")
+                    break  # try next model
+                except RuntimeError as exc:
+                    # Mid-stream API errors (e.g. model overloaded) or custom empty response error
+                    last_exc = exc
+                    if not silent:
+                        self.console.print(f"\n[yellow]⚠ Model {current_model} failed: {exc}. Trying next model...[/yellow]")
+                    break  # try next model
+                except Exception as exc:
+                    last_exc = exc
+                    if not silent:
+                        self.console.print(f"\n[yellow]⚠ Model {current_model} failed (unexpected error: {exc}). Trying next model...[/yellow]")
+                    break  # try next model
+        # If we exit the loop, all models failed
+        if isinstance(last_exc, requests.exceptions.HTTPError):
+            code = last_exc.response.status_code if last_exc.response is not None else "?"
+            raise _ServerUnavailableError(f"Model API returned an error after trying all fallbacks (HTTP {code}).") from last_exc
+        elif last_exc:
+            raise _ServerUnavailableError(f"Cannot reach model API after trying all fallback models. Last error: {last_exc}") from last_exc
+        else:
+            raise _ServerUnavailableError("OPENROUTER_API_KEY is missing. Please set it in your .env file.")
+    # Tool display helpers
+    # Icons per tool
+    TOOL_ICON: Dict[str, str] = {
+        "read_file":              "📄",
+        "write_file":             "✏️ ",
+        "edit_file":              "🔧",
+        "move_file":              "📦",
+        "delete_file":            "🗑️ ",
+        "run_command":            "⚡",
+        "list_directory":         "📁",
+        "get_background_output":  "📤",
+        "send_background_input": "⌨️ ",
+        "stop_background_process":"⏹️ ",
+        "list_background_processes": "📋",
+        "web_search":             "🔍",
+        "plan_project":           "🧠",
+        "manage_todos":           "📝",
+        "query_codebase":         "🧠",
+        "generate_image":         "🎨",
+    }
+    # User-friendly display names for tools
+    TOOL_DISPLAY_NAME: Dict[str, str] = {
+        "read_file":              "Reading file",
+        "write_file":             "Writing file",
+        "edit_file":              "Editing file",
+        "move_file":              "Moving file",
+        "delete_file":            "Deleting file",
+        "run_command":            "Running command",
+        "list_directory":         "Listing directory",
+        "get_background_output":  "Reading background output",
+        "send_background_input":  "Sending background input",
+        "web_search":             "Searching web",
+        "plan_project":           "Planning project",
+        "manage_todos":           "Managing To-Dos",
+        "query_codebase":         "Querying Codebase",
+        "generate_image":         "Generating image",
+    }
+    @staticmethod
+    def _get_display_arg(func_name: str, arguments: Dict) -> str:
+        """Extract the most informative single argument to display inline."""
+        if "__" in func_name:
+            return ", ".join(f"{k}={v}" for k, v in arguments.items())[:60]
+        if func_name in ("read_file", "write_file", "delete_file"):
+            path = arguments.get("filepath", arguments.get("path", ""))
+            # Append line range for read_file when a range was requested
+            if func_name == "read_file":
+                s = arguments.get("start_line")
+                e = arguments.get("end_line")
+                if s or e:
+                    path = f"{path}:{s or ''}–{e or ''}"
+            return path
+        if func_name == "edit_file":
+            return arguments.get("filepath", arguments.get("path", ""))
+        if func_name == "run_command":
+            cmd = arguments.get("command", "")
+            if not cmd:
+                cmds = arguments.get("commands", [])
+                if cmds and isinstance(cmds, list):
+                    cmd = "; ".join(cmds)
+            display = (cmd or "")[:80] + ("…" if len(cmd or "") > 80 else "")
+            dir_p = arguments.get("dir_path", "")
+            if dir_p:
+                display += f"  [{dir_p}]"
+            return display
+        if func_name == "list_directory":
+            return arguments.get("path", ".")
+        if func_name == "move_file":
+            src = arguments.get("source", arguments.get("src", ""))
+            dst = arguments.get("destination", arguments.get("dst", ""))
+            return f"{src} → {dst}"
+        if func_name == "web_search":
+            return arguments.get("prompt", arguments.get("query", ""))
+        if func_name == "generate_image":
+            return arguments.get("prompt", "")[:40]
+        if func_name == "plan_project":
+            return f"{arguments.get('plan_part', 'general')} - {arguments.get('prompt', '')[:30]}"
+        if func_name == "query_codebase":
+            return arguments.get('query', '')[:40]
+        if func_name == "manage_todos":
+            ops = arguments.get('operations', [])
+            if ops:
+                return f"{len(ops)} operations"
+            action = arguments.get('action', '')
+            tid = arguments.get('task_id', '')
+            desc = arguments.get('description', '')[:30]
+            if action == 'add': return f"Add: {desc}"
+            if action in ('mark_done', 'mark_pending', 'delete'): return f"{action}: {tid}"
+            return action
+        if func_name == "get_background_output":
+            return f"process #{arguments.get('process_id', '?')}"
+        if func_name == "send_background_input":
+            return f"to #{arguments.get('process_id', '?')}: {arguments.get('input_text', '')[:30]}"
+        return ""
+    def _render_result(self, func_name: str, arguments: Dict, result: str, color: str, user_confirmed: bool = False) -> None:
+        """Render the tool result inside a styled panel appropriate to the tool type."""
+        width = min(self.console.width - 2, 120)
+        display_name = self.TOOL_DISPLAY_NAME.get(func_name, func_name)
+        # ── When the user already saw and approved a diff dialog, skip the
+        # ── verbose diff body — it was already shown in the confirm panel.
+        if user_confirmed and func_name in ("edit_file", "write_file", "delete_file", "move_file"):
+            icon = self.TOOL_ICON.get(func_name, "●")
+            display_arg = self._get_display_arg(func_name, arguments)
+            line = Text()
+            line.append(f"  ✓  ", style=f"bold {color}")
+            line.append(display_name, style=f"{color}")
+            if display_arg:
+                line.append(f"  {display_arg}", style="white")
+            self.console.print(line)
+            return
+        # ── Header line: icon  ToolName  path/arg ─────────────────────────────
+        display_arg = self._get_display_arg(func_name, arguments)
+        icon = self.TOOL_ICON.get(func_name, "●")
+        header = Text()
+        header.append(f"✓  ", style="bold white")
+        header.append(f"{display_name}", style=f"bold {color}")
+        if display_arg:
+            header.append(f"  {display_arg}", style="white")
+        # ── Body: tool-specific formatting ─────────────────────────────────────
+        if func_name == "edit_file":
+            old_str = arguments.get("old_str", "")
+            new_str = arguments.get("new_str", "")
+            old_lines = old_str.splitlines()
+            new_lines = new_str.splitlines()
+            removed = len(old_lines)
+            added   = len(new_lines)
+            body = Text()
+            # Show up to 4 removed lines then up to 4 added lines
+            for line in old_lines[:4]:
+                body.append(f"- {line}\n", style="bold red")
+            if removed > 4:
+                body.append(f"  … ({removed - 4} more lines)\n", style="dim red")
+            for line in new_lines[:4]:
+                body.append(f"+ {line}\n", style="bold green")
+            if added > 4:
+                body.append(f"  … ({added - 4} more lines)\n", style="dim green")
+            # Stat footer
+            body.append("\n")
+            body.append(f" -{removed} lines", style="bold red")
+            body.append("   ", style="dim")
+            body.append(f"+{added} lines", style="bold green")
+            if "…" in body.plain or "diff truncated" in body.plain:
+                header.append("  (Ctrl+O to expand)", style="dim italic")
+            self.console.print(Panel(
+                body,
+                title=header,
+                title_align="left",
+                border_style=color,
+                padding=(0, 1),
+                width=width,
+            ))
+        elif func_name == "write_file":
+            old_content = arguments.get("_old_content") or ""
+            new_content = arguments.get("content", "")
+            old_lines = old_content.splitlines(keepends=True)
+            new_lines = new_content.splitlines(keepends=True)
+            diff_lines = list(difflib.unified_diff(old_lines, new_lines, lineterm=""))
+            body = Text()
+            removed_count = 0
+            added_count   = 0
+            if not diff_lines:
+                # No change (same content written again)
+                body.append("  (no changes)", style="dim")
+            else:
+                # Skip the --- / +++ header lines, show the hunks
+                shown = 0
+                for dl in diff_lines:
+                    if dl.startswith("---") or dl.startswith("+++"):
+                        continue
+                    if shown >= 30:
+                        remaining = sum(1 for d in diff_lines
+                                        if d.startswith("+") and not d.startswith("+++ ")
+                                        or d.startswith("-") and not d.startswith("--- "))
+                        body.append(f"  … (diff truncated)\n", style="dim")
+                        break
+                    if dl.startswith("+"):
+                        body.append(f"{dl}\n", style="bold green")
+                        added_count += 1
+                        shown += 1
+                    elif dl.startswith("-"):
+                        body.append(f"{dl}\n", style="bold red")
+                        removed_count += 1
+                        shown += 1
+                    else:
+                        body.append(f"{dl}\n", style="dim white")
+                        shown += 1
+                # Stat footer
+                if removed_count or added_count:
+                    body.append("\n")
+                    if removed_count:
+                        body.append(f" -{removed_count} lines", style="bold red")
+                        body.append("   ", style="dim")
+                    body.append(f"+{added_count} lines", style="bold green")
+                elif not old_content:
+                    total = len(new_lines)
+                    body.append(f"\n +{total} lines (new file)", style="bold green")
+            if "…" in body.plain or "diff truncated" in body.plain:
+                header.append("  (Ctrl+O to expand)", style="dim italic")
+            self.console.print(Panel(
+                body,
+                title=header,
+                title_align="left",
+                border_style=color,
+                padding=(0, 1),
+                width=width,
+            ))
+        elif func_name == "run_command":
+            # Parse structured result: [exit_code: N] / [stdout] / [stderr] sections
+            raw_output = result.strip()
+            body = Text()
+            exit_code_val: Optional[str] = None
+            stdout_section = ""
+            stderr_section = ""
+            current_section = None
+            for line in raw_output.splitlines():
+                if line.startswith("[exit_code:"):
+                    exit_code_val = line.strip().lstrip("[").rstrip("]").split(":", 1)[1].strip()
+                elif line == "[stdout]":
+                    current_section = "stdout"
+                elif line == "[stderr]":
+                    current_section = "stderr"
+                else:
+                    if current_section == "stdout":
+                        stdout_section += line + "\n"
+                    elif current_section == "stderr":
+                        stderr_section += line + "\n"
+            # Exit code badge
+            if exit_code_val is not None:
+                code_int = int(exit_code_val) if exit_code_val.lstrip("-").isdigit() else None
+                code_style = "bold red" if (code_int is not None and code_int != 0) else "bold green"
+                body.append(f"exit {exit_code_val}\n", style=code_style)
+            # Stdout block
+            if stdout_section.strip():
+                stdout_lines = stdout_section.splitlines()
+                if len(stdout_lines) > 20:
+                    shown_block = "\n".join(stdout_lines[:20])
+                    tail = f"\n\u2026 ({len(stdout_lines) - 20} more lines)"
+                else:
+                    shown_block = "\n".join(stdout_lines)
+                    tail = ""
+                body.append(shown_block, style="dim white")
+                if tail:
+                    body.append(tail, style="dim")
+                body.append("\n")
+            # Stderr block (yellow to distinguish from stdout)
+            if stderr_section.strip():
+                body.append("\n[stderr]\n", style="bold yellow")
+                stderr_lines = stderr_section.splitlines()
+                if len(stderr_lines) > 10:
+                    shown_err = "\n".join(stderr_lines[:10])
+                    err_tail = f"\n\u2026 ({len(stderr_lines) - 10} more lines)"
+                else:
+                    shown_err = "\n".join(stderr_lines)
+                    err_tail = ""
+                body.append(shown_err, style="dim #f9e2af")
+                if err_tail:
+                    body.append(err_tail, style="dim")
+            if not stdout_section.strip() and not stderr_section.strip():
+                body.append("(no output)", style="dim")
+            if "…" in body.plain or "diff truncated" in body.plain:
+                header.append("  (Ctrl+O to expand)", style="dim italic")
+            self.console.print(Panel(
+                body,
+                title=header,
+                title_align="left",
+                border_style=color,
+                padding=(0, 1),
+                width=width,
+            ))
+        elif func_name == "list_directory":
+            output = result.strip()
+            lines = output.splitlines()
+            body = Text()
+            # Skip the "Contents of X:" header line — it's already in the panel title
+            items = lines[1:] if lines and lines[0].startswith("Contents") else lines
+            for item in items[:30]:
+                body.append(f"  {item}\n", style="dim white")
+            if len(items) > 30:
+                body.append(f"  … ({len(items) - 30} more items)", style="dim")
+            if "…" in body.plain or "diff truncated" in body.plain:
+                header.append("  (Ctrl+O to expand)", style="dim italic")
+            self.console.print(Panel(
+                body,
+                title=header,
+                title_align="left",
+                border_style=color,
+                padding=(0, 1),
+                width=width,
+            ))
+        elif func_name == "read_file":
+            # First line of result is the metadata header [File: ... | Lines ...]
+            all_lines = result.splitlines()
+            meta = all_lines[0] if all_lines and all_lines[0].startswith("[") else ""
+            content_lines = all_lines[1:] if meta else all_lines
+            preview_lines = content_lines[:15]
+            body = Text()
+            if meta:
+                body.append(meta + "\n", style="dim #585b70")
+            for line in preview_lines:
+                body.append(line + "\n", style="dim white")
+            if len(content_lines) > 15:
+                body.append(f"… ({len(content_lines) - 15} more lines in this chunk)", style="dim")
+            if "…" in body.plain or "diff truncated" in body.plain:
+                header.append("  (Ctrl+O to expand)", style="dim italic")
+            self.console.print(Panel(
+                body,
+                title=header,
+                title_align="left",
+                border_style=color,
+                padding=(0, 1),
+                width=width,
+            ))
+        elif func_name == "manage_todos":
+            self.console.print(f" {icon}  [bold {color}]{display_name}[/bold {color}]  [dim italic](Ctrl+O to expand)[/dim italic]")
+            # plain text print for todos without panel
+            self.console.print(Text(result.strip(), style="dim white"))
+            self.console.print()
+        else:
+            # Generic: show the result as plain text
+            summary = result.strip()
+            if len(summary) > 300:
+                summary = summary[:300] + "…"
+            if "…" in summary:
+                header.append("  (Ctrl+O to expand)", style="dim italic")
+            self.console.print(Panel(
+                Text(summary, style="dim white"),
+                title=header,
+                title_align="left",
+                border_style=color,
+                padding=(0, 1),
+                width=width,
+            ))
+    # Tool execution
+    def _execute_tool_timed(self, tool_call: Dict) -> str:
+        """Execute a tool call without measuring its duration."""
+        return self._execute_tool(tool_call)
+    def _execute_tool(self, tool_call: Dict) -> str:
+        """Execute a single tool call and render a prominent panel indicator."""
+        func_name = tool_call["function"]["name"]
+        # Clean corrupted func_name (e.g. from buggy OpenRouter proxy XML to tool-call translations)
+        # E.g. 'read_file filepath=".utim/UTIM.md" />'
+        arguments = {}
+        raw_args = tool_call["function"].get("arguments", "{}")
+        if raw_args:
+            try:
+                arguments = json.loads(raw_args)
+                if not isinstance(arguments, dict):
+                    arguments = {}
+            except Exception:
+                pass
+        func_name_clean = func_name.strip("<> ")
+        if func_name_clean:
+            parts = func_name_clean.split(None, 1)
+            actual_name = parts[0]
+            if len(parts) > 1:
+                attr_string = parts[1].rstrip("/> ")
+                import re
+                attrs = re.findall(r'(\w+)\s*=\s*(?:"([^"]*)"|\'([^\']*)\'|([^\s>]+))', attr_string)
+                for key, val1, val2, val3 in attrs:
+                    val = val1 or val2 or val3 or ""
+                    arguments[key] = val
+            func_name = actual_name
+        # Update the tool_call dict back with the cleaned values
+        tool_call["function"]["name"] = func_name
+        tool_call["function"]["arguments"] = json.dumps(arguments)
+        color = TOOL_COLOR.get(func_name, "#888888")
+        icon  = self.TOOL_ICON.get(func_name, "●")
+        # The JSON arguments are now guaranteed to be clean/valid
+        arguments = json.loads(tool_call["function"]["arguments"])
+        # Check if it's an MCP tool
+        if "__" in func_name:
+            server_name, actual_tool_name = func_name.split("__", 1)
+            try:
+                from utim_cli.mcp_client import mcp_manager
+                if server_name in mcp_manager.sessions:
+                    color = "#cba6f7"  # purple accent for MCP
+                    icon = "🔌"
+                    display_name = f"{server_name} ➔ {actual_tool_name}"
+                    display_arg = self._get_display_arg(func_name, arguments)
+                    # Print running indicator
+                    self.console.print(f"  {icon}  Calling MCP tool {display_name}...", style=f"dim {color}")
+                    # Call tool synchronously
+                    result = mcp_manager.call_tool(server_name, actual_tool_name, arguments)
+                    # Temporarily register metadata for render
+                    self.TOOL_DISPLAY_NAME[func_name] = display_name
+                    self.TOOL_ICON[func_name] = icon
+                    TOOL_COLOR[func_name] = color
+                    self._render_result(func_name, arguments, result, color)
+                    return result
+            except Exception as e:
+                self.console.print(Panel(
+                    Text(f"Error executing MCP tool {func_name}: {str(e)}", style="red"),
+                    border_style="red", padding=(0, 1),
+                ))
+                return f"Error executing MCP tool {func_name}: {str(e)}"
+        if func_name not in TOOL_FUNCTIONS:
+            self.console.print(Panel(
+                Text(f"Unknown tool: {func_name}", style="red"),
+                border_style="red", padding=(0, 1),
+            ))
+            return f"Unknown tool: {func_name}"
+        display_arg = self._get_display_arg(func_name, arguments)
+        # ── Capture before-state for /rewind tracking ─────────────────────────
+        _rewind_entry: Optional[Dict[str, Any]] = None
+        _modifying = ("write_file", "edit_file", "delete_file", "move_file")
+        if func_name in _modifying:
+            path = arguments.get("filepath", arguments.get("path",
+                   arguments.get("dst", arguments.get("destination", ""))))
+            if func_name == "write_file":
+                _rewind_entry = {"action": func_name, "path": path, "before": None, "after": None}
+            elif func_name in ("edit_file", "delete_file"):
+                before = ""
+                try:
+                    with open(path, "r", encoding="utf-8") as _rf:
+                        before = _rf.read()
+                except Exception:
+                    pass
+                _rewind_entry = {"action": func_name, "path": path, "before": before, "after": None}
+            elif func_name == "move_file":
+                src = arguments.get("src", arguments.get("source", ""))
+                dst = arguments.get("dst", arguments.get("destination", ""))
+                # Capture source content before the move, and note whether src existed
+                src_content = None
+                src_existed = os.path.exists(src)
+                if src_existed:
+                    try:
+                        with open(src, "r", encoding="utf-8") as _sf:
+                            src_content = _sf.read()
+                    except Exception:
+                        src_existed = False
+                _rewind_entry = {
+                    "action": "move_file",
+                    "path": dst,            # destination path (where file will end up)
+                    "before_path": src,     # original source path
+                    "before": src_content,  # content of source before move (if existed)
+                    "before_existed": src_existed,
+                    "after": None,
+                }
+        # For write_file: read old content before overwriting so we can diff later
+        if func_name == "write_file":
+            filepath = arguments.get("filepath", arguments.get("path", ""))
+            try:
+                with open(filepath, "r", encoding="utf-8") as _f:
+                    arguments["_old_content"] = _f.read()
+            except Exception:
+                arguments["_old_content"] = None  # File didn't exist before (will trigger deletion on rewind)
+            if _rewind_entry:
+                _rewind_entry["before"] = arguments["_old_content"]
+        # write_file doesn't accept _old_content — strip private keys before calling
+        call_args = {k: v for k, v in arguments.items() if not k.startswith("_")}
+        # ── Manual-mode confirmation ──────────────────────────────────────────
+        _user_confirmed = False
+        _CONFIRM_TOOLS = ("write_file", "edit_file", "delete_file", "move_file", "run_command")
+        if func_name in _CONFIRM_TOOLS:
+            _confirm_fn = self._get_confirm_fn()
+            if _confirm_fn is not None:
+                # Build compact diff lines for the dialog preview
+                _diff_preview: list = []
+                if func_name == "edit_file":
+                    repls = arguments.get("replacements")
+                    if repls and isinstance(repls, list):
+                        for r_idx, r in enumerate(repls[:3]):
+                            o_str = r.get("old_str", "") or ""
+                            n_str = r.get("new_str", "") or ""
+                            _diff_preview.append(f"--- Replacement #{r_idx+1} ---")
+                            for _l in o_str.splitlines()[:2]:
+                                _diff_preview.append(f"- {_l}")
+                            for _l in n_str.splitlines()[:2]:
+                                _diff_preview.append(f"+ {_l}")
+                        if len(repls) > 3:
+                            _diff_preview.append(f"... and {len(repls) - 3} more replacements")
+                    else:
+                        old_str = arguments.get("old_str", "") or ""
+                        new_str = arguments.get("new_str", "") or ""
+                        for _l in old_str.splitlines()[:5]:
+                            _diff_preview.append(f"- {_l}")
+                        for _l in new_str.splitlines()[:5]:
+                            _diff_preview.append(f"+ {_l}")
+                elif func_name == "write_file":
+                    import difflib as _dl
+                    old_c = arguments.get("_old_content") or ""
+                    new_c = arguments.get("content", "")
+                    _diff_preview = [
+                        ln for ln in list(_dl.unified_diff(
+                            old_c.splitlines(), new_c.splitlines(), lineterm="",
+                        ))[:15]
+                        if not ln.startswith("---") and not ln.startswith("+++")
+                    ]
+                _decision = _confirm_fn(func_name, arguments, _diff_preview)
+                if _decision == "reject":
+                    return f"[User rejected {func_name}. Do NOT retry this action — ask the user what they want instead.]"
+                # 'allow' or 'allow_session' → user saw and approved the diff
+                _user_confirmed = True
+            else:
+                # Fallback to standard CLI stdin/stdout prompt if in interactive shell
+                import sys
+                from rich.prompt import Confirm
+                if sys.stdin.isatty():
+                    self.console.print(f"\n[bold yellow]⬡ Approval Required for {func_name}:[/bold yellow]")
+                    if func_name == "run_command":
+                        cmd = arguments.get("command") or arguments.get("commands")
+                        self.console.print(f"  Command: [bold white]{cmd}[/bold white]")
+                    elif func_name in ("write_file", "edit_file", "delete_file", "move_file"):
+                        filepath = arguments.get("filepath") or arguments.get("src") or arguments.get("dst")
+                        self.console.print(f"  File Action: [bold white]{func_name} on {filepath}[/bold white]")
+                        import difflib as _dl
+                        old_c = arguments.get("_old_content") or ""
+                        new_c = arguments.get("content", "")
+                        if func_name == "edit_file":
+                            repls = arguments.get("replacements")
+                            if repls and isinstance(repls, list):
+                                for r in repls[:2]:
+                                    self.console.print(f"    - Replace: [red]{repr(r.get('old_str'))}[/red] with [green]{repr(r.get('new_str'))}[/green]")
+                            else:
+                                old_c = arguments.get("old_str", "") or ""
+                                new_c = arguments.get("new_str", "") or ""
+                        if func_name == "write_file" or (func_name == "edit_file" and not arguments.get("replacements")):
+                            diff_lines = list(_dl.unified_diff(
+                                old_c.splitlines(), new_c.splitlines(), lineterm=""
+                            ))[:10]
+                            for dl in diff_lines:
+                                if dl.startswith("+"):
+                                    self.console.print(f"    [green]{dl}[/green]")
+                                elif dl.startswith("-"):
+                                    self.console.print(f"    [red]{dl}[/red]")
+                                else:
+                                    self.console.print(f"    {dl}")
+                    if not Confirm.ask("Do you want to proceed?"):
+                        self.console.print("[bold red]✗ Execution cancelled by user.[/bold red]")
+                        return f"[User rejected {func_name}. Do NOT retry this action — ask the user what they want instead.]"
+        # ── Silent tools: skip all visual output ─────────────────────────────
+        _SILENT_TOOLS = {"manage_memory", "recall_experience", "store_experience"}
+        if func_name in _SILENT_TOOLS:
+            try:
+                result = TOOL_FUNCTIONS[func_name](**call_args)
+            except Exception as exc:
+                result = f"Error executing {func_name}: {exc}"
+            self.tool_results.append({
+                "func_name": func_name,
+                "arguments": arguments,
+                "result": str(result),
+                "color": color
+            })
+            return str(result)
+        # Print a single static "running" line so the user knows which tool
+        # is executing. We intentionally avoid Rich Live/Spinner here because
+        # it animates at 12 fps and conflicts with prompt_toolkit's own redraws,
+        # causing the double-spinner glitch and constant screen flicker.
+        _pre = Text()
+        _pre.append(f" {icon} ", style=color)
+        _pre.append(func_name, style=f"bold {color}")
+        if display_arg:
+            _pre.append(f"  {display_arg}", style="dim white")
+        _pre.append("  …", style="dim")
+        self.console.print(_pre)
+        try:
+            # Dynamically update the thinking indicator so it shows what tool is running
+            original_topic = "Thinking..."
+            try:
+                from utim_cli.utim import STATE
+                import os
+                original_topic = STATE.get("thinking_topic", "Thinking...")
+                if func_name == "run_command":
+                    cmd = arguments.get("command", display_arg)
+                    if len(cmd) > 30: cmd = cmd[:27] + "..."
+                    STATE["thinking_topic"] = f"Running: {cmd}"
+                elif func_name == "plan_project":
+                    STATE["thinking_topic"] = f"Architecting {arguments.get('plan_part', 'project')}..."
+                elif func_name == "search_web":
+                    q = arguments.get("query", "")
+                    if len(q) > 25: q = q[:22] + "..."
+                    STATE["thinking_topic"] = f"Searching web for '{q}'..."
+                elif func_name == "read_file":
+                    STATE["thinking_topic"] = f"Reading {os.path.basename(arguments.get('filepath', 'file'))}..."
+                elif func_name == "write_file":
+                    STATE["thinking_topic"] = f"Writing to {os.path.basename(arguments.get('filepath', 'file'))}..."
+                elif func_name in ("edit_file", "multi_replace_file_content"):
+                    STATE["thinking_topic"] = f"Editing {os.path.basename(arguments.get('filepath', 'file'))}..."
+                else:
+                    STATE["thinking_topic"] = f"Executing {func_name}..."
+            except Exception:
+                pass
+            result = TOOL_FUNCTIONS[func_name](**call_args)
+            # Restore the indicator to evaluating logic
+            try:
+                STATE["thinking_topic"] = "Evaluating tool results..."
+            except Exception:
+                pass
+        except Exception as exc:
+            self.console.print(Panel(
+                Text(str(exc), style="red"),
+                title=Text(f"✗  {func_name}", style=f"bold red"),
+                title_align="left",
+                border_style="red",
+                padding=(0, 1),
+            ))
+            return f"Error executing {func_name}: {exc}"
+        # Record after-state for rewind tracking
+        if _rewind_entry:
+            if func_name == "delete_file":
+                _rewind_entry["after"] = None  # file no longer exists
+            elif func_name == "move_file":
+                # After move: destination exists with content, source is gone
+                try:
+                    with open(_rewind_entry["path"], "r", encoding="utf-8") as _af:
+                        _rewind_entry["after"] = _af.read()
+                except Exception:
+                    _rewind_entry["after"] = None
+                # Note: we don't need to track source's after state because it's gone
+            else:
+                try:
+                    with open(_rewind_entry["path"], "r", encoding="utf-8") as _af:
+                        _rewind_entry["after"] = _af.read()
+                except Exception:
+                    _rewind_entry["after"] = None
+            self._turn_changes.append(_rewind_entry)
+        # Render the result panel (compact if user already approved via dialog)
+        self._render_result(func_name, arguments, str(result), color, user_confirmed=_user_confirmed)
+        self.tool_results.append({
+            "func_name": func_name,
+            "arguments": arguments,
+            "result": str(result),
+            "color": color
+        })
+        return str(result)
+    def _execute_tools_parallel(self, tool_calls: List[Dict]) -> List[Tuple[Dict, str]]:
+        """Execute multiple tool calls in parallel when possible.
+        Groups tools by dependency type and executes independent tools concurrently.
+        Tools that modify files (write_file, edit_file, delete_file) are executed
+        sequentially to avoid conflicts.
+        Returns list of (tool_call, result) tuples in original order.
+        """
+        # Tools that can be safely executed in parallel (read-only operations)
+        PARALLEL_SAFE = {"read_file", "list_directory", "query_codebase", "web_search",
+                         "project_res", "plan_project", "manage_todos", "manage_memory",
+                         "analyze_image", "analyze_blast_radius", "generate_image"}
+        # Tools that must be sequential (modify state)
+        SEQUENTIAL = {"write_file", "edit_file", "delete_file", "run_command",
+                      "move_file", "compress_context"}
+        # Build list of (original_index, tool_call, is_parallel) for ordering
+        indexed_calls = []
+        for i, tc in enumerate(tool_calls):
+            func_name = tc.get("function", {}).get("name", "")
+            is_parallel = func_name in PARALLEL_SAFE
+            indexed_calls.append((i, tc, is_parallel))
+        parallel_calls = [(i, tc) for i, tc, is_par in indexed_calls if is_par]
+        sequential_calls = [(i, tc) for i, tc, is_par in indexed_calls if not is_par]
+        results = [None] * len(tool_calls)  # Pre-allocate to preserve order
+        # Execute parallel-safe tools concurrently
+        if parallel_calls:
+            self.console.print()
+            self.console.print(f"[dim cyan]⊘ Executing {len(parallel_calls)} tool(s) in parallel...[/dim cyan]")
+            with ThreadPoolExecutor(max_workers=min(len(parallel_calls), 8)) as executor:
+                # Submit all parallel tasks with their original indices
+                future_to_idx = {executor.submit(self._execute_tool_timed, tc): (orig_idx, tc)
+                                for orig_idx, tc in parallel_calls}
+                # Collect results and place them in correct positions
+                for future in as_completed(future_to_idx):
+                    orig_idx, tc = future_to_idx[future]
+                    try:
+                        result = future.result()
+                        results[orig_idx] = (tc, result)
+                    except Exception as e:
+                        func_name = tc.get("function", {}).get("name", "unknown")
+                        results[orig_idx] = (tc, f"Error executing {func_name}: {e}")
+        # Execute sequential tools one by one, placing in correct positions
+        for orig_idx, tc in sequential_calls:
+            if self.cancel_event.is_set():
+                results[orig_idx] = (tc, "[Aborted by user]")
+                continue
+            _tools_module._cancel_event = self.cancel_event
+            result = self._execute_tool_timed(tc)
+            self._current_line_len = 0
+            results[orig_idx] = (tc, result)
+        return results
+    # ── Rewind support ────────────────────────────────────────────────────────
+    @staticmethod
+    def _change_stats(changes: List[Dict]) -> str:
+        """Return a '+N -M lines' summary for a list of changes."""
+        add_total = del_total = 0
+        for ch in changes:
+            before = ch.get("before") or ""
+            after  = ch.get("after")  or ""
+            b_lines = before.splitlines()
+            a_lines = after.splitlines()
+            # Simple heuristic: added = lines only in after, removed = lines only in before
+            b_set = set(b_lines); a_set = set(a_lines)
+            add_total += len(a_lines) - len([l for l in a_lines if l in b_set])
+            del_total += len(b_lines) - len([l for l in b_lines if l in a_set])
+        n_files = len({ch["path"] for ch in changes})
+        parts = []
+        if n_files:
+            parts.append(f"{n_files} file{'s' if n_files != 1 else ''} changed")
+        if add_total:
+            parts.append(f"[bold green]+{add_total}[/bold green]")
+        if del_total:
+            parts.append(f"[bold red]-{del_total}[/bold red]")
+        return "  ".join(parts) if parts else "No files changed"
+    def rewind_single_turn(self, turn_idx: int, revert_code: bool = True,
+                           revert_msgs: bool = True) -> Dict[str, Any]:
+        """Rewind only a single turn (not all subsequent turns)."""
+        if turn_idx >= len(self.turn_history):
+            return {"reverted": [], "errors": []}
+        turn = self.turn_history[turn_idx]
+        res: Dict[str, Any] = {"reverted": [], "errors": []}
+        if revert_code:
+            # Revert code changes for this turn only
+            for ch in reversed(turn["changes"]):
+                path = ch["path"]
+                try:
+                    if ch["action"] == "move_file":
+                        src = ch["before_path"]
+                        if os.path.exists(path):
+                            os.makedirs(os.path.dirname(os.path.abspath(src)), exist_ok=True)
+                            shutil.move(path, src)
+                        res["reverted"].append(f"{path} → {src}")
+                    elif ch.get("before") is None:
+                        # File was created (or didn't exist) — delete it if it exists now
+                        if os.path.exists(path):
+                            os.remove(path)
+                        res["reverted"].append(path)
+                    else:
+                        os.makedirs(
+                            os.path.dirname(os.path.abspath(path)), exist_ok=True
+                        )
+                        with open(path, "w", encoding="utf-8") as wf:
+                            wf.write(ch["before"])
+                        res["reverted"].append(path)
+                except Exception as e:
+                    res["errors"].append(f"{path}: {e}")
+        if revert_msgs:
+            # Remove messages for this turn only
+            msg_start = turn["msg_start"]
+            msg_end = turn["msg_end"]
+            res["msgs_removed"] = msg_end - msg_start
+            # Remove the messages for this turn
+            self.messages = self.messages[:msg_start] + self.messages[msg_end:]
+            # Update msg_start and msg_end for all subsequent turns
+            msgs_removed = msg_end - msg_start
+            for i in range(turn_idx + 1, len(self.turn_history)):
+                self.turn_history[i]["msg_start"] -= msgs_removed
+                self.turn_history[i]["msg_end"] -= msgs_removed
+            # Remove this turn from history and add it to redo history
+            undone_turn = self.turn_history.pop(turn_idx)
+            if not hasattr(self, "redo_history"):
+                self.redo_history = []
+            self.redo_history.append(undone_turn)
+        return res
+    def rewind_to_turn(self, turn_idx: int, revert_code: bool = True,
+                       revert_msgs: bool = True) -> Dict[str, Any]:
+        """Revert everything from turn_idx onward."""
+        turns = self.turn_history[turn_idx:]
+        res: Dict[str, Any] = {"reverted": [], "errors": []}
+        if not turns:
+            return res
+        if revert_code:
+            seen: set = set()
+            for turn in reversed(turns):
+                for ch in reversed(turn["changes"]):
+                    path = ch["path"]
+                    if path in seen:
+                        continue
+                    seen.add(path)
+                    try:
+                        if ch["action"] == "move_file":
+                            src = ch["before_path"]
+                            if os.path.exists(path):
+                                os.makedirs(os.path.dirname(os.path.abspath(src)), exist_ok=True)
+                                shutil.move(path, src)
+                            res["reverted"].append(f"{path} → {src}")
+                        elif ch.get("before") is None:
+                            # File was created (or didn't exist) — delete it if it exists now
+                            if os.path.exists(path):
+                                os.remove(path)
+                            res["reverted"].append(path)
+                        else:
+                            os.makedirs(
+                                os.path.dirname(os.path.abspath(path)), exist_ok=True
+                            )
+                            with open(path, "w", encoding="utf-8") as wf:
+                                wf.write(ch["before"])
+                            res["reverted"].append(path)
+                    except Exception as e:
+                        res["errors"].append(f"{path}: {e}")
+        if revert_msgs:
+            target = turns[0]["msg_start"]
+            res["msgs_removed"] = len(self.messages) - target
+            self.messages = self.messages[:target]
+            # Push all popped turns onto redo_history in reverse order (so popping redos in original forward order!)
+            if not hasattr(self, "redo_history"):
+                self.redo_history = []
+            for t in reversed(turns):
+                self.redo_history.append(t)
+            self.turn_history = self.turn_history[:turn_idx]
+        return res
+    def undo_last_turn(self) -> Dict[str, Any]:
+        """Undo the very last turn (conversation + code changes)."""
+        if not self.turn_history:
+            return {"reverted": [], "errors": ["No turns to undo."]}
+        last_idx = len(self.turn_history) - 1
+        res = self.rewind_single_turn(last_idx, revert_code=True, revert_msgs=True)
+        self._persist_messages()
+        return res
+    def redo_last_undone_turn(self) -> Dict[str, Any]:
+        """Redo the most recently undone turn."""
+        if not hasattr(self, "redo_history") or not self.redo_history:
+            return {"reverted": [], "errors": ["No undone turns to redo."]}
+        turn = self.redo_history.pop()
+        res: Dict[str, Any] = {"redone_code": [], "errors": []}
+        # Redo code changes
+        for ch in turn.get("changes", []):
+            path = ch["path"]
+            action = ch["action"]
+            try:
+                if action == "move_file":
+                    src = ch["before_path"]
+                    if os.path.exists(src):
+                        os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+                        shutil.move(src, path)
+                    res["redone_code"].append(f"{src} → {path}")
+                elif ch.get("after") is None:
+                    # File was deleted
+                    if os.path.exists(path):
+                        os.remove(path)
+                    res["redone_code"].append(f"deleted {path}")
+                else:
+                    # File was written/edited
+                    os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+                    with open(path, "w", encoding="utf-8") as wf:
+                        wf.write(ch["after"])
+                    res["redone_code"].append(path)
+            except Exception as e:
+                res["errors"].append(f"{path}: {e}")
+        # Redo messages: append them back
+        msg_start = len(self.messages)
+        messages_to_add = turn.get("messages", [])
+        self.messages.extend(messages_to_add)
+        msg_end = len(self.messages)
+        # Reconstruct the turn entry and append back to turn_history
+        turn["msg_start"] = msg_start
+        turn["msg_end"] = msg_end
+        self.turn_history.append(turn)
+        # Persist messages and redo history to DB
+        self._persist_messages()
+        return res
+    def redo_up_to_turn(self, redo_idx: int) -> Dict[str, Any]:
+        """Redo all undone turns from index 0 up to redo_idx (inclusive)."""
+        if not hasattr(self, "redo_history") or not self.redo_history or redo_idx >= len(self.redo_history):
+            return {"reverted": [], "errors": ["No undone turns to redo."]}
+        # Get the slice of turns to redo
+        turns_to_redo = self.redo_history[:redo_idx + 1]
+        # Keep the remaining undone turns
+        self.redo_history = self.redo_history[redo_idx + 1:]
+        res: Dict[str, Any] = {"redone_code": [], "errors": []}
+        # Redo them in order
+        for turn in turns_to_redo:
+            # Redo code changes
+            for ch in turn.get("changes", []):
+                path = ch["path"]
+                action = ch["action"]
+                try:
+                    if action == "move_file":
+                        src = ch["before_path"]
+                        if os.path.exists(src):
+                            os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+                            shutil.move(src, path)
+                        res["redone_code"].append(f"{src} → {path}")
+                    elif ch.get("after") is None:
+                        if os.path.exists(path):
+                            os.remove(path)
+                        res["redone_code"].append(f"deleted {path}")
+                    else:
+                        os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+                        with open(path, "w", encoding="utf-8") as wf:
+                            wf.write(ch["after"])
+                        res["redone_code"].append(path)
+                except Exception as e:
+                    res["errors"].append(f"{path}: {e}")
+            # Redo messages: append them back
+            msg_start = len(self.messages)
+            messages_to_add = turn.get("messages", [])
+            self.messages.extend(messages_to_add)
+            msg_end = len(self.messages)
+            # Reconstruct the turn entry and append back to turn_history
+            turn["msg_start"] = msg_start
+            turn["msg_end"] = msg_end
+            self.turn_history.append(turn)
+        self._persist_messages()
+        return res
+    # ── Context compression ──────────────────────────────────────────────────
+    @staticmethod
+    def _estimate_tokens(obj) -> int:
+        """Rough token count: 1 token ≈ 4 chars of serialised JSON."""
+        try:
+            return len(json.dumps(obj, ensure_ascii=False)) // 4
+        except Exception:
+            return len(str(obj)) // 4
+    def _get_dynamic_threshold(self) -> int:
+        """Get dynamic compression threshold based on current model's context window."""
+        try:
+            from .server.models import get_model
+            model_entry = get_model(self.model_id)
+            return _get_compression_threshold(self.model_id, model_entry.context_window)
+        except Exception:
+            # Fallback to safe default if model registry unavailable
+            return 65_000
+    def _update_model_threshold(self, new_model_id: str) -> None:
+        """Update compression threshold when model changes."""
+        self.model_id = new_model_id
+        self._compression_threshold = self._get_dynamic_threshold()
+    def _trigger_bg_summarization(self) -> None:
+        """Background thread to compress turns older than KEEP_FULL_TURNS into a rolling LLM summary."""
+        if not hasattr(self, "_llm_summary"):
+            self._llm_summary = ""
+            self._summarized_turns = 0
+            self._summarizing = False
+        if self._summarizing:
+            return
+        completed = self.turn_history
+        unsummarized = len(completed) - self._summarized_turns
+        # Only summarize if there are turns falling OUTSIDE the KEEP_FULL_TURNS window
+        if unsummarized > KEEP_FULL_TURNS:
+            turns_to_summarize = unsummarized - KEEP_FULL_TURNS
+            turns_slice = completed[self._summarized_turns : self._summarized_turns + turns_to_summarize]
+            current_summary = self._llm_summary
+            self._summarizing = True
+            def _summarize_task():
+                try:
+                    text_parts = []
+                    for t in turns_slice:
+                        req = t.get("user_msg", "").strip()
+                        c_str = self._change_stats(t.get("changes", []))
+                        # BUG 6 FIX: Read from the stored per-turn message snapshot
+                        # instead of slicing self.messages with potentially stale
+                        # absolute indices (which shift whenever _compress_intra_turn
+                        # rewrites self.messages).
+                        conclusion = ""
+                        stored_msgs = t.get("messages", [])
+                        if stored_msgs:
+                            # Use the snapshot saved at turn-end
+                            source_msgs = stored_msgs
+                        else:
+                            # Fallback: try live slice under lock
+                            with self._messages_lock:
+                                source_msgs = list(self.messages[t["msg_start"]: t["msg_end"]])
+                        for m in source_msgs:
+                            if m.get("role") == "assistant" and m.get("content"):
+                                conclusion = m["content"].strip()
+                        text_parts.append(f"User: {req}\nChanges: {c_str}\nAssistant: {conclusion}\n---")
+                    raw_turns = "\n".join(text_parts)
+                    sys_prompt = "You are a highly analytical AI core memory compressor. Your job is to compress conversational history into a dense, highly technical narrative paragraph. Retain all factual details, architectural decisions, file paths, and current project state. Do not use conversational filler."
+                    if current_summary:
+                        user_prompt = f"Existing Memory Summary:\n{current_summary}\n\nNew Interactions to Merge:\n{raw_turns}\n\nUpdate the memory summary to incorporate these new interactions seamlessly. Return ONLY the new summary."
+                    else:
+                        user_prompt = f"New Interactions:\n{raw_turns}\n\nCreate a dense memory summary of these interactions. Return ONLY the summary."
+                    # Use fallback model system from context_pruner
+                    from utim_cli.context_pruner import _call_compression_model_with_fallback
+                    new_summary = _call_compression_model_with_fallback(
+                        messages=[
+                            {"role": "system", "content": sys_prompt},
+                            {"role": "user", "content": user_prompt}
+                        ],
+                        llm_key=self._local_api_key,
+                        max_tokens=2000,
+                        primary_model=self.model_id
+                    )
+                    if new_summary:
+                        self._llm_summary = new_summary
+                        self._summarized_turns += len(turns_slice)
+                    else:
+                        print(f"[WARNING] Context summarization returned None - all fallback models failed", file=sys.stderr)
+                except Exception as e:
+                    print(f"[ERROR] Context summarization failed: {e}", file=sys.stderr)
+                finally:
+                    self._summarizing = False
+            import threading
+            threading.Thread(target=_summarize_task, daemon=True).start()
+    def _get_send_messages(self, turn_msg_start: Optional[int] = None) -> List[Dict]:
+        """Return the context payload, injecting the rolling LLM summary (passive memory)
+        and a dynamic active context checklist.
+        Args:
+            turn_msg_start: The absolute index in self.messages where the current
+                user turn begins.  Passing this explicitly avoids relying on the
+                stale self._current_turn_start class attribute, which can point to
+                the wrong position after _compress_intra_turn rewrites self.messages.
+        """
+        # BUG 1 FIX: Prefer the caller-supplied index; fall back to the cached
+        # attribute only when called from paths that haven't been updated yet.
+        effective_turn_start = turn_msg_start if turn_msg_start is not None else self._current_turn_start
+        with self._messages_lock:
+            messages_snapshot = list(self.messages)
+        system_msg = dict(messages_snapshot[0])
+        # Extract current user prompt to perform dynamic keyword-based RAG search
+        user_prompt = ""
+        if effective_turn_start is not None and effective_turn_start < len(messages_snapshot):
+            for m in messages_snapshot[effective_turn_start:]:
+                if m.get("role") == "user":
+                    user_prompt = m.get("content", "")
+                    break
+        # Reconstruct system prompt with prompt-relevant experiences
+        task_elapsed = int(time.time() - getattr(self, "task_start_time", time.time()))
+        task_iter = getattr(self, "current_iteration", 0)
+        try:
+            system_msg["content"] = get_system_prompt(user_prompt, task_iter, task_elapsed, self.turn_history)
+        except Exception:
+            try:
+                system_msg["content"] = get_system_prompt(user_prompt, turn_history=self.turn_history)
+            except Exception:
+                pass
+        # Inject current timestamp so the model lives in the present
+        from datetime import datetime
+        current_ts = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
+        system_msg["content"] = f"Current date/time: {current_ts}\n\n" + system_msg["content"]
+        # Exclude duration, temporal logs, consciousness state, and milestone reflections from message payload to keep it as a pure coding agent.
+        try:
+            from utim_cli.utim import STATE
+            if STATE.get("planning_mode", True):
+                system_msg["content"] += (
+                    "\n\n### PLANNING MODE ACTIVE (User Review Required):\n"
+                    "Before implementing any architectural, complex, or multi-file changes, you MUST:\n"
+                    "1. Formulate a detailed technical plan (e.g. using `plan_project` or outlining it in text).\n"
+                    "2. Present this plan to the user clearly and ask for explicit approval, modification, or rejection.\n"
+                    "3. DO NOT modify the codebase or run mutating terminal commands until the user approves the plan."
+                )
+            else:
+                system_msg["content"] += (
+                    "\n\n### AUTONOMOUS MODE ACTIVE (Direct Execution):\n"
+                    "You are operating in fully autonomous mode.\n"
+                    "1. Formulate a technical plan internally (using `plan_project` or by tracking subtasks).\n"
+                    "2. Proceed directly to implement the code and execute actions without presenting the plan or waiting for user approval."
+                )
+        except Exception:
+            pass
+        # ── 1. ACTIVE CONTEXT CHECKLIST (Checklist-based Focus) ────────────────
+        active_context = "\n\n### ACTIVE CONTEXT CHECKLIST:"
+        # A. Find active file path dynamically from recent messages
+        active_file = ""
+        for msg in reversed(messages_snapshot):
+            if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                for tc in msg["tool_calls"]:
+                    func = tc.get("function", {})
+                    if func.get("name") in ("write_file", "edit_file", "read_file"):
+                        try:
+                            args = json.loads(func.get("arguments", "{}"))
+                            active_file = args.get("filepath", args.get("path", ""))
+                            if active_file:
+                                break
+                        except Exception:
+                            pass
+                if active_file:
+                    break
+        if active_file:
+            active_context += f"\n- **Current File**: {active_file}"
+        # B. Get latest command status from the most recent run_command output
+        last_command_output = ""
+        for msg in reversed(messages_snapshot):
+            if msg.get("role") == "tool" and msg.get("name") == "run_command":
+                content = msg.get("content", "")
+                lines = [l.strip() for l in content.splitlines() if l.strip()]
+                if lines:
+                    exit_code_line = next((l for l in lines if "exit_code:" in l), "")
+                    err_lines = [l for l in lines if "error" in l.lower() or "failed" in l.lower() or "exception" in l.lower()]
+                    last_command_output = f"Command exit: {exit_code_line or 'unknown'}"
+                    if err_lines:
+                        last_command_output += f" | Errors: {'; '.join(err_lines[:2])}"
+                    else:
+                        last_command_output += f" | Output: {'; '.join(lines[:2])}"
+                    break
+        if last_command_output:
+            active_context += f"\n- **Latest Command Status**: {last_command_output}"
+        # C. Read active todos from todos.json
+        active_todo_checklist = ""
+        todo_file = ".utim_tmp/todos.json"
+        if os.path.exists(todo_file):
+            try:
+                with open(todo_file, "r", encoding="utf-8") as f:
+                    todos = json.load(f)
+                if todos:
+                    active_todo_checklist = "\n### ACTIVE TASK CHECKLIST:\n"
+                    for tid, t in todos.items():
+                        status_mark = "[x]" if t.get("status") == "done" else "[ ]"
+                        active_todo_checklist += f"{status_mark} {t.get('description', '')}\n"
+            except Exception:
+                pass
+        if active_todo_checklist:
+            active_context += active_todo_checklist
+        else:
+            # BUG 7 FIX: Guard against effective_turn_start being out-of-bounds
+            # after _compress_intra_turn shortens self.messages.  Without this
+            # guard the IndexError is silently swallowed and the model gets no
+            # active objective in its system prompt for the rest of the turn.
+            if 0 < effective_turn_start < len(messages_snapshot):
+                obj = (messages_snapshot[effective_turn_start].get("content") or "")[:200]
+                active_context += f"\n- **Active Objective**: {obj}..."
+        system_msg["content"] += active_context
+        # ── 2. PASSIVE MEMORY SUMMARY (Whole memory rollup) ───────────────────
+        if getattr(self, "_llm_summary", ""):
+            system_msg["content"] += "\n\n### PASSIVE MEMORY SUMMARY (Older events):\n" + self._llm_summary
+        completed = self.turn_history
+        n_full = max(0, len(completed) - getattr(self, "_summarized_turns", 0))
+        recent = completed[-n_full:] if n_full > 0 else []
+        if recent:
+            rec_slice = messages_snapshot[recent[0]["msg_start"]: effective_turn_start]
+        else:
+            rec_slice = messages_snapshot[1: effective_turn_start]
+        cur_msgs = messages_snapshot[effective_turn_start:]
+        return [system_msg] + rec_slice + cur_msgs
+    # Main agentic loop
+    def _compress_intra_turn(self, turn_msg_start: int, instruction: str = "") -> None:
+        """Stable, synchronous compression of the current turn's tool calls if it gets too long or if requested.
+        Uses importance-weighted pruning (score >= 0.75 = preserved verbatim) so that
+        high-signal context such as file reads and error messages is carried forward
+        in full, while low-signal chatter is condensed by a compression model.
+        sanitize_message_sequence() is applied on the rebuilt list to keep
+        assistant/tool-call pairs structurally intact.
+        """
+        current_turn_msgs = self.messages[turn_msg_start:]
+        # We need at least 5 messages to justify compression
+        if len(current_turn_msgs) < 5:
+            return
+        # If no explicit instruction is given, check multiple conditions for proactive compression:
+        # 1. Token estimate > dynamic threshold (based on model's context window)
+        # 2. Message count > dynamic limit (scaled based on model's context window)
+        threshold = getattr(self, "_compression_threshold", 65_000)
+        if not instruction:
+            est_tokens = self._estimate_tokens(self.messages)
+            msg_count = len(self.messages)
+            # Determine dynamic message count limit
+            try:
+                from .server.models import get_model
+                model_entry = get_model(self.model_id)
+                context_window = model_entry.context_window
+            except Exception:
+                context_window = 128_000
+            if context_window <= 64_000:
+                msg_count_limit = 35
+            elif context_window <= 300_000:
+                msg_count_limit = 100
+            else:
+                msg_count_limit = 200
+            if est_tokens < threshold and msg_count < msg_count_limit:
+                return
+        tail_keep = 8
+        # Build a compact state anchor so compression never drops the active objective.
+        latest_user = ""
+        latest_assistant = ""
+        latest_tool_plan = ""
+        for m in reversed(current_turn_msgs):
+            if not latest_user and m.get("role") == "user":
+                latest_user = (m.get("content", "") or "")[:1200]
+            if m.get("role") == "assistant":
+                if not latest_assistant and (m.get("content", "") or "").strip():
+                    latest_assistant = (m.get("content", "") or "")[:1200]
+                if not latest_tool_plan and m.get("tool_calls"):
+                    tc_names = [tc.get("function", {}).get("name", "") for tc in m.get("tool_calls", [])]
+                    latest_tool_plan = ", ".join([n for n in tc_names if n])[:500]
+            if latest_user and latest_assistant and latest_tool_plan:
+                break
+        state_anchor = {
+            "role": "user",
+            "content": (
+                "### SYSTEM NOTE: TASK STATE ANCHOR (MUST PRESERVE)\n"
+                f"Current user objective (latest):\n{latest_user or '[not found]'}\n\n"
+                f"Most recent assistant intent/progress:\n{latest_assistant or '[not found]'}\n\n"
+                f"Most recent pending/attempted tool actions:\n{latest_tool_plan or '[none]'}\n\n"
+                "Continue from this exact objective. Do not restart completed steps."
+            ),
+        }
+        # ── Unified importance-weighted compression ──────────────────────────
+        # Threshold 0.75: messages scoring at or above are kept verbatim so that
+        # file-read payloads, error traces, and key facts survive into context.
+        # Everything below goes to the compression model for condensation.
+        try:
+            from utim_cli.context_pruner import score_message_importance, sanitize_message_sequence
+            candidate_msgs = current_turn_msgs[1:-tail_keep]
+            scored_msgs = [(score_message_importance(m), m) for m in candidate_msgs]
+            # Split into verbatim-keep (high) and compress (low) pools
+            preserved_messages = [m for score, m in scored_msgs if score >= 0.75]
+            to_summarize      = [m for score, m in scored_msgs if score <  0.75]
+            # Cap verbatim-keep to 10 to prevent bloat; extras go to compress pool
+            if len(preserved_messages) > 10:
+                sorted_by_val = sorted(enumerate(scored_msgs), key=lambda x: (x[1][0], x[0]), reverse=True)
+                keep_indices  = {idx for idx, _ in sorted_by_val[:10]}
+                new_preserved, extra = [], []
+                for idx, (score, m) in enumerate(scored_msgs):
+                    (new_preserved if idx in keep_indices else extra).append(m)
+                preserved_messages = new_preserved
+                to_summarize.extend(extra)
+            if to_summarize:
+                text_parts = []
+                import re as _re
+                for m in to_summarize:
+                    role = m.get("role", "")
+                    if role == "assistant":
+                        content = _re.sub(
+                            r"<think(?:ing)?>.*?</think(?:ing)?>",
+                            "[thought process]",
+                            (m.get("content", "") or ""),
+                            flags=_re.DOTALL,
+                        ).strip()
+                        tcs    = m.get("tool_calls", [])
+                        tc_str = ", ".join(
+                            f"{tc['function']['name']}({tc['function'].get('arguments', '')})"
+                            for tc in tcs
+                        )
+                        if content or tc_str:
+                            text_parts.append(f"Action: {content}\nTools Called: {tc_str}")
+                    elif role == "tool":
+                        name    = m.get("name", "tool")
+                        content = m.get("content", "")
+                        # FIX #3: Intelligent truncation - preserve critical parts
+                        # Detect if content has critical markers that should never be truncated
+                        critical_patterns = [
+                            r"error", r"exception", r"failed", r"failure",
+                            r"traceback", r"undefined", r"not found",
+                            r"def \w+", r"class \w+", r"import ",
+                            r'"[^"]*":\s*', r"'[^']*':\s*",  # Key-value pairs
+                        ]
+                        # Check if this is critical content that needs full preservation
+                        is_critical = any(re.search(p, content, re.I) for p in critical_patterns)
+                        # Higher char limit for critical content, but still respect reasonable bounds
+                        if len(content) > 1500:
+                            if is_critical:
+                                # For critical content, try to find and preserve the key part
+                                # Look for error lines, function definitions, etc.
+                                lines = content.split('\n')
+                                critical_lines = []
+                                for line in lines:
+                                    if any(re.search(p, line, re.I) for p in critical_patterns):
+                                        critical_lines.append(line)
+                                if critical_lines:
+                                    # Preserve the critical lines plus context
+                                    content = (content[:800] +
+                                              "\n... [critical excerpts preserved] ...\n" +
+                                              "\n".join(critical_lines[:20]))
+                                else:
+                                    content = content[:1500] + "... [truncated]"
+                            else:
+                                content = content[:1200] + "... [truncated]"
+                        text_parts.append(f"Result of {name}: {content}")
+                if text_parts:
+                    if instruction:
+                        self.console.print("\n[dim magenta]⊘ Agent requested context compression: condensing intermediate tool logs...[/dim magenta]")
+                    else:
+                        est_tokens = self._estimate_tokens(self.messages)
+                        msg_count  = len(self.messages)
+                        triggers   = []
+                        if est_tokens >= threshold:
+                            triggers.append(f"tokens ~{est_tokens}")
+                        if msg_count >= 50:
+                            triggers.append(f"msg count {msg_count}")
+                        trigger_str = ", ".join(triggers) if triggers else "context"
+                        self.console.print(f"\n[dim magenta]⊘ Proactive compression (high {trigger_str}): condensing intermediate tool logs...[/dim magenta]")
+                    raw_log = "\n---\n".join(text_parts)
+                    sys_prompt = (
+                        "You are an internal context stabilizer for an autonomous AI agent.\n"
+                        "The agent has been running tool calls in a loop. Summarize intermediate steps "
+                        "while preserving strict technical continuity.\n"
+                        "Required sections:\n"
+                        "1) GOAL\n2) COMPLETED\n3) IN_PROGRESS\n4) BLOCKERS/FAILURES\n5) NEXT_ACTION\n"
+                        "CRITICAL: Preserve ALL specific file paths, line numbers, variable names, "
+                        "error messages, and facts learned from file reads verbatim. No filler.\n\n"
+                        "HALLUCINATION PREVENTION RULES:\n"
+                        "- Do NOT add facts not present in the source logs\n"
+                        "- Do NOT make up file paths, variable names, or error messages\n"
+                        "- Do NOT invent technical details not explicitly stated\n"
+                        "- When in doubt, use verbatim quotes from the source\n"
+                        "- If you cannot determine a fact, state 'not specified' rather than guessing"
+                    )
+                    if instruction:
+                        sys_prompt += (
+                            f"\n\nCRITICAL PRESERVATION RULES FROM THE AGENT:\n{instruction}\n"
+                            "You MUST strictly preserve these facts, constraints, and code snippets."
+                        )
+                    from utim_cli.context_pruner import _call_compression_model_with_fallback
+                    # Pass raw_log for deduplication tracking
+                    summary = _call_compression_model_with_fallback(
+                        messages=[
+                            {"role": "system", "content": sys_prompt},
+                            {"role": "user",   "content": f"Intermediate Logs to Compress:\n{raw_log}"},
+                        ],
+                        llm_key=self._local_api_key,
+                        max_tokens=1500,
+                        content_hint=raw_log[:1000],  # Use first 1000 chars for dedup hash
+                        primary_model=self.model_id
+                    )
+                    if summary:
+                        summary_msg = {
+                            "role": "user",
+                            "content": (
+                                "### SYSTEM NOTE: INTERMEDIATE STEPS COMPRESSED\n"
+                                "The following earlier steps in this task were compressed to save memory:\n"
+                                f"{summary}\n\n"
+                                "Continue from IN_PROGRESS/NEXT_ACTION and finish unresolved work."
+                            ),
+                        }
+                        recent_tail = current_turn_msgs[-tail_keep:]
+                        # BUG 3 FIX: Use object identity (id()) instead of value
+                        # equality (==) for deduplication.  Dict value-equality
+                        # was silently dropping preserved_messages entries whose
+                        # content happened to match a message in recent_tail
+                        # (e.g. repeated read_file of the same file).
+                        merged_tail = []
+                        seen_ids: set = set()
+                        for msg in preserved_messages + recent_tail:
+                            if id(msg) not in seen_ids:
+                                seen_ids.add(id(msg))
+                                merged_tail.append(msg)
+                        with self._messages_lock:
+                            new_messages = (
+                                self.messages[:turn_msg_start]
+                                + [current_turn_msgs[0], state_anchor, summary_msg]
+                                + merged_tail
+                            )
+                            self.messages = sanitize_message_sequence(new_messages)
+                            # BUG 1 FIX: After rewriting self.messages the
+                            # turn_msg_start boundary is still valid (we only
+                            # shrank the current-turn slice, not the prefix).
+                            # Refresh _current_turn_start so _get_send_messages
+                            # slices at the correct position on the next call.
+                            self._current_turn_start = turn_msg_start
+                        return
+                    else:
+                        self.console.print(
+                            "\n[dim red]⊘ Warning: Context compression failed "
+                            "(no response from fallback models). Continuing with full context.[/dim red]"
+                        )
+            else:
+                # Nothing to compress — just sanitize the existing list
+                self.messages = sanitize_message_sequence(self.messages)
+                return
+        except Exception as e:
+            self.console.print(
+                f"\n[dim red]⊘ Warning: Importance-weighted compression failed ({e}). "
+                "Continuing with full context.[/dim red]"
+            )
+    # ── Cleanup utilities ───────────────────────────────────────────────────
+    def _cleanup_tmp_folder(self, keep_current_session: bool = True) -> int:
+        """Clean up the .utim_tmp folder to remove files from previous runs.
+        Args:
+            keep_current_session: If True, preserve files from the current session.
+        Returns:
+            Number of files removed.
+        """
+        import glob
+        tmp_dir = ".utim_tmp"
+        if not os.path.exists(tmp_dir):
+            return 0
+        removed_count = 0
+        errors = []
+        # Define cleanup rules - files/patterns to remove
+        cleanup_patterns = [
+            # Research files older than 1 day
+            (os.path.join(tmp_dir, "research"), "dir"),
+            # Plan files are kept for /rewind functionality
+            # But we can clean up very old ones
+        ]
+        # Remove old research directory contents
+        research_dir = os.path.join(tmp_dir, "research")
+        if os.path.exists(research_dir):
+            try:
+                # Remove files older than 1 day
+                now = time.time()
+                for root, dirs, files in os.walk(research_dir):
+                    for f in files:
+                        fp = os.path.join(root, f)
+                        try:
+                            if os.path.getmtime(fp) < now - 86400:  # 1 day old
+                                os.remove(fp)
+                                removed_count += 1
+                        except OSError as e:
+                            errors.append(str(e))
+            except Exception as e:
+                errors.append(str(e))
+        # Clean up old reflection files (keep last 10)
+        reflection_file = os.path.join(tmp_dir, "task_reflections.json")
+        if os.path.exists(reflection_file):
+            try:
+                import json
+                with open(reflection_file, 'r') as f:
+                    reflections = json.load(f)
+                if len(reflections) > 50:
+                    # Keep only the most recent 50
+                    reflections = reflections[-50:]
+                    with open(reflection_file, 'w') as f:
+                        json.dump(reflections, f)
+                    removed_count += len(reflections) - 50
+            except Exception:
+                pass
+        return removed_count
+    def _detect_and_run_tests(self) -> Optional[str]:
+        import subprocess
+        import os
+        import json
+        # 1. Check for Python/pytest
+        if os.path.exists("pytest.ini") or os.path.exists("conftest.py") or os.path.isdir("tests"):
+            try:
+                res = subprocess.run(["pytest"], capture_output=True, text=True, timeout=60)
+                if res.returncode != 0 and res.returncode != 5:
+                    return f"pytest failed:\n{res.stdout}\n{res.stderr}"
+                return None
+            except subprocess.TimeoutExpired:
+                return "pytest timed out (took longer than 60 seconds)"
+            except Exception:
+                pass
+        # 2. Check for package.json / npm test
+        if os.path.exists("package.json"):
+            try:
+                with open("package.json", "r", encoding="utf-8") as f:
+                    pkg = json.load(f)
+                if "scripts" in pkg and "test" in pkg["scripts"]:
+                    res = subprocess.run(["npm", "test"], capture_output=True, text=True, timeout=60, shell=True)
+                    if res.returncode != 0:
+                        return f"npm test failed:\n{res.stdout}\n{res.stderr}"
+                    return None
+            except subprocess.TimeoutExpired:
+                return "npm test timed out"
+            except Exception:
+                pass
+        # 3. Check for tox.ini
+        if os.path.exists("tox.ini"):
+            try:
+                res = subprocess.run(["tox"], capture_output=True, text=True, timeout=90)
+                if res.returncode != 0:
+                    return f"tox failed:\n{res.stdout}\n{res.stderr}"
+                return None
+            except Exception:
+                pass
+        # 4. Check for Cargo.toml
+        if os.path.exists("Cargo.toml"):
+            try:
+                res = subprocess.run(["cargo", "test"], capture_output=True, text=True, timeout=60)
+                if res.returncode != 0:
+                    return f"cargo test failed:\n{res.stdout}\n{res.stderr}"
+                return None
+            except Exception:
+                pass
+        # 5. Check for go.mod
+        if os.path.exists("go.mod"):
+            try:
+                res = subprocess.run(["go", "test", "./..."], capture_output=True, text=True, timeout=60)
+                if res.returncode != 0:
+                    return f"go test failed:\n{res.stdout}\n{res.stderr}"
+                return None
+            except Exception:
+                pass
+        return None
+    def run_task(self, user_message: str, max_iterations: int = 500) -> None:
+        """Append user_message to history and run the full ReAct loop until the
+        model stops issuing tool calls or we hit max_iterations.
+        """
+        self.turn_step_timings = []
+        # Refresh console width at start of task
+        try:
+            import shutil
+            width = shutil.get_terminal_size().columns
+            if width > 0:
+                self.console.width = width
+        except:
+            pass
+        self.cancel_event.clear()
+        self.pre_prompt_text = ""
+        try:
+            pre_prompt_file = ".utim/pre_prompt_thoughts.json"
+            if os.path.exists(pre_prompt_file):
+                os.remove(pre_prompt_file)
+        except Exception:
+            pass
+        try:
+            from utim_cli.utim import STATE
+            STATE["thinking_topic"] = ""
+        except Exception:
+            pass
+        turn_msg_start = len(self.messages)  # snapshot before user msg is appended
+        self._current_turn_start = turn_msg_start  # used by _get_send_messages()
+        self._turn_changes = []
+        # Analyze previous turn feedback and user sentiment
+        prev_assistant_content = ""
+        prev_iteration_count = 0
+        prev_elapsed_time = 0
+        if self.turn_history:
+            prev_turn = self.turn_history[-1]
+            prev_iteration_count = prev_turn.get("iteration_count", 0)
+            prev_elapsed_time = prev_turn.get("elapsed_time", 0)
+        if self.messages:
+            for msg in reversed(self.messages):
+                if msg.get("role") == "assistant" and msg.get("content"):
+                    prev_assistant_content = msg["content"]
+                    break
+        # Inject secret guidance hint if cached in global CLI state
+        try:
+            from utim_cli.utim import STATE
+            hint = STATE.pop("hint", None)
+            if hint:
+                user_message = f"[Secret Hint Guidance: {hint}]\n{user_message}"
+        except Exception:
+            pass
+        self.messages.append({"role": "user", "content": user_message})
+        self.redo_history = []  # Clear redo history on new user action
+        self._persist_messages(in_progress_turn={
+            "user_msg": user_message,
+            "msg_start": turn_msg_start,
+            "msg_end": len(self.messages),
+            "messages": list(self.messages[turn_msg_start:]),
+            "changes": [],
+        })
+        task_start_time = time.time()
+        self.task_start_time = task_start_time
+        self._test_run_attempts = 0
+        _empty_response_streak = 0  # tracks consecutive empty (no content, no tools) responses
+        turn_iteration = 0
+        for iteration in range(max_iterations):
+            self.current_iteration = iteration
+            turn_iteration = iteration + 1
+            # Check for cancellation before each LLM call
+            if self.cancel_event.is_set():
+                self.console.print("\n[dim yellow]⊘  Aborted.[/dim yellow]\n")
+                self.messages.pop()  # Roll back the unsent user message if first iter
+                break
+            # ── Resilient LLM call with per-iteration retry ────────────────────
+            # We allow up to 3 transient-error retries per iteration before
+            # giving up for real.  This prevents a single network blip from
+            # silently killing a long-running task.
+            _llm_retries = 0
+            _llm_max_retries = 3
+            msg = None
+            while _llm_retries <= _llm_max_retries:
+                try:
+                    # Make the thinking indicator interactive before TTFT
+                    from utim_cli.utim import STATE
+                    if iteration == 0:
+                        if is_casual_message(user_message):
+                            STATE["thinking_topic"] = "Formulating greeting..."
+                        else:
+                            STATE["thinking_topic"] = "Formulating response..."
+                        draft_text = getattr(self, "_pre_computation_text", "").strip()
+                        actual_text = user_message.strip()
+                        def get_similarity(s1, s2):
+                            s1_clean = "".join(c for c in s1.lower() if c.isalnum() or c.isspace()).strip()
+                            s2_clean = "".join(c for c in s2.lower() if c.isalnum() or c.isspace()).strip()
+                            s1_words = s1_clean.split()
+                            s2_words = s2_clean.split()
+                            if not s1_words or not s2_words:
+                                return 0.0
+                            w1 = set(s1_words)
+                            w2 = set(s2_words)
+                            intersection = w1.intersection(w2)
+                            union = w1.union(w2)
+                            return len(intersection) / len(union)
+                        is_match = False
+                        match_reason = ""
+                        if draft_text:
+                            if draft_text == actual_text:
+                                is_match = True
+                                match_reason = "exact match"
+                            elif actual_text.startswith(draft_text) and len(actual_text) - len(draft_text) < 20:
+                                is_match = True
+                                match_reason = "prefix match"
+                            else:
+                                similarity = get_similarity(draft_text, actual_text)
+                                if similarity >= 0.80:
+                                    is_match = True
+                                    match_reason = f"fuzzy match ({similarity:.1%} similarity)"
+                        if is_match:
+                            if (self._pre_computation_thread and
+                                    self._pre_computation_thread.is_alive() and
+                                    not self._pre_computation_done):
+                                STATE["thinking_topic"] = "Anticipating response (finishing background reasoning)..."
+                                self._pre_computation_thread.join(timeout=30)
+                            if self._pre_computation_done and self._pre_computation_result:
+                                self.console.print(f"[bold green]⚡ Anticipatory Cache HIT: Reused background reasoning ({match_reason}).[/bold green]")
+                                msg = self._pre_computation_result
+                                was_streamed = True
+                                clean_content = msg.get("content") or ""
+                                if clean_content:
+                                    self.console.print()
+                                    self.console.print(Markdown(clean_content))
+                                    self.console.print()
+                                self.turn_step_timings.append({
+                                    "step": turn_iteration,
+                                    "reasoning_time": 0.0,
+                                    "tool_time": 0.0,
+                                    "tools": []
+                                })
+                                break
+                    else:
+                        STATE["thinking_topic"] = "Evaluating tool results & logic..."
+                    # BUG 1 FIX: Pass the live turn_msg_start so _get_send_messages
+                    # always slices at the correct boundary, even after compression
+                    # has rewritten self.messages and potentially shifted indices.
+                    send_msgs = self._get_send_messages(turn_msg_start)
+                    STATE["thinking_topic"] = "Synthesizing response..."
+                    t_llm_start = time.time()
+                    msg, was_streamed = self._call_llm(send_msgs)
+                    reasoning_duration = time.time() - t_llm_start
+                    break  # success
+                except _ServerUnavailableError as exc:
+                    if _llm_retries < _llm_max_retries:
+                        _llm_retries += 1
+                        wait_s = 5 * _llm_retries
+                        self.console.print(
+                            f"\n[bold yellow]⚠  All models unreachable (attempt {_llm_retries}/{_llm_max_retries}). "
+                            f"Retrying in {wait_s}s...[/bold yellow]"
+                        )
+                        time.sleep(wait_s)
+                        continue
+                    # All retries exhausted — show error and abort turn
+                    self.console.print()
+                    self.console.print(Panel(
+                        Text.from_markup(
+                            f"[bold #FFE066]⚠  UTIM Server Unavailable[/bold #FFE066]\n\n"
+                            f"[white]{exc}[/white]\n\n"
+                            "[dim]All retry attempts failed. The task has been paused.\n"
+                            "Type your message again when the connection is restored.[/dim]"
+                        ),
+                        border_style="#FFE066",
+                        padding=(0, 2),
+                        expand=False,
+                        width=min(70, self.console.width - 4),
+                    ))
+                    self.console.print()
+                    del self.messages[turn_msg_start:]
+                    return
+                except Exception as exc:
+                    if _llm_retries < _llm_max_retries:
+                        _llm_retries += 1
+                        wait_s = 3 * _llm_retries
+                        self.console.print(
+                            f"\n[dim yellow]⟳  Transient error on iteration {iteration+1} "
+                            f"(attempt {_llm_retries}/{_llm_max_retries}): {exc}. "
+                            f"Retrying in {wait_s}s...[/dim yellow]"
+                        )
+                        time.sleep(wait_s)
+                        continue
+                    # All retries exhausted — log and abort turn cleanly
+                    self.console.print(f"\n[bold red]Error (all retries failed):[/bold red] {exc}\n")
+                    del self.messages[turn_msg_start:]
+                    return
+            if msg is None or msg.get("aborted") or self.cancel_event.is_set():
+                self.console.print("\n[dim yellow]⊘  Aborted.[/dim yellow]\n")
+                del self.messages[turn_msg_start:]
+                return
+            content: str = msg.get("content") or ""
+            tool_calls: List[Dict] = msg.get("tool_calls") or []
+            # Print content that wasn't already streamed live
+            if not was_streamed and content and content.strip():
+                self.console.print()
+                self.console.print(Markdown(content))
+                self._current_line_len = 0
+                if not tool_calls:
+                    self.console.print()
+            elif was_streamed and content:
+                # We finished streaming. The cursor is at some position on the current line.
+                # No extra newline here - let the next block handle it
+                pass
+            # Parse text-based tool calls fallback if native tool calls are empty
+            if not tool_calls and content:
+                parsed_calls = []
+                try:
+                    from utim_cli.tools import TOOL_FUNCTIONS
+                    tool_names = set(TOOL_FUNCTIONS.keys())
+                    import json
+                    decoder = json.JSONDecoder()
+                    pos = 0
+                    while pos < len(content):
+                        start = content.find('{', pos)
+                        if start == -1:
+                            break
+                        try:
+                            obj, end_idx = decoder.raw_decode(content[start:])
+                            extracted = []
+                            # 1. Standard OpenAI format
+                            if "function" in obj and isinstance(obj["function"], dict):
+                                func_obj = obj["function"]
+                                name = func_obj.get("name")
+                                if name in tool_names:
+                                    args = func_obj.get("arguments", "{}")
+                                    if isinstance(args, dict):
+                                        args = json.dumps(args)
+                                    extracted = [{
+                                        "id": obj.get("id", f"call_parsed_{iteration}"),
+                                        "type": "function",
+                                        "function": {"name": name, "arguments": args}
+                                    }]
+                            # 2. Simplified formats
+                            if not extracted:
+                                name_keys = ["name", "tool", "function", "action", "tool_name"]
+                                name = None
+                                for k in name_keys:
+                                    if k in obj and isinstance(obj[k], str) and obj[k] in tool_names:
+                                        name = obj[k]
+                                        break
+                                if name:
+                                    args_obj = {}
+                                    args_keys = ["arguments", "args", "parameters", "params"]
+                                    for k in args_keys:
+                                        if k in obj and isinstance(obj[k], dict):
+                                            args_obj = obj[k]
+                                            break
+                                    else:
+                                        args_obj = {k: v for k, v in obj.items() if k not in name_keys}
+                                    extracted = [{
+                                        "id": f"call_parsed_{iteration}",
+                                        "type": "function",
+                                        "function": {"name": name, "arguments": json.dumps(args_obj)}
+                                    }]
+                            if extracted:
+                                parsed_calls.extend(extracted)
+                            pos = start + end_idx
+                        except json.JSONDecodeError:
+                            pos = start + 1
+                except Exception:
+                    pass
+                if parsed_calls:
+                    tool_calls = parsed_calls
+                    self.console.print(f"\n[bold yellow]🔧 Parsed {len(tool_calls)} tool call(s) from assistant text response.[/bold yellow]")
+            # If the model response was cut off mid-turn due to length/token limits, nudge it to continue
+            if msg.get("was_cut_off"):
+                self.console.print("\n[bold yellow]⚠ Response truncated by token limits. Continuing response...[/bold yellow]\n")
+                self.messages.append(
+                    {
+                        "role": "assistant",
+                        "content": content or None,
+                        "tool_calls": tool_calls if tool_calls else None,
+                    }
+                )
+                self.messages.append(
+                    {
+                        "role": "user",
+                        "content": "You were cut off mid-response (token limit reached). Please continue your response exactly where you left off. Do not repeat yourself; just resume writing from the cutoff point."
+                    }
+                )
+                continue
+            # No tool calls → potentially done
+            if not tool_calls:
+                # Gather recent tool names from this turn to build context-aware nudges
+                _recent_tool_names = []
+                for _prev_msg in reversed(self.messages[turn_msg_start:]):
+                    if _prev_msg.get("role") == "tool":
+                        _tname = _prev_msg.get("name", "")
+                        if _tname and _tname not in {"recall_experience", "store_experience", "manage_memory"}:
+                            _recent_tool_names.append(_tname)
+                    elif _prev_msg.get("role") == "user":
+                        break  # don't look past the user's message
+                _had_tools_this_turn = bool(_recent_tool_names)
+                # If the model stopped with empty OR trivially short response after running tools
+                _is_empty = not content.strip()
+                _is_lazy_transition = False
+                if _had_tools_this_turn and not _is_empty and iteration > 0:
+                    _lower_content = content.strip().lower()
+                    # Strong indicators that the model forgot to output a tool call
+                    _ends_with_cliffhanger = content.strip().endswith(":") or content.strip().endswith("...")
+                    _has_lazy_phrases = any(phrase in _lower_content for phrase in [
+                        "i will now", "let's run", "next, i'll", "running the",
+                        "i am going to", "i'll now", "let me check", "let me run",
+                        "continuing", "my bad", "apologies", "proceeding to",
+                        "now i will", "next i will", "moving on to", "let's proceed",
+                        "i will execute", "let's execute", "i will use", "executing the"
+                    ])
+                    _is_lazy_transition = (
+                        (len(_lower_content) < 400 and _has_lazy_phrases) or
+                        _ends_with_cliffhanger
+                    )
+                    if len(_lower_content) < 50:
+                        _is_lazy_transition = True
+                if (_is_empty or _is_lazy_transition) and iteration < max_iterations - 1:
+                    _empty_response_streak += 1
+                    if _empty_response_streak >= 4:
+                        # Model stuck in a loop — give up
+                        self.console.print(
+                            f"\n[bold yellow]⚠  The model got stuck providing lazy or empty responses ({_empty_response_streak} times). "
+                            "It has been paused. Try nudging it manually or switching models.[/bold yellow]\n"
+                        )
+                        break
+                    # Build a context-aware continuation nudge
+                    if _had_tools_this_turn:
+                        tool_list = ", ".join(dict.fromkeys(reversed(_recent_tool_names)))  # dedupe, preserve order
+                        nudge = (
+                            f"You just executed tool(s) [{tool_list}]. You then wrote a short response without "
+                            f"calling any further tools. If the task is incomplete, you MUST output the required JSON tool calls. "
+                            f"Do NOT just tell me what you are going to do — actually DO IT by calling the tool. "
+                            f"If the task is truly complete, provide a comprehensive final summary."
+                        )
+                    else:
+                        nudge = (
+                            "You stopped without writing anything or taking action. The user's request was: "
+                            f"\"{user_message[:200]}\". Please provide a substantive response or take action."
+                        )
+                    self.console.print(f"\n[dim yellow]⚠ Model provided lazy response without tools. Auto-nudging (attempt {_empty_response_streak}/3)...[/dim yellow]")
+                    self.messages.append({"role": "assistant", "content": content or " "})
+                    self.messages.append({"role": "user", "content": nudge})
+                    continue
+                # Got a valid response — reset streak counter
+                _empty_response_streak = 0
+                # ── Automated Regression Testing Loop ───────────────────────
+                import utim_cli.tools as _t
+                if self._turn_changes and not _t._DRY_RUN and getattr(self, "_test_run_attempts", 0) < 3:
+                    self.console.print("\n[bold yellow]🔍 Running automated regression tests to verify changes...[/bold yellow]")
+                    test_error = self._detect_and_run_tests()
+                    if test_error:
+                        self._test_run_attempts = getattr(self, "_test_run_attempts", 0) + 1
+                        self.console.print(f"[bold red]❌ Automated tests failed (Attempt {self._test_run_attempts}/3). Nudging agent to self-heal...[/bold red]")
+                        self.messages.append({"role": "assistant", "content": content})
+                        self.messages.append({
+                            "role": "user",
+                            "content": f"Automated regression testing failed after your changes. Please fix the failing test(s) or compilation error(s) shown below:\n\n{test_error}"
+                        })
+                        continue
+                    else:
+                        self.console.print("[bold green]✓ All automated tests passed successfully![/bold green]\n")
+                self.messages.append({"role": "assistant", "content": content})
+                self.turn_step_timings.append({
+                    "step": turn_iteration,
+                    "reasoning_time": reasoning_duration,
+                    "tool_time": 0.0,
+                    "tools": []
+                })
+                break
+            # Append assistant message (with tool_calls) to history
+            self.messages.append(
+                {
+                    "role": "assistant",
+                    "content": content or None,
+                    "tool_calls": tool_calls,
+                }
+            )
+            # Real-time persistence: save the assistant response & tool calls immediately
+            self._persist_messages(in_progress_turn={
+                "user_msg": user_message,
+                "msg_start": turn_msg_start,
+                "msg_end": len(self.messages),
+                "messages": list(self.messages[turn_msg_start:]),
+                "changes": list(self._turn_changes),
+            })
+            # Execute tools - use parallel execution for better performance
+            t_tool_start = time.time()
+            compression_instruction = ""
+            # Extract compression instruction before parallel execution
+            for tc in tool_calls:
+                func_name = tc.get("function", {}).get("name", "")
+                if func_name == "compress_context":
+                    try:
+                        args = json.loads(tc["function"].get("arguments", "{}"))
+                        compression_instruction = args.get("preservation_rules", "Keep critical facts and architecture decisions.")
+                    except:
+                        pass
+            # ── Two-phase execution: Knowledge-first gate ─────────────────
+            # When recall_experience is called alongside MUTATING tools
+            # (run_command, write_file, edit_file, etc.), the model has already
+            # decided on those tool arguments BEFORE seeing the recall results.
+            # This creates a race condition where knowledge arrives too late.
+            #
+            # Fix: execute ONLY recall_experience first, inject its results,
+            # DROP the remaining planned calls, and force a re-plan so the
+            # model can use the recalled knowledge to make better decisions.
+            MUTATING_TOOLS = {"run_command", "write_file", "edit_file", "delete_file", "move_file"}
+            KNOWLEDGE_TOOLS = {"recall_experience"}
+            knowledge_calls = [tc for tc in tool_calls
+                               if tc.get("function", {}).get("name", "") in KNOWLEDGE_TOOLS]
+            mutating_calls = [tc for tc in tool_calls
+                              if tc.get("function", {}).get("name", "") in MUTATING_TOOLS]
+            if knowledge_calls and mutating_calls:
+                # Phase 1: Execute ONLY the knowledge tools (silently)
+                for ktc in knowledge_calls:
+                    result = self._execute_tool_timed(ktc)
+                    tc_id = ktc.get("id") or str(ktc.get("index", "0"))
+                    func_name = ktc.get("function", {}).get("name", "")
+                    self.messages.append({
+                        "role": "tool",
+                        "tool_call_id": tc_id,
+                        "name": func_name,
+                        "content": result,
+                    })
+                # Phase 2: Tell the model the remaining calls were NOT executed
+                # and ask it to re-plan with the new knowledge
+                dropped_names = [tc.get("function", {}).get("name", "?") for tc in mutating_calls]
+                non_knowledge_non_mutating = [tc for tc in tool_calls
+                                              if tc.get("function", {}).get("name", "") not in KNOWLEDGE_TOOLS
+                                              and tc.get("function", {}).get("name", "") not in MUTATING_TOOLS]
+                # Execute non-mutating, non-knowledge tools normally (they're safe)
+                for safe_tc in non_knowledge_non_mutating:
+                    result = self._execute_tool_timed(safe_tc)
+                    tc_id = safe_tc.get("id") or str(safe_tc.get("index", "0"))
+                    func_name = safe_tc.get("function", {}).get("name", "")
+                    self.messages.append({
+                        "role": "tool",
+                        "tool_call_id": tc_id,
+                        "name": func_name,
+                        "content": result,
+                    })
+                # Insert placeholder results for dropped mutating calls so the API
+                # doesn't complain about missing tool_call_id responses
+                for mtc in mutating_calls:
+                    tc_id = mtc.get("id") or str(mtc.get("index", "0"))
+                    func_name = mtc.get("function", {}).get("name", "?")
+                    self.messages.append({
+                        "role": "tool",
+                        "tool_call_id": tc_id,
+                        "name": func_name,
+                        "content": f"[NOT EXECUTED] This {func_name} call was held back. "
+                                   f"Review the recall_experience results above — they may "
+                                   f"contain constraints that affect how you should call this tool. "
+                                   f"Please re-plan and re-issue the call with any necessary adjustments.",
+                    })
+                # Force the model to re-plan by continuing the loop
+                tool_duration = time.time() - t_tool_start
+                self.turn_step_timings.append({
+                    "step": turn_iteration,
+                    "reasoning_time": reasoning_duration,
+                    "tool_time": tool_duration,
+                    "tools": [tc.get("function", {}).get("name", "") for tc in tool_calls]
+                })
+                continue
+            # Execute tools in parallel when beneficial
+            if len(tool_calls) > 1:
+                # Use parallel execution for multiple tools
+                parallel_results = self._execute_tools_parallel(tool_calls)
+                for slot in parallel_results:
+                    if self.cancel_event.is_set():
+                        break
+                    # Guard against None slots (defensive: shouldn't happen but prevents crash)
+                    if slot is None:
+                        continue
+                    tc, result = slot
+                    tc_id = tc.get("id") or str(tc.get("index", "0"))
+                    func_name = tc.get("function", {}).get("name", "")
+                    self.messages.append(
+                        {
+                            "role": "tool",
+                            "tool_call_id": tc_id,
+                            "name": func_name,
+                            "content": result,
+                        }
+                    )
+            else:
+                # Single tool - execute directly
+                for tc in tool_calls:
+                    if self.cancel_event.is_set():
+                        break
+                    func_name = tc.get("function", {}).get("name", "")
+                    _tools_module._cancel_event = self.cancel_event
+                    result = self._execute_tool_timed(tc)
+                    self._current_line_len = 0
+                    tc_id = tc.get("id") or str(tc.get("index", "0"))
+                    self.messages.append(
+                        {
+                            "role": "tool",
+                            "tool_call_id": tc_id,
+                            "name": func_name,
+                            "content": result,
+                        }
+                    )
+            # Attempt to compress context if requested or if token limit is breached.
+            # turn_msg_start is passed so that after compression rewrites
+            # self.messages the method can refresh _current_turn_start.
+            self._compress_intra_turn(turn_msg_start, compression_instruction)
+            # Real-time persistence: save tool results and file diffs immediately
+            self._persist_messages(in_progress_turn={
+                "user_msg": user_message,
+                "msg_start": turn_msg_start,
+                "msg_end": len(self.messages),
+                "messages": list(self.messages[turn_msg_start:]),
+                "changes": list(self._turn_changes),
+            })
+            tool_duration = time.time() - t_tool_start
+            self.turn_step_timings.append({
+                "step": turn_iteration,
+                "reasoning_time": reasoning_duration,
+                "tool_time": tool_duration,
+                "tools": [tc.get("function", {}).get("name", "") for tc in tool_calls]
+            })
+        else:
+            if not self.cancel_event.is_set():
+                self.console.print(f"\n[bold yellow]⚠ Agent paused after reaching maximum iterations ({max_iterations}).[/bold yellow]")
+                self.console.print("[dim]You can type 'continue' to resume the task.[/dim]\n")
+        elapsed = int(time.time() - task_start_time)
+        elapsed_str = (
+            f"{elapsed // 60}m {elapsed % 60}s" if elapsed >= 60 else f"{elapsed}s"
+        )
+        self.console.print(Rule(f"[dim]⚙  {elapsed_str}[/dim]"))
+        # Save turn snapshot for /rewind (even if cancelled — partial work matters)
+        # ALWAYS save the turn, even if there are no code changes, so the user can rewind the conversation
+        if not self.cancel_event.is_set():
+            turn_entry = {
+                "user_msg": user_message,
+                "msg_start": turn_msg_start,
+                "msg_end": len(self.messages),
+                "messages": list(self.messages[turn_msg_start:]),  # Save messages slice!
+                "changes": list(self._turn_changes),
+                "iteration_count": turn_iteration,
+                "elapsed_time": elapsed,
+                "step_timings": list(self.turn_step_timings),
+            }
+            self.turn_history.append(turn_entry)
+            # Persist messages to server for /resume (background, non-blocking)
+            self._persist_messages()
+            self._trigger_bg_summarization()
+            # Automated Reflection Phase powered by Hugging Face Vector DB
+            try:
+                from utim_cli.reflection import run_reflection_phase
+                run_reflection_phase(
+                    user_message=user_message or "",
+                    assistant_content=final_answer or "",
+                    tool_results=self._turn_changes or [],
+                    elapsed_seconds=int(elapsed),
+                    iterations=turn_iteration
+                )
+            except Exception:
+                pass
+            # ── Cleanup old tmp files ─────────────────────────────────────────────
+            # Remove stale files to prevent accumulation across sessions
+            try:
+                removed = self._cleanup_tmp_folder()
+                if removed > 0:
+                    self.console.print(f"[dim]⊘ Cleaned up {removed} stale file(s) from .utim_tmp[/dim]")
+            except Exception:
+                pass  # Cleanup failures should be silent
+        self._turn_changes = []