utim-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- utim_cli/__init__.py +40 -0
- utim_cli/agent.py +359 -0
- utim_cli/auth.py +208 -0
- utim_cli/backup.py +101 -0
- utim_cli/billing.py +40 -0
- utim_cli/blender_agent.py +1018 -0
- utim_cli/bootstrap.py +324 -0
- utim_cli/client_utils.py +135 -0
- utim_cli/config.py +194 -0
- utim_cli/context_pruner.py +504 -0
- utim_cli/doctor.py +118 -0
- utim_cli/knowledge_graph.py +462 -0
- utim_cli/logger.py +121 -0
- utim_cli/mcp_clean_wrapper.py +55 -0
- utim_cli/mcp_client.py +198 -0
- utim_cli/mcp_registry.json +1102 -0
- utim_cli/orchestrator.py +3209 -0
- utim_cli/reflection.py +200 -0
- utim_cli/report.py +100 -0
- utim_cli/scrapy_search.py +229 -0
- utim_cli/share.py +320 -0
- utim_cli/share_tui.py +554 -0
- utim_cli/situational_scoring.py +269 -0
- utim_cli/state.py +15 -0
- utim_cli/tools.py +3381 -0
- utim_cli/utim.py +4051 -0
- utim_cli/vector_memory.py +629 -0
- utim_cli/workspace.py +33 -0
- utim_cli-1.0.0.dist-info/METADATA +134 -0
- utim_cli-1.0.0.dist-info/RECORD +34 -0
- utim_cli-1.0.0.dist-info/WHEEL +5 -0
- utim_cli-1.0.0.dist-info/entry_points.txt +2 -0
- utim_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
- utim_cli-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,504 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Importance-Weighted Context Pruning — LLM-driven scoring for intelligent context summarization.
|
|
3
|
+
|
|
4
|
+
This module implements a scoring pass that distinguishes between high-signal technical content
|
|
5
|
+
and low-signal conversational filler before summarization.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
from typing import List, Dict, Tuple, Optional, Set
|
|
11
|
+
import requests
|
|
12
|
+
import json
|
|
13
|
+
import hashlib
|
|
14
|
+
|
|
15
|
+
# Fallback models for compression operations - ordered by reliability and quality
|
|
16
|
+
COMPRESSION_FALLBACK_MODELS = [
|
|
17
|
+
"openai/gpt-oss-20b:free",
|
|
18
|
+
"poolside/laguna-xs.2:free",
|
|
19
|
+
"cohere/north-mini-code:free",
|
|
20
|
+
"openrouter/free"
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
# Track what content has already been summarized to prevent duplicate summarization
|
|
24
|
+
_summarized_content_hashes: Set[str] = set()
|
|
25
|
+
|
|
26
|
+
def _compute_content_hash(content: str) -> str:
|
|
27
|
+
"""Compute a hash of content to detect duplicates."""
|
|
28
|
+
return hashlib.md5(content[:500].encode()).hexdigest()[:16]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Patterns that indicate high-importance content
|
|
32
|
+
HIGH_SIGNAL_PATTERNS = [
|
|
33
|
+
r"(?i)(must|should|requirement|constraint|rule)",
|
|
34
|
+
r"(?i)(architecture|design|pattern|structure)",
|
|
35
|
+
r"(?i)(file|path|module|class|function|method)",
|
|
36
|
+
r"(?i)(import|export|dependency|reference)",
|
|
37
|
+
r"(?i)(config|setting|environment|variable)",
|
|
38
|
+
r"(?i)(api|endpoint|route|handler)",
|
|
39
|
+
r"(?i)(test|spec|verify|assert)",
|
|
40
|
+
r"(?i)(error|exception|failure|bug)",
|
|
41
|
+
r"(?i)(security|auth|permission|access)",
|
|
42
|
+
r"(?i)(performance|optimization|scale)",
|
|
43
|
+
r"(?i)(convention|standard|style|format)",
|
|
44
|
+
r"(?i)(preference|always|prefer|use)",
|
|
45
|
+
r"(?i)(plan|implement|create|build|modify)",
|
|
46
|
+
r"(?i)(important|critical|vital|essential)",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
# Patterns that indicate low-importance content
|
|
50
|
+
LOW_SIGNAL_PATTERNS = [
|
|
51
|
+
r"(?i)^(ok|okay|yes|no|got it|understood|thanks)$",
|
|
52
|
+
r"(?i)^(i see|i understand|right|correct)$",
|
|
53
|
+
r"(?i)^(here's|here is|this is|that's|that is)",
|
|
54
|
+
r"(?i)^(you're welcome|no problem|np)$",
|
|
55
|
+
r"^[^a-zA-Z0-9]*$", # Pure whitespace/punctuation
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
# Continuity markers that should be preserved to avoid task-looping after compression
|
|
59
|
+
CONTINUITY_PATTERNS = [
|
|
60
|
+
r"(?i)(next step|next action|in progress|pending|remaining|blocker)",
|
|
61
|
+
r"(?i)(todo|to-do|plan|objective|goal|continue|resume|finish)",
|
|
62
|
+
r"(?i)(tried|attempted|worked|failed|error|fix|patch)",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def sanitize_message_sequence(messages: List[Dict]) -> List[Dict]:
|
|
67
|
+
"""
|
|
68
|
+
Ensure the messages list is valid for LLM API calls.
|
|
69
|
+
- Each tool message must have a preceding assistant message with the matching tool_call_id.
|
|
70
|
+
- Each assistant message with tool_calls must have corresponding tool messages.
|
|
71
|
+
"""
|
|
72
|
+
if not messages:
|
|
73
|
+
return messages
|
|
74
|
+
|
|
75
|
+
# Step 1: Identify all tool call IDs present in tool messages
|
|
76
|
+
present_tool_ids = {
|
|
77
|
+
m.get("tool_call_id") for m in messages
|
|
78
|
+
if m.get("role") == "tool" and m.get("tool_call_id")
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Step 2: Filter assistant messages' tool_calls to only include those present
|
|
82
|
+
temp_messages = []
|
|
83
|
+
for m in messages:
|
|
84
|
+
if m.get("role") == "assistant" and m.get("tool_calls"):
|
|
85
|
+
valid_tcs = [tc for tc in m.get("tool_calls", []) if tc.get("id") in present_tool_ids]
|
|
86
|
+
if valid_tcs:
|
|
87
|
+
new_m = dict(m)
|
|
88
|
+
new_m["tool_calls"] = valid_tcs
|
|
89
|
+
temp_messages.append(new_m)
|
|
90
|
+
else:
|
|
91
|
+
# If no tool calls are valid, keep only if it has text content
|
|
92
|
+
if m.get("content"):
|
|
93
|
+
new_m = dict(m)
|
|
94
|
+
new_m.pop("tool_calls", None)
|
|
95
|
+
temp_messages.append(new_m)
|
|
96
|
+
# otherwise drop it (do not append)
|
|
97
|
+
else:
|
|
98
|
+
temp_messages.append(m)
|
|
99
|
+
|
|
100
|
+
# Step 3: Now identify all tool call IDs that are actually in the kept assistant messages
|
|
101
|
+
active_tool_ids = set()
|
|
102
|
+
for m in temp_messages:
|
|
103
|
+
if m.get("role") == "assistant" and m.get("tool_calls"):
|
|
104
|
+
for tc in m.get("tool_calls", []):
|
|
105
|
+
if tc.get("id"):
|
|
106
|
+
active_tool_ids.add(tc.get("id"))
|
|
107
|
+
|
|
108
|
+
# Step 4: Keep only tool messages whose tool_call_id is in active_tool_ids
|
|
109
|
+
final_messages = []
|
|
110
|
+
for m in temp_messages:
|
|
111
|
+
if m.get("role") == "tool":
|
|
112
|
+
tc_id = m.get("tool_call_id")
|
|
113
|
+
if tc_id in active_tool_ids:
|
|
114
|
+
final_messages.append(m)
|
|
115
|
+
# If no matching assistant tool call, drop the orphan tool message
|
|
116
|
+
else:
|
|
117
|
+
final_messages.append(m)
|
|
118
|
+
|
|
119
|
+
return final_messages
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def score_message_importance(message: Dict, llm_key: str = None) -> float:
|
|
123
|
+
"""
|
|
124
|
+
Score a message's importance for context preservation.
|
|
125
|
+
|
|
126
|
+
Returns a score between 0.0 (discard) and 1.0 (must preserve).
|
|
127
|
+
"""
|
|
128
|
+
role = message.get("role", "")
|
|
129
|
+
content = message.get("content", "") or ""
|
|
130
|
+
if not isinstance(content, str):
|
|
131
|
+
content = str(content)
|
|
132
|
+
|
|
133
|
+
# Start with base score depending on role
|
|
134
|
+
if role == "user":
|
|
135
|
+
score = 0.85
|
|
136
|
+
elif role == "assistant":
|
|
137
|
+
if message.get("tool_calls"):
|
|
138
|
+
score = 0.85
|
|
139
|
+
else:
|
|
140
|
+
score = 0.6 # Base assistant score
|
|
141
|
+
elif role == "tool":
|
|
142
|
+
score = 0.65 # Base tool score
|
|
143
|
+
else:
|
|
144
|
+
score = 0.5
|
|
145
|
+
|
|
146
|
+
# If it is a tool message, analyze its significance in-depth (no early return)
|
|
147
|
+
if role == "tool":
|
|
148
|
+
content_lower = content.lower()
|
|
149
|
+
tool_name = message.get("name", "")
|
|
150
|
+
|
|
151
|
+
# Critical fix: Short but critical content gets high priority
|
|
152
|
+
# Tool messages with error/failure indicators are always high priority
|
|
153
|
+
critical_keywords = ["error", "exception", "failed", "failure", "traceback",
|
|
154
|
+
"conflict", "permission denied", "not found", "undefined"]
|
|
155
|
+
if any(kw in content_lower for kw in critical_keywords):
|
|
156
|
+
score = max(score, 0.9) # Boost critical errors
|
|
157
|
+
|
|
158
|
+
# Code reading/inspections are highly valued to prevent checking the same files in a loop
|
|
159
|
+
if tool_name in ["read_file", "view_file", "grep_search", "query_codebase", "view_file_outline"]:
|
|
160
|
+
score += 0.2
|
|
161
|
+
|
|
162
|
+
# File edits or changes are high priority
|
|
163
|
+
if any(kw in content_lower for kw in ["success", "created", "modified", "deleted", "replaced", "written"]):
|
|
164
|
+
score += 0.2
|
|
165
|
+
|
|
166
|
+
# Errors or warnings are high priority for debugging loops
|
|
167
|
+
if any(kw in content_lower for kw in ["error", "exception", "failed", "failure", "traceback", "warning", "conflict"]):
|
|
168
|
+
score += 0.2
|
|
169
|
+
|
|
170
|
+
# Code reading (grep search or file view output) is extremely high priority to prevent loops!
|
|
171
|
+
if "def " in content or "class " in content or "import " in content or "function " in content:
|
|
172
|
+
score += 0.25
|
|
173
|
+
if "```" in content or "LineNumber" in content or "LineContent" in content:
|
|
174
|
+
score += 0.2
|
|
175
|
+
|
|
176
|
+
# FIX: Even short tool outputs with technical content should score higher
|
|
177
|
+
# This addresses the issue where short file reads get scored low
|
|
178
|
+
if len(content) > 0 and len(content) < 200:
|
|
179
|
+
# Short content that has code indicators is very valuable
|
|
180
|
+
if re.search(r"[{};()=\[\]]", content) or "\n" in content:
|
|
181
|
+
score = max(score, 0.8)
|
|
182
|
+
elif re.search(r":[a-zA-Z_]\w*\s*=", content): # Key-value pairs
|
|
183
|
+
score = max(score, 0.75)
|
|
184
|
+
|
|
185
|
+
# If the content is from a file read (e.g. a long source code snippet), keep it
|
|
186
|
+
if len(content) > 100 and (re.search(r"[{};()=+\-\[\]]", content) or "\n" in content):
|
|
187
|
+
score += 0.15
|
|
188
|
+
|
|
189
|
+
elif role == "assistant":
|
|
190
|
+
# Assistant responses that contain code or architecture details are important
|
|
191
|
+
if "def " in content or "class " in content or "import " in content or "```" in content:
|
|
192
|
+
score += 0.2
|
|
193
|
+
|
|
194
|
+
# Adjust score based on patterns for all message types
|
|
195
|
+
for pattern in HIGH_SIGNAL_PATTERNS:
|
|
196
|
+
if re.search(pattern, content):
|
|
197
|
+
score += 0.1
|
|
198
|
+
|
|
199
|
+
for pattern in LOW_SIGNAL_PATTERNS:
|
|
200
|
+
if re.search(pattern, content.strip()):
|
|
201
|
+
score -= 0.15
|
|
202
|
+
|
|
203
|
+
# Preserve execution continuity details aggressively (e.g., plans, next steps, todos)
|
|
204
|
+
for pattern in CONTINUITY_PATTERNS:
|
|
205
|
+
if re.search(pattern, content):
|
|
206
|
+
score += 0.25
|
|
207
|
+
|
|
208
|
+
# General code snippets check
|
|
209
|
+
if "```" in content or re.search(r"\w+\s*\(\s*\w*", content):
|
|
210
|
+
score += 0.15
|
|
211
|
+
|
|
212
|
+
# File paths are high signal
|
|
213
|
+
if re.search(r"[\w/\\-]+\.\w+", content):
|
|
214
|
+
score += 0.15
|
|
215
|
+
|
|
216
|
+
# FIXED: Better normalization - critical short content should not be penalized
|
|
217
|
+
min_floor = 0.2
|
|
218
|
+
if role == "user":
|
|
219
|
+
# BUG 5 FIX: User instructions must NEVER be below the compression
|
|
220
|
+
# threshold (0.75). Previously the floor was 0.8, but LOW_SIGNAL_PATTERNS
|
|
221
|
+
# could subtract 0.15 and push a genuine instruction (e.g. "here is the
|
|
222
|
+
# constraint: use Redis") down to 0.70 — below the 0.75 cut-off used in
|
|
223
|
+
# _compress_intra_turn — causing it to be classified as compressible.
|
|
224
|
+
# Setting the floor to 1.0 means user messages always survive verbatim.
|
|
225
|
+
min_floor = 1.0 # Never compress user instructions
|
|
226
|
+
elif role == "assistant":
|
|
227
|
+
if message.get("tool_calls"):
|
|
228
|
+
min_floor = 0.75 # Keep tool calls so we don't break message schema
|
|
229
|
+
else:
|
|
230
|
+
min_floor = 0.45
|
|
231
|
+
elif role == "tool":
|
|
232
|
+
# Keep tool outputs that contain critical technical details
|
|
233
|
+
# Especially grep search or view file results
|
|
234
|
+
if "def " in content or "class " in content or "import " in content or "LineNumber" in content:
|
|
235
|
+
min_floor = 0.8 # Critical files/code should not be pruned
|
|
236
|
+
# FIX: Short but technical content should also have higher floor
|
|
237
|
+
elif len(content) > 0 and len(content) < 300 and (re.search(r"[{}();\[\]]", content) or "\n" in content):
|
|
238
|
+
min_floor = 0.7 # Short technical content preserved
|
|
239
|
+
else:
|
|
240
|
+
min_floor = 0.5
|
|
241
|
+
|
|
242
|
+
return max(min_floor, min(1.0, score))
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _call_compression_model_with_fallback(messages: List[Dict], llm_key: str, max_tokens: int = 1500, content_hint: str = None, primary_model: str = None) -> str:
|
|
246
|
+
"""Call compression model with fallback support."""
|
|
247
|
+
global _summarized_content_hashes
|
|
248
|
+
|
|
249
|
+
# FIX #4: Deduplication - check if this content was already summarized
|
|
250
|
+
if content_hint:
|
|
251
|
+
content_hash = _compute_content_hash(content_hint)
|
|
252
|
+
if content_hash in _summarized_content_hashes:
|
|
253
|
+
return "[Content previously summarized - skipped]"
|
|
254
|
+
_summarized_content_hashes.add(content_hash)
|
|
255
|
+
|
|
256
|
+
# Prioritize primary model if provided, followed by fallback models
|
|
257
|
+
models_to_try = list(COMPRESSION_FALLBACK_MODELS)
|
|
258
|
+
if primary_model:
|
|
259
|
+
if primary_model in models_to_try:
|
|
260
|
+
models_to_try.remove(primary_model)
|
|
261
|
+
models_to_try.insert(0, primary_model)
|
|
262
|
+
|
|
263
|
+
last_error = None
|
|
264
|
+
for i, model in enumerate(models_to_try):
|
|
265
|
+
model_retries = 2
|
|
266
|
+
for attempt in range(model_retries + 1):
|
|
267
|
+
try:
|
|
268
|
+
from utim_cli.client_utils import proxy_openrouter_request
|
|
269
|
+
resp = proxy_openrouter_request(
|
|
270
|
+
json_data={
|
|
271
|
+
"model": model,
|
|
272
|
+
"messages": messages,
|
|
273
|
+
"max_tokens": max_tokens
|
|
274
|
+
},
|
|
275
|
+
stream=False,
|
|
276
|
+
timeout=30
|
|
277
|
+
)
|
|
278
|
+
if resp.status_code == 429 and attempt < model_retries:
|
|
279
|
+
import time
|
|
280
|
+
time.sleep(5 * (attempt + 1))
|
|
281
|
+
continue
|
|
282
|
+
resp.raise_for_status()
|
|
283
|
+
|
|
284
|
+
if resp.status_code == 200:
|
|
285
|
+
result = resp.json()["choices"][0]["message"]["content"]
|
|
286
|
+
# Strip thinking tags
|
|
287
|
+
result = re.sub(r"<think(?:ing)?>.*?</think(?:ing)?>", "", result, flags=re.DOTALL)
|
|
288
|
+
result = result.strip()
|
|
289
|
+
|
|
290
|
+
# FIX #1: Hallucination detection - reject summaries with suspicious patterns
|
|
291
|
+
hallucination_patterns = [
|
|
292
|
+
r"\[.*added.*\]", # Made-up additions like "[feature added]"
|
|
293
|
+
r"\[.*created.*\]", # Made-up creations
|
|
294
|
+
r"I have (?:created|modified|deleted|implemented)", # False claims of action
|
|
295
|
+
r"Successfully (?:added|removed|integrated)", # False success claims
|
|
296
|
+
]
|
|
297
|
+
|
|
298
|
+
# Check if summary makes claims not in source
|
|
299
|
+
for pattern in hallucination_patterns:
|
|
300
|
+
if re.search(pattern, result, re.I) and content_hint:
|
|
301
|
+
# Check if the claim is actually in the original content
|
|
302
|
+
if not re.search(pattern.replace(r"\[", r"[^[]*").replace(r"\]", r"[^]]*"), content_hint, re.I):
|
|
303
|
+
print(f"[HALLUCINATION PREVENTED] Pattern '{pattern}' detected in summary but not in source", file=__import__('sys'))
|
|
304
|
+
# Try next model instead of returning bad summary
|
|
305
|
+
continue
|
|
306
|
+
|
|
307
|
+
return result
|
|
308
|
+
|
|
309
|
+
last_error = f"Model {model} returned status {resp.status_code}"
|
|
310
|
+
break # Not 200 and not 429, break attempt loop and try next model
|
|
311
|
+
|
|
312
|
+
except requests.exceptions.HTTPError as e:
|
|
313
|
+
code = e.response.status_code if e.response is not None else 0
|
|
314
|
+
if code == 429 and attempt < model_retries:
|
|
315
|
+
import time
|
|
316
|
+
time.sleep(5 * (attempt + 1))
|
|
317
|
+
continue
|
|
318
|
+
last_error = e
|
|
319
|
+
break
|
|
320
|
+
except Exception as e:
|
|
321
|
+
last_error = e
|
|
322
|
+
break
|
|
323
|
+
|
|
324
|
+
# Log the failure for debugging
|
|
325
|
+
import sys
|
|
326
|
+
print(f"[COMPRESSION FALLBACK FAILURE] All models failed. Last error: {last_error}", file=sys.stderr)
|
|
327
|
+
return None
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def llm_score_messages(messages: List[Dict], llm_key: str, primary_model: str = None) -> List[Tuple[int, float]]:
|
|
331
|
+
"""
|
|
332
|
+
Use LLM to score message importance for more nuanced evaluation.
|
|
333
|
+
|
|
334
|
+
Returns list of (index, score) tuples.
|
|
335
|
+
"""
|
|
336
|
+
if not llm_key:
|
|
337
|
+
return [(i, score_message_importance(m)) for i, m in enumerate(messages)]
|
|
338
|
+
|
|
339
|
+
# Prepare messages for scoring
|
|
340
|
+
msg_texts = []
|
|
341
|
+
for i, m in enumerate(messages):
|
|
342
|
+
role = m.get("role", "")
|
|
343
|
+
content = m.get("content", "") or ""
|
|
344
|
+
if len(content) > 200:
|
|
345
|
+
content = content[:200] + "..."
|
|
346
|
+
msg_texts.append(f"[{i}] {role}: {content}")
|
|
347
|
+
|
|
348
|
+
prompt = f"""Score each message for importance on a scale of 0-100 based on technical value:
|
|
349
|
+
- High scores for: code snippets, file paths, architectural decisions, user constraints, errors, implementations
|
|
350
|
+
- Low scores for: acknowledgments, small talk, filler phrases, generic confirmations
|
|
351
|
+
|
|
352
|
+
Messages to score:
|
|
353
|
+
{chr(10).join(msg_texts[:20])} # Limit to 20 for cost
|
|
354
|
+
|
|
355
|
+
Return only JSON array like: [{{"idx": 0, "score": 85}}, {{"idx": 1, "score": 30}}]"""
|
|
356
|
+
|
|
357
|
+
llm_messages = [
|
|
358
|
+
{"role": "system", "content": "You are a context analyzer. Score messages for technical importance."},
|
|
359
|
+
{"role": "user", "content": prompt}
|
|
360
|
+
]
|
|
361
|
+
|
|
362
|
+
result = _call_compression_model_with_fallback(llm_messages, llm_key, primary_model=primary_model)
|
|
363
|
+
if result:
|
|
364
|
+
try:
|
|
365
|
+
data = json.loads(result)
|
|
366
|
+
return [(int(d["idx"]), d["score"] / 100.0) for d in data]
|
|
367
|
+
except Exception:
|
|
368
|
+
pass
|
|
369
|
+
|
|
370
|
+
return [(i, score_message_importance(m)) for i, m in enumerate(messages)]
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def prune_context(messages: List[Dict], keep_threshold: float = 0.6,
|
|
374
|
+
llm_key: str = None, primary_model: str = None) -> List[Dict]:
|
|
375
|
+
"""
|
|
376
|
+
Prune messages based on importance scores.
|
|
377
|
+
|
|
378
|
+
Messages below the threshold are either summarized or dropped.
|
|
379
|
+
"""
|
|
380
|
+
if not messages:
|
|
381
|
+
return messages
|
|
382
|
+
|
|
383
|
+
# Get scores
|
|
384
|
+
if llm_key and len(messages) > 5:
|
|
385
|
+
scores = dict(llm_score_messages(messages, llm_key, primary_model=primary_model))
|
|
386
|
+
else:
|
|
387
|
+
scores = {i: score_message_importance(m) for i, m in enumerate(messages)}
|
|
388
|
+
|
|
389
|
+
# Split into keep and summarize groups
|
|
390
|
+
to_keep = []
|
|
391
|
+
to_summarize = []
|
|
392
|
+
|
|
393
|
+
for i, msg in enumerate(messages):
|
|
394
|
+
# Never prune first/last anchors; they often carry objective and latest state.
|
|
395
|
+
if i == 0 or i == len(messages) - 1:
|
|
396
|
+
to_keep.append(msg)
|
|
397
|
+
continue
|
|
398
|
+
score = scores.get(i, 0.5)
|
|
399
|
+
if score >= keep_threshold:
|
|
400
|
+
to_keep.append(msg)
|
|
401
|
+
else:
|
|
402
|
+
to_summarize.append(msg)
|
|
403
|
+
|
|
404
|
+
# BUG 4 FIX: When more messages need pruning than are being kept, generate a
|
|
405
|
+
# real LLM summary instead of a useless placeholder string. The old code wrote
|
|
406
|
+
# "[Context pruned: N messages removed]" which gives the model zero information
|
|
407
|
+
# about what was dropped, making it unable to continue the task coherently.
|
|
408
|
+
if len(to_summarize) > len(to_keep):
|
|
409
|
+
if llm_key and to_summarize:
|
|
410
|
+
# Build a text digest of the low-importance messages
|
|
411
|
+
text_parts = []
|
|
412
|
+
for m in to_summarize:
|
|
413
|
+
role = m.get("role", "")
|
|
414
|
+
content = (m.get("content") or "")[:800]
|
|
415
|
+
if role == "assistant" and m.get("tool_calls"):
|
|
416
|
+
tc_names = ", ".join(
|
|
417
|
+
tc.get("function", {}).get("name", "") for tc in m.get("tool_calls", [])
|
|
418
|
+
)
|
|
419
|
+
text_parts.append(f"assistant (tools: {tc_names}): {content}")
|
|
420
|
+
else:
|
|
421
|
+
text_parts.append(f"{role}: {content}")
|
|
422
|
+
raw = "\n---\n".join(text_parts)
|
|
423
|
+
llm_msgs = [
|
|
424
|
+
{
|
|
425
|
+
"role": "system",
|
|
426
|
+
"content": (
|
|
427
|
+
"You are a context compressor for an AI agent. "
|
|
428
|
+
"Summarise the following conversation excerpts into a dense, "
|
|
429
|
+
"technically precise paragraph. Preserve all file paths, error "
|
|
430
|
+
"messages, variable names, and decisions. No filler."
|
|
431
|
+
),
|
|
432
|
+
},
|
|
433
|
+
{"role": "user", "content": f"Excerpts to compress:\n{raw}"},
|
|
434
|
+
]
|
|
435
|
+
summary_text = _call_compression_model_with_fallback(
|
|
436
|
+
llm_msgs, llm_key, max_tokens=800, primary_model=primary_model
|
|
437
|
+
)
|
|
438
|
+
if summary_text:
|
|
439
|
+
summary_content = (
|
|
440
|
+
"[CONTEXT SUMMARY — earlier pruned messages]\n" + summary_text
|
|
441
|
+
)
|
|
442
|
+
else:
|
|
443
|
+
# Compression call failed; fall back to a richer placeholder
|
|
444
|
+
roles = ", ".join({m.get("role", "?") for m in to_summarize})
|
|
445
|
+
summary_content = (
|
|
446
|
+
f"[Context pruned: {len(to_summarize)} messages ({roles}) removed "
|
|
447
|
+
"— compression model unavailable. Some context may be missing.]"
|
|
448
|
+
)
|
|
449
|
+
else:
|
|
450
|
+
roles = ", ".join({m.get("role", "?") for m in to_summarize})
|
|
451
|
+
summary_content = (
|
|
452
|
+
f"[Context pruned: {len(to_summarize)} messages ({roles}) removed.]"
|
|
453
|
+
)
|
|
454
|
+
return sanitize_message_sequence(
|
|
455
|
+
to_keep + [{"role": "user", "content": summary_content}]
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
return sanitize_message_sequence(to_keep + to_summarize)
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def adaptive_compression(messages: List[Dict], target_tokens: int = 50000, primary_model: str = None) -> List[Dict]:
|
|
462
|
+
"""
|
|
463
|
+
Intelligently compress messages to stay under token budget.
|
|
464
|
+
|
|
465
|
+
Uses importance scoring to decide what to keep verbatim vs summarize.
|
|
466
|
+
"""
|
|
467
|
+
def estimate_tokens(msgs):
|
|
468
|
+
try:
|
|
469
|
+
return len(json.dumps([{"role": m.get("role"), "content": str(m.get("content", ""))[:500]} for m in msgs])) // 4
|
|
470
|
+
except:
|
|
471
|
+
return len(str(msgs)) // 4
|
|
472
|
+
|
|
473
|
+
current_tokens = estimate_tokens(messages)
|
|
474
|
+
if current_tokens <= target_tokens:
|
|
475
|
+
return messages
|
|
476
|
+
|
|
477
|
+
# Load .env from the current working directory explicitly so adaptive_compression
|
|
478
|
+
# uses the same folder-local key that Orchestrator resolved, not a stale key
|
|
479
|
+
# inherited from a different utim installation elsewhere on the PATH.
|
|
480
|
+
_cwd_env = os.path.join(os.getcwd(), ".env")
|
|
481
|
+
try:
|
|
482
|
+
from dotenv import load_dotenv as _load_dotenv
|
|
483
|
+
if os.path.isfile(_cwd_env):
|
|
484
|
+
_load_dotenv(_cwd_env, override=True)
|
|
485
|
+
except Exception:
|
|
486
|
+
pass
|
|
487
|
+
from utim_cli.config import config
|
|
488
|
+
llm_key = os.getenv("OPENROUTER_API_KEY") or config.get("api_key")
|
|
489
|
+
|
|
490
|
+
# Iteratively remove lowest importance until under budget
|
|
491
|
+
pruned = list(messages)
|
|
492
|
+
while estimate_tokens(pruned) > target_tokens and len(pruned) > 2:
|
|
493
|
+
# Re-score current list each iteration to avoid stale index mappings after pops.
|
|
494
|
+
scores = llm_score_messages(pruned, llm_key, primary_model=primary_model) if llm_key else \
|
|
495
|
+
[(i, score_message_importance(m)) for i, m in enumerate(pruned)]
|
|
496
|
+
score_dict = dict(scores)
|
|
497
|
+
# Protect boundaries to preserve task continuity.
|
|
498
|
+
candidates = [i for i in range(1, len(pruned) - 1)]
|
|
499
|
+
if not candidates:
|
|
500
|
+
break
|
|
501
|
+
worst_idx = min(candidates, key=lambda i: score_dict.get(i, 0.5))
|
|
502
|
+
pruned.pop(worst_idx)
|
|
503
|
+
|
|
504
|
+
return sanitize_message_sequence(pruned)
|
utim_cli/doctor.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
import json
|
|
5
|
+
import urllib.request
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
|
|
8
|
+
def run_diagnostics(console: Console = None):
|
|
9
|
+
"""Run environmental, dependency, connectivity, and database diagnostics."""
|
|
10
|
+
if console is None:
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
console.print("\n[bold #42bcf5][+] UTIM Doctor - System Diagnostics[/bold #42bcf5]\n")
|
|
14
|
+
|
|
15
|
+
# 1. Environment & Python
|
|
16
|
+
py_ver = sys.version.split()[0]
|
|
17
|
+
console.print(f" [bold]Python Version:[/bold] {py_ver}")
|
|
18
|
+
console.print(f" [bold]Platform:[/bold] {sys.platform}")
|
|
19
|
+
console.print(f" [bold]Working Directory:[/bold] {os.getcwd()}")
|
|
20
|
+
|
|
21
|
+
# 2. Dependency Check
|
|
22
|
+
console.print("\n [bold #42bcf5]Core Dependencies:[/bold #42bcf5]")
|
|
23
|
+
core_dependencies = [
|
|
24
|
+
"openai", "requests", "aiohttp", "typer", "rich",
|
|
25
|
+
"prompt_toolkit", "tree_sitter", "mcp", "nest_asyncio"
|
|
26
|
+
]
|
|
27
|
+
for dep in core_dependencies:
|
|
28
|
+
try:
|
|
29
|
+
mod_name = dep.replace("-", "_")
|
|
30
|
+
__import__(mod_name)
|
|
31
|
+
console.print(f" [+] {dep:<24} [green]Installed[/green]")
|
|
32
|
+
except ImportError:
|
|
33
|
+
console.print(f" [-] {dep:<24} [red]Not Installed (Required)[/red]")
|
|
34
|
+
|
|
35
|
+
console.print("\n [bold #42bcf5]Optional Feature Dependencies:[/bold #42bcf5]")
|
|
36
|
+
optional_dependencies = {
|
|
37
|
+
"chromadb": "RAG Vector DB feature",
|
|
38
|
+
"sentence_transformers": "Dynamic embedding extraction",
|
|
39
|
+
"scrapy": "Scrapy search crawler",
|
|
40
|
+
"scrapy-playwright": "Playwright JS crawler",
|
|
41
|
+
"pillow": "Image utilities (generate_image)",
|
|
42
|
+
}
|
|
43
|
+
for dep, feat in optional_dependencies.items():
|
|
44
|
+
try:
|
|
45
|
+
mod_name = dep.replace("-", "_")
|
|
46
|
+
__import__(mod_name)
|
|
47
|
+
console.print(f" [+] {dep:<24} [green]Installed[/green] [dim]({feat})[/dim]")
|
|
48
|
+
except ImportError:
|
|
49
|
+
console.print(f" [ ] {dep:<24} [yellow]Not Installed[/yellow] [dim]({feat})[/dim]")
|
|
50
|
+
|
|
51
|
+
# 3. Local Workspace State
|
|
52
|
+
console.print("\n [bold #42bcf5]Workspace Configurations:[/bold #42bcf5]")
|
|
53
|
+
local_utim = os.path.exists(".utim")
|
|
54
|
+
console.print(f" * .utim/ directory: {'[green]Exists[/green]' if local_utim else '[yellow]Missing[/yellow]'}")
|
|
55
|
+
|
|
56
|
+
config_exists = os.path.exists(".utim/config.json")
|
|
57
|
+
console.print(f" * config.json: {'[green]Exists[/green]' if config_exists else '[yellow]Missing[/yellow]'}")
|
|
58
|
+
|
|
59
|
+
db_exists = os.path.exists(".utim/utim_local.db")
|
|
60
|
+
console.print(f" * Local SQLite DB: {'[green]Exists[/green]' if db_exists else '[yellow]Missing[/yellow]'}")
|
|
61
|
+
|
|
62
|
+
# 4. Model & Connectivity check
|
|
63
|
+
console.print("\n [bold #42bcf5]Connectivity & API Health:[/bold #42bcf5]")
|
|
64
|
+
|
|
65
|
+
# 4a. Check UTIM Server Connection
|
|
66
|
+
from utim_cli.auth import SERVER_URL
|
|
67
|
+
console.print(f" * Server Target URL: {SERVER_URL}")
|
|
68
|
+
try:
|
|
69
|
+
req = urllib.request.Request(
|
|
70
|
+
f"{SERVER_URL}/plans",
|
|
71
|
+
headers={'User-Agent': 'Mozilla/5.0'}
|
|
72
|
+
)
|
|
73
|
+
with urllib.request.urlopen(req, timeout=5) as r:
|
|
74
|
+
status = r.getcode()
|
|
75
|
+
if status == 200:
|
|
76
|
+
console.print(" [+] UTIM Server API: [green]Healthy & Reachable (HTTP 200)[/green]")
|
|
77
|
+
else:
|
|
78
|
+
console.print(f" [-] UTIM Server API: [red]Status Code {status}[/red]")
|
|
79
|
+
except Exception as e:
|
|
80
|
+
console.print(f" [-] UTIM Server API: [red]Unreachable[/red] ({e})")
|
|
81
|
+
|
|
82
|
+
# 4b. Check OpenRouter Connection
|
|
83
|
+
try:
|
|
84
|
+
req = urllib.request.Request(
|
|
85
|
+
"https://openrouter.ai",
|
|
86
|
+
headers={'User-Agent': 'Mozilla/5.0'}
|
|
87
|
+
)
|
|
88
|
+
urllib.request.urlopen(req, timeout=5)
|
|
89
|
+
console.print(" [+] OpenRouter connection: [green]Successful[/green]")
|
|
90
|
+
except Exception as e:
|
|
91
|
+
console.print(f" [-] OpenRouter connection: [red]Failed[/red] ({e})")
|
|
92
|
+
|
|
93
|
+
# 5. MCP Server Health
|
|
94
|
+
console.print("\n [bold #42bcf5]Model Context Protocol (MCP) Health:[/bold #42bcf5]")
|
|
95
|
+
mcp_config_path = os.path.join(".utim", "mcp.json")
|
|
96
|
+
if os.path.exists(mcp_config_path):
|
|
97
|
+
try:
|
|
98
|
+
with open(mcp_config_path, "r", encoding="utf-8") as f:
|
|
99
|
+
mcp_data = json.load(f)
|
|
100
|
+
servers = mcp_data.get("mcpServers", {})
|
|
101
|
+
if not servers:
|
|
102
|
+
console.print(" * No MCP servers configured.")
|
|
103
|
+
for name, cfg in servers.items():
|
|
104
|
+
cmd = cfg.get("command", "")
|
|
105
|
+
console.print(f" * [bold]{name}[/bold] (command: {cmd}):")
|
|
106
|
+
if shutil.which(cmd) or cmd == sys.executable:
|
|
107
|
+
console.print(" [+] Command path: [green]Found[/green]")
|
|
108
|
+
else:
|
|
109
|
+
console.print(f" [-] Command path: [red]Not Found ('{cmd}')[/red]")
|
|
110
|
+
except Exception as e:
|
|
111
|
+
console.print(f" [-] Error reading mcp.json: {e}")
|
|
112
|
+
else:
|
|
113
|
+
console.print(" * No local mcp.json configuration.")
|
|
114
|
+
|
|
115
|
+
console.print("\n[bold #42bcf5][+] Diagnostics Complete.[/bold #42bcf5]\n")
|
|
116
|
+
|
|
117
|
+
if __name__ == "__main__":
|
|
118
|
+
run_diagnostics()
|