utim-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
utim_cli/reflection.py ADDED
@@ -0,0 +1,200 @@
1
+ """
2
+ Automated Task Reflection & Experience System — Powered by Hugging Face Vector Embeddings.
3
+
4
+ This module captures learnings, architecture rules, user preferences, and failure corrections
5
+ at the end of agent tasks, indexing them into ChromaDB using Hugging Face model embeddings.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import time
11
+ import uuid
12
+ import sqlite3
13
+ from typing import Dict, List, Optional
14
+ from datetime import datetime
15
+ import requests
16
+
17
+
18
+ MEMORY_FILE = ".utim_tmp/task_reflections.json"
19
+ CONVENTIONS_FILE = ".utim_conventions.md"
20
+
21
+
22
+ def extract_learnings(user_message: str, assistant_content: str,
23
+ tool_results: List[Dict], llm_key: str, elapsed_seconds: int = 0, iterations: int = 0) -> Dict:
24
+ """
25
+ Use LLM to analyze completed work and extract learnings, preferences, conventions, corrections, and time management insights.
26
+
27
+ Returns dictionary with preferences, conventions, rules, corrections, time_reflection, and sub-agent specific learnings.
28
+ """
29
+ if not llm_key:
30
+ return {}
31
+
32
+ # Prepare context for reflection
33
+ tool_summary = []
34
+ for r in tool_results[:10]: # Limit for context
35
+ name = r.get("func_name", "") or r.get("name", "")
36
+ result = str(r.get("result", ""))[:300]
37
+ tool_summary.append(f"- {name}: {result[:100]}...")
38
+
39
+ tool_text = "\n".join(tool_summary)
40
+
41
+ prompt = f"""Analyze this completed interaction and extract ULTRA-CONCISE actionable rules (each strictly under 12 words, no long prose):
42
+ 1. User preferences (e.g., "always use pytest")
43
+ 2. Project conventions discovered (naming/structure rules)
44
+ 3. Architectural rules learned
45
+ 4. Failure corrections or reasoning lessons learned
46
+ 5. Sub-agent specific rules (for `project_res`, `plan_project`, `web_search`)
47
+ 6. Time-management reflection (single rule under 80 chars)
48
+
49
+ Task:
50
+ User: {user_message}
51
+
52
+ Assistant: {assistant_content[:600]}
53
+
54
+ Tools used:
55
+ {tool_text}
56
+
57
+ Return ONLY a JSON object with the following structure:
58
+ {{
59
+ "preferences": ["..."],
60
+ "conventions": ["..."],
61
+ "rules": ["..."],
62
+ "corrections": ["..."],
63
+ "time_reflection": "...",
64
+ "subagent_learnings": {{
65
+ "project_res": {{ "rules": ["..."], "experiences": ["..."] }},
66
+ "plan_project": {{ "rules": ["..."], "experiences": ["..."] }},
67
+ "web_search": {{ "rules": ["..."], "experiences": ["..."] }}
68
+ }}
69
+ }}"""
70
+
71
+ try:
72
+ # Prefer direct API endpoint or router
73
+ apiUrl = os.environ.get("ROUTER_API_URL", "https://openrouter.ai/api/v1/chat/completions")
74
+ resp = requests.post(
75
+ apiUrl,
76
+ json={
77
+ "model": "cohere/north-mini-code:free",
78
+ "messages": [
79
+ {"role": "system", "content": "You are a reflection engine. Extract technical learnings, logical corrections, and time adaptations from completed work."},
80
+ {"role": "user", "content": prompt}
81
+ ]
82
+ },
83
+ headers={"Authorization": f"Bearer {llm_key}"},
84
+ timeout=15
85
+ )
86
+ if resp.status_code == 200:
87
+ content = resp.json()["choices"][0]["message"]["content"]
88
+ import re
89
+ content = re.sub(r"<think(?:ing)?>.*?</think(?:ing)?>", "", content, flags=re.DOTALL).strip()
90
+ if content.startswith("```"):
91
+ lines = content.splitlines()
92
+ if lines[0].startswith("```"):
93
+ lines = lines[1:]
94
+ if lines[-1].startswith("```"):
95
+ lines = lines[:-1]
96
+ content = "\n".join(lines).strip()
97
+ if content.startswith("json"):
98
+ content = content[4:].strip()
99
+ return json.loads(content)
100
+ except Exception:
101
+ pass
102
+
103
+ return {}
104
+
105
+
106
+ def save_learnings(learnings: Dict, project_dir: str = ".", user_message: str = "", assistant_content: str = "", elapsed_seconds: int = 0, iterations: int = 0):
107
+ """
108
+ Save learnings to memory.json, .utim_conventions.md, and the Hugging Face Reflection Vector DB.
109
+ """
110
+ os.makedirs(".utim_tmp", exist_ok=True)
111
+
112
+ # 1. Update task reflections history
113
+ if learnings and any(learnings.get(k) for k in ["conventions", "rules", "preferences", "corrections", "subagent_learnings", "time_reflection"]):
114
+ reflections = []
115
+ if os.path.exists(MEMORY_FILE):
116
+ try:
117
+ with open(MEMORY_FILE, "r", encoding="utf-8") as f:
118
+ reflections = json.load(f)
119
+ except Exception:
120
+ reflections = []
121
+
122
+ entry = {
123
+ "timestamp": datetime.now().isoformat(),
124
+ "user_task": user_message[:200],
125
+ "learnings": learnings
126
+ }
127
+ reflections.append(entry)
128
+ reflections = reflections[-100:]
129
+
130
+ with open(MEMORY_FILE, "w", encoding="utf-8") as f:
131
+ json.dump(reflections, f, indent=2)
132
+
133
+ # 2. Update project conventions file
134
+ conventions_path = os.path.join(project_dir, CONVENTIONS_FILE)
135
+ existing = ""
136
+ if os.path.exists(conventions_path):
137
+ try:
138
+ with open(conventions_path, "r", encoding="utf-8") as f:
139
+ existing = f.read()
140
+ except Exception:
141
+ pass
142
+
143
+ new_section = ""
144
+ if learnings.get("conventions"):
145
+ new_section += f"\n\n## Conventions (added {datetime.now().strftime('%Y-%m-%d %H:%M')})\n"
146
+ for c in learnings["conventions"]:
147
+ new_section += f"- {c}\n"
148
+ if learnings.get("rules"):
149
+ new_section += f"\n### Architectural Rules\n"
150
+ for r in learnings["rules"]:
151
+ new_section += f"- {r}\n"
152
+ if learnings.get("preferences"):
153
+ new_section += f"\n### Preferences\n"
154
+ for p in learnings["preferences"]:
155
+ new_section += f"- {p}\n"
156
+
157
+ if new_section:
158
+ try:
159
+ with open(conventions_path, "w", encoding="utf-8") as f:
160
+ f.write(existing + new_section)
161
+ except Exception:
162
+ pass
163
+
164
+ # 3. Save to Vector Memory DB using Hugging Face model
165
+ try:
166
+ from utim_cli.vector_memory import get_reflections_memory, store_reflection
167
+
168
+ for pref in learnings.get("preferences", []):
169
+ store_reflection(content=pref, category="user_preference", task_prompt=user_message)
170
+
171
+ for rule in learnings.get("rules", []):
172
+ store_reflection(content=rule, category="architectural_rule", task_prompt=user_message)
173
+
174
+ for conv in learnings.get("conventions", []):
175
+ store_reflection(content=conv, category="project_convention", task_prompt=user_message)
176
+
177
+ for corr in learnings.get("corrections", []):
178
+ store_reflection(content=corr, category="failure_correction", task_prompt=user_message)
179
+
180
+ if user_message and assistant_content:
181
+ summary = f"Task: {user_message[:300]}\nResolution: {assistant_content[:400]}"
182
+ store_reflection(content=summary, category="task_experience", task_prompt=user_message)
183
+ except Exception as e:
184
+ pass
185
+
186
+
187
+ def run_reflection_phase(user_message: str, assistant_content: str,
188
+ tool_results: List[Dict], elapsed_seconds: int = 0, iterations: int = 0) -> Dict:
189
+ """
190
+ Main entry point for the reflection phase.
191
+ Analyzes completed work and stores learnings for future tasks.
192
+ """
193
+ llm_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("UTIM_API_KEY") or "mock_key"
194
+ learnings = {}
195
+
196
+ if llm_key and llm_key != "mock_key":
197
+ learnings = extract_learnings(user_message, assistant_content, tool_results, llm_key, elapsed_seconds, iterations)
198
+
199
+ save_learnings(learnings, user_message=user_message, assistant_content=assistant_content, elapsed_seconds=elapsed_seconds, iterations=iterations)
200
+ return learnings
utim_cli/report.py ADDED
@@ -0,0 +1,100 @@
1
+ import os
2
+ import re
3
+ import sys
4
+ import shutil
5
+ import zipfile
6
+ import io
7
+ from utim_cli.logger import redact_text, LOG_FILE
8
+ from utim_cli.doctor import run_diagnostics
9
+ from rich.console import Console
10
+
11
+ # ── Unicode → ASCII symbol map ────────────────────────────────────────────────
12
+ _UNICODE_TO_ASCII: list = [
13
+ # Rich / doctor symbols
14
+ ("\u2713", "[OK]"), # ✓
15
+ ("\u2717", "[FAIL]"), # ✗
16
+ ("\u2022", "-"), # •
17
+ ("\u2b21", "#"), # ⬡
18
+ ("\u2026", "..."), # …
19
+ ("\u2192", "->"), # →
20
+ ("\u2714", "[OK]"), # ✔
21
+ ("\u2718", "[FAIL]"), # ✘
22
+ ("\u25b6", ">"), # ▶
23
+ ("\u25cf", "*"), # ●
24
+ # Emoji used in doctor / report
25
+ ("\U0001f4c4", "[file]"),
26
+ ("\u270f", "[edit]"),
27
+ ("\U0001f5d1", "[del]"),
28
+ ("\U0001f4e6", "[pkg]"),
29
+ ("\u26a1", "[run]"),
30
+ ("\U0001f4c1", "[dir]"),
31
+ ("\U0001f50d", "[search]"),
32
+ ("\U0001f9e0", "[ai]"),
33
+ ]
34
+
35
+ _ANSI_RE = re.compile(r"\x1b\[[0-9;]*[mKHFABCDJsu]")
36
+
37
+
38
+ def _to_ascii(text: str) -> str:
39
+ """Strip ANSI escape codes and replace Unicode symbols with ASCII equivalents.
40
+
41
+ Any remaining non-ASCII character (e.g. user-supplied filenames or log
42
+ entries) is replaced with '?' so the output is always 7-bit clean and safe
43
+ to print on any Windows code page.
44
+ """
45
+ # 1. Remove ANSI colour/cursor escape sequences
46
+ text = _ANSI_RE.sub("", text)
47
+ # 2. Map known symbols to ASCII stand-ins
48
+ for uni, asc in _UNICODE_TO_ASCII:
49
+ text = text.replace(uni, asc)
50
+ # 3. Encode to ASCII, replacing anything still non-ASCII
51
+ return text.encode("ascii", errors="replace").decode("ascii")
52
+
53
+
54
+ def create_report_bundle() -> str:
55
+ """Create a support report zip bundle with sensitive data redacted.
56
+
57
+ The file written inside the zip is ASCII-only so it can be opened and
58
+ printed on any Windows console regardless of the active code page.
59
+ """
60
+ report_dir = ".utim_tmp"
61
+ os.makedirs(report_dir, exist_ok=True)
62
+
63
+ report_txt_path = os.path.join(report_dir, "support_report.txt")
64
+ bundle_zip_path = os.path.join(report_dir, "report_bundle.zip")
65
+
66
+ try:
67
+ # Capture Rich diagnostics into a StringIO buffer (no terminal needed)
68
+ buf = io.StringIO()
69
+ buf_console = Console(file=buf, force_terminal=False, width=100)
70
+ run_diagnostics(buf_console)
71
+ diagnostics_text = buf.getvalue()
72
+
73
+ with open(report_txt_path, "w", encoding="utf-8") as f:
74
+ f.write("=== UTIM SUPPORT REPORT ===\n")
75
+ ts = os.path.getmtime(LOG_FILE) if os.path.exists(LOG_FILE) else "unknown"
76
+ f.write(f"Timestamp: {ts}\n\n")
77
+
78
+ f.write("=== DIAGNOSTICS ===\n")
79
+ # Redact first, then ASCII-ify so redaction markers stay readable
80
+ f.write(_to_ascii(redact_text(diagnostics_text)) + "\n\n")
81
+
82
+ f.write("=== REDACTED DEBUG LOG ===\n")
83
+ if os.path.exists(LOG_FILE):
84
+ with open(LOG_FILE, "r", encoding="utf-8") as lf:
85
+ log_content = lf.read()
86
+ f.write(_to_ascii(redact_text(log_content)))
87
+ else:
88
+ f.write("(no debug log found)\n")
89
+
90
+ with zipfile.ZipFile(bundle_zip_path, "w", zipfile.ZIP_DEFLATED) as z:
91
+ z.write(report_txt_path, "support_report.txt")
92
+
93
+ try:
94
+ os.remove(report_txt_path)
95
+ except Exception:
96
+ pass
97
+
98
+ return bundle_zip_path
99
+ except Exception as e:
100
+ raise RuntimeError(f"Failed to create support bundle: {e}")
@@ -0,0 +1,229 @@
1
+ """
2
+ Scrapy-based web scraping enhancement for the web_search tool.
3
+ Provides robust, async-capable scraping with proper HTTP semantics.
4
+ """
5
+
6
+ import asyncio
7
+ import os
8
+ from typing import Dict, List, Optional
9
+ from dataclasses import dataclass
10
+
11
+
12
+ @dataclass
13
+ class ScrapedContent:
14
+ """Container for scraped page content."""
15
+ url: str
16
+ title: str
17
+ text: str
18
+ error: Optional[str] = None
19
+
20
+
21
+ async def scrape_urls_with_playwright(urls: List[str], use_js: bool = False, timeout: int = 10) -> Dict[str, str]:
22
+ """
23
+ Scrape URLs using Scrapy with optional Playwright for JavaScript rendering.
24
+
25
+ Args:
26
+ urls: List of URLs to scrape
27
+ use_js: Whether to use Playwright for JavaScript-heavy sites
28
+ timeout: Request timeout in seconds
29
+
30
+ Returns:
31
+ Dictionary mapping URL to scraped text content
32
+ """
33
+ from scrapy import Spider, Request
34
+ from scrapy.crawler import CrawlerProcess
35
+ from scrapy.utils.project import get_project_settings
36
+ from scrapy.utils.log import configure_logging
37
+
38
+ results = {}
39
+
40
+ if use_js and len(urls) > 0:
41
+ # Use Playwright for JS rendering
42
+ return await _scrape_with_playwright(urls, timeout)
43
+ else:
44
+ # Use regular Scrapy for static content
45
+ return await _scrape_static(urls, timeout)
46
+
47
+
48
+ async def _scrape_with_playwright(urls: List[str], timeout: int) -> Dict[str, str]:
49
+ """Scrape URLs using Playwright for JavaScript rendering."""
50
+ try:
51
+ from playwright.async_api import async_playwright
52
+ except ImportError:
53
+ # Fallback to static scraping if Playwright not available
54
+ return await _scrape_static(urls, timeout)
55
+
56
+ results = {}
57
+
58
+ async with async_playwright() as p:
59
+ browser = await p.chromium.launch(headless=True)
60
+ context = await browser.new_context(
61
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36"
62
+ )
63
+
64
+ for url in urls:
65
+ try:
66
+ page = await context.new_page()
67
+ await page.goto(url, wait_until="networkidle", timeout=timeout * 1000)
68
+ content = await page.content()
69
+
70
+ # Extract text content
71
+ from bs4 import BeautifulSoup
72
+ soup = BeautifulSoup(content, 'html.parser')
73
+
74
+ # Remove script and style elements
75
+ for script in soup(["script", "style", "nav", "header", "footer", "aside"]):
76
+ script.decompose()
77
+
78
+ text = soup.get_text(separator='\n', strip=True)
79
+ # Clean up whitespace
80
+ lines = [line.strip() for line in text.splitlines() if line.strip()]
81
+ results[url] = '\n'.join(lines)[:6000]
82
+
83
+ await page.close()
84
+ except Exception as e:
85
+ results[url] = f"Error: {str(e)}"
86
+
87
+ await context.close()
88
+ await browser.close()
89
+
90
+ return results
91
+
92
+
93
+ async def _scrape_static(urls: List[str], timeout: int) -> Dict[str, str]:
94
+ """Scrape URLs using standard Scrapy (no JavaScript)."""
95
+ import shutil
96
+
97
+ # Check if scrapy-playwright is available
98
+ use_playwright = shutil.which('playwright') is not None
99
+
100
+ # Create a temporary Scrapy project settings
101
+ from scrapy.settings import Settings
102
+
103
+ settings = Settings()
104
+ settings.set('USER_AGENT', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
105
+ settings.set('ROBOTSTXT_OBEY', True)
106
+ settings.set('DOWNLOAD_TIMEOUT', timeout)
107
+ settings.set('CONCURRENT_REQUESTS', min(len(urls), 8))
108
+ settings.set('CONCURRENT_REQUESTS_PER_DOMAIN', 2)
109
+ settings.set('RETRY_TIMES', 2)
110
+ settings.set('LOG_LEVEL', 'ERROR')
111
+
112
+ # Configure Playwright if available
113
+ if use_playwright:
114
+ settings.set('PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT', timeout * 1000)
115
+
116
+ # Import here to avoid issues if not installed
117
+ try:
118
+ from scrapy.crawler import CrawlerRunner
119
+ from twisted.internet import asyncioreactor
120
+ asyncioreactor.install()
121
+ except ImportError:
122
+ # Fall back to simple requests if Scrapy has issues
123
+ return _fallback_requests_scraping(urls, timeout)
124
+
125
+ # Create inline spider
126
+ from scrapy import Spider
127
+ from itemadapter import ItemAdapter
128
+
129
+ class InlineSpider(Spider):
130
+ name = 'inline_spider'
131
+
132
+ def __init__(self, *args, **kwargs):
133
+ super().__init__(*args, **kwargs)
134
+ self.results = {}
135
+ self.urls_to_scrape = urls
136
+
137
+ def start_requests(self):
138
+ for url in self.urls_to_scrape:
139
+ yield Request(url=url, callback=self.parse, errback=self.errback)
140
+
141
+ def parse(self, response):
142
+ # Extract text with BeautifulSoup
143
+ try:
144
+ from bs4 import BeautifulSoup
145
+ soup = BeautifulSoup(response.text, 'html.parser')
146
+
147
+ # Remove unwanted elements
148
+ for elem in soup(["script", "style", "nav", "header", "footer", "aside"]):
149
+ elem.decompose()
150
+
151
+ text = soup.get_text(separator='\n', strip=True)
152
+ lines = [line.strip() for line in text.splitlines() if line.strip()]
153
+ self.results[response.url] = '\n'.join(lines)[:6000]
154
+ except Exception as e:
155
+ self.results[response.url] = f"Parse error: {str(e)}"
156
+
157
+ yield {'url': response.url}
158
+
159
+ def errback(self, failure):
160
+ self.results[failure.request.url] = f"Error: {str(failure.value)}"
161
+
162
+ # Run the spider
163
+ from twisted.internet import reactor
164
+ from scrapy.crawler import CrawlerRunner
165
+
166
+ runner = CrawlerRunner(settings)
167
+ spider = InlineSpider()
168
+
169
+ try:
170
+ d = runner.crawl(spider)
171
+ d.addBoth(lambda _: reactor.stop())
172
+ reactor.run()
173
+ except Exception:
174
+ # Fallback if reactor issues
175
+ return _fallback_requests_scraping(urls, timeout)
176
+
177
+ return spider.results or _fallback_requests_scraping(urls, timeout)
178
+
179
+
180
+ def _fallback_requests_scraping(urls: List[str], timeout: int) -> Dict[str, str]:
181
+ """Fallback to requests-based scraping if Scrapy fails."""
182
+ import requests
183
+ import re
184
+ import html as html_lib
185
+
186
+ results = {}
187
+
188
+ for url in urls:
189
+ try:
190
+ headers = {
191
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
192
+ }
193
+ r = requests.get(url, headers=headers, timeout=timeout)
194
+ if r.status_code == 200:
195
+ html_content = r.text
196
+ # Remove scripts, styles
197
+ html_content = re.sub(r'<(script|style).*?>.*?</\1>', '', html_content, flags=re.DOTALL | re.IGNORECASE)
198
+ # Remove html tags
199
+ text = re.sub(r'<.*?>', ' ', html_content)
200
+ text = html_lib.unescape(text)
201
+ # Format whitespace
202
+ lines = [l.strip() for l in text.splitlines() if l.strip()]
203
+ results[url] = '\n'.join(lines)[:6000]
204
+ except Exception as e:
205
+ results[url] = f"Error: {str(e)}"
206
+
207
+ return results
208
+
209
+
210
+ def enhanced_scrape_urls(urls: List[str], use_js: bool = False, timeout: int = 10) -> Dict[str, str]:
211
+ """
212
+ Synchronous wrapper for scrape_urls_with_playwright.
213
+
214
+ This is the main entry point used by web_search.
215
+ """
216
+ try:
217
+ # Try async approach first
218
+ loop = asyncio.get_event_loop()
219
+ if loop.is_running():
220
+ # If event loop is already running, use a new one
221
+ return asyncio.run(scrape_urls_with_playwright(urls, use_js, timeout))
222
+ else:
223
+ return loop.run_until_complete(scrape_urls_with_playwright(urls, use_js, timeout))
224
+ except RuntimeError:
225
+ # No event loop, create one
226
+ return asyncio.run(scrape_urls_with_playwright(urls, use_js, timeout))
227
+ except Exception:
228
+ # Fallback to sync requests
229
+ return _fallback_requests_scraping(urls, timeout)