utim-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- utim_cli/__init__.py +40 -0
- utim_cli/agent.py +359 -0
- utim_cli/auth.py +208 -0
- utim_cli/backup.py +101 -0
- utim_cli/billing.py +40 -0
- utim_cli/blender_agent.py +1018 -0
- utim_cli/bootstrap.py +324 -0
- utim_cli/client_utils.py +135 -0
- utim_cli/config.py +194 -0
- utim_cli/context_pruner.py +504 -0
- utim_cli/doctor.py +118 -0
- utim_cli/knowledge_graph.py +462 -0
- utim_cli/logger.py +121 -0
- utim_cli/mcp_clean_wrapper.py +55 -0
- utim_cli/mcp_client.py +198 -0
- utim_cli/mcp_registry.json +1102 -0
- utim_cli/orchestrator.py +3209 -0
- utim_cli/reflection.py +200 -0
- utim_cli/report.py +100 -0
- utim_cli/scrapy_search.py +229 -0
- utim_cli/share.py +320 -0
- utim_cli/share_tui.py +554 -0
- utim_cli/situational_scoring.py +269 -0
- utim_cli/state.py +15 -0
- utim_cli/tools.py +3381 -0
- utim_cli/utim.py +4051 -0
- utim_cli/vector_memory.py +629 -0
- utim_cli/workspace.py +33 -0
- utim_cli-1.0.0.dist-info/METADATA +134 -0
- utim_cli-1.0.0.dist-info/RECORD +34 -0
- utim_cli-1.0.0.dist-info/WHEEL +5 -0
- utim_cli-1.0.0.dist-info/entry_points.txt +2 -0
- utim_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
- utim_cli-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Codebase Knowledge Graph — AST-based dependency and call graph analysis using Tree-sitter.
|
|
3
|
+
|
|
4
|
+
This module parses the codebase into a knowledge graph of imports, function calls,
|
|
5
|
+
and class relationships, enabling blast-radius analysis for code changes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import json
|
|
10
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
|
|
13
|
+
# Tree-sitter imports
|
|
14
|
+
try:
|
|
15
|
+
from tree_sitter import Language, Parser
|
|
16
|
+
import tree_sitter_python as ts_python
|
|
17
|
+
import tree_sitter_javascript as ts_javascript
|
|
18
|
+
import tree_sitter_typescript as ts_typescript
|
|
19
|
+
TREE_SITTER_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
TREE_SITTER_AVAILABLE = False
|
|
22
|
+
|
|
23
|
+
# Graph storage
|
|
24
|
+
GRAPH_FILE = ".utim_tmp/knowledge_graph.json"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class CodeEntity:
|
|
29
|
+
"""Represents a code entity (function, class, variable, etc.)."""
|
|
30
|
+
id: str
|
|
31
|
+
type: str # function, class, method, import, variable
|
|
32
|
+
name: str
|
|
33
|
+
filepath: str
|
|
34
|
+
line_start: int = 0
|
|
35
|
+
line_end: int = 0
|
|
36
|
+
visibility: str = "public" # public, private
|
|
37
|
+
metadata: Dict = field(default_factory=dict)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class CodeRelation:
|
|
42
|
+
"""Represents a relationship between code entities."""
|
|
43
|
+
from_id: str
|
|
44
|
+
to_id: str
|
|
45
|
+
relation_type: str # calls, imports, inherits, implements, references
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class KnowledgeGraph:
|
|
49
|
+
"""
|
|
50
|
+
Builds and maintains a knowledge graph from codebase AST analysis.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(self):
|
|
54
|
+
self.entities: Dict[str, CodeEntity] = {}
|
|
55
|
+
self.relations: List[CodeRelation] = []
|
|
56
|
+
self.file_entities: Dict[str, List[str]] = {} # filepath -> entity ids
|
|
57
|
+
self.reference_index: Dict[str, List[str]] = {} # name -> entity ids
|
|
58
|
+
|
|
59
|
+
# Language parsers
|
|
60
|
+
self.parsers: Dict[str, Parser] = {}
|
|
61
|
+
self._init_parsers()
|
|
62
|
+
|
|
63
|
+
def _init_parsers(self):
|
|
64
|
+
"""Initialize tree-sitter parsers for supported languages."""
|
|
65
|
+
if not TREE_SITTER_AVAILABLE:
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
py_lang = Language(ts_python.language())
|
|
70
|
+
self.parsers[".py"] = Parser(py_lang)
|
|
71
|
+
except Exception:
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
js_lang = Language(ts_javascript.language())
|
|
76
|
+
self.parsers[".js"] = Parser(js_lang)
|
|
77
|
+
except Exception:
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
ts_lang = Language(ts_typescript.language())
|
|
82
|
+
self.parsers[".ts"] = Parser(ts_lang)
|
|
83
|
+
except Exception:
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
tsx_lang = Language(ts_typescript.language_tsx())
|
|
88
|
+
self.parsers[".tsx"] = Parser(tsx_lang)
|
|
89
|
+
except Exception:
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
def _get_parser(self, filepath: str) -> Optional[Parser]:
|
|
93
|
+
"""Get appropriate parser for file extension."""
|
|
94
|
+
ext = os.path.splitext(filepath)[1].lower()
|
|
95
|
+
return self.parsers.get(ext)
|
|
96
|
+
|
|
97
|
+
def _make_entity_id(self, filepath: str, name: str, line: int = 0) -> str:
|
|
98
|
+
"""Generate unique entity ID."""
|
|
99
|
+
return f"{filepath}:{name}:{line}"
|
|
100
|
+
|
|
101
|
+
def parse_python_file(self, filepath: str, content: bytes) -> List[CodeEntity]:
|
|
102
|
+
"""Parse Python file for functions, classes, imports, and calls."""
|
|
103
|
+
entities = []
|
|
104
|
+
|
|
105
|
+
if ".py" not in self.parsers:
|
|
106
|
+
return entities
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
tree = self.parsers[".py"].parse(content)
|
|
110
|
+
root = tree.root_node
|
|
111
|
+
except Exception:
|
|
112
|
+
return entities
|
|
113
|
+
|
|
114
|
+
def walk(node, in_class: str = None):
|
|
115
|
+
if node.type == "function_definition":
|
|
116
|
+
name_node = None
|
|
117
|
+
body_start = node.start_point[0]
|
|
118
|
+
|
|
119
|
+
for child in node.children:
|
|
120
|
+
if child.type == "identifier":
|
|
121
|
+
name_node = child
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
if name_node:
|
|
125
|
+
name = name_node.text.decode()
|
|
126
|
+
entity_type = "method" if in_class else "function"
|
|
127
|
+
entity_id = self._make_entity_id(filepath, name, body_start + 1)
|
|
128
|
+
|
|
129
|
+
entities.append(CodeEntity(
|
|
130
|
+
id=entity_id,
|
|
131
|
+
type=entity_type,
|
|
132
|
+
name=name,
|
|
133
|
+
filepath=filepath,
|
|
134
|
+
line_start=body_start + 1,
|
|
135
|
+
line_end=node.end_point[0] + 1,
|
|
136
|
+
metadata={"class": in_class} if in_class else {}
|
|
137
|
+
))
|
|
138
|
+
|
|
139
|
+
elif node.type == "class_definition":
|
|
140
|
+
name_node = None
|
|
141
|
+
|
|
142
|
+
for child in node.children:
|
|
143
|
+
if child.type == "identifier":
|
|
144
|
+
name_node = child
|
|
145
|
+
break
|
|
146
|
+
|
|
147
|
+
if name_node:
|
|
148
|
+
name = name_node.text.decode()
|
|
149
|
+
entity_id = self._make_entity_id(filepath, name, node.start_point[0] + 1)
|
|
150
|
+
|
|
151
|
+
entities.append(CodeEntity(
|
|
152
|
+
id=entity_id,
|
|
153
|
+
type="class",
|
|
154
|
+
name=name,
|
|
155
|
+
filepath=filepath,
|
|
156
|
+
line_start=node.start_point[0] + 1,
|
|
157
|
+
line_end=node.end_point[0] + 1
|
|
158
|
+
))
|
|
159
|
+
|
|
160
|
+
# Walk body for class members
|
|
161
|
+
for child in node.children:
|
|
162
|
+
if child.type == "block":
|
|
163
|
+
walk(child, in_class=name)
|
|
164
|
+
|
|
165
|
+
elif node.type == "import_statement" or node.type == "import_from_statement":
|
|
166
|
+
names = []
|
|
167
|
+
module = ""
|
|
168
|
+
|
|
169
|
+
for child in node.children:
|
|
170
|
+
if child.type == "dotted_name" or child.type == "identifier":
|
|
171
|
+
names.append(child.text.decode())
|
|
172
|
+
elif child.type == "module_name":
|
|
173
|
+
module = child.text.decode()
|
|
174
|
+
|
|
175
|
+
for name in names:
|
|
176
|
+
entity_id = self._make_entity_id(filepath, f"import:{name}", node.start_point[0] + 1)
|
|
177
|
+
entities.append(CodeEntity(
|
|
178
|
+
id=entity_id,
|
|
179
|
+
type="import",
|
|
180
|
+
name=name,
|
|
181
|
+
filepath=filepath,
|
|
182
|
+
line_start=node.start_point[0] + 1,
|
|
183
|
+
line_end=node.end_point[0] + 1,
|
|
184
|
+
metadata={"module": module}
|
|
185
|
+
))
|
|
186
|
+
|
|
187
|
+
for child in node.children:
|
|
188
|
+
walk(child, in_class)
|
|
189
|
+
|
|
190
|
+
walk(root)
|
|
191
|
+
return entities
|
|
192
|
+
|
|
193
|
+
def parse_javascript_file(self, filepath: str, content: bytes) -> List[CodeEntity]:
|
|
194
|
+
"""Parse JavaScript/TypeScript file for functions, classes, imports."""
|
|
195
|
+
entities = []
|
|
196
|
+
|
|
197
|
+
ext = os.path.splitext(filepath)[1].lower()
|
|
198
|
+
parser_key = ext if ext in self.parsers else None
|
|
199
|
+
|
|
200
|
+
if not parser_key:
|
|
201
|
+
return entities
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
tree = self.parsers[parser_key].parse(content)
|
|
205
|
+
root = tree.root_node
|
|
206
|
+
except Exception:
|
|
207
|
+
return entities
|
|
208
|
+
|
|
209
|
+
def walk(node, in_class: str = None):
|
|
210
|
+
if node.type in ("function_declaration", "function_expression", "arrow_function"):
|
|
211
|
+
name = "anonymous"
|
|
212
|
+
if node.type in ("function_declaration", "function_expression"):
|
|
213
|
+
for child in node.children:
|
|
214
|
+
if child.type == "identifier":
|
|
215
|
+
name = child.text.decode()
|
|
216
|
+
break
|
|
217
|
+
|
|
218
|
+
entity_type = "method" if in_class else "function"
|
|
219
|
+
entity_id = self._make_entity_id(filepath, name, node.start_point[0] + 1)
|
|
220
|
+
|
|
221
|
+
entities.append(CodeEntity(
|
|
222
|
+
id=entity_id,
|
|
223
|
+
type=entity_type,
|
|
224
|
+
name=name,
|
|
225
|
+
filepath=filepath,
|
|
226
|
+
line_start=node.start_point[0] + 1,
|
|
227
|
+
line_end=node.end_point[0] + 1
|
|
228
|
+
))
|
|
229
|
+
|
|
230
|
+
elif node.type == "class_declaration":
|
|
231
|
+
name = "anonymous"
|
|
232
|
+
for child in node.children:
|
|
233
|
+
if child.type == "identifier":
|
|
234
|
+
name = child.text.decode()
|
|
235
|
+
break
|
|
236
|
+
|
|
237
|
+
entity_id = self._make_entity_id(filepath, name, node.start_point[0] + 1)
|
|
238
|
+
entities.append(CodeEntity(
|
|
239
|
+
id=entity_id,
|
|
240
|
+
type="class",
|
|
241
|
+
name=name,
|
|
242
|
+
filepath=filepath,
|
|
243
|
+
line_start=node.start_point[0] + 1,
|
|
244
|
+
line_end=node.end_point[0] + 1
|
|
245
|
+
))
|
|
246
|
+
|
|
247
|
+
for child in node.children:
|
|
248
|
+
if child.type in ("class_body", "block"):
|
|
249
|
+
walk(child, in_class=name)
|
|
250
|
+
|
|
251
|
+
for child in node.children:
|
|
252
|
+
walk(child, in_class)
|
|
253
|
+
|
|
254
|
+
walk(root)
|
|
255
|
+
return entities
|
|
256
|
+
|
|
257
|
+
def build_graph(self, paths: List[str] = None, exclude_dirs: Set[str] = None) -> int:
|
|
258
|
+
"""
|
|
259
|
+
Build knowledge graph from codebase files.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
paths: Specific files to parse. If None, walks directory.
|
|
263
|
+
exclude_dirs: Directories to exclude.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Number of entities found.
|
|
267
|
+
"""
|
|
268
|
+
if exclude_dirs is None:
|
|
269
|
+
exclude_dirs = {".git", "node_modules", "dist", "build", "__pycache__", ".venv", "venv", ".utim_tmp"}
|
|
270
|
+
|
|
271
|
+
self.entities.clear()
|
|
272
|
+
self.relations.clear()
|
|
273
|
+
self.file_entities.clear()
|
|
274
|
+
|
|
275
|
+
files_to_parse = []
|
|
276
|
+
|
|
277
|
+
if paths:
|
|
278
|
+
files_to_parse = [p for p in paths if os.path.exists(p)]
|
|
279
|
+
else:
|
|
280
|
+
for root, dirs, files in os.walk("."):
|
|
281
|
+
dirs[:] = [d for d in dirs if d not in exclude_dirs]
|
|
282
|
+
for f in files:
|
|
283
|
+
ext = os.path.splitext(f)[1].lower()
|
|
284
|
+
if ext in [".py", ".js", ".ts", ".tsx"]:
|
|
285
|
+
p = os.path.join(root, f)
|
|
286
|
+
files_to_parse.append(p)
|
|
287
|
+
|
|
288
|
+
for filepath in files_to_parse:
|
|
289
|
+
try:
|
|
290
|
+
with open(filepath, "rb") as f:
|
|
291
|
+
content = f.read()
|
|
292
|
+
|
|
293
|
+
ext = os.path.splitext(filepath)[1].lower()
|
|
294
|
+
|
|
295
|
+
if ext == ".py":
|
|
296
|
+
entities = self.parse_python_file(filepath, content)
|
|
297
|
+
elif ext in [".js", ".ts", ".tsx"]:
|
|
298
|
+
entities = self.parse_javascript_file(filepath, content)
|
|
299
|
+
else:
|
|
300
|
+
entities = []
|
|
301
|
+
|
|
302
|
+
for entity in entities:
|
|
303
|
+
self.entities[entity.id] = entity
|
|
304
|
+
if filepath not in self.file_entities:
|
|
305
|
+
self.file_entities[filepath] = []
|
|
306
|
+
self.file_entities[filepath].append(entity.id)
|
|
307
|
+
|
|
308
|
+
# Update reference index
|
|
309
|
+
if entity.name not in self.reference_index:
|
|
310
|
+
self.reference_index[entity.name] = []
|
|
311
|
+
self.reference_index[entity.name].append(entity.id)
|
|
312
|
+
|
|
313
|
+
except Exception:
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
self._save_graph()
|
|
317
|
+
return len(self.entities)
|
|
318
|
+
|
|
319
|
+
def find_dependents(self, entity_name: str, filepath: str = None) -> List[Dict]:
|
|
320
|
+
"""
|
|
321
|
+
Find all files that depend on a given entity (call, import, etc.).
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
entity_name: Name of the function/class to find callers for
|
|
325
|
+
filepath: Optional specific file to search in
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
List of dependent file paths with relationship info.
|
|
329
|
+
"""
|
|
330
|
+
dependents = []
|
|
331
|
+
|
|
332
|
+
# Find the entity
|
|
333
|
+
matching_ids = []
|
|
334
|
+
for eid, entity in self.entities.items():
|
|
335
|
+
if entity.name == entity_name:
|
|
336
|
+
if filepath is None or entity.filepath == filepath:
|
|
337
|
+
matching_ids.append(eid)
|
|
338
|
+
|
|
339
|
+
# For each matching entity, find references
|
|
340
|
+
for target_id in matching_ids:
|
|
341
|
+
target_entity = self.entities.get(target_id)
|
|
342
|
+
if not target_entity:
|
|
343
|
+
continue
|
|
344
|
+
|
|
345
|
+
# Check for functions/methods that might call this
|
|
346
|
+
for eid, entity in self.entities.items():
|
|
347
|
+
if entity.type in ("function", "method"):
|
|
348
|
+
# Simple heuristic: same project, different file
|
|
349
|
+
if entity.filepath != target_entity.filepath:
|
|
350
|
+
dependents.append({
|
|
351
|
+
"filepath": entity.filepath,
|
|
352
|
+
"line": entity.line_start,
|
|
353
|
+
"type": "potential_caller",
|
|
354
|
+
"entity": entity.name
|
|
355
|
+
})
|
|
356
|
+
|
|
357
|
+
return list(set(d.get("filepath") for d in dependents))
|
|
358
|
+
|
|
359
|
+
def get_blast_radius(self, filepath: str) -> List[str]:
|
|
360
|
+
"""
|
|
361
|
+
Estimate files that might be affected by changes to a file.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
filepath: File to analyze
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
List of potentially affected file paths.
|
|
368
|
+
"""
|
|
369
|
+
affected = set()
|
|
370
|
+
|
|
371
|
+
# Get entities in the file
|
|
372
|
+
file_entity_ids = self.file_entities.get(filepath, [])
|
|
373
|
+
|
|
374
|
+
for eid in file_entity_ids:
|
|
375
|
+
entity = self.entities.get(eid)
|
|
376
|
+
if not entity:
|
|
377
|
+
continue
|
|
378
|
+
|
|
379
|
+
# Find dependents for each entity
|
|
380
|
+
deps = self.find_dependents(entity.name, entity.filepath)
|
|
381
|
+
affected.update(deps)
|
|
382
|
+
|
|
383
|
+
return list(affected)
|
|
384
|
+
|
|
385
|
+
def get_stats(self) -> Dict:
|
|
386
|
+
"""Get knowledge graph statistics."""
|
|
387
|
+
return {
|
|
388
|
+
"total_entities": len(self.entities),
|
|
389
|
+
"total_files": len(self.file_entities),
|
|
390
|
+
"entity_types": {
|
|
391
|
+
t: sum(1 for e in self.entities.values() if e.type == t)
|
|
392
|
+
for t in ["function", "method", "class", "import"]
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
def _save_graph(self):
|
|
397
|
+
"""Save graph to disk."""
|
|
398
|
+
os.makedirs(".utim_tmp", exist_ok=True)
|
|
399
|
+
|
|
400
|
+
data = {
|
|
401
|
+
"entities": [
|
|
402
|
+
{
|
|
403
|
+
"id": e.id,
|
|
404
|
+
"type": e.type,
|
|
405
|
+
"name": e.name,
|
|
406
|
+
"filepath": e.filepath,
|
|
407
|
+
"line_start": e.line_start,
|
|
408
|
+
"line_end": e.line_end
|
|
409
|
+
}
|
|
410
|
+
for e in self.entities.values()
|
|
411
|
+
],
|
|
412
|
+
"file_entities": self.file_entities
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
with open(GRAPH_FILE, "w", encoding="utf-8") as f:
|
|
416
|
+
json.dump(data, f, indent=2)
|
|
417
|
+
|
|
418
|
+
def load_graph(self):
|
|
419
|
+
"""Load graph from disk if exists."""
|
|
420
|
+
if os.path.exists(GRAPH_FILE):
|
|
421
|
+
try:
|
|
422
|
+
with open(GRAPH_FILE, "r", encoding="utf-8") as f:
|
|
423
|
+
data = json.load(f)
|
|
424
|
+
|
|
425
|
+
for e in data.get("entities", []):
|
|
426
|
+
entity = CodeEntity(
|
|
427
|
+
id=e["id"],
|
|
428
|
+
type=e["type"],
|
|
429
|
+
name=e["name"],
|
|
430
|
+
filepath=e["filepath"],
|
|
431
|
+
line_start=e.get("line_start", 0),
|
|
432
|
+
line_end=e.get("line_end", 0)
|
|
433
|
+
)
|
|
434
|
+
self.entities[entity.id] = entity
|
|
435
|
+
|
|
436
|
+
self.file_entities = data.get("file_entities", {})
|
|
437
|
+
|
|
438
|
+
return True
|
|
439
|
+
except Exception:
|
|
440
|
+
pass
|
|
441
|
+
return False
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
# Global instance
|
|
445
|
+
_knowledge_graph: Optional[KnowledgeGraph] = None
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def get_knowledge_graph() -> Optional[KnowledgeGraph]:
|
|
449
|
+
"""Get or create the global knowledge graph instance."""
|
|
450
|
+
global _knowledge_graph
|
|
451
|
+
if _knowledge_graph is None and TREE_SITTER_AVAILABLE:
|
|
452
|
+
_knowledge_graph = KnowledgeGraph()
|
|
453
|
+
_knowledge_graph.load_graph()
|
|
454
|
+
return _knowledge_graph
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def build_knowledge_graph(paths: List[str] = None) -> int:
|
|
458
|
+
"""Build or rebuild the knowledge graph."""
|
|
459
|
+
kg = get_knowledge_graph()
|
|
460
|
+
if kg:
|
|
461
|
+
return kg.build_graph(paths)
|
|
462
|
+
return 0
|
utim_cli/logger.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import time
|
|
4
|
+
import traceback
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
# Global logging path
|
|
8
|
+
LOG_FILE = os.path.join(".utim", "utim_debug.log")
|
|
9
|
+
|
|
10
|
+
# Sensitive word list for log redaction
|
|
11
|
+
SENSITIVE_KEYWORDS = {
|
|
12
|
+
"girlfriend", "gf", "wife", "spouse", "partner", "relationship",
|
|
13
|
+
"secret", "password", "code", "private", "personal", "anushka", "puchkuli"
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
# Compile regex to match sensitive words case-insensitively
|
|
17
|
+
_REDACT_RE = re.compile(
|
|
18
|
+
r"\b(" + "|".join(re.escape(w) for w in SENSITIVE_KEYWORDS) + r")\b",
|
|
19
|
+
re.IGNORECASE
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# API key patterns (OpenAI, Anthropic, OpenRouter, etc.)
|
|
23
|
+
_API_KEY_RE = re.compile(
|
|
24
|
+
r"\b(?:sk-|sk-or-v1-|xai-|ai-)[a-zA-Z0-9\-]{20,}\b",
|
|
25
|
+
re.IGNORECASE
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Bearer token patterns
|
|
29
|
+
_BEARER_TOKEN_RE = re.compile(
|
|
30
|
+
r"\bbearer\s+[a-zA-Z0-9\-._~+/]+=*\b",
|
|
31
|
+
re.IGNORECASE
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Email address patterns
|
|
35
|
+
_EMAIL_RE = re.compile(
|
|
36
|
+
r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b",
|
|
37
|
+
re.IGNORECASE
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def redact_text(text: str) -> str:
|
|
41
|
+
"""Redact sensitive keywords, API keys, tokens, emails, working directories, and usernames."""
|
|
42
|
+
if not text:
|
|
43
|
+
return text
|
|
44
|
+
|
|
45
|
+
# 1. Redact API keys and bearer tokens
|
|
46
|
+
text = _API_KEY_RE.sub("[REDACTED_API_KEY]", text)
|
|
47
|
+
text = _BEARER_TOKEN_RE.sub("[REDACTED_TOKEN]", text)
|
|
48
|
+
|
|
49
|
+
# 2. Redact emails
|
|
50
|
+
text = _EMAIL_RE.sub("[REDACTED_EMAIL]", text)
|
|
51
|
+
|
|
52
|
+
# 3. Redact current working directory
|
|
53
|
+
try:
|
|
54
|
+
cwd = os.getcwd()
|
|
55
|
+
if cwd and len(cwd) > 3:
|
|
56
|
+
text = text.replace(cwd, "[WORKSPACE_DIR]")
|
|
57
|
+
# Also redact with forward slashes if paths are converted
|
|
58
|
+
cwd_f = cwd.replace('\\', '/')
|
|
59
|
+
text = text.replace(cwd_f, "[WORKSPACE_DIR]")
|
|
60
|
+
except Exception:
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
# 4. Redact system username dynamically
|
|
64
|
+
try:
|
|
65
|
+
import getpass
|
|
66
|
+
user = getpass.getuser()
|
|
67
|
+
if user and len(user) > 2:
|
|
68
|
+
user_re = re.compile(re.escape(user), re.IGNORECASE)
|
|
69
|
+
text = user_re.sub("[USER]", text)
|
|
70
|
+
except Exception:
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
# 5. Redact general sensitive keywords
|
|
74
|
+
text = _REDACT_RE.sub("[REDACTED]", text)
|
|
75
|
+
|
|
76
|
+
# 6. Redact sensitive values from environment variables
|
|
77
|
+
for k, v in os.environ.items():
|
|
78
|
+
if any(sec in k.upper() for sec in ["KEY", "SECRET", "PASSWORD", "TOKEN", "AUTH"]):
|
|
79
|
+
if v and len(v) > 4:
|
|
80
|
+
text = text.replace(v, f"[REDACTED_{k}]")
|
|
81
|
+
|
|
82
|
+
return text
|
|
83
|
+
|
|
84
|
+
def log_event(level: str, module: str, message: str, error: Exception = None):
|
|
85
|
+
"""Log structured events to .utim/utim_debug.log.
|
|
86
|
+
|
|
87
|
+
Levels: INFO, WARNING, ERROR, DEBUG
|
|
88
|
+
"""
|
|
89
|
+
try:
|
|
90
|
+
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
|
|
91
|
+
timestamp = time.strftime("%Y-%m-%dT%H:%M:%S")
|
|
92
|
+
|
|
93
|
+
# Redact secrets
|
|
94
|
+
safe_msg = redact_text(message)
|
|
95
|
+
|
|
96
|
+
log_line = f"[{timestamp}] [{level}] [{module}] {safe_msg}"
|
|
97
|
+
if error:
|
|
98
|
+
tb = redact_text(traceback.format_exc())
|
|
99
|
+
log_line += f"\nTraceback:\n{tb}"
|
|
100
|
+
|
|
101
|
+
with open(LOG_FILE, "a", encoding="utf-8") as f:
|
|
102
|
+
f.write(log_line + "\n")
|
|
103
|
+
|
|
104
|
+
# If debug mode is active in the environment, we print it to console/debug stream
|
|
105
|
+
from utim_cli.config import config
|
|
106
|
+
if config.debug_mode:
|
|
107
|
+
print(f"[DEBUG-LOG] {log_line}")
|
|
108
|
+
except Exception:
|
|
109
|
+
pass # Logging failures should never crash the main application
|
|
110
|
+
|
|
111
|
+
def log_info(module: str, message: str):
|
|
112
|
+
log_event("INFO", module, message)
|
|
113
|
+
|
|
114
|
+
def log_warning(module: str, message: str, error: Exception = None):
|
|
115
|
+
log_event("WARNING", module, message, error)
|
|
116
|
+
|
|
117
|
+
def log_error(module: str, message: str, error: Exception = None):
|
|
118
|
+
log_event("ERROR", module, message, error)
|
|
119
|
+
|
|
120
|
+
def log_debug(module: str, message: str):
|
|
121
|
+
log_event("DEBUG", module, message)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import subprocess
|
|
3
|
+
import threading
|
|
4
|
+
|
|
5
|
+
def forward_stream(source, dest, filter_json=False):
|
|
6
|
+
try:
|
|
7
|
+
for line in source:
|
|
8
|
+
if filter_json:
|
|
9
|
+
# Check if the line starts with '{' (ignoring leading whitespace)
|
|
10
|
+
stripped = line.strip()
|
|
11
|
+
if stripped.startswith(b'{'):
|
|
12
|
+
dest.write(line)
|
|
13
|
+
dest.flush()
|
|
14
|
+
else:
|
|
15
|
+
# Redirect plain text logging to stderr
|
|
16
|
+
sys.stderr.buffer.write(b"[stdout-log] " + line)
|
|
17
|
+
sys.stderr.buffer.flush()
|
|
18
|
+
else:
|
|
19
|
+
dest.write(line)
|
|
20
|
+
dest.flush()
|
|
21
|
+
except Exception:
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
def main():
|
|
25
|
+
if len(sys.argv) < 2:
|
|
26
|
+
sys.exit(1)
|
|
27
|
+
|
|
28
|
+
cmd = sys.argv[1:]
|
|
29
|
+
|
|
30
|
+
# On Windows, using shell=False is safer when commands are resolved to absolute paths,
|
|
31
|
+
# as it prevents argument double-quoting bugs with cmd.exe command line construction.
|
|
32
|
+
use_shell = False
|
|
33
|
+
|
|
34
|
+
proc = subprocess.Popen(
|
|
35
|
+
cmd,
|
|
36
|
+
stdin=subprocess.PIPE,
|
|
37
|
+
stdout=subprocess.PIPE,
|
|
38
|
+
stderr=subprocess.PIPE,
|
|
39
|
+
bufsize=0,
|
|
40
|
+
shell=use_shell
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
t_in = threading.Thread(target=forward_stream, args=(sys.stdin.buffer, proc.stdin), daemon=True)
|
|
44
|
+
t_out = threading.Thread(target=forward_stream, args=(proc.stdout, sys.stdout.buffer, True), daemon=True)
|
|
45
|
+
t_err = threading.Thread(target=forward_stream, args=(proc.stderr, sys.stderr.buffer), daemon=True)
|
|
46
|
+
|
|
47
|
+
t_in.start()
|
|
48
|
+
t_out.start()
|
|
49
|
+
t_err.start()
|
|
50
|
+
|
|
51
|
+
proc.wait()
|
|
52
|
+
sys.exit(proc.returncode)
|
|
53
|
+
|
|
54
|
+
if __name__ == '__main__':
|
|
55
|
+
main()
|