utim-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,462 @@
1
+ """
2
+ Codebase Knowledge Graph — AST-based dependency and call graph analysis using Tree-sitter.
3
+
4
+ This module parses the codebase into a knowledge graph of imports, function calls,
5
+ and class relationships, enabling blast-radius analysis for code changes.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ from typing import Dict, List, Optional, Set, Tuple
11
+ from dataclasses import dataclass, field
12
+
13
+ # Tree-sitter imports
14
+ try:
15
+ from tree_sitter import Language, Parser
16
+ import tree_sitter_python as ts_python
17
+ import tree_sitter_javascript as ts_javascript
18
+ import tree_sitter_typescript as ts_typescript
19
+ TREE_SITTER_AVAILABLE = True
20
+ except ImportError:
21
+ TREE_SITTER_AVAILABLE = False
22
+
23
+ # Graph storage
24
+ GRAPH_FILE = ".utim_tmp/knowledge_graph.json"
25
+
26
+
27
+ @dataclass
28
+ class CodeEntity:
29
+ """Represents a code entity (function, class, variable, etc.)."""
30
+ id: str
31
+ type: str # function, class, method, import, variable
32
+ name: str
33
+ filepath: str
34
+ line_start: int = 0
35
+ line_end: int = 0
36
+ visibility: str = "public" # public, private
37
+ metadata: Dict = field(default_factory=dict)
38
+
39
+
40
+ @dataclass
41
+ class CodeRelation:
42
+ """Represents a relationship between code entities."""
43
+ from_id: str
44
+ to_id: str
45
+ relation_type: str # calls, imports, inherits, implements, references
46
+
47
+
48
+ class KnowledgeGraph:
49
+ """
50
+ Builds and maintains a knowledge graph from codebase AST analysis.
51
+ """
52
+
53
+ def __init__(self):
54
+ self.entities: Dict[str, CodeEntity] = {}
55
+ self.relations: List[CodeRelation] = []
56
+ self.file_entities: Dict[str, List[str]] = {} # filepath -> entity ids
57
+ self.reference_index: Dict[str, List[str]] = {} # name -> entity ids
58
+
59
+ # Language parsers
60
+ self.parsers: Dict[str, Parser] = {}
61
+ self._init_parsers()
62
+
63
+ def _init_parsers(self):
64
+ """Initialize tree-sitter parsers for supported languages."""
65
+ if not TREE_SITTER_AVAILABLE:
66
+ return
67
+
68
+ try:
69
+ py_lang = Language(ts_python.language())
70
+ self.parsers[".py"] = Parser(py_lang)
71
+ except Exception:
72
+ pass
73
+
74
+ try:
75
+ js_lang = Language(ts_javascript.language())
76
+ self.parsers[".js"] = Parser(js_lang)
77
+ except Exception:
78
+ pass
79
+
80
+ try:
81
+ ts_lang = Language(ts_typescript.language())
82
+ self.parsers[".ts"] = Parser(ts_lang)
83
+ except Exception:
84
+ pass
85
+
86
+ try:
87
+ tsx_lang = Language(ts_typescript.language_tsx())
88
+ self.parsers[".tsx"] = Parser(tsx_lang)
89
+ except Exception:
90
+ pass
91
+
92
+ def _get_parser(self, filepath: str) -> Optional[Parser]:
93
+ """Get appropriate parser for file extension."""
94
+ ext = os.path.splitext(filepath)[1].lower()
95
+ return self.parsers.get(ext)
96
+
97
+ def _make_entity_id(self, filepath: str, name: str, line: int = 0) -> str:
98
+ """Generate unique entity ID."""
99
+ return f"{filepath}:{name}:{line}"
100
+
101
+ def parse_python_file(self, filepath: str, content: bytes) -> List[CodeEntity]:
102
+ """Parse Python file for functions, classes, imports, and calls."""
103
+ entities = []
104
+
105
+ if ".py" not in self.parsers:
106
+ return entities
107
+
108
+ try:
109
+ tree = self.parsers[".py"].parse(content)
110
+ root = tree.root_node
111
+ except Exception:
112
+ return entities
113
+
114
+ def walk(node, in_class: str = None):
115
+ if node.type == "function_definition":
116
+ name_node = None
117
+ body_start = node.start_point[0]
118
+
119
+ for child in node.children:
120
+ if child.type == "identifier":
121
+ name_node = child
122
+ break
123
+
124
+ if name_node:
125
+ name = name_node.text.decode()
126
+ entity_type = "method" if in_class else "function"
127
+ entity_id = self._make_entity_id(filepath, name, body_start + 1)
128
+
129
+ entities.append(CodeEntity(
130
+ id=entity_id,
131
+ type=entity_type,
132
+ name=name,
133
+ filepath=filepath,
134
+ line_start=body_start + 1,
135
+ line_end=node.end_point[0] + 1,
136
+ metadata={"class": in_class} if in_class else {}
137
+ ))
138
+
139
+ elif node.type == "class_definition":
140
+ name_node = None
141
+
142
+ for child in node.children:
143
+ if child.type == "identifier":
144
+ name_node = child
145
+ break
146
+
147
+ if name_node:
148
+ name = name_node.text.decode()
149
+ entity_id = self._make_entity_id(filepath, name, node.start_point[0] + 1)
150
+
151
+ entities.append(CodeEntity(
152
+ id=entity_id,
153
+ type="class",
154
+ name=name,
155
+ filepath=filepath,
156
+ line_start=node.start_point[0] + 1,
157
+ line_end=node.end_point[0] + 1
158
+ ))
159
+
160
+ # Walk body for class members
161
+ for child in node.children:
162
+ if child.type == "block":
163
+ walk(child, in_class=name)
164
+
165
+ elif node.type == "import_statement" or node.type == "import_from_statement":
166
+ names = []
167
+ module = ""
168
+
169
+ for child in node.children:
170
+ if child.type == "dotted_name" or child.type == "identifier":
171
+ names.append(child.text.decode())
172
+ elif child.type == "module_name":
173
+ module = child.text.decode()
174
+
175
+ for name in names:
176
+ entity_id = self._make_entity_id(filepath, f"import:{name}", node.start_point[0] + 1)
177
+ entities.append(CodeEntity(
178
+ id=entity_id,
179
+ type="import",
180
+ name=name,
181
+ filepath=filepath,
182
+ line_start=node.start_point[0] + 1,
183
+ line_end=node.end_point[0] + 1,
184
+ metadata={"module": module}
185
+ ))
186
+
187
+ for child in node.children:
188
+ walk(child, in_class)
189
+
190
+ walk(root)
191
+ return entities
192
+
193
+ def parse_javascript_file(self, filepath: str, content: bytes) -> List[CodeEntity]:
194
+ """Parse JavaScript/TypeScript file for functions, classes, imports."""
195
+ entities = []
196
+
197
+ ext = os.path.splitext(filepath)[1].lower()
198
+ parser_key = ext if ext in self.parsers else None
199
+
200
+ if not parser_key:
201
+ return entities
202
+
203
+ try:
204
+ tree = self.parsers[parser_key].parse(content)
205
+ root = tree.root_node
206
+ except Exception:
207
+ return entities
208
+
209
+ def walk(node, in_class: str = None):
210
+ if node.type in ("function_declaration", "function_expression", "arrow_function"):
211
+ name = "anonymous"
212
+ if node.type in ("function_declaration", "function_expression"):
213
+ for child in node.children:
214
+ if child.type == "identifier":
215
+ name = child.text.decode()
216
+ break
217
+
218
+ entity_type = "method" if in_class else "function"
219
+ entity_id = self._make_entity_id(filepath, name, node.start_point[0] + 1)
220
+
221
+ entities.append(CodeEntity(
222
+ id=entity_id,
223
+ type=entity_type,
224
+ name=name,
225
+ filepath=filepath,
226
+ line_start=node.start_point[0] + 1,
227
+ line_end=node.end_point[0] + 1
228
+ ))
229
+
230
+ elif node.type == "class_declaration":
231
+ name = "anonymous"
232
+ for child in node.children:
233
+ if child.type == "identifier":
234
+ name = child.text.decode()
235
+ break
236
+
237
+ entity_id = self._make_entity_id(filepath, name, node.start_point[0] + 1)
238
+ entities.append(CodeEntity(
239
+ id=entity_id,
240
+ type="class",
241
+ name=name,
242
+ filepath=filepath,
243
+ line_start=node.start_point[0] + 1,
244
+ line_end=node.end_point[0] + 1
245
+ ))
246
+
247
+ for child in node.children:
248
+ if child.type in ("class_body", "block"):
249
+ walk(child, in_class=name)
250
+
251
+ for child in node.children:
252
+ walk(child, in_class)
253
+
254
+ walk(root)
255
+ return entities
256
+
257
+ def build_graph(self, paths: List[str] = None, exclude_dirs: Set[str] = None) -> int:
258
+ """
259
+ Build knowledge graph from codebase files.
260
+
261
+ Args:
262
+ paths: Specific files to parse. If None, walks directory.
263
+ exclude_dirs: Directories to exclude.
264
+
265
+ Returns:
266
+ Number of entities found.
267
+ """
268
+ if exclude_dirs is None:
269
+ exclude_dirs = {".git", "node_modules", "dist", "build", "__pycache__", ".venv", "venv", ".utim_tmp"}
270
+
271
+ self.entities.clear()
272
+ self.relations.clear()
273
+ self.file_entities.clear()
274
+
275
+ files_to_parse = []
276
+
277
+ if paths:
278
+ files_to_parse = [p for p in paths if os.path.exists(p)]
279
+ else:
280
+ for root, dirs, files in os.walk("."):
281
+ dirs[:] = [d for d in dirs if d not in exclude_dirs]
282
+ for f in files:
283
+ ext = os.path.splitext(f)[1].lower()
284
+ if ext in [".py", ".js", ".ts", ".tsx"]:
285
+ p = os.path.join(root, f)
286
+ files_to_parse.append(p)
287
+
288
+ for filepath in files_to_parse:
289
+ try:
290
+ with open(filepath, "rb") as f:
291
+ content = f.read()
292
+
293
+ ext = os.path.splitext(filepath)[1].lower()
294
+
295
+ if ext == ".py":
296
+ entities = self.parse_python_file(filepath, content)
297
+ elif ext in [".js", ".ts", ".tsx"]:
298
+ entities = self.parse_javascript_file(filepath, content)
299
+ else:
300
+ entities = []
301
+
302
+ for entity in entities:
303
+ self.entities[entity.id] = entity
304
+ if filepath not in self.file_entities:
305
+ self.file_entities[filepath] = []
306
+ self.file_entities[filepath].append(entity.id)
307
+
308
+ # Update reference index
309
+ if entity.name not in self.reference_index:
310
+ self.reference_index[entity.name] = []
311
+ self.reference_index[entity.name].append(entity.id)
312
+
313
+ except Exception:
314
+ continue
315
+
316
+ self._save_graph()
317
+ return len(self.entities)
318
+
319
+ def find_dependents(self, entity_name: str, filepath: str = None) -> List[Dict]:
320
+ """
321
+ Find all files that depend on a given entity (call, import, etc.).
322
+
323
+ Args:
324
+ entity_name: Name of the function/class to find callers for
325
+ filepath: Optional specific file to search in
326
+
327
+ Returns:
328
+ List of dependent file paths with relationship info.
329
+ """
330
+ dependents = []
331
+
332
+ # Find the entity
333
+ matching_ids = []
334
+ for eid, entity in self.entities.items():
335
+ if entity.name == entity_name:
336
+ if filepath is None or entity.filepath == filepath:
337
+ matching_ids.append(eid)
338
+
339
+ # For each matching entity, find references
340
+ for target_id in matching_ids:
341
+ target_entity = self.entities.get(target_id)
342
+ if not target_entity:
343
+ continue
344
+
345
+ # Check for functions/methods that might call this
346
+ for eid, entity in self.entities.items():
347
+ if entity.type in ("function", "method"):
348
+ # Simple heuristic: same project, different file
349
+ if entity.filepath != target_entity.filepath:
350
+ dependents.append({
351
+ "filepath": entity.filepath,
352
+ "line": entity.line_start,
353
+ "type": "potential_caller",
354
+ "entity": entity.name
355
+ })
356
+
357
+ return list(set(d.get("filepath") for d in dependents))
358
+
359
+ def get_blast_radius(self, filepath: str) -> List[str]:
360
+ """
361
+ Estimate files that might be affected by changes to a file.
362
+
363
+ Args:
364
+ filepath: File to analyze
365
+
366
+ Returns:
367
+ List of potentially affected file paths.
368
+ """
369
+ affected = set()
370
+
371
+ # Get entities in the file
372
+ file_entity_ids = self.file_entities.get(filepath, [])
373
+
374
+ for eid in file_entity_ids:
375
+ entity = self.entities.get(eid)
376
+ if not entity:
377
+ continue
378
+
379
+ # Find dependents for each entity
380
+ deps = self.find_dependents(entity.name, entity.filepath)
381
+ affected.update(deps)
382
+
383
+ return list(affected)
384
+
385
+ def get_stats(self) -> Dict:
386
+ """Get knowledge graph statistics."""
387
+ return {
388
+ "total_entities": len(self.entities),
389
+ "total_files": len(self.file_entities),
390
+ "entity_types": {
391
+ t: sum(1 for e in self.entities.values() if e.type == t)
392
+ for t in ["function", "method", "class", "import"]
393
+ }
394
+ }
395
+
396
+ def _save_graph(self):
397
+ """Save graph to disk."""
398
+ os.makedirs(".utim_tmp", exist_ok=True)
399
+
400
+ data = {
401
+ "entities": [
402
+ {
403
+ "id": e.id,
404
+ "type": e.type,
405
+ "name": e.name,
406
+ "filepath": e.filepath,
407
+ "line_start": e.line_start,
408
+ "line_end": e.line_end
409
+ }
410
+ for e in self.entities.values()
411
+ ],
412
+ "file_entities": self.file_entities
413
+ }
414
+
415
+ with open(GRAPH_FILE, "w", encoding="utf-8") as f:
416
+ json.dump(data, f, indent=2)
417
+
418
+ def load_graph(self):
419
+ """Load graph from disk if exists."""
420
+ if os.path.exists(GRAPH_FILE):
421
+ try:
422
+ with open(GRAPH_FILE, "r", encoding="utf-8") as f:
423
+ data = json.load(f)
424
+
425
+ for e in data.get("entities", []):
426
+ entity = CodeEntity(
427
+ id=e["id"],
428
+ type=e["type"],
429
+ name=e["name"],
430
+ filepath=e["filepath"],
431
+ line_start=e.get("line_start", 0),
432
+ line_end=e.get("line_end", 0)
433
+ )
434
+ self.entities[entity.id] = entity
435
+
436
+ self.file_entities = data.get("file_entities", {})
437
+
438
+ return True
439
+ except Exception:
440
+ pass
441
+ return False
442
+
443
+
444
+ # Global instance
445
+ _knowledge_graph: Optional[KnowledgeGraph] = None
446
+
447
+
448
+ def get_knowledge_graph() -> Optional[KnowledgeGraph]:
449
+ """Get or create the global knowledge graph instance."""
450
+ global _knowledge_graph
451
+ if _knowledge_graph is None and TREE_SITTER_AVAILABLE:
452
+ _knowledge_graph = KnowledgeGraph()
453
+ _knowledge_graph.load_graph()
454
+ return _knowledge_graph
455
+
456
+
457
+ def build_knowledge_graph(paths: List[str] = None) -> int:
458
+ """Build or rebuild the knowledge graph."""
459
+ kg = get_knowledge_graph()
460
+ if kg:
461
+ return kg.build_graph(paths)
462
+ return 0
utim_cli/logger.py ADDED
@@ -0,0 +1,121 @@
1
+ import os
2
+ import re
3
+ import time
4
+ import traceback
5
+ from typing import Any
6
+
7
+ # Global logging path
8
+ LOG_FILE = os.path.join(".utim", "utim_debug.log")
9
+
10
+ # Sensitive word list for log redaction
11
+ SENSITIVE_KEYWORDS = {
12
+ "girlfriend", "gf", "wife", "spouse", "partner", "relationship",
13
+ "secret", "password", "code", "private", "personal", "anushka", "puchkuli"
14
+ }
15
+
16
+ # Compile regex to match sensitive words case-insensitively
17
+ _REDACT_RE = re.compile(
18
+ r"\b(" + "|".join(re.escape(w) for w in SENSITIVE_KEYWORDS) + r")\b",
19
+ re.IGNORECASE
20
+ )
21
+
22
+ # API key patterns (OpenAI, Anthropic, OpenRouter, etc.)
23
+ _API_KEY_RE = re.compile(
24
+ r"\b(?:sk-|sk-or-v1-|xai-|ai-)[a-zA-Z0-9\-]{20,}\b",
25
+ re.IGNORECASE
26
+ )
27
+
28
+ # Bearer token patterns
29
+ _BEARER_TOKEN_RE = re.compile(
30
+ r"\bbearer\s+[a-zA-Z0-9\-._~+/]+=*\b",
31
+ re.IGNORECASE
32
+ )
33
+
34
+ # Email address patterns
35
+ _EMAIL_RE = re.compile(
36
+ r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b",
37
+ re.IGNORECASE
38
+ )
39
+
40
+ def redact_text(text: str) -> str:
41
+ """Redact sensitive keywords, API keys, tokens, emails, working directories, and usernames."""
42
+ if not text:
43
+ return text
44
+
45
+ # 1. Redact API keys and bearer tokens
46
+ text = _API_KEY_RE.sub("[REDACTED_API_KEY]", text)
47
+ text = _BEARER_TOKEN_RE.sub("[REDACTED_TOKEN]", text)
48
+
49
+ # 2. Redact emails
50
+ text = _EMAIL_RE.sub("[REDACTED_EMAIL]", text)
51
+
52
+ # 3. Redact current working directory
53
+ try:
54
+ cwd = os.getcwd()
55
+ if cwd and len(cwd) > 3:
56
+ text = text.replace(cwd, "[WORKSPACE_DIR]")
57
+ # Also redact with forward slashes if paths are converted
58
+ cwd_f = cwd.replace('\\', '/')
59
+ text = text.replace(cwd_f, "[WORKSPACE_DIR]")
60
+ except Exception:
61
+ pass
62
+
63
+ # 4. Redact system username dynamically
64
+ try:
65
+ import getpass
66
+ user = getpass.getuser()
67
+ if user and len(user) > 2:
68
+ user_re = re.compile(re.escape(user), re.IGNORECASE)
69
+ text = user_re.sub("[USER]", text)
70
+ except Exception:
71
+ pass
72
+
73
+ # 5. Redact general sensitive keywords
74
+ text = _REDACT_RE.sub("[REDACTED]", text)
75
+
76
+ # 6. Redact sensitive values from environment variables
77
+ for k, v in os.environ.items():
78
+ if any(sec in k.upper() for sec in ["KEY", "SECRET", "PASSWORD", "TOKEN", "AUTH"]):
79
+ if v and len(v) > 4:
80
+ text = text.replace(v, f"[REDACTED_{k}]")
81
+
82
+ return text
83
+
84
+ def log_event(level: str, module: str, message: str, error: Exception = None):
85
+ """Log structured events to .utim/utim_debug.log.
86
+
87
+ Levels: INFO, WARNING, ERROR, DEBUG
88
+ """
89
+ try:
90
+ os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
91
+ timestamp = time.strftime("%Y-%m-%dT%H:%M:%S")
92
+
93
+ # Redact secrets
94
+ safe_msg = redact_text(message)
95
+
96
+ log_line = f"[{timestamp}] [{level}] [{module}] {safe_msg}"
97
+ if error:
98
+ tb = redact_text(traceback.format_exc())
99
+ log_line += f"\nTraceback:\n{tb}"
100
+
101
+ with open(LOG_FILE, "a", encoding="utf-8") as f:
102
+ f.write(log_line + "\n")
103
+
104
+ # If debug mode is active in the environment, we print it to console/debug stream
105
+ from utim_cli.config import config
106
+ if config.debug_mode:
107
+ print(f"[DEBUG-LOG] {log_line}")
108
+ except Exception:
109
+ pass # Logging failures should never crash the main application
110
+
111
+ def log_info(module: str, message: str):
112
+ log_event("INFO", module, message)
113
+
114
+ def log_warning(module: str, message: str, error: Exception = None):
115
+ log_event("WARNING", module, message, error)
116
+
117
+ def log_error(module: str, message: str, error: Exception = None):
118
+ log_event("ERROR", module, message, error)
119
+
120
+ def log_debug(module: str, message: str):
121
+ log_event("DEBUG", module, message)
@@ -0,0 +1,55 @@
1
+ import sys
2
+ import subprocess
3
+ import threading
4
+
5
+ def forward_stream(source, dest, filter_json=False):
6
+ try:
7
+ for line in source:
8
+ if filter_json:
9
+ # Check if the line starts with '{' (ignoring leading whitespace)
10
+ stripped = line.strip()
11
+ if stripped.startswith(b'{'):
12
+ dest.write(line)
13
+ dest.flush()
14
+ else:
15
+ # Redirect plain text logging to stderr
16
+ sys.stderr.buffer.write(b"[stdout-log] " + line)
17
+ sys.stderr.buffer.flush()
18
+ else:
19
+ dest.write(line)
20
+ dest.flush()
21
+ except Exception:
22
+ pass
23
+
24
+ def main():
25
+ if len(sys.argv) < 2:
26
+ sys.exit(1)
27
+
28
+ cmd = sys.argv[1:]
29
+
30
+ # On Windows, using shell=False is safer when commands are resolved to absolute paths,
31
+ # as it prevents argument double-quoting bugs with cmd.exe command line construction.
32
+ use_shell = False
33
+
34
+ proc = subprocess.Popen(
35
+ cmd,
36
+ stdin=subprocess.PIPE,
37
+ stdout=subprocess.PIPE,
38
+ stderr=subprocess.PIPE,
39
+ bufsize=0,
40
+ shell=use_shell
41
+ )
42
+
43
+ t_in = threading.Thread(target=forward_stream, args=(sys.stdin.buffer, proc.stdin), daemon=True)
44
+ t_out = threading.Thread(target=forward_stream, args=(proc.stdout, sys.stdout.buffer, True), daemon=True)
45
+ t_err = threading.Thread(target=forward_stream, args=(proc.stderr, sys.stderr.buffer), daemon=True)
46
+
47
+ t_in.start()
48
+ t_out.start()
49
+ t_err.start()
50
+
51
+ proc.wait()
52
+ sys.exit(proc.returncode)
53
+
54
+ if __name__ == '__main__':
55
+ main()