tarang 4.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tarang/__init__.py +23 -0
- tarang/cli.py +1168 -0
- tarang/client/__init__.py +19 -0
- tarang/client/api_client.py +701 -0
- tarang/client/auth.py +178 -0
- tarang/context/__init__.py +41 -0
- tarang/context/bm25.py +218 -0
- tarang/context/chunker.py +984 -0
- tarang/context/graph.py +464 -0
- tarang/context/indexer.py +514 -0
- tarang/context/retriever.py +270 -0
- tarang/context/skeleton.py +282 -0
- tarang/context_collector.py +449 -0
- tarang/executor/__init__.py +6 -0
- tarang/executor/diff_apply.py +246 -0
- tarang/executor/linter.py +184 -0
- tarang/stream.py +1346 -0
- tarang/ui/__init__.py +7 -0
- tarang/ui/console.py +407 -0
- tarang/ui/diff_viewer.py +146 -0
- tarang/ui/formatter.py +1151 -0
- tarang/ui/keyboard.py +197 -0
- tarang/ws/__init__.py +14 -0
- tarang/ws/client.py +464 -0
- tarang/ws/executor.py +638 -0
- tarang/ws/handlers.py +590 -0
- tarang-4.4.0.dist-info/METADATA +102 -0
- tarang-4.4.0.dist-info/RECORD +31 -0
- tarang-4.4.0.dist-info/WHEEL +5 -0
- tarang-4.4.0.dist-info/entry_points.txt +2 -0
- tarang-4.4.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Context Retriever - Unified interface for BM25 + KG retrieval.
|
|
3
|
+
|
|
4
|
+
Combines BM25 keyword search with Symbol Graph expansion to
|
|
5
|
+
provide rich, connected context for LLM queries.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
from .bm25 import BM25Index, SearchResult
|
|
14
|
+
from .chunker import Chunk
|
|
15
|
+
from .graph import SymbolGraph, SymbolNode
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class RetrievalResult:
|
|
20
|
+
"""Result from context retrieval."""
|
|
21
|
+
chunks: List[Chunk] # Full code for direct matches
|
|
22
|
+
signatures: List[str] # Signatures for connected symbols
|
|
23
|
+
graph_context: Dict[str, Any] # Relationship summary
|
|
24
|
+
stats: Dict[str, Any] = field(default_factory=dict)
|
|
25
|
+
|
|
26
|
+
def to_context_dict(self) -> Dict:
|
|
27
|
+
"""Convert to dictionary for API payload."""
|
|
28
|
+
return {
|
|
29
|
+
"chunks": [
|
|
30
|
+
{
|
|
31
|
+
"id": c.id,
|
|
32
|
+
"file": c.file,
|
|
33
|
+
"type": c.type,
|
|
34
|
+
"name": c.name,
|
|
35
|
+
"signature": c.signature,
|
|
36
|
+
"content": c.content,
|
|
37
|
+
"line_start": c.line_start,
|
|
38
|
+
"line_end": c.line_end,
|
|
39
|
+
}
|
|
40
|
+
for c in self.chunks
|
|
41
|
+
],
|
|
42
|
+
"signatures": self.signatures,
|
|
43
|
+
"graph": self.graph_context,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def total_lines(self) -> int:
|
|
48
|
+
"""Total lines of code in chunks."""
|
|
49
|
+
return sum(c.line_end - c.line_start + 1 for c in self.chunks)
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def is_empty(self) -> bool:
|
|
53
|
+
"""Check if result has no content."""
|
|
54
|
+
return len(self.chunks) == 0
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class ContextRetriever:
|
|
58
|
+
"""
|
|
59
|
+
Unified context retrieval using BM25 + Knowledge Graph.
|
|
60
|
+
|
|
61
|
+
Workflow:
|
|
62
|
+
1. BM25 search finds relevant chunks
|
|
63
|
+
2. KG expansion adds connected symbols (signatures only)
|
|
64
|
+
3. Returns combined context for LLM
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
bm25_index: BM25Index,
|
|
70
|
+
symbol_graph: SymbolGraph,
|
|
71
|
+
):
|
|
72
|
+
self.bm25 = bm25_index
|
|
73
|
+
self.graph = symbol_graph
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def is_ready(self) -> bool:
|
|
77
|
+
"""Check if retriever has indexed data."""
|
|
78
|
+
return not self.bm25.is_empty
|
|
79
|
+
|
|
80
|
+
def retrieve(
|
|
81
|
+
self,
|
|
82
|
+
query: str,
|
|
83
|
+
hops: int = 1,
|
|
84
|
+
max_chunks: int = 10,
|
|
85
|
+
max_signatures: int = 20,
|
|
86
|
+
) -> RetrievalResult:
|
|
87
|
+
"""
|
|
88
|
+
Retrieve relevant context for a query.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
query: User instruction or search query
|
|
92
|
+
hops: KG expansion hops (0=none, 1=direct, 2=2-level)
|
|
93
|
+
max_chunks: Maximum code chunks to return
|
|
94
|
+
max_signatures: Maximum connected signatures
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
RetrievalResult with chunks, signatures, and graph context
|
|
98
|
+
"""
|
|
99
|
+
# Step 1: BM25 search
|
|
100
|
+
search_results = self.bm25.search(query, k=max_chunks)
|
|
101
|
+
|
|
102
|
+
if not search_results:
|
|
103
|
+
return RetrievalResult(
|
|
104
|
+
chunks=[],
|
|
105
|
+
signatures=[],
|
|
106
|
+
graph_context={},
|
|
107
|
+
stats={"bm25_hits": 0, "expanded_symbols": 0},
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Extract chunks and symbol IDs
|
|
111
|
+
chunks = [r.chunk for r in search_results]
|
|
112
|
+
symbol_ids = [c.id for c in chunks]
|
|
113
|
+
|
|
114
|
+
# Step 2: KG expansion
|
|
115
|
+
signatures: List[str] = []
|
|
116
|
+
expanded_ids: set = set()
|
|
117
|
+
|
|
118
|
+
if hops > 0 and not self.graph.is_empty:
|
|
119
|
+
for sid in symbol_ids:
|
|
120
|
+
neighbors = self.graph.get_neighbors(sid, hops=hops)
|
|
121
|
+
for neighbor in neighbors:
|
|
122
|
+
if neighbor.id not in symbol_ids and neighbor.id not in expanded_ids:
|
|
123
|
+
expanded_ids.add(neighbor.id)
|
|
124
|
+
signatures.append(neighbor.signature)
|
|
125
|
+
|
|
126
|
+
if len(signatures) >= max_signatures:
|
|
127
|
+
break
|
|
128
|
+
|
|
129
|
+
if len(signatures) >= max_signatures:
|
|
130
|
+
break
|
|
131
|
+
|
|
132
|
+
# Step 3: Get graph context
|
|
133
|
+
all_ids = symbol_ids + list(expanded_ids)
|
|
134
|
+
graph_context = self.graph.get_graph_context(all_ids)
|
|
135
|
+
|
|
136
|
+
return RetrievalResult(
|
|
137
|
+
chunks=chunks,
|
|
138
|
+
signatures=signatures[:max_signatures],
|
|
139
|
+
graph_context=graph_context,
|
|
140
|
+
stats={
|
|
141
|
+
"bm25_hits": len(search_results),
|
|
142
|
+
"expanded_symbols": len(expanded_ids),
|
|
143
|
+
"total_chunks": len(chunks),
|
|
144
|
+
"total_signatures": len(signatures),
|
|
145
|
+
},
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
def retrieve_for_file(
|
|
149
|
+
self,
|
|
150
|
+
file_path: str,
|
|
151
|
+
hops: int = 1,
|
|
152
|
+
) -> RetrievalResult:
|
|
153
|
+
"""
|
|
154
|
+
Retrieve all context for a specific file.
|
|
155
|
+
|
|
156
|
+
Useful when user mentions a file explicitly.
|
|
157
|
+
"""
|
|
158
|
+
chunks = self.bm25.get_chunks_for_file(file_path)
|
|
159
|
+
|
|
160
|
+
if not chunks:
|
|
161
|
+
return RetrievalResult(
|
|
162
|
+
chunks=[],
|
|
163
|
+
signatures=[],
|
|
164
|
+
graph_context={},
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
symbol_ids = [c.id for c in chunks]
|
|
168
|
+
|
|
169
|
+
# KG expansion
|
|
170
|
+
signatures: List[str] = []
|
|
171
|
+
expanded_ids: set = set()
|
|
172
|
+
|
|
173
|
+
if hops > 0 and not self.graph.is_empty:
|
|
174
|
+
for sid in symbol_ids:
|
|
175
|
+
neighbors = self.graph.get_neighbors(sid, hops=hops)
|
|
176
|
+
for neighbor in neighbors:
|
|
177
|
+
if neighbor.id not in symbol_ids and neighbor.id not in expanded_ids:
|
|
178
|
+
expanded_ids.add(neighbor.id)
|
|
179
|
+
signatures.append(neighbor.signature)
|
|
180
|
+
|
|
181
|
+
graph_context = self.graph.get_graph_context(symbol_ids + list(expanded_ids))
|
|
182
|
+
|
|
183
|
+
return RetrievalResult(
|
|
184
|
+
chunks=chunks,
|
|
185
|
+
signatures=signatures,
|
|
186
|
+
graph_context=graph_context,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
def retrieve_symbol(
|
|
190
|
+
self,
|
|
191
|
+
symbol_name: str,
|
|
192
|
+
hops: int = 1,
|
|
193
|
+
) -> RetrievalResult:
|
|
194
|
+
"""
|
|
195
|
+
Retrieve context for a specific symbol by name.
|
|
196
|
+
|
|
197
|
+
Searches for chunks matching the symbol name exactly.
|
|
198
|
+
"""
|
|
199
|
+
# Search for the symbol
|
|
200
|
+
results = self.bm25.search(symbol_name, k=5)
|
|
201
|
+
|
|
202
|
+
# Filter to exact name matches
|
|
203
|
+
exact_matches = [
|
|
204
|
+
r for r in results
|
|
205
|
+
if r.chunk.name.lower() == symbol_name.lower()
|
|
206
|
+
]
|
|
207
|
+
|
|
208
|
+
if not exact_matches:
|
|
209
|
+
# Fall back to partial matches
|
|
210
|
+
exact_matches = results[:3]
|
|
211
|
+
|
|
212
|
+
if not exact_matches:
|
|
213
|
+
return RetrievalResult(chunks=[], signatures=[], graph_context={})
|
|
214
|
+
|
|
215
|
+
chunks = [r.chunk for r in exact_matches]
|
|
216
|
+
symbol_ids = [c.id for c in chunks]
|
|
217
|
+
|
|
218
|
+
# KG expansion
|
|
219
|
+
signatures: List[str] = []
|
|
220
|
+
expanded_ids: set = set()
|
|
221
|
+
|
|
222
|
+
if hops > 0 and not self.graph.is_empty:
|
|
223
|
+
for sid in symbol_ids:
|
|
224
|
+
neighbors = self.graph.get_neighbors(sid, hops=hops)
|
|
225
|
+
for neighbor in neighbors:
|
|
226
|
+
if neighbor.id not in symbol_ids and neighbor.id not in expanded_ids:
|
|
227
|
+
expanded_ids.add(neighbor.id)
|
|
228
|
+
signatures.append(neighbor.signature)
|
|
229
|
+
|
|
230
|
+
graph_context = self.graph.get_graph_context(symbol_ids + list(expanded_ids))
|
|
231
|
+
|
|
232
|
+
return RetrievalResult(
|
|
233
|
+
chunks=chunks,
|
|
234
|
+
signatures=signatures,
|
|
235
|
+
graph_context=graph_context,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
def get_callers(self, symbol_id: str) -> List[SymbolNode]:
|
|
239
|
+
"""Get all symbols that call this symbol."""
|
|
240
|
+
return self.graph.get_callers(symbol_id)
|
|
241
|
+
|
|
242
|
+
def get_callees(self, symbol_id: str) -> List[SymbolNode]:
|
|
243
|
+
"""Get all symbols that this symbol calls."""
|
|
244
|
+
return self.graph.get_callees(symbol_id)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def create_retriever(index_path: Path) -> Optional[ContextRetriever]:
|
|
248
|
+
"""
|
|
249
|
+
Create a retriever from saved index files.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
index_path: Path to .tarang/index/ directory
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
ContextRetriever if index exists, None otherwise
|
|
256
|
+
"""
|
|
257
|
+
bm25_path = index_path / "bm25.pkl"
|
|
258
|
+
graph_path = index_path / "graph.json"
|
|
259
|
+
|
|
260
|
+
bm25 = BM25Index()
|
|
261
|
+
graph = SymbolGraph()
|
|
262
|
+
|
|
263
|
+
# Load BM25 index
|
|
264
|
+
if not bm25.load(bm25_path):
|
|
265
|
+
return None
|
|
266
|
+
|
|
267
|
+
# Load graph (optional, retriever works without it)
|
|
268
|
+
graph.load(graph_path)
|
|
269
|
+
|
|
270
|
+
return ContextRetriever(bm25, graph)
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Project Skeleton Generator - Lightweight project context for backend.
|
|
3
|
+
|
|
4
|
+
Generates file tree and symbol information to send to the Orchestrator.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import subprocess
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class SymbolDefinition:
|
|
16
|
+
"""A symbol (function, class, method) in the project."""
|
|
17
|
+
name: str
|
|
18
|
+
kind: str # function, class, method, variable
|
|
19
|
+
file: str
|
|
20
|
+
line: int
|
|
21
|
+
signature: Optional[str] = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class ProjectSkeleton:
|
|
26
|
+
"""Lightweight project map for context."""
|
|
27
|
+
file_tree: str
|
|
28
|
+
symbols: List[SymbolDefinition] = field(default_factory=list)
|
|
29
|
+
dependencies: Dict[str, List[str]] = field(default_factory=dict)
|
|
30
|
+
total_files: int = 0
|
|
31
|
+
total_lines: int = 0
|
|
32
|
+
|
|
33
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
34
|
+
"""Convert to dictionary for API."""
|
|
35
|
+
return {
|
|
36
|
+
"file_tree": self.file_tree,
|
|
37
|
+
"symbols": [
|
|
38
|
+
{
|
|
39
|
+
"name": s.name,
|
|
40
|
+
"kind": s.kind,
|
|
41
|
+
"file": s.file,
|
|
42
|
+
"line": s.line,
|
|
43
|
+
"signature": s.signature,
|
|
44
|
+
}
|
|
45
|
+
for s in self.symbols[:100] # Limit symbols
|
|
46
|
+
],
|
|
47
|
+
"dependencies": dict(list(self.dependencies.items())[:50]),
|
|
48
|
+
"total_files": self.total_files,
|
|
49
|
+
"total_lines": self.total_lines,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class SkeletonGenerator:
|
|
54
|
+
"""
|
|
55
|
+
Generate lightweight project skeleton for backend context.
|
|
56
|
+
|
|
57
|
+
Extracts file tree and symbol definitions without sending full code.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
IGNORE_PATTERNS = [
|
|
61
|
+
"node_modules", ".git", "__pycache__", ".venv",
|
|
62
|
+
"venv", "dist", "build", ".next", "target",
|
|
63
|
+
".tarang", ".pytest_cache", ".mypy_cache",
|
|
64
|
+
"*.pyc", "*.pyo", ".DS_Store",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
def __init__(self, project_root: Path):
|
|
68
|
+
self.project_root = project_root
|
|
69
|
+
|
|
70
|
+
def generate(self, max_depth: int = 4) -> ProjectSkeleton:
|
|
71
|
+
"""
|
|
72
|
+
Generate project skeleton.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
max_depth: Maximum directory depth for tree
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
ProjectSkeleton with tree and symbols
|
|
79
|
+
"""
|
|
80
|
+
file_tree = self._generate_tree(max_depth)
|
|
81
|
+
symbols = self._extract_symbols()
|
|
82
|
+
dependencies = self._analyze_dependencies()
|
|
83
|
+
total_files, total_lines = self._count_stats()
|
|
84
|
+
|
|
85
|
+
return ProjectSkeleton(
|
|
86
|
+
file_tree=file_tree,
|
|
87
|
+
symbols=symbols,
|
|
88
|
+
dependencies=dependencies,
|
|
89
|
+
total_files=total_files,
|
|
90
|
+
total_lines=total_lines,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
def _should_ignore(self, path: Path) -> bool:
|
|
94
|
+
"""Check if path should be ignored."""
|
|
95
|
+
name = path.name
|
|
96
|
+
for pattern in self.IGNORE_PATTERNS:
|
|
97
|
+
if pattern.startswith("*"):
|
|
98
|
+
if name.endswith(pattern[1:]):
|
|
99
|
+
return True
|
|
100
|
+
elif pattern in str(path):
|
|
101
|
+
return True
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
def _generate_tree(self, max_depth: int) -> str:
|
|
105
|
+
"""Generate ASCII file tree."""
|
|
106
|
+
lines = [f"{self.project_root.name}/"]
|
|
107
|
+
|
|
108
|
+
def walk(path: Path, prefix: str = "", depth: int = 0):
|
|
109
|
+
if depth > max_depth:
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
items = sorted(path.iterdir(), key=lambda x: (x.is_file(), x.name.lower()))
|
|
114
|
+
except PermissionError:
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
# Filter ignored items
|
|
118
|
+
items = [i for i in items if not self._should_ignore(i)]
|
|
119
|
+
|
|
120
|
+
for i, item in enumerate(items[:30]): # Limit items per directory
|
|
121
|
+
is_last = i == len(items) - 1
|
|
122
|
+
connector = "└── " if is_last else "├── "
|
|
123
|
+
|
|
124
|
+
if item.is_dir():
|
|
125
|
+
lines.append(f"{prefix}{connector}{item.name}/")
|
|
126
|
+
extension = " " if is_last else "│ "
|
|
127
|
+
walk(item, prefix + extension, depth + 1)
|
|
128
|
+
else:
|
|
129
|
+
lines.append(f"{prefix}{connector}{item.name}")
|
|
130
|
+
|
|
131
|
+
walk(self.project_root)
|
|
132
|
+
return "\n".join(lines[:200]) # Limit total lines
|
|
133
|
+
|
|
134
|
+
def _extract_symbols(self) -> List[SymbolDefinition]:
|
|
135
|
+
"""Extract symbols using ctags if available."""
|
|
136
|
+
symbols = []
|
|
137
|
+
|
|
138
|
+
# Try ctags first
|
|
139
|
+
if self._has_ctags():
|
|
140
|
+
symbols = self._extract_with_ctags()
|
|
141
|
+
if symbols:
|
|
142
|
+
return symbols
|
|
143
|
+
|
|
144
|
+
# Fallback: Simple regex extraction for Python
|
|
145
|
+
symbols = self._extract_python_symbols()
|
|
146
|
+
return symbols
|
|
147
|
+
|
|
148
|
+
def _has_ctags(self) -> bool:
|
|
149
|
+
"""Check if ctags is available."""
|
|
150
|
+
try:
|
|
151
|
+
subprocess.run(
|
|
152
|
+
["ctags", "--version"],
|
|
153
|
+
capture_output=True,
|
|
154
|
+
timeout=5
|
|
155
|
+
)
|
|
156
|
+
return True
|
|
157
|
+
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
def _extract_with_ctags(self) -> List[SymbolDefinition]:
|
|
161
|
+
"""Extract symbols using universal-ctags."""
|
|
162
|
+
symbols = []
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
result = subprocess.run(
|
|
166
|
+
[
|
|
167
|
+
"ctags", "-R", "--output-format=json",
|
|
168
|
+
"--languages=Python,JavaScript,TypeScript,Go,Rust",
|
|
169
|
+
"--exclude=node_modules", "--exclude=.git",
|
|
170
|
+
"--exclude=__pycache__", "--exclude=venv",
|
|
171
|
+
"-f", "-", str(self.project_root)
|
|
172
|
+
],
|
|
173
|
+
capture_output=True,
|
|
174
|
+
timeout=30
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
import json
|
|
178
|
+
for line in result.stdout.decode().strip().split("\n"):
|
|
179
|
+
if not line:
|
|
180
|
+
continue
|
|
181
|
+
try:
|
|
182
|
+
tag = json.loads(line)
|
|
183
|
+
symbols.append(SymbolDefinition(
|
|
184
|
+
name=tag.get("name", ""),
|
|
185
|
+
kind=tag.get("kind", "unknown"),
|
|
186
|
+
file=tag.get("path", ""),
|
|
187
|
+
line=tag.get("line", 0),
|
|
188
|
+
signature=tag.get("signature"),
|
|
189
|
+
))
|
|
190
|
+
except json.JSONDecodeError:
|
|
191
|
+
continue
|
|
192
|
+
|
|
193
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
return symbols[:500]
|
|
197
|
+
|
|
198
|
+
def _extract_python_symbols(self) -> List[SymbolDefinition]:
|
|
199
|
+
"""Fallback: Extract Python symbols with regex."""
|
|
200
|
+
import re
|
|
201
|
+
symbols = []
|
|
202
|
+
|
|
203
|
+
func_pattern = re.compile(r'^(\s*)def\s+(\w+)\s*\(([^)]*)\)', re.MULTILINE)
|
|
204
|
+
class_pattern = re.compile(r'^class\s+(\w+)', re.MULTILINE)
|
|
205
|
+
|
|
206
|
+
for py_file in self.project_root.rglob("*.py"):
|
|
207
|
+
if self._should_ignore(py_file):
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
try:
|
|
211
|
+
content = py_file.read_text(errors="replace")
|
|
212
|
+
rel_path = str(py_file.relative_to(self.project_root))
|
|
213
|
+
|
|
214
|
+
# Extract classes
|
|
215
|
+
for match in class_pattern.finditer(content):
|
|
216
|
+
line_num = content[:match.start()].count("\n") + 1
|
|
217
|
+
symbols.append(SymbolDefinition(
|
|
218
|
+
name=match.group(1),
|
|
219
|
+
kind="class",
|
|
220
|
+
file=rel_path,
|
|
221
|
+
line=line_num,
|
|
222
|
+
))
|
|
223
|
+
|
|
224
|
+
# Extract functions
|
|
225
|
+
for match in func_pattern.finditer(content):
|
|
226
|
+
indent = match.group(1)
|
|
227
|
+
name = match.group(2)
|
|
228
|
+
args = match.group(3)
|
|
229
|
+
line_num = content[:match.start()].count("\n") + 1
|
|
230
|
+
|
|
231
|
+
kind = "method" if indent else "function"
|
|
232
|
+
symbols.append(SymbolDefinition(
|
|
233
|
+
name=name,
|
|
234
|
+
kind=kind,
|
|
235
|
+
file=rel_path,
|
|
236
|
+
line=line_num,
|
|
237
|
+
signature=f"({args})",
|
|
238
|
+
))
|
|
239
|
+
|
|
240
|
+
except (IOError, UnicodeDecodeError):
|
|
241
|
+
continue
|
|
242
|
+
|
|
243
|
+
return symbols[:500]
|
|
244
|
+
|
|
245
|
+
def _analyze_dependencies(self) -> Dict[str, List[str]]:
|
|
246
|
+
"""Build import dependency graph for Python files."""
|
|
247
|
+
deps = {}
|
|
248
|
+
|
|
249
|
+
for py_file in self.project_root.rglob("*.py"):
|
|
250
|
+
if self._should_ignore(py_file):
|
|
251
|
+
continue
|
|
252
|
+
|
|
253
|
+
imports = []
|
|
254
|
+
try:
|
|
255
|
+
content = py_file.read_text(errors="replace")
|
|
256
|
+
for line in content.split("\n")[:100]: # Only scan first 100 lines
|
|
257
|
+
line = line.strip()
|
|
258
|
+
if line.startswith("import ") or line.startswith("from "):
|
|
259
|
+
imports.append(line)
|
|
260
|
+
except (IOError, UnicodeDecodeError):
|
|
261
|
+
continue
|
|
262
|
+
|
|
263
|
+
if imports:
|
|
264
|
+
rel_path = str(py_file.relative_to(self.project_root))
|
|
265
|
+
deps[rel_path] = imports[:20]
|
|
266
|
+
|
|
267
|
+
return deps
|
|
268
|
+
|
|
269
|
+
def _count_stats(self) -> tuple:
|
|
270
|
+
"""Count total files and lines."""
|
|
271
|
+
total_files = 0
|
|
272
|
+
total_lines = 0
|
|
273
|
+
|
|
274
|
+
for f in self.project_root.rglob("*"):
|
|
275
|
+
if f.is_file() and not self._should_ignore(f):
|
|
276
|
+
total_files += 1
|
|
277
|
+
try:
|
|
278
|
+
total_lines += len(f.read_text(errors="replace").split("\n"))
|
|
279
|
+
except (IOError, UnicodeDecodeError):
|
|
280
|
+
pass
|
|
281
|
+
|
|
282
|
+
return total_files, total_lines
|