srcodex 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- srcodex/__init__.py +0 -0
- srcodex/backend/__init__.py +0 -0
- srcodex/backend/chat.py +79 -0
- srcodex/backend/main.py +98 -0
- srcodex/backend/services/__init__.py +0 -0
- srcodex/backend/services/claude_service.py +754 -0
- srcodex/backend/services/config_loader.py +113 -0
- srcodex/backend/services/file_access_tools.py +279 -0
- srcodex/backend/services/file_tree.py +480 -0
- srcodex/backend/services/graph_tools.py +874 -0
- srcodex/backend/services/logger_setup.py +91 -0
- srcodex/backend/services/session_manager.py +81 -0
- srcodex/backend/services/status_tracker.py +91 -0
- srcodex/cli.py +255 -0
- srcodex/core/__init__.py +0 -0
- srcodex/core/config.py +113 -0
- srcodex/core/logger.py +23 -0
- srcodex/indexer/__init__.py +0 -0
- srcodex/indexer/cscope_client.py +183 -0
- srcodex/indexer/ctags_compat.py +223 -0
- srcodex/indexer/ctags_parser.py +456 -0
- srcodex/indexer/explorer.py +135 -0
- srcodex/indexer/field_access_analyzer.py +436 -0
- srcodex/indexer/indexer.py +664 -0
- srcodex/indexer/reference_ingestor.py +293 -0
- srcodex/indexer/reference_resolver.py +544 -0
- srcodex/tui/__init__.py +0 -0
- srcodex/tui/app.py +103 -0
- srcodex/tui/app.tcss +24 -0
- srcodex/tui/components/__init__.py +0 -0
- srcodex/tui/components/bars/__init__.py +0 -0
- srcodex/tui/components/bars/chat_header.py +48 -0
- srcodex/tui/components/bars/code_tab_bar.py +157 -0
- srcodex/tui/components/bars/footer_bar.py +128 -0
- srcodex/tui/components/bars/left_tab.py +54 -0
- srcodex/tui/components/logger.py +57 -0
- srcodex/tui/components/panels/__init__.py +0 -0
- srcodex/tui/components/panels/chat_panel.py +523 -0
- srcodex/tui/components/panels/code_panel.py +229 -0
- srcodex/tui/components/panels/side_panel.py +128 -0
- srcodex/tui/components/views/__init__.py +0 -0
- srcodex/tui/components/views/explorer_view.py +20 -0
- srcodex/tui/components/views/search_view.py +148 -0
- srcodex/tui/components/widgets/__init__.py +0 -0
- srcodex/tui/components/widgets/file_browser.py +16 -0
- srcodex/tui/components/widgets/find_box.py +85 -0
- srcodex-0.2.0.dist-info/METADATA +170 -0
- srcodex-0.2.0.dist-info/RECORD +52 -0
- srcodex-0.2.0.dist-info/WHEEL +5 -0
- srcodex-0.2.0.dist-info/entry_points.txt +2 -0
- srcodex-0.2.0.dist-info/licenses/LICENSE +21 -0
- srcodex-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
SRC Code Explorer - CTags Parser
|
|
4
|
+
Wrapper around Universal CTags to extract symbols from C code
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import subprocess
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import tempfile
|
|
11
|
+
from typing import List, Dict, Optional
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from tqdm import tqdm
|
|
14
|
+
from .ctags_compat import verify_ctags_compatibility
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CTagsParser:
|
|
18
|
+
"""Parse C source code using Universal CTags"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, ctags_bin: str = "ctags"):
|
|
21
|
+
"""
|
|
22
|
+
Initialize CTags parser
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
ctags_bin: Path to ctags binary (default: "ctags")
|
|
26
|
+
"""
|
|
27
|
+
self.ctags_bin = ctags_bin
|
|
28
|
+
self._verify_ctags()
|
|
29
|
+
|
|
30
|
+
def _verify_ctags(self):
|
|
31
|
+
"""Verify that ctags is installed and compatible"""
|
|
32
|
+
verify_ctags_compatibility(self.ctags_bin)
|
|
33
|
+
|
|
34
|
+
def parse_root(self, root_dir: str, extensions: List[str] = None, source_root: Optional[str] = None) -> Dict[str, List[Dict]]:
|
|
35
|
+
"""
|
|
36
|
+
Parse entire directory tree with SINGLE ctags invocation (efficient for large codebases).
|
|
37
|
+
This is the RECOMMENDED method for production indexing. Runs ctags once on all files,
|
|
38
|
+
vastly faster than per-file invocation (n files: 1 invocation vs n invocations).
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
root_dir: Root directory to scan
|
|
42
|
+
extensions: File extensions to include (default: ['.c', '.h'])
|
|
43
|
+
source_root: Root directory for canonical path computation (default: root_dir)
|
|
44
|
+
All returned paths will be relative to this directory in POSIX format.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Dictionary mapping CANONICAL file paths (rel_posix from source_root) to symbol lists:
|
|
48
|
+
{
|
|
49
|
+
'path/to/file.c': [symbol1, symbol2, ...],
|
|
50
|
+
'path/to/file.h': [symbol3, ...],
|
|
51
|
+
}
|
|
52
|
+
"""
|
|
53
|
+
if extensions is None:
|
|
54
|
+
extensions = ['.c', '.h']
|
|
55
|
+
|
|
56
|
+
root_path = Path(root_dir).resolve()
|
|
57
|
+
if not root_path.exists():
|
|
58
|
+
raise FileNotFoundError(f"Directory not found: {root_dir}")
|
|
59
|
+
|
|
60
|
+
# Determine source_root for canonical path computation
|
|
61
|
+
if source_root is None:
|
|
62
|
+
source_root_path = root_path
|
|
63
|
+
else:
|
|
64
|
+
source_root_path = Path(source_root).resolve()
|
|
65
|
+
|
|
66
|
+
# Find all matching files
|
|
67
|
+
file_list = []
|
|
68
|
+
for ext in extensions:
|
|
69
|
+
file_list.extend(root_path.rglob(f'*{ext}'))
|
|
70
|
+
|
|
71
|
+
if not file_list:
|
|
72
|
+
return {}
|
|
73
|
+
|
|
74
|
+
# Run ctags ONCE on all files using -L (file list from stdin)
|
|
75
|
+
# This avoids "Argument list too long" errors on large codebases
|
|
76
|
+
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.filelist') as f:
|
|
77
|
+
filelist_path = f.name
|
|
78
|
+
for file_path in file_list:
|
|
79
|
+
f.write(f"{file_path}\n")
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
cmd = [
|
|
83
|
+
self.ctags_bin,
|
|
84
|
+
"--output-format=json",
|
|
85
|
+
"--fields=+nKSz",
|
|
86
|
+
"--kinds-C=+p",
|
|
87
|
+
"-f", "-",
|
|
88
|
+
"-L", filelist_path
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
# Stream ctags output with progress bar
|
|
92
|
+
process = subprocess.Popen(
|
|
93
|
+
cmd,
|
|
94
|
+
stdout=subprocess.PIPE,
|
|
95
|
+
stderr=subprocess.PIPE,
|
|
96
|
+
text=True
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Parse JSON output - TWO PASS approach PER FILE
|
|
100
|
+
# CRITICAL: Build anon_to_typedef separately for EACH file to avoid cross-file pollution
|
|
101
|
+
# (Same __anonXXX token can appear in multiple files with different typedef names)
|
|
102
|
+
|
|
103
|
+
# Pass 1: Parse all tags and group by file (with progress)
|
|
104
|
+
raw_tags = []
|
|
105
|
+
with tqdm(desc="Running ctags", unit=" symbols") as pbar:
|
|
106
|
+
for line in iter(process.stdout.readline, ''):
|
|
107
|
+
if not line:
|
|
108
|
+
break
|
|
109
|
+
line = line.strip()
|
|
110
|
+
if not line or line.startswith('!'):
|
|
111
|
+
continue
|
|
112
|
+
try:
|
|
113
|
+
tag = json.loads(line)
|
|
114
|
+
raw_tags.append(tag)
|
|
115
|
+
pbar.update(1)
|
|
116
|
+
except json.JSONDecodeError:
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
# Wait for process to complete
|
|
120
|
+
process.wait()
|
|
121
|
+
if process.returncode != 0:
|
|
122
|
+
stderr = process.stderr.read()
|
|
123
|
+
print(f"Warning: ctags failed with code {process.returncode}: {stderr}")
|
|
124
|
+
return {}
|
|
125
|
+
|
|
126
|
+
print(f"✓ CTags complete: {len(raw_tags)} symbols extracted")
|
|
127
|
+
|
|
128
|
+
except Exception as e:
|
|
129
|
+
print(f"Warning: ctags failed: {e}")
|
|
130
|
+
return {}
|
|
131
|
+
finally:
|
|
132
|
+
# Clean up temp file
|
|
133
|
+
try:
|
|
134
|
+
os.unlink(filelist_path)
|
|
135
|
+
except OSError:
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
# Two-pass symbol processing:
|
|
139
|
+
# Pass 1: Build typedef mappings for anonymous struct/union/enum resolution
|
|
140
|
+
# Pass 2: Parse all symbols with resolved typedef names
|
|
141
|
+
# Note: Pass 1 must complete before Pass 2 since member symbols may reference
|
|
142
|
+
# typedefs defined later in the file
|
|
143
|
+
|
|
144
|
+
# Pass 1: Collect typedef mappings per file
|
|
145
|
+
anon_to_typedef_by_file = {}
|
|
146
|
+
for tag in raw_tags:
|
|
147
|
+
if tag.get('kind') == 'typedef':
|
|
148
|
+
file_path = tag.get('path')
|
|
149
|
+
if file_path:
|
|
150
|
+
file_path_abs = Path(file_path).resolve()
|
|
151
|
+
file_path_canonical = file_path_abs.relative_to(source_root_path).as_posix()
|
|
152
|
+
|
|
153
|
+
typeref = tag.get('typeref', '')
|
|
154
|
+
if typeref.startswith('struct:') or typeref.startswith('union:') or typeref.startswith('enum:'):
|
|
155
|
+
anon_name = typeref.split(':', 1)[1]
|
|
156
|
+
typedef_name = tag.get('name')
|
|
157
|
+
if anon_name.startswith('__anon') and typedef_name:
|
|
158
|
+
if file_path_canonical not in anon_to_typedef_by_file:
|
|
159
|
+
anon_to_typedef_by_file[file_path_canonical] = {}
|
|
160
|
+
anon_to_typedef_by_file[file_path_canonical][anon_name] = typedef_name
|
|
161
|
+
|
|
162
|
+
# Pass 2: Parse all symbols with typedef resolution
|
|
163
|
+
results = {}
|
|
164
|
+
with tqdm(total=len(raw_tags), desc="Parsing symbols", unit=" tags") as pbar:
|
|
165
|
+
for tag in raw_tags:
|
|
166
|
+
file_path = tag.get('path')
|
|
167
|
+
if not file_path:
|
|
168
|
+
pbar.update(1)
|
|
169
|
+
continue
|
|
170
|
+
|
|
171
|
+
# Normalize key: canonical rel_posix
|
|
172
|
+
file_path_abs = Path(file_path).resolve()
|
|
173
|
+
file_path_canonical = file_path_abs.relative_to(source_root_path).as_posix()
|
|
174
|
+
|
|
175
|
+
# Use ONLY this file's anon mapping
|
|
176
|
+
file_anon_map = anon_to_typedef_by_file.get(file_path_canonical, {})
|
|
177
|
+
symbol = self._parse_tag(tag, file_path, file_anon_map)
|
|
178
|
+
if symbol:
|
|
179
|
+
if file_path_canonical not in results:
|
|
180
|
+
results[file_path_canonical] = []
|
|
181
|
+
results[file_path_canonical].append(symbol)
|
|
182
|
+
|
|
183
|
+
pbar.update(1)
|
|
184
|
+
|
|
185
|
+
return results
|
|
186
|
+
|
|
187
|
+
def parse_file(self, file_path: str) -> List[Dict]:
|
|
188
|
+
"""
|
|
189
|
+
Parse a single file and extract symbols.
|
|
190
|
+
|
|
191
|
+
This method is INEFFICIENT for bulk indexing (runs ctags once per file).
|
|
192
|
+
Use parse_root() for production indexing of directories.
|
|
193
|
+
This method is kept for:
|
|
194
|
+
- Debugging individual files
|
|
195
|
+
- Incremental updates of single files
|
|
196
|
+
- Testing
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
file_path: Path to C source file
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
List of symbol dictionaries with keys:
|
|
203
|
+
- name: Symbol name
|
|
204
|
+
- type: Symbol type (function, variable, struct, etc.)
|
|
205
|
+
- line: Line number
|
|
206
|
+
- signature: Full signature (if available)
|
|
207
|
+
- scope: Scope (global, static, etc.)
|
|
208
|
+
- scope_kind: Parent scope kind (struct, union, enum)
|
|
209
|
+
- scope_name: Parent scope name (PowerState, Dummy, etc.)
|
|
210
|
+
"""
|
|
211
|
+
if not os.path.exists(file_path):
|
|
212
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
213
|
+
|
|
214
|
+
# Run ctags with JSON output
|
|
215
|
+
cmd = [
|
|
216
|
+
self.ctags_bin,
|
|
217
|
+
"--output-format=json",
|
|
218
|
+
"--fields=+nKSz", # +n (line numbers), +K (kind), +S (signature), +z (scope)
|
|
219
|
+
"--kinds-C=+p", # Include function prototypes (modern syntax)
|
|
220
|
+
"-f", "-", # Output to stdout
|
|
221
|
+
file_path
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
result = subprocess.run(
|
|
226
|
+
cmd,
|
|
227
|
+
capture_output=True,
|
|
228
|
+
text=True,
|
|
229
|
+
check=True
|
|
230
|
+
)
|
|
231
|
+
except subprocess.CalledProcessError as e:
|
|
232
|
+
print(f"Warning: ctags failed on {file_path}: {e}")
|
|
233
|
+
return []
|
|
234
|
+
|
|
235
|
+
# Parse JSON output - TWO PASS approach:
|
|
236
|
+
# Pass 1: Build mapping of anonymous structs to typedef names
|
|
237
|
+
# Pass 2: Parse all tags and resolve anonymous struct references
|
|
238
|
+
|
|
239
|
+
raw_tags = []
|
|
240
|
+
anon_to_typedef = {} # Maps __anonXXX -> typedef name
|
|
241
|
+
|
|
242
|
+
for line in result.stdout.strip().split('\n'):
|
|
243
|
+
if not line or line.startswith('!'):
|
|
244
|
+
continue
|
|
245
|
+
|
|
246
|
+
try:
|
|
247
|
+
tag = json.loads(line)
|
|
248
|
+
raw_tags.append(tag)
|
|
249
|
+
|
|
250
|
+
# If this is a typedef for a struct/union/enum, record the mapping
|
|
251
|
+
if tag.get('kind') == 'typedef':
|
|
252
|
+
typeref = tag.get('typeref', '')
|
|
253
|
+
if typeref.startswith('struct:') or typeref.startswith('union:') or typeref.startswith('enum:'):
|
|
254
|
+
# typeref is like "struct:__anondd0b9e6c0108"
|
|
255
|
+
anon_name = typeref.split(':', 1)[1]
|
|
256
|
+
typedef_name = tag.get('name')
|
|
257
|
+
if anon_name.startswith('__anon') and typedef_name:
|
|
258
|
+
anon_to_typedef[anon_name] = typedef_name
|
|
259
|
+
except json.JSONDecodeError:
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
# Pass 2: Parse all tags with resolved scope names
|
|
263
|
+
symbols = []
|
|
264
|
+
for tag in raw_tags:
|
|
265
|
+
symbol = self._parse_tag(tag, file_path, anon_to_typedef)
|
|
266
|
+
if symbol:
|
|
267
|
+
symbols.append(symbol)
|
|
268
|
+
|
|
269
|
+
return symbols
|
|
270
|
+
|
|
271
|
+
def _parse_tag(self, tag: Dict, file_path: str, anon_to_typedef: Dict[str, str] = None) -> Optional[Dict]:
|
|
272
|
+
"""
|
|
273
|
+
Parse a ctags tag into our symbol format
|
|
274
|
+
Args:
|
|
275
|
+
tag: Raw ctags tag dictionary
|
|
276
|
+
file_path: Source file path
|
|
277
|
+
anon_to_typedef: Mapping from anonymous struct names to typedef names
|
|
278
|
+
Returns:
|
|
279
|
+
Symbol dictionary or None if invalid
|
|
280
|
+
"""
|
|
281
|
+
if anon_to_typedef is None:
|
|
282
|
+
anon_to_typedef = {}
|
|
283
|
+
# Extract basic info
|
|
284
|
+
name = tag.get('name')
|
|
285
|
+
kind = tag.get('kind')
|
|
286
|
+
line = tag.get('line', 0)
|
|
287
|
+
|
|
288
|
+
if not name or not kind:
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
if name.startswith('__anon'):
|
|
292
|
+
return None
|
|
293
|
+
|
|
294
|
+
# Store raw ctags kind BEFORE normalization
|
|
295
|
+
kind_raw = kind
|
|
296
|
+
|
|
297
|
+
# Extract raw typeref and signature from ctags (before we process them)
|
|
298
|
+
# Store NULL if not provided - DO NOT invent values
|
|
299
|
+
raw_typeref = tag.get('typeref') if 'typeref' in tag else None
|
|
300
|
+
raw_signature = tag.get('signature') if 'signature' in tag else None
|
|
301
|
+
|
|
302
|
+
# Handle typedef structs/unions/enums - treat them as struct/union/enum with the typedef name
|
|
303
|
+
if kind == 'typedef' and raw_typeref:
|
|
304
|
+
if raw_typeref.startswith('struct:'):
|
|
305
|
+
symbol_type = 'struct'
|
|
306
|
+
elif raw_typeref.startswith('union:'):
|
|
307
|
+
symbol_type = 'union'
|
|
308
|
+
elif raw_typeref.startswith('enum:'):
|
|
309
|
+
symbol_type = 'enum'
|
|
310
|
+
else:
|
|
311
|
+
symbol_type = 'typedef'
|
|
312
|
+
else:
|
|
313
|
+
# Map ctags kinds to our NORMALIZED types
|
|
314
|
+
# NOTE: Both 'prototype' and 'function' map to 'function', but we keep kind_raw to distinguish
|
|
315
|
+
type_map = {
|
|
316
|
+
'function': 'function',
|
|
317
|
+
'prototype': 'function', # Declaration in .h
|
|
318
|
+
'variable': 'variable',
|
|
319
|
+
'struct': 'struct',
|
|
320
|
+
'union': 'union',
|
|
321
|
+
'enum': 'enum',
|
|
322
|
+
'enumerator': 'enumerator',
|
|
323
|
+
'typedef': 'typedef',
|
|
324
|
+
'macro': 'macro',
|
|
325
|
+
'member': 'member',
|
|
326
|
+
'header': 'header',
|
|
327
|
+
}
|
|
328
|
+
symbol_type = type_map.get(kind, kind)
|
|
329
|
+
|
|
330
|
+
# Extract scope information
|
|
331
|
+
scope = 'global'
|
|
332
|
+
scope_kind = None
|
|
333
|
+
scope_name = None
|
|
334
|
+
|
|
335
|
+
# Extract parent scope (struct/union/enum/class)
|
|
336
|
+
if 'scopeKind' in tag and 'scope' in tag:
|
|
337
|
+
parent_scope_name = tag['scope']
|
|
338
|
+
|
|
339
|
+
# Resolve anonymous struct names to their typedef names
|
|
340
|
+
if parent_scope_name.startswith('__anon') and parent_scope_name in anon_to_typedef:
|
|
341
|
+
parent_scope_name = anon_to_typedef[parent_scope_name]
|
|
342
|
+
|
|
343
|
+
# Store scope info (skip only if still anonymous after resolution)
|
|
344
|
+
if not parent_scope_name.startswith('__anon'):
|
|
345
|
+
scope_kind = tag['scopeKind']
|
|
346
|
+
scope_name = parent_scope_name
|
|
347
|
+
|
|
348
|
+
# Detect file-local scope (static in C)
|
|
349
|
+
# ctags provides this via the 'file' boolean field or 'fileScope' in extras
|
|
350
|
+
is_file_scope = None # NULL = unknown
|
|
351
|
+
|
|
352
|
+
# Check the 'file' boolean field (most reliable)
|
|
353
|
+
if 'file' in tag:
|
|
354
|
+
is_file_scope = 1 if tag['file'] else 0
|
|
355
|
+
# Fallback: check 'extras' for 'fileScope'
|
|
356
|
+
elif 'extras' in tag:
|
|
357
|
+
# Normalize extras: can be list or string depending on ctags version
|
|
358
|
+
extras = tag.get('extras')
|
|
359
|
+
if isinstance(extras, str):
|
|
360
|
+
# Old format: comma-separated string
|
|
361
|
+
extras = [e.strip() for e in extras.split(',') if e.strip()]
|
|
362
|
+
elif not isinstance(extras, list):
|
|
363
|
+
# Unknown format, treat as empty
|
|
364
|
+
extras = []
|
|
365
|
+
|
|
366
|
+
# Check if fileScope is in the list
|
|
367
|
+
if 'fileScope' in extras:
|
|
368
|
+
is_file_scope = 1
|
|
369
|
+
else:
|
|
370
|
+
is_file_scope = 0
|
|
371
|
+
|
|
372
|
+
# Keep old 'scope' field for backwards compatibility (deprecated)
|
|
373
|
+
if is_file_scope == 1:
|
|
374
|
+
scope = 'static'
|
|
375
|
+
|
|
376
|
+
return {
|
|
377
|
+
'name': name,
|
|
378
|
+
'type': symbol_type, # Normalized type (prototype -> function)
|
|
379
|
+
'kind_raw': kind_raw, # Raw ctags kind (prototype, function, etc.)
|
|
380
|
+
'line': line,
|
|
381
|
+
'signature': raw_signature, # Raw from ctags, NULL if not available
|
|
382
|
+
'typeref': raw_typeref, # Raw from ctags, NULL if not available
|
|
383
|
+
'scope': scope, # Deprecated: kept for backwards compatibility
|
|
384
|
+
'scope_kind': scope_kind,
|
|
385
|
+
'scope_name': scope_name,
|
|
386
|
+
'is_file_scope': is_file_scope,
|
|
387
|
+
'file_path': file_path
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
def parse_directory(self, dir_path: str, extensions: List[str] = None) -> Dict[str, List[Dict]]:
|
|
391
|
+
"""
|
|
392
|
+
Parse all files in a directory recursively.
|
|
393
|
+
|
|
394
|
+
DEPRECATED: Use parse_root() instead for better performance.
|
|
395
|
+
This method now delegates to parse_root().
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
dir_path: Directory to scan
|
|
399
|
+
extensions: File extensions to include (default: ['.c', '.h'])
|
|
400
|
+
Returns:
|
|
401
|
+
Dictionary mapping file paths to symbol lists
|
|
402
|
+
"""
|
|
403
|
+
return self.parse_root(dir_path, extensions)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
# Simple test
|
|
407
|
+
if __name__ == "__main__":
|
|
408
|
+
import sys
|
|
409
|
+
|
|
410
|
+
if len(sys.argv) < 2:
|
|
411
|
+
print("Usage: python ctags_parser.py <file_or_directory>")
|
|
412
|
+
sys.exit(1)
|
|
413
|
+
|
|
414
|
+
parser = CTagsParser()
|
|
415
|
+
path = sys.argv[1]
|
|
416
|
+
|
|
417
|
+
if os.path.isfile(path):
|
|
418
|
+
symbols = parser.parse_file(path)
|
|
419
|
+
print(f"Found {len(symbols)} symbols in {path}:")
|
|
420
|
+
for sym in symbols: # Show all symbols
|
|
421
|
+
# Build qualified name if it has a parent scope
|
|
422
|
+
if sym.get('scope_kind') and sym.get('scope_name'):
|
|
423
|
+
qualified = f"{sym['scope_name']}.{sym['name']}"
|
|
424
|
+
scope_info = f" ({sym['scope_kind']}:{sym['scope_name']})"
|
|
425
|
+
else:
|
|
426
|
+
qualified = sym['name']
|
|
427
|
+
scope_info = ""
|
|
428
|
+
|
|
429
|
+
# Add file-scope indicator
|
|
430
|
+
file_scope_indicator = ""
|
|
431
|
+
if sym.get('is_file_scope') == 1:
|
|
432
|
+
file_scope_indicator = " [file-local]"
|
|
433
|
+
elif sym.get('is_file_scope') == 0:
|
|
434
|
+
file_scope_indicator = " [global]"
|
|
435
|
+
|
|
436
|
+
# Add signature for functions (including return type from typeref)
|
|
437
|
+
sig_display = ""
|
|
438
|
+
if sym['type'] == 'function':
|
|
439
|
+
# Build full signature: "return_type name(params)"
|
|
440
|
+
return_type = ""
|
|
441
|
+
if sym.get('typeref'):
|
|
442
|
+
# typeref is like "typename:void" or "typename:int"
|
|
443
|
+
return_type = sym['typeref'].replace('typename:', '') + ' '
|
|
444
|
+
|
|
445
|
+
params = sym.get('signature', '()')
|
|
446
|
+
sig_display = f"{return_type}{params}"
|
|
447
|
+
|
|
448
|
+
print(f" {sym['type']:12} {qualified:30}{sig_display:40} @ line {sym['line']}{scope_info}{file_scope_indicator}")
|
|
449
|
+
else:
|
|
450
|
+
results = parser.parse_directory(path)
|
|
451
|
+
total = sum(len(syms) for syms in results.values())
|
|
452
|
+
print(f"Found {total} symbols in {len(results)} files")
|
|
453
|
+
for file_path, symbols in list(results.items())[:5]: # Show first 5 files
|
|
454
|
+
print(f"\n{file_path}: {len(symbols)} symbols")
|
|
455
|
+
for sym in symbols[:5]:
|
|
456
|
+
print(f" {sym['type']:12} {sym['name']:30} @ line {sym['line']}")
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Explorer - Unified File Discovery Module
|
|
4
|
+
Used by both indexer and cscope to ensure consistent file sets
|
|
5
|
+
|
|
6
|
+
This module provides FileDiscovery class for finding source files
|
|
7
|
+
with consistent filtering rules across all tools.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Set
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
DEFAULT_IGNORE_DIRS = {
|
|
15
|
+
'.git',
|
|
16
|
+
'__pycache__',
|
|
17
|
+
'out',
|
|
18
|
+
'build',
|
|
19
|
+
'dist',
|
|
20
|
+
'.pytest_cache',
|
|
21
|
+
'node_modules',
|
|
22
|
+
'.venv',
|
|
23
|
+
'venv'
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class FileDiscovery:
|
|
28
|
+
"""
|
|
29
|
+
Discovers source files in a directory with consistent filtering
|
|
30
|
+
|
|
31
|
+
CRITICAL: Both indexer and cscope MUST use this same discovery logic
|
|
32
|
+
to ensure they index the exact same set of files.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
source_root: str,
|
|
38
|
+
extensions: List[str] = None,
|
|
39
|
+
ignore_dirs: Set[str] = None
|
|
40
|
+
):
|
|
41
|
+
"""
|
|
42
|
+
Args:
|
|
43
|
+
source_root: Root directory to scan
|
|
44
|
+
extensions: File extensions to include (default: ['.c', '.h'])
|
|
45
|
+
ignore_dirs: Directory names to skip (default: DEFAULT_IGNORE_DIRS)
|
|
46
|
+
"""
|
|
47
|
+
self.source_root = Path(source_root).resolve()
|
|
48
|
+
self.extensions = extensions or [
|
|
49
|
+
'.c', '.h', # C
|
|
50
|
+
'.cpp', '.cc', '.cxx', '.hpp', '.hxx', # C++
|
|
51
|
+
'.py', # Python
|
|
52
|
+
'.mk', # Makefiles
|
|
53
|
+
'.java', # Java
|
|
54
|
+
'.rs', # Rust
|
|
55
|
+
]
|
|
56
|
+
self.ignore_dirs = ignore_dirs or DEFAULT_IGNORE_DIRS
|
|
57
|
+
|
|
58
|
+
if not self.source_root.exists():
|
|
59
|
+
raise FileNotFoundError(f"Directory not found: {source_root}")
|
|
60
|
+
|
|
61
|
+
if not self.source_root.is_dir():
|
|
62
|
+
raise NotADirectoryError(f"Not a directory: {source_root}")
|
|
63
|
+
|
|
64
|
+
def discover_files(self) -> List[str]:
|
|
65
|
+
"""
|
|
66
|
+
Find all files matching extensions, with ignore filters
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
List of POSIX-formatted relative paths from source_root
|
|
70
|
+
Example: ['power.c', 'drivers/thermal.c', 'include/power.h']
|
|
71
|
+
"""
|
|
72
|
+
files = []
|
|
73
|
+
|
|
74
|
+
for ext in self.extensions:
|
|
75
|
+
for file_path in self.source_root.rglob(f'*{ext}'):
|
|
76
|
+
if self._should_ignore(file_path):
|
|
77
|
+
continue
|
|
78
|
+
# Convert to relative POSIX path
|
|
79
|
+
rel_path = file_path.relative_to(self.source_root)
|
|
80
|
+
files.append(rel_path.as_posix())
|
|
81
|
+
|
|
82
|
+
return sorted(files)
|
|
83
|
+
|
|
84
|
+
def _should_ignore(self, file_path: Path) -> bool:
|
|
85
|
+
"""
|
|
86
|
+
True if file should be ignored, False otherwise
|
|
87
|
+
"""
|
|
88
|
+
for part in file_path.parts:
|
|
89
|
+
if part in self.ignore_dirs:
|
|
90
|
+
return True
|
|
91
|
+
|
|
92
|
+
return False
|
|
93
|
+
|
|
94
|
+
def discover_files_absolute(self) -> List[Path]:
|
|
95
|
+
"""
|
|
96
|
+
Find all files matching extensions, with ignore filters
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
List of absolute Path objects
|
|
100
|
+
"""
|
|
101
|
+
files = []
|
|
102
|
+
|
|
103
|
+
for ext in self.extensions:
|
|
104
|
+
for file_path in self.source_root.rglob(f'*{ext}'):
|
|
105
|
+
if self._should_ignore(file_path):
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
files.append(file_path)
|
|
109
|
+
|
|
110
|
+
return sorted(files)
|
|
111
|
+
|
|
112
|
+
def get_stats(self) -> dict:
|
|
113
|
+
"""
|
|
114
|
+
Returns:
|
|
115
|
+
Dictionary with file counts and extensions
|
|
116
|
+
"""
|
|
117
|
+
files = self.discover_files()
|
|
118
|
+
|
|
119
|
+
# Count by extension
|
|
120
|
+
ext_counts = {}
|
|
121
|
+
for file_path in files:
|
|
122
|
+
ext = Path(file_path).suffix
|
|
123
|
+
ext_counts[ext] = ext_counts.get(ext, 0) + 1
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
'total_files': len(files),
|
|
127
|
+
'extensions': ext_counts,
|
|
128
|
+
'source_root': str(self.source_root)
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# Convenience function for quick usage
|
|
133
|
+
def discover_files(source_root: str, extensions: List[str] = None) -> List[str]:
|
|
134
|
+
discovery = FileDiscovery(source_root, extensions)
|
|
135
|
+
return discovery.discover_files()
|