srcodex 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. srcodex/__init__.py +0 -0
  2. srcodex/backend/__init__.py +0 -0
  3. srcodex/backend/chat.py +79 -0
  4. srcodex/backend/main.py +98 -0
  5. srcodex/backend/services/__init__.py +0 -0
  6. srcodex/backend/services/claude_service.py +754 -0
  7. srcodex/backend/services/config_loader.py +113 -0
  8. srcodex/backend/services/file_access_tools.py +279 -0
  9. srcodex/backend/services/file_tree.py +480 -0
  10. srcodex/backend/services/graph_tools.py +874 -0
  11. srcodex/backend/services/logger_setup.py +91 -0
  12. srcodex/backend/services/session_manager.py +81 -0
  13. srcodex/backend/services/status_tracker.py +91 -0
  14. srcodex/cli.py +255 -0
  15. srcodex/core/__init__.py +0 -0
  16. srcodex/core/config.py +113 -0
  17. srcodex/core/logger.py +23 -0
  18. srcodex/indexer/__init__.py +0 -0
  19. srcodex/indexer/cscope_client.py +183 -0
  20. srcodex/indexer/ctags_compat.py +223 -0
  21. srcodex/indexer/ctags_parser.py +456 -0
  22. srcodex/indexer/explorer.py +135 -0
  23. srcodex/indexer/field_access_analyzer.py +436 -0
  24. srcodex/indexer/indexer.py +664 -0
  25. srcodex/indexer/reference_ingestor.py +293 -0
  26. srcodex/indexer/reference_resolver.py +544 -0
  27. srcodex/tui/__init__.py +0 -0
  28. srcodex/tui/app.py +103 -0
  29. srcodex/tui/app.tcss +24 -0
  30. srcodex/tui/components/__init__.py +0 -0
  31. srcodex/tui/components/bars/__init__.py +0 -0
  32. srcodex/tui/components/bars/chat_header.py +48 -0
  33. srcodex/tui/components/bars/code_tab_bar.py +157 -0
  34. srcodex/tui/components/bars/footer_bar.py +128 -0
  35. srcodex/tui/components/bars/left_tab.py +54 -0
  36. srcodex/tui/components/logger.py +57 -0
  37. srcodex/tui/components/panels/__init__.py +0 -0
  38. srcodex/tui/components/panels/chat_panel.py +523 -0
  39. srcodex/tui/components/panels/code_panel.py +229 -0
  40. srcodex/tui/components/panels/side_panel.py +128 -0
  41. srcodex/tui/components/views/__init__.py +0 -0
  42. srcodex/tui/components/views/explorer_view.py +20 -0
  43. srcodex/tui/components/views/search_view.py +148 -0
  44. srcodex/tui/components/widgets/__init__.py +0 -0
  45. srcodex/tui/components/widgets/file_browser.py +16 -0
  46. srcodex/tui/components/widgets/find_box.py +85 -0
  47. srcodex-0.2.0.dist-info/METADATA +170 -0
  48. srcodex-0.2.0.dist-info/RECORD +52 -0
  49. srcodex-0.2.0.dist-info/WHEEL +5 -0
  50. srcodex-0.2.0.dist-info/entry_points.txt +2 -0
  51. srcodex-0.2.0.dist-info/licenses/LICENSE +21 -0
  52. srcodex-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,456 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SRC Code Explorer - CTags Parser
4
+ Wrapper around Universal CTags to extract symbols from C code
5
+ """
6
+
7
+ import subprocess
8
+ import json
9
+ import os
10
+ import tempfile
11
+ from typing import List, Dict, Optional
12
+ from pathlib import Path
13
+ from tqdm import tqdm
14
+ from .ctags_compat import verify_ctags_compatibility
15
+
16
+
17
+ class CTagsParser:
18
+ """Parse C source code using Universal CTags"""
19
+
20
+ def __init__(self, ctags_bin: str = "ctags"):
21
+ """
22
+ Initialize CTags parser
23
+
24
+ Args:
25
+ ctags_bin: Path to ctags binary (default: "ctags")
26
+ """
27
+ self.ctags_bin = ctags_bin
28
+ self._verify_ctags()
29
+
30
+ def _verify_ctags(self):
31
+ """Verify that ctags is installed and compatible"""
32
+ verify_ctags_compatibility(self.ctags_bin)
33
+
34
+ def parse_root(self, root_dir: str, extensions: List[str] = None, source_root: Optional[str] = None) -> Dict[str, List[Dict]]:
35
+ """
36
+ Parse entire directory tree with SINGLE ctags invocation (efficient for large codebases).
37
+ This is the RECOMMENDED method for production indexing. Runs ctags once on all files,
38
+ vastly faster than per-file invocation (n files: 1 invocation vs n invocations).
39
+
40
+ Args:
41
+ root_dir: Root directory to scan
42
+ extensions: File extensions to include (default: ['.c', '.h'])
43
+ source_root: Root directory for canonical path computation (default: root_dir)
44
+ All returned paths will be relative to this directory in POSIX format.
45
+
46
+ Returns:
47
+ Dictionary mapping CANONICAL file paths (rel_posix from source_root) to symbol lists:
48
+ {
49
+ 'path/to/file.c': [symbol1, symbol2, ...],
50
+ 'path/to/file.h': [symbol3, ...],
51
+ }
52
+ """
53
+ if extensions is None:
54
+ extensions = ['.c', '.h']
55
+
56
+ root_path = Path(root_dir).resolve()
57
+ if not root_path.exists():
58
+ raise FileNotFoundError(f"Directory not found: {root_dir}")
59
+
60
+ # Determine source_root for canonical path computation
61
+ if source_root is None:
62
+ source_root_path = root_path
63
+ else:
64
+ source_root_path = Path(source_root).resolve()
65
+
66
+ # Find all matching files
67
+ file_list = []
68
+ for ext in extensions:
69
+ file_list.extend(root_path.rglob(f'*{ext}'))
70
+
71
+ if not file_list:
72
+ return {}
73
+
74
+ # Run ctags ONCE on all files using -L (file list from stdin)
75
+ # This avoids "Argument list too long" errors on large codebases
76
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.filelist') as f:
77
+ filelist_path = f.name
78
+ for file_path in file_list:
79
+ f.write(f"{file_path}\n")
80
+
81
+ try:
82
+ cmd = [
83
+ self.ctags_bin,
84
+ "--output-format=json",
85
+ "--fields=+nKSz",
86
+ "--kinds-C=+p",
87
+ "-f", "-",
88
+ "-L", filelist_path
89
+ ]
90
+
91
+ # Stream ctags output with progress bar
92
+ process = subprocess.Popen(
93
+ cmd,
94
+ stdout=subprocess.PIPE,
95
+ stderr=subprocess.PIPE,
96
+ text=True
97
+ )
98
+
99
+ # Parse JSON output - TWO PASS approach PER FILE
100
+ # CRITICAL: Build anon_to_typedef separately for EACH file to avoid cross-file pollution
101
+ # (Same __anonXXX token can appear in multiple files with different typedef names)
102
+
103
+ # Pass 1: Parse all tags and group by file (with progress)
104
+ raw_tags = []
105
+ with tqdm(desc="Running ctags", unit=" symbols") as pbar:
106
+ for line in iter(process.stdout.readline, ''):
107
+ if not line:
108
+ break
109
+ line = line.strip()
110
+ if not line or line.startswith('!'):
111
+ continue
112
+ try:
113
+ tag = json.loads(line)
114
+ raw_tags.append(tag)
115
+ pbar.update(1)
116
+ except json.JSONDecodeError:
117
+ continue
118
+
119
+ # Wait for process to complete
120
+ process.wait()
121
+ if process.returncode != 0:
122
+ stderr = process.stderr.read()
123
+ print(f"Warning: ctags failed with code {process.returncode}: {stderr}")
124
+ return {}
125
+
126
+ print(f"✓ CTags complete: {len(raw_tags)} symbols extracted")
127
+
128
+ except Exception as e:
129
+ print(f"Warning: ctags failed: {e}")
130
+ return {}
131
+ finally:
132
+ # Clean up temp file
133
+ try:
134
+ os.unlink(filelist_path)
135
+ except OSError:
136
+ pass
137
+
138
+ # Two-pass symbol processing:
139
+ # Pass 1: Build typedef mappings for anonymous struct/union/enum resolution
140
+ # Pass 2: Parse all symbols with resolved typedef names
141
+ # Note: Pass 1 must complete before Pass 2 since member symbols may reference
142
+ # typedefs defined later in the file
143
+
144
+ # Pass 1: Collect typedef mappings per file
145
+ anon_to_typedef_by_file = {}
146
+ for tag in raw_tags:
147
+ if tag.get('kind') == 'typedef':
148
+ file_path = tag.get('path')
149
+ if file_path:
150
+ file_path_abs = Path(file_path).resolve()
151
+ file_path_canonical = file_path_abs.relative_to(source_root_path).as_posix()
152
+
153
+ typeref = tag.get('typeref', '')
154
+ if typeref.startswith('struct:') or typeref.startswith('union:') or typeref.startswith('enum:'):
155
+ anon_name = typeref.split(':', 1)[1]
156
+ typedef_name = tag.get('name')
157
+ if anon_name.startswith('__anon') and typedef_name:
158
+ if file_path_canonical not in anon_to_typedef_by_file:
159
+ anon_to_typedef_by_file[file_path_canonical] = {}
160
+ anon_to_typedef_by_file[file_path_canonical][anon_name] = typedef_name
161
+
162
+ # Pass 2: Parse all symbols with typedef resolution
163
+ results = {}
164
+ with tqdm(total=len(raw_tags), desc="Parsing symbols", unit=" tags") as pbar:
165
+ for tag in raw_tags:
166
+ file_path = tag.get('path')
167
+ if not file_path:
168
+ pbar.update(1)
169
+ continue
170
+
171
+ # Normalize key: canonical rel_posix
172
+ file_path_abs = Path(file_path).resolve()
173
+ file_path_canonical = file_path_abs.relative_to(source_root_path).as_posix()
174
+
175
+ # Use ONLY this file's anon mapping
176
+ file_anon_map = anon_to_typedef_by_file.get(file_path_canonical, {})
177
+ symbol = self._parse_tag(tag, file_path, file_anon_map)
178
+ if symbol:
179
+ if file_path_canonical not in results:
180
+ results[file_path_canonical] = []
181
+ results[file_path_canonical].append(symbol)
182
+
183
+ pbar.update(1)
184
+
185
+ return results
186
+
187
+ def parse_file(self, file_path: str) -> List[Dict]:
188
+ """
189
+ Parse a single file and extract symbols.
190
+
191
+ This method is INEFFICIENT for bulk indexing (runs ctags once per file).
192
+ Use parse_root() for production indexing of directories.
193
+ This method is kept for:
194
+ - Debugging individual files
195
+ - Incremental updates of single files
196
+ - Testing
197
+
198
+ Args:
199
+ file_path: Path to C source file
200
+
201
+ Returns:
202
+ List of symbol dictionaries with keys:
203
+ - name: Symbol name
204
+ - type: Symbol type (function, variable, struct, etc.)
205
+ - line: Line number
206
+ - signature: Full signature (if available)
207
+ - scope: Scope (global, static, etc.)
208
+ - scope_kind: Parent scope kind (struct, union, enum)
209
+ - scope_name: Parent scope name (PowerState, Dummy, etc.)
210
+ """
211
+ if not os.path.exists(file_path):
212
+ raise FileNotFoundError(f"File not found: {file_path}")
213
+
214
+ # Run ctags with JSON output
215
+ cmd = [
216
+ self.ctags_bin,
217
+ "--output-format=json",
218
+ "--fields=+nKSz", # +n (line numbers), +K (kind), +S (signature), +z (scope)
219
+ "--kinds-C=+p", # Include function prototypes (modern syntax)
220
+ "-f", "-", # Output to stdout
221
+ file_path
222
+ ]
223
+
224
+ try:
225
+ result = subprocess.run(
226
+ cmd,
227
+ capture_output=True,
228
+ text=True,
229
+ check=True
230
+ )
231
+ except subprocess.CalledProcessError as e:
232
+ print(f"Warning: ctags failed on {file_path}: {e}")
233
+ return []
234
+
235
+ # Parse JSON output - TWO PASS approach:
236
+ # Pass 1: Build mapping of anonymous structs to typedef names
237
+ # Pass 2: Parse all tags and resolve anonymous struct references
238
+
239
+ raw_tags = []
240
+ anon_to_typedef = {} # Maps __anonXXX -> typedef name
241
+
242
+ for line in result.stdout.strip().split('\n'):
243
+ if not line or line.startswith('!'):
244
+ continue
245
+
246
+ try:
247
+ tag = json.loads(line)
248
+ raw_tags.append(tag)
249
+
250
+ # If this is a typedef for a struct/union/enum, record the mapping
251
+ if tag.get('kind') == 'typedef':
252
+ typeref = tag.get('typeref', '')
253
+ if typeref.startswith('struct:') or typeref.startswith('union:') or typeref.startswith('enum:'):
254
+ # typeref is like "struct:__anondd0b9e6c0108"
255
+ anon_name = typeref.split(':', 1)[1]
256
+ typedef_name = tag.get('name')
257
+ if anon_name.startswith('__anon') and typedef_name:
258
+ anon_to_typedef[anon_name] = typedef_name
259
+ except json.JSONDecodeError:
260
+ continue
261
+
262
+ # Pass 2: Parse all tags with resolved scope names
263
+ symbols = []
264
+ for tag in raw_tags:
265
+ symbol = self._parse_tag(tag, file_path, anon_to_typedef)
266
+ if symbol:
267
+ symbols.append(symbol)
268
+
269
+ return symbols
270
+
271
+ def _parse_tag(self, tag: Dict, file_path: str, anon_to_typedef: Dict[str, str] = None) -> Optional[Dict]:
272
+ """
273
+ Parse a ctags tag into our symbol format
274
+ Args:
275
+ tag: Raw ctags tag dictionary
276
+ file_path: Source file path
277
+ anon_to_typedef: Mapping from anonymous struct names to typedef names
278
+ Returns:
279
+ Symbol dictionary or None if invalid
280
+ """
281
+ if anon_to_typedef is None:
282
+ anon_to_typedef = {}
283
+ # Extract basic info
284
+ name = tag.get('name')
285
+ kind = tag.get('kind')
286
+ line = tag.get('line', 0)
287
+
288
+ if not name or not kind:
289
+ return None
290
+
291
+ if name.startswith('__anon'):
292
+ return None
293
+
294
+ # Store raw ctags kind BEFORE normalization
295
+ kind_raw = kind
296
+
297
+ # Extract raw typeref and signature from ctags (before we process them)
298
+ # Store NULL if not provided - DO NOT invent values
299
+ raw_typeref = tag.get('typeref') if 'typeref' in tag else None
300
+ raw_signature = tag.get('signature') if 'signature' in tag else None
301
+
302
+ # Handle typedef structs/unions/enums - treat them as struct/union/enum with the typedef name
303
+ if kind == 'typedef' and raw_typeref:
304
+ if raw_typeref.startswith('struct:'):
305
+ symbol_type = 'struct'
306
+ elif raw_typeref.startswith('union:'):
307
+ symbol_type = 'union'
308
+ elif raw_typeref.startswith('enum:'):
309
+ symbol_type = 'enum'
310
+ else:
311
+ symbol_type = 'typedef'
312
+ else:
313
+ # Map ctags kinds to our NORMALIZED types
314
+ # NOTE: Both 'prototype' and 'function' map to 'function', but we keep kind_raw to distinguish
315
+ type_map = {
316
+ 'function': 'function',
317
+ 'prototype': 'function', # Declaration in .h
318
+ 'variable': 'variable',
319
+ 'struct': 'struct',
320
+ 'union': 'union',
321
+ 'enum': 'enum',
322
+ 'enumerator': 'enumerator',
323
+ 'typedef': 'typedef',
324
+ 'macro': 'macro',
325
+ 'member': 'member',
326
+ 'header': 'header',
327
+ }
328
+ symbol_type = type_map.get(kind, kind)
329
+
330
+ # Extract scope information
331
+ scope = 'global'
332
+ scope_kind = None
333
+ scope_name = None
334
+
335
+ # Extract parent scope (struct/union/enum/class)
336
+ if 'scopeKind' in tag and 'scope' in tag:
337
+ parent_scope_name = tag['scope']
338
+
339
+ # Resolve anonymous struct names to their typedef names
340
+ if parent_scope_name.startswith('__anon') and parent_scope_name in anon_to_typedef:
341
+ parent_scope_name = anon_to_typedef[parent_scope_name]
342
+
343
+ # Store scope info (skip only if still anonymous after resolution)
344
+ if not parent_scope_name.startswith('__anon'):
345
+ scope_kind = tag['scopeKind']
346
+ scope_name = parent_scope_name
347
+
348
+ # Detect file-local scope (static in C)
349
+ # ctags provides this via the 'file' boolean field or 'fileScope' in extras
350
+ is_file_scope = None # NULL = unknown
351
+
352
+ # Check the 'file' boolean field (most reliable)
353
+ if 'file' in tag:
354
+ is_file_scope = 1 if tag['file'] else 0
355
+ # Fallback: check 'extras' for 'fileScope'
356
+ elif 'extras' in tag:
357
+ # Normalize extras: can be list or string depending on ctags version
358
+ extras = tag.get('extras')
359
+ if isinstance(extras, str):
360
+ # Old format: comma-separated string
361
+ extras = [e.strip() for e in extras.split(',') if e.strip()]
362
+ elif not isinstance(extras, list):
363
+ # Unknown format, treat as empty
364
+ extras = []
365
+
366
+ # Check if fileScope is in the list
367
+ if 'fileScope' in extras:
368
+ is_file_scope = 1
369
+ else:
370
+ is_file_scope = 0
371
+
372
+ # Keep old 'scope' field for backwards compatibility (deprecated)
373
+ if is_file_scope == 1:
374
+ scope = 'static'
375
+
376
+ return {
377
+ 'name': name,
378
+ 'type': symbol_type, # Normalized type (prototype -> function)
379
+ 'kind_raw': kind_raw, # Raw ctags kind (prototype, function, etc.)
380
+ 'line': line,
381
+ 'signature': raw_signature, # Raw from ctags, NULL if not available
382
+ 'typeref': raw_typeref, # Raw from ctags, NULL if not available
383
+ 'scope': scope, # Deprecated: kept for backwards compatibility
384
+ 'scope_kind': scope_kind,
385
+ 'scope_name': scope_name,
386
+ 'is_file_scope': is_file_scope,
387
+ 'file_path': file_path
388
+ }
389
+
390
+ def parse_directory(self, dir_path: str, extensions: List[str] = None) -> Dict[str, List[Dict]]:
391
+ """
392
+ Parse all files in a directory recursively.
393
+
394
+ DEPRECATED: Use parse_root() instead for better performance.
395
+ This method now delegates to parse_root().
396
+
397
+ Args:
398
+ dir_path: Directory to scan
399
+ extensions: File extensions to include (default: ['.c', '.h'])
400
+ Returns:
401
+ Dictionary mapping file paths to symbol lists
402
+ """
403
+ return self.parse_root(dir_path, extensions)
404
+
405
+
406
+ # Simple test
407
+ if __name__ == "__main__":
408
+ import sys
409
+
410
+ if len(sys.argv) < 2:
411
+ print("Usage: python ctags_parser.py <file_or_directory>")
412
+ sys.exit(1)
413
+
414
+ parser = CTagsParser()
415
+ path = sys.argv[1]
416
+
417
+ if os.path.isfile(path):
418
+ symbols = parser.parse_file(path)
419
+ print(f"Found {len(symbols)} symbols in {path}:")
420
+ for sym in symbols: # Show all symbols
421
+ # Build qualified name if it has a parent scope
422
+ if sym.get('scope_kind') and sym.get('scope_name'):
423
+ qualified = f"{sym['scope_name']}.{sym['name']}"
424
+ scope_info = f" ({sym['scope_kind']}:{sym['scope_name']})"
425
+ else:
426
+ qualified = sym['name']
427
+ scope_info = ""
428
+
429
+ # Add file-scope indicator
430
+ file_scope_indicator = ""
431
+ if sym.get('is_file_scope') == 1:
432
+ file_scope_indicator = " [file-local]"
433
+ elif sym.get('is_file_scope') == 0:
434
+ file_scope_indicator = " [global]"
435
+
436
+ # Add signature for functions (including return type from typeref)
437
+ sig_display = ""
438
+ if sym['type'] == 'function':
439
+ # Build full signature: "return_type name(params)"
440
+ return_type = ""
441
+ if sym.get('typeref'):
442
+ # typeref is like "typename:void" or "typename:int"
443
+ return_type = sym['typeref'].replace('typename:', '') + ' '
444
+
445
+ params = sym.get('signature', '()')
446
+ sig_display = f"{return_type}{params}"
447
+
448
+ print(f" {sym['type']:12} {qualified:30}{sig_display:40} @ line {sym['line']}{scope_info}{file_scope_indicator}")
449
+ else:
450
+ results = parser.parse_directory(path)
451
+ total = sum(len(syms) for syms in results.values())
452
+ print(f"Found {total} symbols in {len(results)} files")
453
+ for file_path, symbols in list(results.items())[:5]: # Show first 5 files
454
+ print(f"\n{file_path}: {len(symbols)} symbols")
455
+ for sym in symbols[:5]:
456
+ print(f" {sym['type']:12} {sym['name']:30} @ line {sym['line']}")
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Explorer - Unified File Discovery Module
4
+ Used by both indexer and cscope to ensure consistent file sets
5
+
6
+ This module provides FileDiscovery class for finding source files
7
+ with consistent filtering rules across all tools.
8
+ """
9
+
10
+ from pathlib import Path
11
+ from typing import List, Set
12
+
13
+
14
+ DEFAULT_IGNORE_DIRS = {
15
+ '.git',
16
+ '__pycache__',
17
+ 'out',
18
+ 'build',
19
+ 'dist',
20
+ '.pytest_cache',
21
+ 'node_modules',
22
+ '.venv',
23
+ 'venv'
24
+ }
25
+
26
+
27
+ class FileDiscovery:
28
+ """
29
+ Discovers source files in a directory with consistent filtering
30
+
31
+ CRITICAL: Both indexer and cscope MUST use this same discovery logic
32
+ to ensure they index the exact same set of files.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ source_root: str,
38
+ extensions: List[str] = None,
39
+ ignore_dirs: Set[str] = None
40
+ ):
41
+ """
42
+ Args:
43
+ source_root: Root directory to scan
44
+ extensions: File extensions to include (default: ['.c', '.h'])
45
+ ignore_dirs: Directory names to skip (default: DEFAULT_IGNORE_DIRS)
46
+ """
47
+ self.source_root = Path(source_root).resolve()
48
+ self.extensions = extensions or [
49
+ '.c', '.h', # C
50
+ '.cpp', '.cc', '.cxx', '.hpp', '.hxx', # C++
51
+ '.py', # Python
52
+ '.mk', # Makefiles
53
+ '.java', # Java
54
+ '.rs', # Rust
55
+ ]
56
+ self.ignore_dirs = ignore_dirs or DEFAULT_IGNORE_DIRS
57
+
58
+ if not self.source_root.exists():
59
+ raise FileNotFoundError(f"Directory not found: {source_root}")
60
+
61
+ if not self.source_root.is_dir():
62
+ raise NotADirectoryError(f"Not a directory: {source_root}")
63
+
64
+ def discover_files(self) -> List[str]:
65
+ """
66
+ Find all files matching extensions, with ignore filters
67
+
68
+ Returns:
69
+ List of POSIX-formatted relative paths from source_root
70
+ Example: ['power.c', 'drivers/thermal.c', 'include/power.h']
71
+ """
72
+ files = []
73
+
74
+ for ext in self.extensions:
75
+ for file_path in self.source_root.rglob(f'*{ext}'):
76
+ if self._should_ignore(file_path):
77
+ continue
78
+ # Convert to relative POSIX path
79
+ rel_path = file_path.relative_to(self.source_root)
80
+ files.append(rel_path.as_posix())
81
+
82
+ return sorted(files)
83
+
84
+ def _should_ignore(self, file_path: Path) -> bool:
85
+ """
86
+ True if file should be ignored, False otherwise
87
+ """
88
+ for part in file_path.parts:
89
+ if part in self.ignore_dirs:
90
+ return True
91
+
92
+ return False
93
+
94
+ def discover_files_absolute(self) -> List[Path]:
95
+ """
96
+ Find all files matching extensions, with ignore filters
97
+
98
+ Returns:
99
+ List of absolute Path objects
100
+ """
101
+ files = []
102
+
103
+ for ext in self.extensions:
104
+ for file_path in self.source_root.rglob(f'*{ext}'):
105
+ if self._should_ignore(file_path):
106
+ continue
107
+
108
+ files.append(file_path)
109
+
110
+ return sorted(files)
111
+
112
+ def get_stats(self) -> dict:
113
+ """
114
+ Returns:
115
+ Dictionary with file counts and extensions
116
+ """
117
+ files = self.discover_files()
118
+
119
+ # Count by extension
120
+ ext_counts = {}
121
+ for file_path in files:
122
+ ext = Path(file_path).suffix
123
+ ext_counts[ext] = ext_counts.get(ext, 0) + 1
124
+
125
+ return {
126
+ 'total_files': len(files),
127
+ 'extensions': ext_counts,
128
+ 'source_root': str(self.source_root)
129
+ }
130
+
131
+
132
+ # Convenience function for quick usage
133
+ def discover_files(source_root: str, extensions: List[str] = None) -> List[str]:
134
+ discovery = FileDiscovery(source_root, extensions)
135
+ return discovery.discover_files()