srcodex 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. srcodex/__init__.py +0 -0
  2. srcodex/backend/__init__.py +0 -0
  3. srcodex/backend/chat.py +79 -0
  4. srcodex/backend/main.py +98 -0
  5. srcodex/backend/services/__init__.py +0 -0
  6. srcodex/backend/services/claude_service.py +754 -0
  7. srcodex/backend/services/config_loader.py +113 -0
  8. srcodex/backend/services/file_access_tools.py +279 -0
  9. srcodex/backend/services/file_tree.py +480 -0
  10. srcodex/backend/services/graph_tools.py +874 -0
  11. srcodex/backend/services/logger_setup.py +91 -0
  12. srcodex/backend/services/session_manager.py +81 -0
  13. srcodex/backend/services/status_tracker.py +91 -0
  14. srcodex/cli.py +255 -0
  15. srcodex/core/__init__.py +0 -0
  16. srcodex/core/config.py +113 -0
  17. srcodex/core/logger.py +23 -0
  18. srcodex/indexer/__init__.py +0 -0
  19. srcodex/indexer/cscope_client.py +183 -0
  20. srcodex/indexer/ctags_compat.py +223 -0
  21. srcodex/indexer/ctags_parser.py +456 -0
  22. srcodex/indexer/explorer.py +135 -0
  23. srcodex/indexer/field_access_analyzer.py +436 -0
  24. srcodex/indexer/indexer.py +664 -0
  25. srcodex/indexer/reference_ingestor.py +293 -0
  26. srcodex/indexer/reference_resolver.py +544 -0
  27. srcodex/tui/__init__.py +0 -0
  28. srcodex/tui/app.py +103 -0
  29. srcodex/tui/app.tcss +24 -0
  30. srcodex/tui/components/__init__.py +0 -0
  31. srcodex/tui/components/bars/__init__.py +0 -0
  32. srcodex/tui/components/bars/chat_header.py +48 -0
  33. srcodex/tui/components/bars/code_tab_bar.py +157 -0
  34. srcodex/tui/components/bars/footer_bar.py +128 -0
  35. srcodex/tui/components/bars/left_tab.py +54 -0
  36. srcodex/tui/components/logger.py +57 -0
  37. srcodex/tui/components/panels/__init__.py +0 -0
  38. srcodex/tui/components/panels/chat_panel.py +523 -0
  39. srcodex/tui/components/panels/code_panel.py +229 -0
  40. srcodex/tui/components/panels/side_panel.py +128 -0
  41. srcodex/tui/components/views/__init__.py +0 -0
  42. srcodex/tui/components/views/explorer_view.py +20 -0
  43. srcodex/tui/components/views/search_view.py +148 -0
  44. srcodex/tui/components/widgets/__init__.py +0 -0
  45. srcodex/tui/components/widgets/file_browser.py +16 -0
  46. srcodex/tui/components/widgets/find_box.py +85 -0
  47. srcodex-0.2.0.dist-info/METADATA +170 -0
  48. srcodex-0.2.0.dist-info/RECORD +52 -0
  49. srcodex-0.2.0.dist-info/WHEEL +5 -0
  50. srcodex-0.2.0.dist-info/entry_points.txt +2 -0
  51. srcodex-0.2.0.dist-info/licenses/LICENSE +21 -0
  52. srcodex-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,664 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Code Explorer - Main Indexer
4
+ Scans source code and builds a searchable database
5
+ """
6
+
7
+ import sqlite3
8
+ import os
9
+ import sys
10
+ import hashlib
11
+ import subprocess
12
+ import time
13
+ from pathlib import Path
14
+ from typing import List, Optional, Dict
15
+ from datetime import datetime
16
+ import click
17
+ from tqdm import tqdm
18
+
19
+ from .ctags_parser import CTagsParser
20
+ from .explorer import FileDiscovery
21
+ from .reference_ingestor import ReferenceIngestor
22
+ from .reference_resolver import ReferenceResolver
23
+ from .field_access_analyzer import FieldAccessAnalyzer
24
+
25
+
26
+ class Indexer:
27
+ def __init__(self, db_path: str, verbose: bool = False):
28
+ """
29
+ Args:
30
+ db_path: Path to SQLite database
31
+ """
32
+ self.db_path = db_path
33
+ self.verbose = verbose
34
+ self.conn = None
35
+ self.ctags = CTagsParser()
36
+ self.source_root = None
37
+
38
+ def connect_db(self):
39
+ """Connect to database and initialize schema"""
40
+ self.conn = sqlite3.connect(self.db_path)
41
+ self.conn.row_factory = sqlite3.Row
42
+
43
+ self.conn.execute("PRAGMA foreign_keys = ON")
44
+
45
+ # Read and execute schema
46
+ schema_path = Path(__file__).parent / "db_schema.sql"
47
+ with open(schema_path, 'r') as f:
48
+ schema_sql = f.read()
49
+ self.conn.executescript(schema_sql)
50
+
51
+ self.conn.commit()
52
+
53
+ if self.verbose:
54
+ print(f"✓ Database initialized: {self.db_path}")
55
+
56
+ def close_db(self):
57
+ """Close database connection"""
58
+ if self.conn:
59
+ self.conn.close()
60
+
61
+ def index_directory(self, source_dir: str, extensions: List[str] = None, force_clear: bool = False):
62
+ """
63
+ Index all files in a directory
64
+ Args:
65
+ source_dir: Root directory to scan
66
+ extensions: File extensions to index (default: ['.c', '.h'])
67
+ force_clear: If True, clear database without prompting
68
+ """
69
+ if extensions is None:
70
+ extensions = ['.c', '.h', '.cpp', '.py']
71
+
72
+ source_path = Path(source_dir).resolve()
73
+
74
+ # Store source root for relative path computation
75
+ self.source_root = source_path
76
+
77
+ print(f"Scanning directory: {source_dir}")
78
+
79
+ # Use unified FileDiscovery module
80
+ discovery = FileDiscovery(source_dir, extensions)
81
+ files_to_index = discovery.discover_files_absolute()
82
+
83
+ print(f"Found {len(files_to_index)} files to index")
84
+
85
+ if force_clear:
86
+ self._clear_database()
87
+ if self.verbose:
88
+ print("Database cleared (--force)")
89
+ elif click.confirm("Clear existing database?", default=True):
90
+ self._clear_database()
91
+
92
+ # Parse ALL files with SINGLE ctags invocation (with progress bar)
93
+ file_to_symbols = self.ctags.parse_root(str(source_path), extensions, source_root=str(source_path))
94
+
95
+ # Index each file (store metadata + symbols) in ONE transaction
96
+ self.conn.execute("BEGIN")
97
+ try:
98
+ total_symbols = 0
99
+ with tqdm(total=len(files_to_index), desc="Indexing", unit="file") as pbar:
100
+ for file_path in files_to_index:
101
+ try:
102
+ # Normalize to canonical form: rel_posix (same as parse_root() keys)
103
+ file_path_canonical = Path(file_path).relative_to(source_path).as_posix()
104
+ symbols = file_to_symbols.get(file_path_canonical, [])
105
+ symbols_count = self._index_file_with_symbols(str(file_path), symbols)
106
+ total_symbols += symbols_count
107
+ pbar.set_postfix({"symbols": total_symbols})
108
+ except Exception as e:
109
+ if self.verbose:
110
+ print(f"\nError indexing {file_path}: {e}")
111
+ finally:
112
+ pbar.update(1)
113
+
114
+ self.conn.commit()
115
+ except Exception as e:
116
+ self.conn.rollback()
117
+ raise
118
+
119
+ # Update metadata
120
+ self._update_metadata(total_symbols, len(files_to_index))
121
+
122
+ print(f"\nIndexing complete!")
123
+ print(f" Files indexed: {len(files_to_index)}")
124
+ print(f" Symbols found: {total_symbols}")
125
+
126
+ def _clear_database(self):
127
+ cursor = self.conn.cursor()
128
+ cursor.execute("DELETE FROM symbol_edges")
129
+ cursor.execute("DELETE FROM raw_references")
130
+ cursor.execute("DELETE FROM symbols")
131
+ cursor.execute("DELETE FROM files")
132
+ cursor.execute("DELETE FROM symbols_fts")
133
+ self.conn.commit()
134
+
135
+ if self.verbose:
136
+ print("✓ Database cleared")
137
+
138
+ def _index_file_with_symbols(self, file_path: str, symbols: List[Dict]) -> int:
139
+ """
140
+ Index a single file with PRE-PARSED symbols (from batch ctags call).
141
+ RECOMMENDED method - symbols already parsed by parse_root().
142
+
143
+ Args:
144
+ file_path: Path to source file (absolute)
145
+ symbols: Pre-parsed symbols from ctags
146
+ Returns:
147
+ Number of symbols indexed
148
+ """
149
+ # Compute relative path for storage (use POSIX for cross-platform)
150
+ file_path_rel = Path(file_path).relative_to(self.source_root).as_posix()
151
+
152
+ # Read file for metadata (sha1, mtime, size)
153
+ with open(file_path, 'rb') as f:
154
+ content_bytes = f.read()
155
+
156
+ # Compute metadata
157
+ file_size = len(content_bytes)
158
+ sha1_hash = hashlib.sha1(content_bytes).hexdigest()
159
+ mtime = os.path.getmtime(file_path)
160
+
161
+ # Determine language from extension
162
+ ext = Path(file_path).suffix.lower()
163
+ language_map = {
164
+ '.c': 'c',
165
+ '.h': 'c',
166
+ '.cpp': 'cpp',
167
+ '.cc': 'cpp',
168
+ '.cxx': 'cpp',
169
+ '.hpp': 'cpp',
170
+ '.hxx': 'cpp',
171
+ '.py': 'python',
172
+ '.mk': 'makefile',
173
+ '.java': 'java',
174
+ '.rs': 'rust',
175
+ }
176
+ language = language_map.get(ext, 'unknown')
177
+
178
+ cursor = self.conn.cursor()
179
+
180
+ # Delete existing symbols for this file (per-file refresh)
181
+ cursor.execute("DELETE FROM symbols WHERE file_path = ?", (file_path_rel,))
182
+
183
+ # Store file metadata
184
+ cursor.execute(
185
+ """INSERT OR REPLACE INTO files (path, size, language, sha1, last_modified)
186
+ VALUES (?, ?, ?, ?, ?)""",
187
+ (file_path_rel, file_size, language, sha1_hash, mtime)
188
+ )
189
+
190
+ # Store symbols (already parsed!) - use executemany for batch insert
191
+ symbol_rows = [
192
+ (
193
+ symbol['name'],
194
+ symbol['type'],
195
+ symbol.get('kind_raw'),
196
+ file_path_rel, # RELATIVE path
197
+ symbol['line'],
198
+ symbol.get('signature'),
199
+ symbol.get('typeref'),
200
+ symbol.get('scope', 'global'),
201
+ symbol.get('scope_kind'),
202
+ symbol.get('scope_name'),
203
+ symbol.get('is_file_scope')
204
+ )
205
+ for symbol in symbols
206
+ ]
207
+
208
+ if symbol_rows:
209
+ cursor.executemany(
210
+ """
211
+ INSERT INTO symbols (name, type, kind_raw, file_path, line_number, signature, typeref, scope, scope_kind, scope_name, is_file_scope)
212
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
213
+ """,
214
+ symbol_rows
215
+ )
216
+
217
+ return len(symbols)
218
+
219
+ def _index_file(self, file_path: str) -> int:
220
+ """
221
+ Index a single file with per-file ctags invocation.
222
+
223
+ DEPRECATED for bulk indexing - use _index_file_with_symbols() instead.
224
+ Kept for:
225
+ - Incremental updates of single files
226
+ - Debugging
227
+
228
+ Args:
229
+ file_path: Path to source file (absolute)
230
+ Returns:
231
+ Number of symbols found
232
+ """
233
+ # Convert to relative path for storage
234
+ file_path_obj = Path(file_path)
235
+ if self.source_root:
236
+ try:
237
+ rel_path = file_path_obj.relative_to(self.source_root)
238
+ file_path_rel = rel_path.as_posix() # POSIX format for cross-platform
239
+ except ValueError:
240
+ # File is outside source_root, use absolute (POSIX)
241
+ file_path_rel = file_path_obj.as_posix()
242
+ else:
243
+ file_path_rel = file_path_obj.as_posix()
244
+
245
+ # Read file content (use absolute path for actual file access)
246
+ try:
247
+ with open(file_path, 'rb') as f: # Binary mode for SHA1
248
+ content_bytes = f.read()
249
+ content = content_bytes.decode('utf-8', errors='ignore')
250
+ except Exception as e:
251
+ if self.verbose:
252
+ print(f"Warning: Could not read {file_path}: {e}")
253
+ return 0
254
+
255
+ # Compute metadata
256
+ file_size = len(content_bytes)
257
+ sha1_hash = hashlib.sha1(content_bytes).hexdigest()
258
+ mtime = os.path.getmtime(file_path)
259
+
260
+ # Determine language from extension
261
+ ext = Path(file_path).suffix.lower()
262
+ language_map = {
263
+ '.c': 'c',
264
+ '.h': 'c',
265
+ '.cpp': 'cpp',
266
+ '.cc': 'cpp',
267
+ '.cxx': 'cpp',
268
+ '.hpp': 'cpp',
269
+ '.hxx': 'cpp',
270
+ '.py': 'python',
271
+ '.mk': 'makefile',
272
+ '.java': 'java',
273
+ '.rs': 'rust',
274
+ }
275
+ language = language_map.get(ext, 'unknown')
276
+
277
+ cursor = self.conn.cursor()
278
+
279
+ # Delete existing symbols for this file (per-file refresh) - use relative path
280
+ cursor.execute("DELETE FROM symbols WHERE file_path = ?", (file_path_rel,))
281
+
282
+ # Store file METADATA only (not content) - use relative path
283
+ cursor.execute(
284
+ """INSERT OR REPLACE INTO files (path, size, language, sha1, last_modified)
285
+ VALUES (?, ?, ?, ?, ?)""",
286
+ (file_path_rel, file_size, language, sha1_hash, mtime)
287
+ )
288
+
289
+ # Parse symbols with ctags (use absolute path for ctags)
290
+ symbols = self.ctags.parse_file(file_path)
291
+
292
+ # Store symbols - override file_path with relative path
293
+ for symbol in symbols:
294
+ cursor.execute(
295
+ """
296
+ INSERT INTO symbols (name, type, kind_raw, file_path, line_number, signature, typeref, scope, scope_kind, scope_name, is_file_scope)
297
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
298
+ """,
299
+ (
300
+ symbol['name'],
301
+ symbol['type'], # Normalized type
302
+ symbol.get('kind_raw'), # Raw ctags kind
303
+ file_path_rel, # RELATIVE path for portability
304
+ symbol['line'],
305
+ symbol.get('signature'), # NULL if not available
306
+ symbol.get('typeref'), # NULL if not available
307
+ symbol.get('scope', 'global'),
308
+ symbol.get('scope_kind'),
309
+ symbol.get('scope_name'),
310
+ symbol.get('is_file_scope')
311
+ )
312
+ )
313
+
314
+ return len(symbols)
315
+
316
+ def _update_metadata(self, total_symbols: int, total_files: int):
317
+ """Update metadata table with indexing statistics"""
318
+ cursor = self.conn.cursor()
319
+ cursor.execute(
320
+ "INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)",
321
+ ('total_symbols', str(total_symbols))
322
+ )
323
+ cursor.execute(
324
+ "INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)",
325
+ ('total_files', str(total_files))
326
+ )
327
+ cursor.execute(
328
+ "INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)",
329
+ ('indexed_at', datetime.now().isoformat())
330
+ )
331
+ # Store source root for path resolution
332
+ if self.source_root:
333
+ cursor.execute(
334
+ "INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)",
335
+ ('source_root', str(self.source_root))
336
+ )
337
+ self.conn.commit()
338
+
339
+ def build_cscope_database(self, output_dir: str = None):
340
+ """
341
+ Build cscope database for cross-reference queries.
342
+
343
+ CRITICAL: Builds cscope with cwd=source_root and rel_posix paths to ensure
344
+ cscope output paths match DB canonical paths exactly. All cscope files
345
+ (cscope.out, cscope.files, etc.) are stored in output_dir.
346
+
347
+ Args:
348
+ output_dir: Directory to store cscope files (default: None, must be provided)
349
+ """
350
+ print("\n[Stage 2a] Building cscope database...")
351
+
352
+ if not self.source_root:
353
+ print("Error: source_root not set. Cannot build cscope database.")
354
+ return
355
+
356
+ if output_dir is None:
357
+ print("Error: output_dir must be provided (e.g., 'data/cscope')")
358
+ return
359
+
360
+ output_dir = Path(output_dir)
361
+ output_dir.mkdir(parents=True, exist_ok=True)
362
+
363
+ # Get all indexed files from database
364
+ cursor = self.conn.cursor()
365
+ cursor.execute("SELECT path FROM files")
366
+ files = cursor.fetchall()
367
+
368
+ if not files:
369
+ print("Warning: No files found in database. Run indexing first.")
370
+ return
371
+
372
+ # Write cscope.files with RELATIVE paths (same as DB canonical format)
373
+ cscope_files_path = output_dir / "cscope.files"
374
+ with open(cscope_files_path, 'w') as f:
375
+ for file_row in files:
376
+ file_path_rel = file_row['path'] # Already canonical rel_posix!
377
+ f.write(f"{file_path_rel}\n")
378
+
379
+ print(f" Wrote {len(files)} files to {cscope_files_path}")
380
+
381
+ # Run cscope with cwd=source_root to force rel_posix output paths
382
+ # Use -f flag to specify output location in output_dir
383
+ # Use absolute paths for -i and -f since cwd is source_root
384
+ try:
385
+ cscope_out = output_dir / "cscope.out"
386
+ result = subprocess.run(
387
+ ['cscope', '-b', '-q', '-k',
388
+ '-i', str(cscope_files_path.absolute()),
389
+ '-f', str(cscope_out.absolute())],
390
+ cwd=self.source_root, # KEY: Run from source_root for relative paths!
391
+ capture_output=True,
392
+ text=True,
393
+ check=True
394
+ )
395
+
396
+ # Check that output files were created in output_dir
397
+ if cscope_out.exists():
398
+ size_mb = cscope_out.stat().st_size / (1024 * 1024)
399
+ print(f"Cscope database built: {cscope_out} ({size_mb:.2f} MB)")
400
+
401
+ # Store cscope_dir for later use
402
+ self.cscope_dir = output_dir
403
+ else:
404
+ print(f"Warning: cscope.out not found at {cscope_out}")
405
+
406
+ except subprocess.CalledProcessError as e:
407
+ print(f"Error building cscope database: {e}")
408
+ if e.stderr:
409
+ print(f" stderr: {e.stderr}")
410
+ except FileNotFoundError:
411
+ print("Error: cscope command not found. Install with: sudo apt install cscope")
412
+
413
+ def analyze_field_accesses(self):
414
+ """
415
+ Stage 1.5: Analyze field accesses in function bodies
416
+ Creates ACCESSES edges in symbol_edges table
417
+ """
418
+ print("\n[Stage 1.5] Analyzing field accesses...")
419
+
420
+ analyzer = FieldAccessAnalyzer(
421
+ db_conn=self.conn,
422
+ source_root=self.source_root
423
+ )
424
+
425
+ # Use parallel version for better performance
426
+ stats = analyzer.analyze_all_functions_parallel(clear_existing=True)
427
+
428
+ return stats
429
+
430
+ def ingest_raw_references(self, cscope_dir: Optional[str] = None):
431
+ """
432
+ Stage 2: Ingest raw cscope output into raw_references table
433
+ Args:
434
+ cscope_dir: Directory containing cscope.out (required, typically data/cscope/)
435
+ """
436
+ if cscope_dir is None:
437
+ print("Error: cscope_dir must be provided (e.g., 'data/cscope')")
438
+ return
439
+
440
+ cscope_path = Path(cscope_dir)
441
+
442
+ if not (cscope_path / "cscope.out").exists():
443
+ print("Warning: Cscope database not found. Run with --cscope flag first.")
444
+ return
445
+
446
+ print("\n[Stage 2b] Ingesting raw references from cscope...")
447
+
448
+ ingestor = ReferenceIngestor(
449
+ db_conn=self.conn,
450
+ source_root=self.source_root,
451
+ cscope_dir=cscope_path
452
+ )
453
+
454
+ # Ingest all three types of references
455
+ total_refs = 0
456
+
457
+ # 2a. Callees (functions called by each function)
458
+ callees_count = ingestor.ingest_callees(clear_existing=True)
459
+ total_refs += callees_count
460
+
461
+ # 2b. Callers (functions that call each function)
462
+ callers_count = ingestor.ingest_callers(clear_existing=True)
463
+ total_refs += callers_count
464
+
465
+ # 2c. Includes (files that include each header)
466
+ includes_count = ingestor.ingest_includes(clear_existing=True)
467
+ total_refs += includes_count
468
+
469
+ print(f"\nIngested {total_refs} total raw references:")
470
+ print(f" - Callees: {callees_count}")
471
+ print(f" - Callers: {callers_count}")
472
+ print(f" - Includes: {includes_count}")
473
+
474
+ def resolve_semantic_edges(self):
475
+ """
476
+ Stage 3: Resolve raw references into semantic graph edges
477
+ Converts (file, function) names → symbol IDs and stores typed edges
478
+ """
479
+ print("\n[Stage 3] Resolving semantic edges...")
480
+
481
+ resolver = ReferenceResolver(db_conn=self.conn)
482
+
483
+ # 3a. Resolve callees → CALLS edges (symbol-to-symbol)
484
+ callees_stats = resolver.resolve_callees(clear_existing=True)
485
+
486
+ # 3b. Resolve includes → INCLUDES edges (file-to-file)
487
+ includes_stats = resolver.resolve_includes(clear_existing=True)
488
+
489
+ print(f"\nResolved {callees_stats['resolved_edges']} symbol edges + {includes_stats['resolved_edges']} file edges")
490
+
491
+ def print_stats(self):
492
+ """Print database statistics"""
493
+ cursor = self.conn.cursor()
494
+
495
+ # Get counts
496
+ cursor.execute("SELECT COUNT(*) as count FROM symbols")
497
+ symbol_count = cursor.fetchone()['count']
498
+
499
+ cursor.execute("SELECT COUNT(*) as count FROM files")
500
+ file_count = cursor.fetchone()['count']
501
+
502
+ cursor.execute("SELECT COUNT(*) as count FROM raw_references")
503
+ raw_ref_count = cursor.fetchone()['count']
504
+
505
+ cursor.execute("SELECT COUNT(*) as count FROM symbol_edges")
506
+ symbol_edge_count = cursor.fetchone()['count']
507
+
508
+ cursor.execute("SELECT COUNT(*) as count FROM file_edges")
509
+ file_edge_count = cursor.fetchone()['count']
510
+
511
+ # Get symbol type breakdown
512
+ cursor.execute("SELECT type, COUNT(*) as count FROM symbols GROUP BY type ORDER BY count DESC")
513
+ type_counts = cursor.fetchall()
514
+
515
+ print("\nDatabase Statistics:")
516
+ print(f" Files: {file_count}")
517
+ print(f" Symbols: {symbol_count}")
518
+ print(f" Raw refs: {raw_ref_count}")
519
+ print(f" Symbol edges: {symbol_edge_count}")
520
+ print(f" File edges: {file_edge_count}")
521
+ print("\n Symbol Types:")
522
+ for row in type_counts:
523
+ print(f" {row['type']:15} {row['count']:6}")
524
+
525
+ @click.command()
526
+ @click.argument('source_dir', type=click.Path(exists=True))
527
+ @click.option('--db', default='data/pmfw.db', help='Database path')
528
+ @click.option('--extensions', default='.c,.h,.cpp,.cc,.py,.mk', help='File extensions (comma-separated)')
529
+ @click.option('--refs', is_flag=True, help='[PIPELINE] Build cscope + ingest + resolve (full reference pipeline)')
530
+ @click.option('--build-cscope', is_flag=True, help='[STAGE] Build cscope database only')
531
+ @click.option('--ingest-refs', is_flag=True, help='[STAGE] Ingest raw references only (requires existing cscope DB)')
532
+ @click.option('--resolve-refs', is_flag=True, help='[STAGE] Resolve semantic edges only (requires raw_references)')
533
+ @click.option('--force', '-f', is_flag=True, help='Force clear database without prompting')
534
+ @click.option('--verbose', '-v', is_flag=True, help='Verbose output')
535
+
536
+ def main(source_dir, db, extensions, refs, build_cscope, ingest_refs, resolve_refs, force, verbose):
537
+ """
538
+ Index source code and build semantic graph
539
+
540
+ PIPELINE STAGES:
541
+ 1. Index symbols (always runs)
542
+ 2. Build cscope DB (optional, --build-cscope or --refs)
543
+ 3. Ingest raw refs (optional, --ingest-refs or --refs)
544
+ 4. Resolve edges (optional, --resolve-refs or --refs)
545
+
546
+ Examples:
547
+ # Symbols only (Stage 1):
548
+ python indexer.py test_code --db data/test.db --force
549
+
550
+ # Full pipeline (Stages 1-4):
551
+ python indexer.py test_code --db data/test.db --force --refs
552
+
553
+ # Debug Stage 2 only (requires existing cscope DB):
554
+ python indexer.py test_code --db data/test.db --ingest-refs
555
+
556
+ # Build pipeline piece by piece:
557
+ python indexer.py test_code --db data/test.db --force
558
+ python indexer.py test_code --db data/test.db --build-cscope
559
+ python indexer.py test_code --db data/test.db --ingest-refs
560
+ python indexer.py test_code --db data/test.db --resolve-refs
561
+ """
562
+ ext_list = [f".{ext.strip().lstrip('.')}" for ext in extensions.split(',')]
563
+
564
+ # Find project root (parent of indexer/ directory or where .git exists)
565
+ script_dir = Path(__file__).parent.resolve()
566
+ project_root = script_dir.parent
567
+
568
+ # Resolve database path: always relative to project_root/data/
569
+ db_path = Path(db)
570
+ if not db_path.is_absolute():
571
+ # If relative path given, resolve it relative to project_root/data/
572
+ if db_path.parts[0] == 'data':
573
+ db_path = project_root / db_path
574
+ else:
575
+ db_path = project_root / "data" / db_path
576
+
577
+ # Ensure parent directory exists
578
+ db_path.parent.mkdir(parents=True, exist_ok=True)
579
+
580
+ # Derive cscope directory from database path
581
+ # If db is data/test.db, cscope_dir is data/cscope/
582
+ # If db is data/pmfw.db, cscope_dir is data/cscope/
583
+ cscope_dir = db_path.parent / "cscope"
584
+
585
+ print(f" Source Code Explorer - Indexer")
586
+ print(f" Source: {source_dir}")
587
+ print(f" Database: {db_path}")
588
+ print(f" Cscope: {cscope_dir}")
589
+ print(f" Extensions: {', '.join(ext_list)}\n")
590
+
591
+ # Create indexer with resolved absolute path
592
+ indexer = Indexer(str(db_path), verbose=verbose)
593
+
594
+ # Track timing for each stage
595
+ stage_times = {}
596
+ total_start = time.time()
597
+
598
+ try:
599
+ # Connect to database
600
+ indexer.connect_db()
601
+
602
+ # Determine pipeline stages to run
603
+ run_build_cscope = refs or build_cscope
604
+ run_ingest = refs or ingest_refs
605
+ run_resolve = refs or resolve_refs
606
+
607
+ # Stage 1: Index files (CTags → symbols, files tables)
608
+ # Always runs unless we're doing stage-specific operations
609
+ if not (ingest_refs or resolve_refs):
610
+ stage1_start = time.time()
611
+ indexer.index_directory(source_dir, ext_list, force_clear=force)
612
+ stage_times['Stage 1 (Symbol Extraction)'] = time.time() - stage1_start
613
+ else:
614
+ # If skipping Stage 1, still need to set source_root for Stage 2/3
615
+ indexer.source_root = Path(source_dir).resolve()
616
+
617
+ # Stage 1.5: Analyze field accesses (NEW!)
618
+ if not (ingest_refs or resolve_refs): # Only if we just ran Stage 1
619
+ stage15_start = time.time()
620
+ indexer.analyze_field_accesses()
621
+ stage_times['Stage 1.5 (Field Access)'] = time.time() - stage15_start
622
+
623
+ # Stage 2a: Build cscope database
624
+ if run_build_cscope:
625
+ stage2a_start = time.time()
626
+ indexer.build_cscope_database(output_dir=str(cscope_dir))
627
+ stage_times['Stage 2a (Build Cscope)'] = time.time() - stage2a_start
628
+
629
+ # Stage 2b: Ingest raw references (cscope → raw_references table)
630
+ if run_ingest:
631
+ stage2b_start = time.time()
632
+ indexer.ingest_raw_references(cscope_dir=str(cscope_dir))
633
+ stage_times['Stage 2b (Ingest References)'] = time.time() - stage2b_start
634
+
635
+ # Stage 3: Resolve semantic edges (raw_references → symbol_edges table)
636
+ if run_resolve:
637
+ stage3_start = time.time()
638
+ indexer.resolve_semantic_edges()
639
+ stage_times['Stage 3 (Resolve Edges)'] = time.time() - stage3_start
640
+
641
+ # Print statistics
642
+ indexer.print_stats()
643
+
644
+ finally:
645
+ indexer.close_db()
646
+
647
+ total_time = time.time() - total_start
648
+
649
+ # Print timing summary
650
+ if stage_times:
651
+ print("\nTiming:")
652
+ for stage_name, duration in stage_times.items():
653
+ print(f" {stage_name}: {duration:.2f}s")
654
+ print(f" Total: {total_time:.2f}s")
655
+
656
+ # Print database size
657
+ db_size_mb = db_path.stat().st_size / (1024 * 1024)
658
+ print(f"\nDatabase: {db_path} ({db_size_mb:.2f} MB)")
659
+
660
+ print(f"\nDone!")
661
+
662
+
663
+ if __name__ == "__main__":
664
+ main()