srcodex 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. srcodex/__init__.py +0 -0
  2. srcodex/backend/__init__.py +0 -0
  3. srcodex/backend/chat.py +79 -0
  4. srcodex/backend/main.py +98 -0
  5. srcodex/backend/services/__init__.py +0 -0
  6. srcodex/backend/services/claude_service.py +754 -0
  7. srcodex/backend/services/config_loader.py +113 -0
  8. srcodex/backend/services/file_access_tools.py +279 -0
  9. srcodex/backend/services/file_tree.py +480 -0
  10. srcodex/backend/services/graph_tools.py +874 -0
  11. srcodex/backend/services/logger_setup.py +91 -0
  12. srcodex/backend/services/session_manager.py +81 -0
  13. srcodex/backend/services/status_tracker.py +91 -0
  14. srcodex/cli.py +255 -0
  15. srcodex/core/__init__.py +0 -0
  16. srcodex/core/config.py +113 -0
  17. srcodex/core/logger.py +23 -0
  18. srcodex/indexer/__init__.py +0 -0
  19. srcodex/indexer/cscope_client.py +183 -0
  20. srcodex/indexer/ctags_compat.py +223 -0
  21. srcodex/indexer/ctags_parser.py +456 -0
  22. srcodex/indexer/explorer.py +135 -0
  23. srcodex/indexer/field_access_analyzer.py +436 -0
  24. srcodex/indexer/indexer.py +664 -0
  25. srcodex/indexer/reference_ingestor.py +293 -0
  26. srcodex/indexer/reference_resolver.py +544 -0
  27. srcodex/tui/__init__.py +0 -0
  28. srcodex/tui/app.py +103 -0
  29. srcodex/tui/app.tcss +24 -0
  30. srcodex/tui/components/__init__.py +0 -0
  31. srcodex/tui/components/bars/__init__.py +0 -0
  32. srcodex/tui/components/bars/chat_header.py +48 -0
  33. srcodex/tui/components/bars/code_tab_bar.py +157 -0
  34. srcodex/tui/components/bars/footer_bar.py +128 -0
  35. srcodex/tui/components/bars/left_tab.py +54 -0
  36. srcodex/tui/components/logger.py +57 -0
  37. srcodex/tui/components/panels/__init__.py +0 -0
  38. srcodex/tui/components/panels/chat_panel.py +523 -0
  39. srcodex/tui/components/panels/code_panel.py +229 -0
  40. srcodex/tui/components/panels/side_panel.py +128 -0
  41. srcodex/tui/components/views/__init__.py +0 -0
  42. srcodex/tui/components/views/explorer_view.py +20 -0
  43. srcodex/tui/components/views/search_view.py +148 -0
  44. srcodex/tui/components/widgets/__init__.py +0 -0
  45. srcodex/tui/components/widgets/file_browser.py +16 -0
  46. srcodex/tui/components/widgets/find_box.py +85 -0
  47. srcodex-0.2.0.dist-info/METADATA +170 -0
  48. srcodex-0.2.0.dist-info/RECORD +52 -0
  49. srcodex-0.2.0.dist-info/WHEEL +5 -0
  50. srcodex-0.2.0.dist-info/entry_points.txt +2 -0
  51. srcodex-0.2.0.dist-info/licenses/LICENSE +21 -0
  52. srcodex-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,436 @@
1
+ """
2
+ Field Access Analyzer
3
+ Analyzes function bodies to find field access patterns and creates ACCESSES edges
4
+
5
+ Similar to cscope (which finds CALLS), but focuses on field accesses:
6
+ function.field
7
+ pointer->field
8
+ struct.nested.field
9
+ Populates symbol_edges table with edge_type='ACCESSES'
10
+ """
11
+ import re
12
+ from pathlib import Path
13
+ from typing import List, Tuple, Optional
14
+ from tqdm import tqdm
15
+ from multiprocessing import Pool, cpu_count
16
+ from functools import partial
17
+
18
+
19
+ class FieldAccessAnalyzer:
20
+ def __init__(self, db_conn, source_root: Path):
21
+ """
22
+ Args:
23
+ db_conn: SQLite database connectio
24
+ source_root: Root directory of source code (absolute path)
25
+ """
26
+ self.conn = db_conn
27
+ self.source_root = Path(source_root)
28
+
29
+ # Regex patterns for field access
30
+ self.arrow_pattern = re.compile(r'\b(\w+)\s*->\s*(\w+)')
31
+ self.dot_pattern = re.compile(r'\b([a-zA-Z_]\w*)\s*\.\s*(\w+)')
32
+
33
+ def _get_functions(self) -> List[dict]:
34
+ """Get all functions from database"""
35
+ cursor = self.conn.cursor()
36
+ cursor.execute("""
37
+ SELECT id, name, file_path, line_number
38
+ FROM symbols
39
+ WHERE type = 'function'
40
+ ORDER BY file_path, line_number
41
+ """)
42
+
43
+ return [dict(row) for row in cursor.fetchall()]
44
+
45
+ def _read_function_body(self, file_path: str, start_line: int) -> List[Tuple[int, str]]:
46
+ """Read function body from source file"""
47
+ full_path = self.source_root / file_path
48
+
49
+ if not full_path.exists():
50
+ return []
51
+
52
+ try:
53
+ with open(full_path, 'r', encoding='utf-8', errors='replace') as f:
54
+ lines = f.readlines()
55
+ except Exception:
56
+ return []
57
+
58
+ # find function body boundaries
59
+ body_lines = []
60
+ brace_count = 0
61
+ in_function = False
62
+
63
+ for i in range(start_line - 1, len(lines)):
64
+ line = lines[i]
65
+ line_num = i + 1
66
+
67
+ # Count braces
68
+ for char in line:
69
+ if char == '{':
70
+ brace_count += 1
71
+ in_function = True
72
+ elif char == '}':
73
+ brace_count -= 1
74
+
75
+ # Collect lines inside function body
76
+ if in_function:
77
+ body_lines.append((line_num, line))
78
+
79
+ # Found matching closing brace
80
+ if in_function and brace_count == 0:
81
+ break
82
+
83
+ return body_lines
84
+
85
+ def _extract_field_accesses(self, body_lines: List[Tuple[int, str]]) -> List[Tuple[str, int]]:
86
+ """Extract field access patterns from function body"""
87
+ accesses = []
88
+
89
+ for line_num, line_text in body_lines:
90
+ clean_line = self._clean_line(line_text)
91
+
92
+ # Find arrow accesses: ptr->field
93
+ for match in self.arrow_pattern.finditer(clean_line):
94
+ field_name = match.group(2)
95
+ accesses.append((field_name, line_num))
96
+
97
+ # Find dot accesses: var.field
98
+ for match in self.dot_pattern.finditer(clean_line):
99
+ var_name = match.group(1)
100
+ field_name = match.group(2)
101
+
102
+ if self._is_valid_field_access(var_name, field_name):
103
+ accesses.append((field_name, line_num))
104
+
105
+ return accesses
106
+
107
+ def _clean_line(self, line: str) -> str:
108
+ """Remove comments and string literals"""
109
+ # Remove // comments
110
+ line = re.sub(r'//.*', '', line)
111
+
112
+ # Remove /* */ comments
113
+ line = re.sub(r'/\*.*?\*/', '', line)
114
+
115
+ # Remove string literals
116
+ line = re.sub(r'"[^"]*"', '', line)
117
+ line = re.sub(r"'[^']*'", '', line)
118
+
119
+ return line
120
+
121
+ def _is_valid_field_access(self, var_name: str, field_name: str) -> bool:
122
+ """Filter false positives"""
123
+ # Reject numeric literals, Keywords and preprocessors
124
+ if var_name.isdigit():
125
+ return False
126
+
127
+ keywords = {'return', 'break', 'continue', 'goto', 'if', 'while', 'for', 'switch'}
128
+ if var_name in keywords:
129
+ return False
130
+
131
+ if var_name.startswith('#'):
132
+ return False
133
+
134
+ return True
135
+
136
+ def _resolve_and_create_edges(
137
+ self, function_id: int, accesses: List[Tuple[str, int]], file_path: str
138
+ ) -> Tuple[int, int]:
139
+ """Resolve field names to IDs and create edges"""
140
+ resolved = 0
141
+ unresolved = 0
142
+
143
+ cursor = self.conn.cursor()
144
+
145
+ for field_name, line_num in accesses:
146
+ cursor.execute("""
147
+ SELECT id, scope_name
148
+ FROM symbols
149
+ WHERE name = ? AND type = 'member'
150
+ """, (field_name,))
151
+
152
+ matches = cursor.fetchall()
153
+
154
+ if not matches:
155
+ unresolved += 1
156
+ continue
157
+
158
+ for field_row in matches:
159
+ field_id = field_row['id']
160
+
161
+ try:
162
+ cursor.execute("""
163
+ INSERT INTO symbol_edges (
164
+ edge_type, src_symbol_id, dst_symbol_id,
165
+ source_file, line_number
166
+ )
167
+ VALUES ('ACCESSES', ?, ?, ?, ?)
168
+ """, (function_id, field_id, file_path, line_num))
169
+
170
+ resolved += 1
171
+ except Exception:
172
+ # Duplicate edge - UNIQUE constraint prevents it
173
+ pass
174
+
175
+ return resolved, unresolved
176
+
177
+ @staticmethod
178
+ def _analyze_function_worker(func_data: dict, source_root: Path) -> Tuple[int, List[Tuple[str, int, str, int]]]:
179
+ """Worker function for parallel processing - analyzes one function
180
+
181
+ Returns:
182
+ (total_accesses, [(field_name, line_num, file_path, function_id), ...])
183
+ """
184
+ import re
185
+
186
+ func_id = func_data['id']
187
+ file_path = func_data['file_path']
188
+ start_line = func_data['line_number']
189
+
190
+ # Read function body
191
+ full_path = source_root / file_path
192
+ if not full_path.exists():
193
+ return 0, []
194
+
195
+ try:
196
+ with open(full_path, 'r', encoding='utf-8', errors='replace') as f:
197
+ lines = f.readlines()
198
+ except Exception:
199
+ return 0, []
200
+
201
+ # Find function body boundaries
202
+ body_lines = []
203
+ brace_count = 0
204
+ in_function = False
205
+
206
+ for i in range(start_line - 1, len(lines)):
207
+ line = lines[i]
208
+ line_num = i + 1
209
+
210
+ for char in line:
211
+ if char == '{':
212
+ brace_count += 1
213
+ in_function = True
214
+ elif char == '}':
215
+ brace_count -= 1
216
+
217
+ if in_function:
218
+ body_lines.append((line_num, line))
219
+
220
+ if in_function and brace_count == 0:
221
+ break
222
+
223
+ # Extract field accesses
224
+ arrow_pattern = re.compile(r'\b(\w+)\s*->\s*(\w+)')
225
+ dot_pattern = re.compile(r'\b([a-zA-Z_]\w*)\s*\.\s*(\w+)')
226
+ keywords = {'return', 'break', 'continue', 'goto', 'if', 'while', 'for', 'switch'}
227
+
228
+ accesses = []
229
+ for line_num, line_text in body_lines:
230
+ # Clean line
231
+ clean_line = re.sub(r'//.*', '', line_text)
232
+ clean_line = re.sub(r'/\*.*?\*/', '', clean_line)
233
+ clean_line = re.sub(r'"[^"]*"', '', clean_line)
234
+ clean_line = re.sub(r"'[^']*'", '', clean_line)
235
+
236
+ # Find arrow accesses: ptr->field
237
+ for match in arrow_pattern.finditer(clean_line):
238
+ field_name = match.group(2)
239
+ accesses.append((field_name, line_num, file_path, func_id))
240
+
241
+ # Find dot accesses: var.field
242
+ for match in dot_pattern.finditer(clean_line):
243
+ var_name = match.group(1)
244
+ field_name = match.group(2)
245
+
246
+ # Filter false positives
247
+ if not var_name.isdigit() and var_name not in keywords and not var_name.startswith('#'):
248
+ accesses.append((field_name, line_num, file_path, func_id))
249
+
250
+ return len(accesses), accesses
251
+
252
+ def analyze_all_functions_parallel(self, clear_existing: bool = False, num_workers: int = None) -> dict:
253
+ """Parallel version: Analyze all functions using multiprocessing
254
+
255
+ Args:
256
+ clear_existing: Delete existing ACCESSES edges before starting
257
+ num_workers: Number of worker processes (default: cpu_count())
258
+ """
259
+ print("\n[Stage 1.5] Analyzing field accesses (parallel mode)...")
260
+
261
+ if clear_existing:
262
+ self.conn.execute("DELETE FROM symbol_edges WHERE edge_type = 'ACCESSES'")
263
+ self.conn.commit()
264
+
265
+ # Get all functions
266
+ functions = self._get_functions()
267
+ num_workers = num_workers or cpu_count()
268
+ print(f" Found {len(functions)} functions to analyze")
269
+ print(f" Using {num_workers} worker processes")
270
+
271
+ # Process functions in parallel
272
+ worker_fn = partial(self._analyze_function_worker, source_root=self.source_root)
273
+
274
+ all_accesses = []
275
+ total_accesses_count = 0
276
+
277
+ with Pool(processes=num_workers) as pool:
278
+ with tqdm(total=len(functions), desc="Analyzing functions", unit="func") as pbar:
279
+ for access_count, accesses in pool.imap_unordered(worker_fn, functions, chunksize=10):
280
+ total_accesses_count += access_count
281
+ all_accesses.extend(accesses)
282
+ pbar.update(1)
283
+
284
+ print(f"\n Parallel analysis complete, resolving {len(all_accesses)} field accesses...")
285
+
286
+ # Build field name → IDs lookup cache for faster resolution
287
+ print(" Building field lookup cache...")
288
+ cursor = self.conn.cursor()
289
+ cursor.execute("SELECT id, name FROM symbols WHERE type = 'member'")
290
+
291
+ field_lookup = {} # field_name → [id1, id2, ...]
292
+ for row in cursor.fetchall():
293
+ field_id, field_name = row['id'], row['name']
294
+ if field_name not in field_lookup:
295
+ field_lookup[field_name] = []
296
+ field_lookup[field_name].append(field_id)
297
+
298
+ # Filter out overly-ambiguous fields (appear in too many structs)
299
+ MAX_AMBIGUITY = 100 # Skip fields that appear in >100 structs
300
+ ambiguous_fields = [name for name, ids in field_lookup.items() if len(ids) > MAX_AMBIGUITY]
301
+ for name in ambiguous_fields:
302
+ del field_lookup[name]
303
+
304
+ print(f" Cached {len(field_lookup)} unique field names")
305
+ print(f" Filtered out {len(ambiguous_fields)} overly-ambiguous fields (>{MAX_AMBIGUITY} structs)")
306
+
307
+ # Resolve and insert edges with batch inserts
308
+ resolved_edges = 0
309
+ unresolved_accesses = 0
310
+ edges_batch = []
311
+ batch_size = 5000
312
+
313
+ with tqdm(total=len(all_accesses), desc="Resolving edges", unit="access") as pbar:
314
+ for i, (field_name, line_num, file_path, func_id) in enumerate(all_accesses):
315
+ # Fast lookup in cache (no database query)
316
+ field_ids = field_lookup.get(field_name)
317
+
318
+ if not field_ids:
319
+ unresolved_accesses += 1
320
+ pbar.update(1)
321
+ continue
322
+
323
+ # Add edge for each matching field
324
+ for field_id in field_ids:
325
+ edges_batch.append(('ACCESSES', func_id, field_id, file_path, line_num))
326
+ resolved_edges += 1
327
+
328
+ # Batch insert every N edges
329
+ if len(edges_batch) >= batch_size:
330
+ cursor.executemany("""
331
+ INSERT OR IGNORE INTO symbol_edges (
332
+ edge_type, src_symbol_id, dst_symbol_id,
333
+ source_file, line_number
334
+ )
335
+ VALUES (?, ?, ?, ?, ?)
336
+ """, edges_batch)
337
+ self.conn.commit()
338
+ edges_batch.clear()
339
+
340
+ pbar.update(1)
341
+
342
+ # Insert remaining edges
343
+ if edges_batch:
344
+ cursor.executemany("""
345
+ INSERT OR IGNORE INTO symbol_edges (
346
+ edge_type, src_symbol_id, dst_symbol_id,
347
+ source_file, line_number
348
+ )
349
+ VALUES (?, ?, ?, ?, ?)
350
+ """, edges_batch)
351
+
352
+ self.conn.commit()
353
+
354
+ stats = {
355
+ 'total_functions': len(functions),
356
+ 'total_accesses': total_accesses_count,
357
+ 'resolved_edges': resolved_edges,
358
+ 'unresolved_accesses': unresolved_accesses
359
+ }
360
+
361
+ print(f"\nField access analysis complete:")
362
+ print(f" Functions analyzed: {stats['total_functions']}")
363
+ print(f" Field accesses found: {stats['total_accesses']}")
364
+ print(f" ACCESSES edges created: {stats['resolved_edges']}")
365
+ print(f" Unresolved accesses: {stats['unresolved_accesses']}")
366
+
367
+ return stats
368
+
369
+ def analyze_all_functions(self, clear_existing: bool = False, batch_size: int = 100) -> dict:
370
+ """Main entry point: Analyze all functions and create ACCESSES edges
371
+
372
+ Args:
373
+ clear_existing: Delete existing ACCESSES edges before starting
374
+ batch_size: Commit every N functions (default 100) for better performance
375
+ """
376
+ print("\n[Stage 1.5] Analyzing field accesses...")
377
+
378
+ if clear_existing:
379
+ self.conn.execute("DELETE FROM symbol_edges WHERE edge_type = 'ACCESSES'")
380
+ self.conn.commit()
381
+
382
+ # Get all functions from database
383
+ functions = self._get_functions()
384
+ print(f" Found {len(functions)} functions to analyze")
385
+
386
+ total_accesses = 0
387
+ resolved_edges = 0
388
+ unresolved_accesses = 0
389
+
390
+ # Analyze each function with batch commits
391
+ with tqdm(total=len(functions), desc="Analyzing functions", unit="func") as pbar:
392
+ for i, func in enumerate(functions):
393
+ try:
394
+ body_lines = self._read_function_body(func['file_path'], func['line_number'])
395
+
396
+ if not body_lines:
397
+ pbar.update(1)
398
+ continue
399
+
400
+ accesses = self._extract_field_accesses(body_lines)
401
+ total_accesses += len(accesses)
402
+
403
+ if accesses:
404
+ resolved, unresolved = self._resolve_and_create_edges(
405
+ func['id'], accesses, func['file_path']
406
+ )
407
+ resolved_edges += resolved
408
+ unresolved_accesses += unresolved
409
+
410
+ # Batch commit every N functions for better performance
411
+ if (i + 1) % batch_size == 0:
412
+ self.conn.commit()
413
+
414
+ except Exception as e:
415
+ print(f"\n Error analyzing {func['name']}: {e}")
416
+
417
+ finally:
418
+ pbar.update(1)
419
+
420
+ # Final commit for remaining functions
421
+ self.conn.commit()
422
+
423
+ stats = {
424
+ 'total_functions': len(functions),
425
+ 'total_accesses': total_accesses,
426
+ 'resolved_edges': resolved_edges,
427
+ 'unresolved_accesses': unresolved_accesses
428
+ }
429
+
430
+ print(f"\nField access analysis complete:")
431
+ print(f" Functions analyzed: {stats['total_functions']}")
432
+ print(f" Field accesses found: {stats['total_accesses']}")
433
+ print(f" ACCESSES edges created: {stats['resolved_edges']}")
434
+ print(f" Unresolved accesses: {stats['unresolved_accesses']}")
435
+
436
+ return stats