specfact-cli 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of specfact-cli might be problematic. Click here for more details.

Files changed (60) hide show
  1. specfact_cli/__init__.py +14 -0
  2. specfact_cli/agents/__init__.py +23 -0
  3. specfact_cli/agents/analyze_agent.py +392 -0
  4. specfact_cli/agents/base.py +95 -0
  5. specfact_cli/agents/plan_agent.py +202 -0
  6. specfact_cli/agents/registry.py +176 -0
  7. specfact_cli/agents/sync_agent.py +133 -0
  8. specfact_cli/analyzers/__init__.py +10 -0
  9. specfact_cli/analyzers/code_analyzer.py +775 -0
  10. specfact_cli/cli.py +397 -0
  11. specfact_cli/commands/__init__.py +7 -0
  12. specfact_cli/commands/enforce.py +87 -0
  13. specfact_cli/commands/import_cmd.py +355 -0
  14. specfact_cli/commands/init.py +119 -0
  15. specfact_cli/commands/plan.py +1090 -0
  16. specfact_cli/commands/repro.py +172 -0
  17. specfact_cli/commands/sync.py +408 -0
  18. specfact_cli/common/__init__.py +24 -0
  19. specfact_cli/common/logger_setup.py +673 -0
  20. specfact_cli/common/logging_utils.py +41 -0
  21. specfact_cli/common/text_utils.py +52 -0
  22. specfact_cli/common/utils.py +48 -0
  23. specfact_cli/comparators/__init__.py +10 -0
  24. specfact_cli/comparators/plan_comparator.py +391 -0
  25. specfact_cli/generators/__init__.py +13 -0
  26. specfact_cli/generators/plan_generator.py +105 -0
  27. specfact_cli/generators/protocol_generator.py +115 -0
  28. specfact_cli/generators/report_generator.py +200 -0
  29. specfact_cli/generators/workflow_generator.py +111 -0
  30. specfact_cli/importers/__init__.py +6 -0
  31. specfact_cli/importers/speckit_converter.py +773 -0
  32. specfact_cli/importers/speckit_scanner.py +704 -0
  33. specfact_cli/models/__init__.py +32 -0
  34. specfact_cli/models/deviation.py +105 -0
  35. specfact_cli/models/enforcement.py +150 -0
  36. specfact_cli/models/plan.py +97 -0
  37. specfact_cli/models/protocol.py +28 -0
  38. specfact_cli/modes/__init__.py +18 -0
  39. specfact_cli/modes/detector.py +126 -0
  40. specfact_cli/modes/router.py +153 -0
  41. specfact_cli/sync/__init__.py +11 -0
  42. specfact_cli/sync/repository_sync.py +279 -0
  43. specfact_cli/sync/speckit_sync.py +388 -0
  44. specfact_cli/utils/__init__.py +57 -0
  45. specfact_cli/utils/console.py +69 -0
  46. specfact_cli/utils/feature_keys.py +213 -0
  47. specfact_cli/utils/git.py +241 -0
  48. specfact_cli/utils/ide_setup.py +381 -0
  49. specfact_cli/utils/prompts.py +179 -0
  50. specfact_cli/utils/structure.py +496 -0
  51. specfact_cli/utils/yaml_utils.py +200 -0
  52. specfact_cli/validators/__init__.py +19 -0
  53. specfact_cli/validators/fsm.py +260 -0
  54. specfact_cli/validators/repro_checker.py +320 -0
  55. specfact_cli/validators/schema.py +200 -0
  56. specfact_cli-0.4.0.dist-info/METADATA +332 -0
  57. specfact_cli-0.4.0.dist-info/RECORD +60 -0
  58. specfact_cli-0.4.0.dist-info/WHEEL +4 -0
  59. specfact_cli-0.4.0.dist-info/entry_points.txt +2 -0
  60. specfact_cli-0.4.0.dist-info/licenses/LICENSE.md +55 -0
@@ -0,0 +1,775 @@
1
+ """Code analyzer for extracting features from brownfield codebases."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ import re
7
+ from collections import defaultdict
8
+ from pathlib import Path
9
+
10
+ import networkx as nx
11
+ from beartype import beartype
12
+ from icontract import ensure, require
13
+
14
+ from specfact_cli.models.plan import Feature, Idea, Metadata, PlanBundle, Product, Story
15
+ from specfact_cli.utils.feature_keys import to_classname_key, to_sequential_key
16
+
17
+
18
+ class CodeAnalyzer:
19
+ """
20
+ Analyzes Python code to auto-derive plan bundles.
21
+
22
+ Extracts features from classes and user stories from method patterns
23
+ following Scrum/Agile practices.
24
+ """
25
+
26
+ # Fibonacci sequence for story points
27
+ FIBONACCI = [1, 2, 3, 5, 8, 13, 21, 34, 55, 89]
28
+
29
+ @beartype
30
+ @require(lambda repo_path: repo_path is not None and isinstance(repo_path, Path), "Repo path must be Path")
31
+ @require(lambda confidence_threshold: 0.0 <= confidence_threshold <= 1.0, "Confidence threshold must be 0.0-1.0")
32
+ @require(lambda plan_name: plan_name is None or isinstance(plan_name, str), "Plan name must be None or str")
33
+ def __init__(
34
+ self,
35
+ repo_path: Path,
36
+ confidence_threshold: float = 0.5,
37
+ key_format: str = "classname",
38
+ plan_name: str | None = None,
39
+ ) -> None:
40
+ """
41
+ Initialize code analyzer.
42
+
43
+ Args:
44
+ repo_path: Path to repository root
45
+ confidence_threshold: Minimum confidence score (0.0-1.0)
46
+ key_format: Feature key format ('classname' or 'sequential', default: 'classname')
47
+ plan_name: Custom plan name (will be used for idea.title, optional)
48
+ """
49
+ self.repo_path = Path(repo_path)
50
+ self.confidence_threshold = confidence_threshold
51
+ self.key_format = key_format
52
+ self.plan_name = plan_name
53
+ self.features: list[Feature] = []
54
+ self.themes: set[str] = set()
55
+ self.dependency_graph: nx.DiGraph = nx.DiGraph() # Module dependency graph
56
+ self.type_hints: dict[str, dict[str, str]] = {} # Module -> {function: type_hint}
57
+ self.async_patterns: dict[str, list[str]] = {} # Module -> [async_methods]
58
+ self.commit_bounds: dict[str, tuple[str, str]] = {} # Feature -> (first_commit, last_commit)
59
+
60
+ @beartype
61
+ @ensure(lambda result: isinstance(result, PlanBundle), "Must return PlanBundle")
62
+ @ensure(lambda result: result.version == "1.0", "Plan bundle version must be 1.0")
63
+ @ensure(lambda result: len(result.features) >= 0, "Features list must be non-negative length")
64
+ def analyze(self) -> PlanBundle:
65
+ """
66
+ Analyze repository and generate plan bundle.
67
+
68
+ Returns:
69
+ Generated PlanBundle from code analysis
70
+ """
71
+ # Find all Python files
72
+ python_files = list(self.repo_path.rglob("*.py"))
73
+
74
+ # Build module dependency graph first
75
+ self._build_dependency_graph(python_files)
76
+
77
+ # Analyze each file
78
+ for file_path in python_files:
79
+ if self._should_skip_file(file_path):
80
+ continue
81
+
82
+ self._analyze_file(file_path)
83
+
84
+ # Analyze commit history for feature boundaries
85
+ self._analyze_commit_history()
86
+
87
+ # Enhance features with dependency information
88
+ self._enhance_features_with_dependencies()
89
+
90
+ # If sequential format, update all keys now that we know the total count
91
+ if self.key_format == "sequential":
92
+ for idx, feature in enumerate(self.features, start=1):
93
+ feature.key = to_sequential_key(feature.key, idx)
94
+
95
+ # Generate plan bundle
96
+ # Use plan_name if provided, otherwise use repo name, otherwise fallback
97
+ if self.plan_name:
98
+ # Use the plan name (already sanitized, but humanize for title)
99
+ title = self.plan_name.replace("_", " ").replace("-", " ").title()
100
+ else:
101
+ repo_name = self.repo_path.name or "Unknown Project"
102
+ title = self._humanize_name(repo_name)
103
+
104
+ idea = Idea(
105
+ title=title,
106
+ narrative=f"Auto-derived plan from brownfield analysis of {title}",
107
+ metrics=None,
108
+ )
109
+
110
+ product = Product(
111
+ themes=sorted(self.themes) if self.themes else ["Core"],
112
+ releases=[],
113
+ )
114
+
115
+ return PlanBundle(
116
+ version="1.0",
117
+ idea=idea,
118
+ business=None,
119
+ product=product,
120
+ features=self.features,
121
+ metadata=Metadata(stage="draft", promoted_at=None, promoted_by=None),
122
+ )
123
+
124
+ def _should_skip_file(self, file_path: Path) -> bool:
125
+ """Check if file should be skipped."""
126
+ skip_patterns = [
127
+ "__pycache__",
128
+ ".git",
129
+ "venv",
130
+ ".venv",
131
+ "env",
132
+ ".pytest_cache",
133
+ "htmlcov",
134
+ "dist",
135
+ "build",
136
+ ".eggs",
137
+ "tests", # Skip test files
138
+ ]
139
+
140
+ return any(pattern in str(file_path) for pattern in skip_patterns)
141
+
142
+ def _analyze_file(self, file_path: Path) -> None:
143
+ """Analyze a single Python file."""
144
+ try:
145
+ content = file_path.read_text(encoding="utf-8")
146
+ tree = ast.parse(content)
147
+
148
+ # Extract module-level info
149
+ self._extract_themes_from_imports(tree)
150
+
151
+ # Extract type hints
152
+ self._extract_type_hints(tree, file_path)
153
+
154
+ # Detect async patterns
155
+ self._detect_async_patterns(tree, file_path)
156
+
157
+ # Extract classes as features
158
+ for node in ast.walk(tree):
159
+ if isinstance(node, ast.ClassDef):
160
+ feature = self._extract_feature_from_class(node, file_path)
161
+ if feature:
162
+ self.features.append(feature)
163
+
164
+ except (SyntaxError, UnicodeDecodeError):
165
+ # Skip files that can't be parsed
166
+ pass
167
+
168
+ def _extract_themes_from_imports(self, tree: ast.AST) -> None:
169
+ """Extract themes from import statements."""
170
+ theme_keywords = {
171
+ "fastapi": "API",
172
+ "flask": "API",
173
+ "django": "Web",
174
+ "redis": "Caching",
175
+ "postgres": "Database",
176
+ "mysql": "Database",
177
+ "asyncio": "Async",
178
+ "typer": "CLI",
179
+ "click": "CLI",
180
+ "pydantic": "Validation",
181
+ "pytest": "Testing",
182
+ "sqlalchemy": "ORM",
183
+ "requests": "HTTP Client",
184
+ "aiohttp": "Async HTTP",
185
+ }
186
+
187
+ for node in ast.walk(tree):
188
+ if isinstance(node, (ast.Import, ast.ImportFrom)):
189
+ if isinstance(node, ast.Import):
190
+ for alias in node.names:
191
+ for keyword, theme in theme_keywords.items():
192
+ if keyword in alias.name.lower():
193
+ self.themes.add(theme)
194
+ elif isinstance(node, ast.ImportFrom) and node.module:
195
+ for keyword, theme in theme_keywords.items():
196
+ if keyword in node.module.lower():
197
+ self.themes.add(theme)
198
+
199
+ def _extract_feature_from_class(self, node: ast.ClassDef, file_path: Path) -> Feature | None:
200
+ """Extract feature from class definition."""
201
+ # Skip private classes and test classes
202
+ if node.name.startswith("_") or node.name.startswith("Test"):
203
+ return None
204
+
205
+ # Generate feature key based on configured format
206
+ if self.key_format == "sequential":
207
+ # Use sequential numbering (will be updated after all features are collected)
208
+ feature_key = f"FEATURE-{len(self.features) + 1:03d}"
209
+ else:
210
+ # Default: classname format
211
+ feature_key = to_classname_key(node.name)
212
+
213
+ # Extract docstring as outcome
214
+ docstring = ast.get_docstring(node)
215
+ outcomes = []
216
+ if docstring:
217
+ # Take first paragraph as primary outcome
218
+ first_para = docstring.split("\n\n")[0].strip()
219
+ outcomes.append(first_para)
220
+ else:
221
+ outcomes.append(f"Provides {self._humanize_name(node.name)} functionality")
222
+
223
+ # Collect all methods
224
+ methods = [item for item in node.body if isinstance(item, ast.FunctionDef)]
225
+
226
+ # Group methods into user stories
227
+ stories = self._extract_stories_from_methods(methods, node.name)
228
+
229
+ # Calculate confidence based on documentation and story quality
230
+ confidence = self._calculate_feature_confidence(node, stories)
231
+
232
+ if confidence < self.confidence_threshold:
233
+ return None
234
+
235
+ # Skip if no meaningful stories
236
+ if not stories:
237
+ return None
238
+
239
+ return Feature(
240
+ key=feature_key,
241
+ title=self._humanize_name(node.name),
242
+ outcomes=outcomes,
243
+ acceptance=[f"{node.name} class provides documented functionality"],
244
+ stories=stories,
245
+ confidence=round(confidence, 2),
246
+ )
247
+
248
+ def _extract_stories_from_methods(self, methods: list[ast.FunctionDef], class_name: str) -> list[Story]:
249
+ """
250
+ Extract user stories from methods by grouping related functionality.
251
+
252
+ Groups methods by:
253
+ - CRUD operations (create, read, update, delete)
254
+ - Common prefixes (get_, set_, validate_, process_)
255
+ - Functionality patterns
256
+ """
257
+ # Group methods by pattern
258
+ method_groups = self._group_methods_by_functionality(methods)
259
+
260
+ stories = []
261
+ story_counter = 1
262
+
263
+ for group_name, group_methods in method_groups.items():
264
+ if not group_methods:
265
+ continue
266
+
267
+ # Create a user story for this group
268
+ story = self._create_story_from_method_group(group_name, group_methods, class_name, story_counter)
269
+
270
+ if story:
271
+ stories.append(story)
272
+ story_counter += 1
273
+
274
+ return stories
275
+
276
+ def _group_methods_by_functionality(self, methods: list[ast.FunctionDef]) -> dict[str, list[ast.FunctionDef]]:
277
+ """Group methods by their functionality patterns."""
278
+ groups = defaultdict(list)
279
+
280
+ # Filter out private methods (except __init__)
281
+ public_methods = [m for m in methods if not m.name.startswith("_") or m.name == "__init__"]
282
+
283
+ for method in public_methods:
284
+ # CRUD operations
285
+ if any(crud in method.name.lower() for crud in ["create", "add", "insert", "new"]):
286
+ groups["Create Operations"].append(method)
287
+ elif any(read in method.name.lower() for read in ["get", "read", "fetch", "find", "list", "retrieve"]):
288
+ groups["Read Operations"].append(method)
289
+ elif any(update in method.name.lower() for update in ["update", "modify", "edit", "change", "set"]):
290
+ groups["Update Operations"].append(method)
291
+ elif any(delete in method.name.lower() for delete in ["delete", "remove", "destroy"]):
292
+ groups["Delete Operations"].append(method)
293
+
294
+ # Validation
295
+ elif any(val in method.name.lower() for val in ["validate", "check", "verify", "is_valid"]):
296
+ groups["Validation"].append(method)
297
+
298
+ # Processing/Computation
299
+ elif any(
300
+ proc in method.name.lower() for proc in ["process", "compute", "calculate", "transform", "convert"]
301
+ ):
302
+ groups["Processing"].append(method)
303
+
304
+ # Analysis
305
+ elif any(an in method.name.lower() for an in ["analyze", "parse", "extract", "detect"]):
306
+ groups["Analysis"].append(method)
307
+
308
+ # Generation
309
+ elif any(gen in method.name.lower() for gen in ["generate", "build", "create", "make"]):
310
+ groups["Generation"].append(method)
311
+
312
+ # Comparison
313
+ elif any(cmp in method.name.lower() for cmp in ["compare", "diff", "match"]):
314
+ groups["Comparison"].append(method)
315
+
316
+ # Setup/Configuration
317
+ elif method.name == "__init__" or any(
318
+ setup in method.name.lower() for setup in ["setup", "configure", "initialize"]
319
+ ):
320
+ groups["Configuration"].append(method)
321
+
322
+ # Catch-all for other public methods
323
+ else:
324
+ groups["Core Functionality"].append(method)
325
+
326
+ return dict(groups)
327
+
328
+ def _create_story_from_method_group(
329
+ self, group_name: str, methods: list[ast.FunctionDef], class_name: str, story_number: int
330
+ ) -> Story | None:
331
+ """Create a user story from a group of related methods."""
332
+ if not methods:
333
+ return None
334
+
335
+ # Generate story key
336
+ story_key = f"STORY-{class_name.upper()}-{story_number:03d}"
337
+
338
+ # Create user-centric title based on group
339
+ title = self._generate_story_title(group_name, class_name)
340
+
341
+ # Extract acceptance criteria from docstrings
342
+ acceptance = []
343
+ tasks = []
344
+
345
+ for method in methods:
346
+ # Add method as task
347
+ tasks.append(f"{method.name}()")
348
+
349
+ # Extract acceptance from docstring
350
+ docstring = ast.get_docstring(method)
351
+ if docstring:
352
+ # Take first line as acceptance criterion
353
+ first_line = docstring.split("\n")[0].strip()
354
+ if first_line and first_line not in acceptance:
355
+ acceptance.append(first_line)
356
+
357
+ # Add default acceptance if none found
358
+ if not acceptance:
359
+ acceptance.append(f"{group_name} functionality works as expected")
360
+
361
+ # Calculate story points (complexity) based on number of methods and their size
362
+ story_points = self._calculate_story_points(methods)
363
+
364
+ # Calculate value points based on public API exposure
365
+ value_points = self._calculate_value_points(methods, group_name)
366
+
367
+ return Story(
368
+ key=story_key,
369
+ title=title,
370
+ acceptance=acceptance,
371
+ story_points=story_points,
372
+ value_points=value_points,
373
+ tasks=tasks,
374
+ confidence=0.8 if len(methods) > 1 else 0.6,
375
+ )
376
+
377
+ def _generate_story_title(self, group_name: str, class_name: str) -> str:
378
+ """Generate user-centric story title."""
379
+ # Map group names to user-centric titles
380
+ title_templates = {
381
+ "Create Operations": f"As a user, I can create new {self._humanize_name(class_name)} records",
382
+ "Read Operations": f"As a user, I can view {self._humanize_name(class_name)} data",
383
+ "Update Operations": f"As a user, I can update {self._humanize_name(class_name)} records",
384
+ "Delete Operations": f"As a user, I can delete {self._humanize_name(class_name)} records",
385
+ "Validation": f"As a developer, I can validate {self._humanize_name(class_name)} data",
386
+ "Processing": f"As a user, I can process data using {self._humanize_name(class_name)}",
387
+ "Analysis": f"As a user, I can analyze data with {self._humanize_name(class_name)}",
388
+ "Generation": f"As a user, I can generate outputs from {self._humanize_name(class_name)}",
389
+ "Comparison": f"As a user, I can compare {self._humanize_name(class_name)} data",
390
+ "Configuration": f"As a developer, I can configure {self._humanize_name(class_name)}",
391
+ "Core Functionality": f"As a user, I can use {self._humanize_name(class_name)} features",
392
+ }
393
+
394
+ return title_templates.get(group_name, f"As a user, I can work with {self._humanize_name(class_name)}")
395
+
396
+ def _calculate_story_points(self, methods: list[ast.FunctionDef]) -> int:
397
+ """
398
+ Calculate story points (complexity) using Fibonacci sequence.
399
+
400
+ Based on:
401
+ - Number of methods
402
+ - Average method size
403
+ - Complexity indicators (loops, conditionals)
404
+ """
405
+ # Base complexity on number of methods
406
+ method_count = len(methods)
407
+
408
+ # Count total lines across all methods
409
+ total_lines = sum(len(ast.unparse(m).split("\n")) for m in methods)
410
+ avg_lines = total_lines / method_count if method_count > 0 else 0
411
+
412
+ # Simple heuristic: 1-2 methods = small, 3-5 = medium, 6+ = large
413
+ if method_count <= 2 and avg_lines < 20:
414
+ base_points = 2 # Small
415
+ elif method_count <= 5 and avg_lines < 40:
416
+ base_points = 5 # Medium
417
+ elif method_count <= 8:
418
+ base_points = 8 # Large
419
+ else:
420
+ base_points = 13 # Extra Large
421
+
422
+ # Return nearest Fibonacci number
423
+ return min(self.FIBONACCI, key=lambda x: abs(x - base_points))
424
+
425
+ def _calculate_value_points(self, methods: list[ast.FunctionDef], group_name: str) -> int:
426
+ """
427
+ Calculate value points (business value) using Fibonacci sequence.
428
+
429
+ Based on:
430
+ - Public API exposure
431
+ - CRUD operations have high value
432
+ - Validation has medium value
433
+ """
434
+ # CRUD operations are high value
435
+ crud_groups = ["Create Operations", "Read Operations", "Update Operations", "Delete Operations"]
436
+ if group_name in crud_groups:
437
+ base_value = 8 # High business value
438
+
439
+ # User-facing operations
440
+ elif group_name in ["Processing", "Analysis", "Generation", "Comparison"]:
441
+ base_value = 5 # Medium-high value
442
+
443
+ # Developer/internal operations
444
+ elif group_name in ["Validation", "Configuration"]:
445
+ base_value = 3 # Medium value
446
+
447
+ # Core functionality
448
+ else:
449
+ base_value = 3 # Default medium value
450
+
451
+ # Adjust based on number of public methods (more = higher value)
452
+ public_count = sum(1 for m in methods if not m.name.startswith("_"))
453
+ if public_count >= 3:
454
+ base_value = min(base_value + 2, 13)
455
+
456
+ # Return nearest Fibonacci number
457
+ return min(self.FIBONACCI, key=lambda x: abs(x - base_value))
458
+
459
+ def _calculate_feature_confidence(self, node: ast.ClassDef, stories: list[Story]) -> float:
460
+ """Calculate confidence score for a feature."""
461
+ score = 0.3 # Base score
462
+
463
+ # Has docstring
464
+ if ast.get_docstring(node):
465
+ score += 0.2
466
+
467
+ # Has stories
468
+ if stories:
469
+ score += 0.2
470
+
471
+ # Has multiple stories (better coverage)
472
+ if len(stories) > 2:
473
+ score += 0.2
474
+
475
+ # Stories are well-documented
476
+ documented_stories = sum(1 for s in stories if s.acceptance and len(s.acceptance) > 1)
477
+ if stories and documented_stories > len(stories) / 2:
478
+ score += 0.1
479
+
480
+ return min(score, 1.0)
481
+
482
+ def _humanize_name(self, name: str) -> str:
483
+ """Convert snake_case or PascalCase to human-readable title."""
484
+ # Handle PascalCase
485
+ name = re.sub(r"([A-Z])", r" \1", name).strip()
486
+ # Handle snake_case
487
+ name = name.replace("_", " ").replace("-", " ")
488
+ return name.title()
489
+
490
+ def _build_dependency_graph(self, python_files: list[Path]) -> None:
491
+ """
492
+ Build module dependency graph using AST imports.
493
+
494
+ Creates a directed graph where nodes are modules and edges represent imports.
495
+ """
496
+ # First pass: collect all modules as nodes
497
+ modules: dict[str, Path] = {}
498
+ for file_path in python_files:
499
+ if self._should_skip_file(file_path):
500
+ continue
501
+
502
+ # Convert file path to module name
503
+ module_name = self._path_to_module_name(file_path)
504
+ modules[module_name] = file_path
505
+ self.dependency_graph.add_node(module_name, path=file_path)
506
+
507
+ # Second pass: add edges based on imports
508
+ for module_name, file_path in modules.items():
509
+ try:
510
+ content = file_path.read_text(encoding="utf-8")
511
+ tree = ast.parse(content)
512
+
513
+ # Extract imports
514
+ imports = self._extract_imports_from_ast(tree, file_path)
515
+ for imported_module in imports:
516
+ # Only add edges for modules we know about (within repo)
517
+ # Try exact match first, then partial match
518
+ if imported_module in modules:
519
+ self.dependency_graph.add_edge(module_name, imported_module)
520
+ else:
521
+ # Try to find matching module (e.g., "module_a" matches "src.module_a")
522
+ matching_module = None
523
+ for known_module in modules:
524
+ # Check if imported name matches the module name (last part)
525
+ if imported_module == known_module.split(".")[-1]:
526
+ matching_module = known_module
527
+ break
528
+ if matching_module:
529
+ self.dependency_graph.add_edge(module_name, matching_module)
530
+ except (SyntaxError, UnicodeDecodeError):
531
+ # Skip files that can't be parsed
532
+ continue
533
+
534
+ def _path_to_module_name(self, file_path: Path) -> str:
535
+ """Convert file path to module name (e.g., src/foo/bar.py -> src.foo.bar)."""
536
+ # Get relative path from repo root
537
+ try:
538
+ relative_path = file_path.relative_to(self.repo_path)
539
+ except ValueError:
540
+ # File is outside repo, use full path
541
+ relative_path = file_path
542
+
543
+ # Convert to module name
544
+ parts = list(relative_path.parts[:-1]) + [relative_path.stem] # Remove .py extension
545
+ return ".".join(parts)
546
+
547
+ def _extract_imports_from_ast(self, tree: ast.AST, file_path: Path) -> list[str]:
548
+ """
549
+ Extract imported module names from AST.
550
+
551
+ Returns:
552
+ List of module names (relative to repo root if possible)
553
+ """
554
+ imports: set[str] = set()
555
+
556
+ for node in ast.walk(tree):
557
+ if isinstance(node, ast.Import):
558
+ for alias in node.names:
559
+ # Import aliases (e.g., import foo as bar)
560
+ if "." in alias.name:
561
+ # Extract root module (e.g., foo.bar.baz -> foo)
562
+ root_module = alias.name.split(".")[0]
563
+ imports.add(root_module)
564
+ else:
565
+ imports.add(alias.name)
566
+
567
+ elif isinstance(node, ast.ImportFrom) and node.module:
568
+ # From imports (e.g., from foo.bar import baz)
569
+ if "." in node.module:
570
+ # Extract root module
571
+ root_module = node.module.split(".")[0]
572
+ imports.add(root_module)
573
+ else:
574
+ imports.add(node.module)
575
+
576
+ # Try to resolve local imports (relative to current file)
577
+ resolved_imports = []
578
+ current_module = self._path_to_module_name(file_path)
579
+
580
+ for imported in imports:
581
+ # Skip stdlib imports (common patterns)
582
+ stdlib_modules = {
583
+ "sys",
584
+ "os",
585
+ "json",
586
+ "yaml",
587
+ "pathlib",
588
+ "typing",
589
+ "collections",
590
+ "dataclasses",
591
+ "enum",
592
+ "abc",
593
+ "asyncio",
594
+ "functools",
595
+ "itertools",
596
+ "re",
597
+ "datetime",
598
+ "time",
599
+ "logging",
600
+ "hashlib",
601
+ "base64",
602
+ "urllib",
603
+ "http",
604
+ "socket",
605
+ "threading",
606
+ "multiprocessing",
607
+ }
608
+
609
+ if imported in stdlib_modules:
610
+ continue
611
+
612
+ # Try to resolve relative imports
613
+ # If imported module matches a pattern from our repo, resolve it
614
+ potential_module = self._resolve_local_import(imported, current_module)
615
+ if potential_module:
616
+ resolved_imports.append(potential_module)
617
+ else:
618
+ # Keep as external dependency
619
+ resolved_imports.append(imported)
620
+
621
+ return resolved_imports
622
+
623
+ def _resolve_local_import(self, imported: str, current_module: str) -> str | None:
624
+ """
625
+ Try to resolve a local import relative to current module.
626
+
627
+ Returns:
628
+ Resolved module name if found in repo, None otherwise
629
+ """
630
+ # Check if it's already in our dependency graph
631
+ if imported in self.dependency_graph:
632
+ return imported
633
+
634
+ # Try relative import resolution (e.g., from .foo import bar)
635
+ # This is simplified - full resolution would need to handle package structure
636
+ current_parts = current_module.split(".")
637
+ if len(current_parts) > 1:
638
+ # Try parent package
639
+ parent_module = ".".join(current_parts[:-1])
640
+ potential = f"{parent_module}.{imported}"
641
+ if potential in self.dependency_graph:
642
+ return potential
643
+
644
+ return None
645
+
646
+ def _extract_type_hints(self, tree: ast.AST, file_path: Path) -> dict[str, str]:
647
+ """
648
+ Extract type hints from function/method signatures.
649
+
650
+ Returns:
651
+ Dictionary mapping function names to their return type hints
652
+ """
653
+ type_hints: dict[str, str] = {}
654
+ module_name = self._path_to_module_name(file_path)
655
+
656
+ for node in ast.walk(tree):
657
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
658
+ func_name = node.name
659
+ return_type = "None"
660
+
661
+ # Extract return type annotation
662
+ if node.returns:
663
+ # Convert AST node to string representation
664
+ if isinstance(node.returns, ast.Name):
665
+ return_type = node.returns.id
666
+ elif isinstance(node.returns, ast.Subscript):
667
+ # Handle generics like List[str], Dict[str, int]
668
+ container = node.returns.value.id if isinstance(node.returns.value, ast.Name) else "Any"
669
+ return_type = str(container) # Simplified representation
670
+
671
+ type_hints[func_name] = return_type
672
+
673
+ # Store per module
674
+ if module_name not in self.type_hints:
675
+ self.type_hints[module_name] = {}
676
+ self.type_hints[module_name].update(type_hints)
677
+
678
+ return type_hints
679
+
680
+ def _detect_async_patterns(self, tree: ast.AST, file_path: Path) -> list[str]:
681
+ """
682
+ Detect async/await patterns in code.
683
+
684
+ Returns:
685
+ List of async method/function names
686
+ """
687
+ async_methods: list[str] = []
688
+ module_name = self._path_to_module_name(file_path)
689
+
690
+ for node in ast.walk(tree):
691
+ # Check for async functions
692
+ if isinstance(node, ast.AsyncFunctionDef):
693
+ async_methods.append(node.name)
694
+
695
+ # Check for await statements (even in sync functions)
696
+ if isinstance(node, ast.Await):
697
+ # Find containing function
698
+ for parent in ast.walk(tree):
699
+ if isinstance(parent, (ast.FunctionDef, ast.AsyncFunctionDef)):
700
+ for child in ast.walk(parent):
701
+ if child == node:
702
+ if parent.name not in async_methods:
703
+ async_methods.append(parent.name)
704
+ break
705
+
706
+ # Store per module
707
+ self.async_patterns[module_name] = async_methods
708
+
709
+ return async_methods
710
+
711
+ def _analyze_commit_history(self) -> None:
712
+ """
713
+ Mine commit history to identify feature boundaries.
714
+
715
+ Uses GitPython to analyze commit messages and associate them with features.
716
+ Limits analysis to recent commits to avoid performance issues.
717
+ """
718
+ try:
719
+ from git import Repo
720
+
721
+ if not (self.repo_path / ".git").exists():
722
+ return
723
+
724
+ repo = Repo(self.repo_path)
725
+ # Limit to last 100 commits to avoid performance issues with large repositories
726
+ max_commits = 100
727
+ commits = list(repo.iter_commits(max_count=max_commits))
728
+
729
+ # Map commits to files to features
730
+ file_to_feature: dict[str, list[str]] = {}
731
+ for feature in self.features:
732
+ # Extract potential file paths from feature key
733
+ # This is simplified - in reality we'd track which files contributed to which features
734
+ pass
735
+
736
+ # Analyze commit messages for feature references
737
+ for commit in commits:
738
+ try:
739
+ # Skip commits that can't be accessed (corrupted or too old)
740
+ # Use commit.message which is lazy-loaded but faster than full commit object
741
+ commit_message = commit.message
742
+ if isinstance(commit_message, bytes):
743
+ commit_message = commit_message.decode("utf-8", errors="ignore")
744
+ message = commit_message.lower()
745
+ # Look for feature patterns (e.g., FEATURE-001, feat:, feature:)
746
+ if "feat" in message or "feature" in message:
747
+ # Try to extract feature keys from commit message
748
+ feature_match = re.search(r"feature[-\s]?(\d+)", message, re.IGNORECASE)
749
+ if feature_match:
750
+ feature_num = feature_match.group(1)
751
+ # Associate commit with feature (simplified)
752
+ except Exception:
753
+ # Skip individual commits that fail (corrupted, etc.)
754
+ continue
755
+
756
+ except ImportError:
757
+ # GitPython not available, skip
758
+ pass
759
+ except Exception:
760
+ # Git operations failed, skip gracefully
761
+ pass
762
+
763
+ def _enhance_features_with_dependencies(self) -> None:
764
+ """Enhance features with dependency graph information."""
765
+ for feature in self.features:
766
+ # Find dependencies for this feature's module
767
+ # This is simplified - would need to track which module each feature comes from
768
+ pass
769
+
770
+ def _get_module_dependencies(self, module_name: str) -> list[str]:
771
+ """Get list of modules that the given module depends on."""
772
+ if module_name not in self.dependency_graph:
773
+ return []
774
+
775
+ return list(self.dependency_graph.successors(module_name))