specfact-cli 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. specfact_cli/__init__.py +14 -0
  2. specfact_cli/agents/__init__.py +24 -0
  3. specfact_cli/agents/analyze_agent.py +392 -0
  4. specfact_cli/agents/base.py +95 -0
  5. specfact_cli/agents/plan_agent.py +202 -0
  6. specfact_cli/agents/registry.py +176 -0
  7. specfact_cli/agents/sync_agent.py +133 -0
  8. specfact_cli/analyzers/__init__.py +11 -0
  9. specfact_cli/analyzers/code_analyzer.py +796 -0
  10. specfact_cli/cli.py +396 -0
  11. specfact_cli/commands/__init__.py +7 -0
  12. specfact_cli/commands/enforce.py +88 -0
  13. specfact_cli/commands/import_cmd.py +365 -0
  14. specfact_cli/commands/init.py +125 -0
  15. specfact_cli/commands/plan.py +1089 -0
  16. specfact_cli/commands/repro.py +192 -0
  17. specfact_cli/commands/sync.py +408 -0
  18. specfact_cli/common/__init__.py +25 -0
  19. specfact_cli/common/logger_setup.py +654 -0
  20. specfact_cli/common/logging_utils.py +41 -0
  21. specfact_cli/common/text_utils.py +52 -0
  22. specfact_cli/common/utils.py +48 -0
  23. specfact_cli/comparators/__init__.py +11 -0
  24. specfact_cli/comparators/plan_comparator.py +391 -0
  25. specfact_cli/generators/__init__.py +14 -0
  26. specfact_cli/generators/plan_generator.py +105 -0
  27. specfact_cli/generators/protocol_generator.py +115 -0
  28. specfact_cli/generators/report_generator.py +200 -0
  29. specfact_cli/generators/workflow_generator.py +120 -0
  30. specfact_cli/importers/__init__.py +7 -0
  31. specfact_cli/importers/speckit_converter.py +773 -0
  32. specfact_cli/importers/speckit_scanner.py +711 -0
  33. specfact_cli/models/__init__.py +33 -0
  34. specfact_cli/models/deviation.py +105 -0
  35. specfact_cli/models/enforcement.py +150 -0
  36. specfact_cli/models/plan.py +97 -0
  37. specfact_cli/models/protocol.py +28 -0
  38. specfact_cli/modes/__init__.py +19 -0
  39. specfact_cli/modes/detector.py +126 -0
  40. specfact_cli/modes/router.py +153 -0
  41. specfact_cli/resources/semgrep/async.yml +285 -0
  42. specfact_cli/sync/__init__.py +12 -0
  43. specfact_cli/sync/repository_sync.py +279 -0
  44. specfact_cli/sync/speckit_sync.py +388 -0
  45. specfact_cli/utils/__init__.py +58 -0
  46. specfact_cli/utils/console.py +70 -0
  47. specfact_cli/utils/feature_keys.py +212 -0
  48. specfact_cli/utils/git.py +241 -0
  49. specfact_cli/utils/github_annotations.py +399 -0
  50. specfact_cli/utils/ide_setup.py +382 -0
  51. specfact_cli/utils/prompts.py +180 -0
  52. specfact_cli/utils/structure.py +497 -0
  53. specfact_cli/utils/yaml_utils.py +200 -0
  54. specfact_cli/validators/__init__.py +20 -0
  55. specfact_cli/validators/fsm.py +262 -0
  56. specfact_cli/validators/repro_checker.py +759 -0
  57. specfact_cli/validators/schema.py +196 -0
  58. specfact_cli-0.4.2.dist-info/METADATA +370 -0
  59. specfact_cli-0.4.2.dist-info/RECORD +62 -0
  60. specfact_cli-0.4.2.dist-info/WHEEL +4 -0
  61. specfact_cli-0.4.2.dist-info/entry_points.txt +2 -0
  62. specfact_cli-0.4.2.dist-info/licenses/LICENSE.md +61 -0
@@ -0,0 +1,796 @@
1
+ """Code analyzer for extracting features from brownfield codebases."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ import re
7
+ from collections import defaultdict
8
+ from pathlib import Path
9
+
10
+ import networkx as nx
11
+ from beartype import beartype
12
+ from icontract import ensure, require
13
+
14
+ from specfact_cli.models.plan import Feature, Idea, Metadata, PlanBundle, Product, Story
15
+ from specfact_cli.utils.feature_keys import to_classname_key, to_sequential_key
16
+
17
+
18
+ class CodeAnalyzer:
19
+ """
20
+ Analyzes Python code to auto-derive plan bundles.
21
+
22
+ Extracts features from classes and user stories from method patterns
23
+ following Scrum/Agile practices.
24
+ """
25
+
26
+ # Fibonacci sequence for story points
27
+ FIBONACCI = [1, 2, 3, 5, 8, 13, 21, 34, 55, 89]
28
+
29
+ @beartype
30
+ @require(lambda repo_path: repo_path is not None and isinstance(repo_path, Path), "Repo path must be Path")
31
+ @require(lambda confidence_threshold: 0.0 <= confidence_threshold <= 1.0, "Confidence threshold must be 0.0-1.0")
32
+ @require(lambda plan_name: plan_name is None or isinstance(plan_name, str), "Plan name must be None or str")
33
+ def __init__(
34
+ self,
35
+ repo_path: Path,
36
+ confidence_threshold: float = 0.5,
37
+ key_format: str = "classname",
38
+ plan_name: str | None = None,
39
+ ) -> None:
40
+ """
41
+ Initialize code analyzer.
42
+
43
+ Args:
44
+ repo_path: Path to repository root
45
+ confidence_threshold: Minimum confidence score (0.0-1.0)
46
+ key_format: Feature key format ('classname' or 'sequential', default: 'classname')
47
+ plan_name: Custom plan name (will be used for idea.title, optional)
48
+ """
49
+ self.repo_path = Path(repo_path)
50
+ self.confidence_threshold = confidence_threshold
51
+ self.key_format = key_format
52
+ self.plan_name = plan_name
53
+ self.features: list[Feature] = []
54
+ self.themes: set[str] = set()
55
+ self.dependency_graph: nx.DiGraph[str] = nx.DiGraph() # Module dependency graph
56
+ self.type_hints: dict[str, dict[str, str]] = {} # Module -> {function: type_hint}
57
+ self.async_patterns: dict[str, list[str]] = {} # Module -> [async_methods]
58
+ self.commit_bounds: dict[str, tuple[str, str]] = {} # Feature -> (first_commit, last_commit)
59
+
60
+ @beartype
61
+ @ensure(lambda result: isinstance(result, PlanBundle), "Must return PlanBundle")
62
+ @ensure(
63
+ lambda result: isinstance(result, PlanBundle)
64
+ and hasattr(result, "version")
65
+ and hasattr(result, "features")
66
+ and result.version == "1.0" # type: ignore[reportUnknownMemberType]
67
+ and len(result.features) >= 0, # type: ignore[reportUnknownMemberType]
68
+ "Plan bundle must be valid",
69
+ )
70
+ def analyze(self) -> PlanBundle:
71
+ """
72
+ Analyze repository and generate plan bundle.
73
+
74
+ Returns:
75
+ Generated PlanBundle from code analysis
76
+ """
77
+ # Find all Python files
78
+ python_files = list(self.repo_path.rglob("*.py"))
79
+
80
+ # Build module dependency graph first
81
+ self._build_dependency_graph(python_files)
82
+
83
+ # Analyze each file
84
+ for file_path in python_files:
85
+ if self._should_skip_file(file_path):
86
+ continue
87
+
88
+ self._analyze_file(file_path)
89
+
90
+ # Analyze commit history for feature boundaries
91
+ self._analyze_commit_history()
92
+
93
+ # Enhance features with dependency information
94
+ self._enhance_features_with_dependencies()
95
+
96
+ # If sequential format, update all keys now that we know the total count
97
+ if self.key_format == "sequential":
98
+ for idx, feature in enumerate(self.features, start=1):
99
+ feature.key = to_sequential_key(feature.key, idx)
100
+
101
+ # Generate plan bundle
102
+ # Use plan_name if provided, otherwise use repo name, otherwise fallback
103
+ if self.plan_name:
104
+ # Use the plan name (already sanitized, but humanize for title)
105
+ title = self.plan_name.replace("_", " ").replace("-", " ").title()
106
+ else:
107
+ repo_name = self.repo_path.name or "Unknown Project"
108
+ title = self._humanize_name(repo_name)
109
+
110
+ idea = Idea(
111
+ title=title,
112
+ narrative=f"Auto-derived plan from brownfield analysis of {title}",
113
+ metrics=None,
114
+ )
115
+
116
+ product = Product(
117
+ themes=sorted(self.themes) if self.themes else ["Core"],
118
+ releases=[],
119
+ )
120
+
121
+ return PlanBundle(
122
+ version="1.0",
123
+ idea=idea,
124
+ business=None,
125
+ product=product,
126
+ features=self.features,
127
+ metadata=Metadata(stage="draft", promoted_at=None, promoted_by=None),
128
+ )
129
+
130
+ def _should_skip_file(self, file_path: Path) -> bool:
131
+ """Check if file should be skipped."""
132
+ skip_patterns = [
133
+ "__pycache__",
134
+ ".git",
135
+ "venv",
136
+ ".venv",
137
+ "env",
138
+ ".pytest_cache",
139
+ "htmlcov",
140
+ "dist",
141
+ "build",
142
+ ".eggs",
143
+ "tests", # Skip test files
144
+ ]
145
+
146
+ return any(pattern in str(file_path) for pattern in skip_patterns)
147
+
148
+ def _analyze_file(self, file_path: Path) -> None:
149
+ """Analyze a single Python file."""
150
+ try:
151
+ content = file_path.read_text(encoding="utf-8")
152
+ tree = ast.parse(content)
153
+
154
+ # Extract module-level info
155
+ self._extract_themes_from_imports(tree)
156
+
157
+ # Extract type hints
158
+ self._extract_type_hints(tree, file_path)
159
+
160
+ # Detect async patterns
161
+ self._detect_async_patterns(tree, file_path)
162
+
163
+ # Extract classes as features
164
+ for node in ast.walk(tree):
165
+ if isinstance(node, ast.ClassDef):
166
+ feature = self._extract_feature_from_class(node, file_path)
167
+ if feature:
168
+ self.features.append(feature)
169
+
170
+ except (SyntaxError, UnicodeDecodeError):
171
+ # Skip files that can't be parsed
172
+ pass
173
+
174
+ def _extract_themes_from_imports(self, tree: ast.AST) -> None:
175
+ """Extract themes from import statements."""
176
+ theme_keywords = {
177
+ "fastapi": "API",
178
+ "flask": "API",
179
+ "django": "Web",
180
+ "redis": "Caching",
181
+ "postgres": "Database",
182
+ "mysql": "Database",
183
+ "asyncio": "Async",
184
+ "typer": "CLI",
185
+ "click": "CLI",
186
+ "pydantic": "Validation",
187
+ "pytest": "Testing",
188
+ "sqlalchemy": "ORM",
189
+ "requests": "HTTP Client",
190
+ "aiohttp": "Async HTTP",
191
+ }
192
+
193
+ for node in ast.walk(tree):
194
+ if isinstance(node, (ast.Import, ast.ImportFrom)):
195
+ if isinstance(node, ast.Import):
196
+ for alias in node.names:
197
+ for keyword, theme in theme_keywords.items():
198
+ if keyword in alias.name.lower():
199
+ self.themes.add(theme)
200
+ elif isinstance(node, ast.ImportFrom) and node.module:
201
+ for keyword, theme in theme_keywords.items():
202
+ if keyword in node.module.lower():
203
+ self.themes.add(theme)
204
+
205
+ def _extract_feature_from_class(self, node: ast.ClassDef, file_path: Path) -> Feature | None:
206
+ """Extract feature from class definition."""
207
+ # Skip private classes and test classes
208
+ if node.name.startswith("_") or node.name.startswith("Test"):
209
+ return None
210
+
211
+ # Generate feature key based on configured format
212
+ if self.key_format == "sequential":
213
+ # Use sequential numbering (will be updated after all features are collected)
214
+ feature_key = f"FEATURE-{len(self.features) + 1:03d}"
215
+ else:
216
+ # Default: classname format
217
+ feature_key = to_classname_key(node.name)
218
+
219
+ # Extract docstring as outcome
220
+ docstring = ast.get_docstring(node)
221
+ outcomes: list[str] = []
222
+ if docstring:
223
+ # Take first paragraph as primary outcome
224
+ first_para = docstring.split("\n\n")[0].strip()
225
+ outcomes.append(first_para) # type: ignore[reportUnknownMemberType]
226
+ else:
227
+ outcomes.append(f"Provides {self._humanize_name(node.name)} functionality") # type: ignore[reportUnknownMemberType]
228
+
229
+ # Collect all methods
230
+ methods = [item for item in node.body if isinstance(item, ast.FunctionDef)]
231
+
232
+ # Group methods into user stories
233
+ stories = self._extract_stories_from_methods(methods, node.name)
234
+
235
+ # Calculate confidence based on documentation and story quality
236
+ confidence = self._calculate_feature_confidence(node, stories)
237
+
238
+ if confidence < self.confidence_threshold:
239
+ return None
240
+
241
+ # Skip if no meaningful stories
242
+ if not stories:
243
+ return None
244
+
245
+ return Feature(
246
+ key=feature_key,
247
+ title=self._humanize_name(node.name),
248
+ outcomes=outcomes,
249
+ acceptance=[f"{node.name} class provides documented functionality"],
250
+ stories=stories,
251
+ confidence=round(confidence, 2),
252
+ )
253
+
254
+ def _extract_stories_from_methods(self, methods: list[ast.FunctionDef], class_name: str) -> list[Story]:
255
+ """
256
+ Extract user stories from methods by grouping related functionality.
257
+
258
+ Groups methods by:
259
+ - CRUD operations (create, read, update, delete)
260
+ - Common prefixes (get_, set_, validate_, process_)
261
+ - Functionality patterns
262
+ """
263
+ # Group methods by pattern
264
+ method_groups = self._group_methods_by_functionality(methods)
265
+
266
+ stories: list[Story] = []
267
+ story_counter = 1
268
+
269
+ for group_name, group_methods in method_groups.items():
270
+ if not group_methods:
271
+ continue
272
+
273
+ # Create a user story for this group
274
+ story = self._create_story_from_method_group(group_name, group_methods, class_name, story_counter)
275
+
276
+ if story:
277
+ stories.append(story) # type: ignore[reportUnknownMemberType]
278
+ story_counter += 1
279
+
280
+ return stories
281
+
282
+ def _group_methods_by_functionality(self, methods: list[ast.FunctionDef]) -> dict[str, list[ast.FunctionDef]]:
283
+ """Group methods by their functionality patterns."""
284
+ groups: dict[str, list[ast.FunctionDef]] = defaultdict(list)
285
+
286
+ # Filter out private methods (except __init__)
287
+ public_methods = [m for m in methods if not m.name.startswith("_") or m.name == "__init__"]
288
+
289
+ for method in public_methods:
290
+ # CRUD operations
291
+ if any(crud in method.name.lower() for crud in ["create", "add", "insert", "new"]):
292
+ groups["Create Operations"].append(method) # type: ignore[reportUnknownMemberType]
293
+ elif any(read in method.name.lower() for read in ["get", "read", "fetch", "find", "list", "retrieve"]):
294
+ groups["Read Operations"].append(method) # type: ignore[reportUnknownMemberType]
295
+ elif any(update in method.name.lower() for update in ["update", "modify", "edit", "change", "set"]):
296
+ groups["Update Operations"].append(method) # type: ignore[reportUnknownMemberType]
297
+ elif any(delete in method.name.lower() for delete in ["delete", "remove", "destroy"]):
298
+ groups["Delete Operations"].append(method) # type: ignore[reportUnknownMemberType]
299
+
300
+ # Validation
301
+ elif any(val in method.name.lower() for val in ["validate", "check", "verify", "is_valid"]):
302
+ groups["Validation"].append(method) # type: ignore[reportUnknownMemberType]
303
+
304
+ # Processing/Computation
305
+ elif any(
306
+ proc in method.name.lower() for proc in ["process", "compute", "calculate", "transform", "convert"]
307
+ ):
308
+ groups["Processing"].append(method) # type: ignore[reportUnknownMemberType]
309
+
310
+ # Analysis
311
+ elif any(an in method.name.lower() for an in ["analyze", "parse", "extract", "detect"]):
312
+ groups["Analysis"].append(method) # type: ignore[reportUnknownMemberType]
313
+
314
+ # Generation
315
+ elif any(gen in method.name.lower() for gen in ["generate", "build", "create", "make"]):
316
+ groups["Generation"].append(method) # type: ignore[reportUnknownMemberType]
317
+
318
+ # Comparison
319
+ elif any(cmp in method.name.lower() for cmp in ["compare", "diff", "match"]):
320
+ groups["Comparison"].append(method) # type: ignore[reportUnknownMemberType]
321
+
322
+ # Setup/Configuration
323
+ elif method.name == "__init__" or any(
324
+ setup in method.name.lower() for setup in ["setup", "configure", "initialize"]
325
+ ):
326
+ groups["Configuration"].append(method) # type: ignore[reportUnknownMemberType]
327
+
328
+ # Catch-all for other public methods
329
+ else:
330
+ groups["Core Functionality"].append(method) # type: ignore[reportUnknownMemberType]
331
+
332
+ return dict(groups)
333
+
334
+ def _create_story_from_method_group(
335
+ self, group_name: str, methods: list[ast.FunctionDef], class_name: str, story_number: int
336
+ ) -> Story | None:
337
+ """Create a user story from a group of related methods."""
338
+ if not methods:
339
+ return None
340
+
341
+ # Generate story key
342
+ story_key = f"STORY-{class_name.upper()}-{story_number:03d}"
343
+
344
+ # Create user-centric title based on group
345
+ title = self._generate_story_title(group_name, class_name)
346
+
347
+ # Extract acceptance criteria from docstrings
348
+ acceptance: list[str] = []
349
+ tasks: list[str] = []
350
+
351
+ for method in methods:
352
+ # Add method as task
353
+ tasks.append(f"{method.name}()")
354
+
355
+ # Extract acceptance from docstring
356
+ docstring = ast.get_docstring(method)
357
+ if docstring:
358
+ # Take first line as acceptance criterion
359
+ first_line = docstring.split("\n")[0].strip()
360
+ if first_line and first_line not in acceptance:
361
+ acceptance.append(first_line)
362
+
363
+ # Add default acceptance if none found
364
+ if not acceptance:
365
+ acceptance.append(f"{group_name} functionality works as expected")
366
+
367
+ # Calculate story points (complexity) based on number of methods and their size
368
+ story_points = self._calculate_story_points(methods)
369
+
370
+ # Calculate value points based on public API exposure
371
+ value_points = self._calculate_value_points(methods, group_name)
372
+
373
+ return Story(
374
+ key=story_key,
375
+ title=title,
376
+ acceptance=acceptance,
377
+ story_points=story_points,
378
+ value_points=value_points,
379
+ tasks=tasks,
380
+ confidence=0.8 if len(methods) > 1 else 0.6,
381
+ )
382
+
383
+ def _generate_story_title(self, group_name: str, class_name: str) -> str:
384
+ """Generate user-centric story title."""
385
+ # Map group names to user-centric titles
386
+ title_templates = {
387
+ "Create Operations": f"As a user, I can create new {self._humanize_name(class_name)} records",
388
+ "Read Operations": f"As a user, I can view {self._humanize_name(class_name)} data",
389
+ "Update Operations": f"As a user, I can update {self._humanize_name(class_name)} records",
390
+ "Delete Operations": f"As a user, I can delete {self._humanize_name(class_name)} records",
391
+ "Validation": f"As a developer, I can validate {self._humanize_name(class_name)} data",
392
+ "Processing": f"As a user, I can process data using {self._humanize_name(class_name)}",
393
+ "Analysis": f"As a user, I can analyze data with {self._humanize_name(class_name)}",
394
+ "Generation": f"As a user, I can generate outputs from {self._humanize_name(class_name)}",
395
+ "Comparison": f"As a user, I can compare {self._humanize_name(class_name)} data",
396
+ "Configuration": f"As a developer, I can configure {self._humanize_name(class_name)}",
397
+ "Core Functionality": f"As a user, I can use {self._humanize_name(class_name)} features",
398
+ }
399
+
400
+ return title_templates.get(group_name, f"As a user, I can work with {self._humanize_name(class_name)}")
401
+
402
+ def _calculate_story_points(self, methods: list[ast.FunctionDef]) -> int:
403
+ """
404
+ Calculate story points (complexity) using Fibonacci sequence.
405
+
406
+ Based on:
407
+ - Number of methods
408
+ - Average method size
409
+ - Complexity indicators (loops, conditionals)
410
+ """
411
+ # Base complexity on number of methods
412
+ method_count = len(methods)
413
+
414
+ # Count total lines across all methods
415
+ total_lines = sum(len(ast.unparse(m).split("\n")) for m in methods)
416
+ avg_lines = total_lines / method_count if method_count > 0 else 0
417
+
418
+ # Simple heuristic: 1-2 methods = small, 3-5 = medium, 6+ = large
419
+ if method_count <= 2 and avg_lines < 20:
420
+ base_points = 2 # Small
421
+ elif method_count <= 5 and avg_lines < 40:
422
+ base_points = 5 # Medium
423
+ elif method_count <= 8:
424
+ base_points = 8 # Large
425
+ else:
426
+ base_points = 13 # Extra Large
427
+
428
+ # Return nearest Fibonacci number
429
+ return min(self.FIBONACCI, key=lambda x: abs(x - base_points))
430
+
431
+ def _calculate_value_points(self, methods: list[ast.FunctionDef], group_name: str) -> int:
432
+ """
433
+ Calculate value points (business value) using Fibonacci sequence.
434
+
435
+ Based on:
436
+ - Public API exposure
437
+ - CRUD operations have high value
438
+ - Validation has medium value
439
+ """
440
+ # CRUD operations are high value
441
+ crud_groups = ["Create Operations", "Read Operations", "Update Operations", "Delete Operations"]
442
+ if group_name in crud_groups:
443
+ base_value = 8 # High business value
444
+
445
+ # User-facing operations
446
+ elif group_name in ["Processing", "Analysis", "Generation", "Comparison"]:
447
+ base_value = 5 # Medium-high value
448
+
449
+ # Developer/internal operations
450
+ elif group_name in ["Validation", "Configuration"]:
451
+ base_value = 3 # Medium value
452
+
453
+ # Core functionality
454
+ else:
455
+ base_value = 3 # Default medium value
456
+
457
+ # Adjust based on number of public methods (more = higher value)
458
+ public_count = sum(1 for m in methods if not m.name.startswith("_"))
459
+ if public_count >= 3:
460
+ base_value = min(base_value + 2, 13)
461
+
462
+ # Return nearest Fibonacci number
463
+ return min(self.FIBONACCI, key=lambda x: abs(x - base_value))
464
+
465
+ def _calculate_feature_confidence(self, node: ast.ClassDef, stories: list[Story]) -> float:
466
+ """Calculate confidence score for a feature."""
467
+ score = 0.3 # Base score
468
+
469
+ # Has docstring
470
+ if ast.get_docstring(node):
471
+ score += 0.2
472
+
473
+ # Has stories
474
+ if stories:
475
+ score += 0.2
476
+
477
+ # Has multiple stories (better coverage)
478
+ if len(stories) > 2:
479
+ score += 0.2
480
+
481
+ # Stories are well-documented
482
+ documented_stories = sum(1 for s in stories if s.acceptance and len(s.acceptance) > 1)
483
+ if stories and documented_stories > len(stories) / 2:
484
+ score += 0.1
485
+
486
+ return min(score, 1.0)
487
+
488
+ def _humanize_name(self, name: str) -> str:
489
+ """Convert snake_case or PascalCase to human-readable title."""
490
+ # Handle PascalCase
491
+ name = re.sub(r"([A-Z])", r" \1", name).strip()
492
+ # Handle snake_case
493
+ name = name.replace("_", " ").replace("-", " ")
494
+ return name.title()
495
+
496
+ def _build_dependency_graph(self, python_files: list[Path]) -> None:
497
+ """
498
+ Build module dependency graph using AST imports.
499
+
500
+ Creates a directed graph where nodes are modules and edges represent imports.
501
+ """
502
+ # First pass: collect all modules as nodes
503
+ modules: dict[str, Path] = {}
504
+ for file_path in python_files:
505
+ if self._should_skip_file(file_path):
506
+ continue
507
+
508
+ # Convert file path to module name
509
+ module_name = self._path_to_module_name(file_path)
510
+ modules[module_name] = file_path
511
+ self.dependency_graph.add_node(module_name, path=file_path)
512
+
513
+ # Second pass: add edges based on imports
514
+ for module_name, file_path in modules.items():
515
+ try:
516
+ content = file_path.read_text(encoding="utf-8")
517
+ tree = ast.parse(content)
518
+
519
+ # Extract imports
520
+ imports = self._extract_imports_from_ast(tree, file_path)
521
+ for imported_module in imports:
522
+ # Only add edges for modules we know about (within repo)
523
+ # Try exact match first, then partial match
524
+ if imported_module in modules:
525
+ self.dependency_graph.add_edge(module_name, imported_module)
526
+ else:
527
+ # Try to find matching module (e.g., "module_a" matches "src.module_a")
528
+ matching_module = None
529
+ for known_module in modules:
530
+ # Check if imported name matches the module name (last part)
531
+ if imported_module == known_module.split(".")[-1]:
532
+ matching_module = known_module
533
+ break
534
+ if matching_module:
535
+ self.dependency_graph.add_edge(module_name, matching_module)
536
+ except (SyntaxError, UnicodeDecodeError):
537
+ # Skip files that can't be parsed
538
+ continue
539
+
540
+ def _path_to_module_name(self, file_path: Path) -> str:
541
+ """Convert file path to module name (e.g., src/foo/bar.py -> src.foo.bar)."""
542
+ # Get relative path from repo root
543
+ try:
544
+ relative_path = file_path.relative_to(self.repo_path)
545
+ except ValueError:
546
+ # File is outside repo, use full path
547
+ relative_path = file_path
548
+
549
+ # Convert to module name
550
+ parts = [*relative_path.parts[:-1], relative_path.stem] # Remove .py extension
551
+ return ".".join(parts)
552
+
553
+ def _extract_imports_from_ast(self, tree: ast.AST, file_path: Path) -> list[str]:
554
+ """
555
+ Extract imported module names from AST.
556
+
557
+ Returns:
558
+ List of module names (relative to repo root if possible)
559
+ """
560
+ imports: set[str] = set()
561
+
562
+ for node in ast.walk(tree):
563
+ if isinstance(node, ast.Import):
564
+ for alias in node.names:
565
+ # Import aliases (e.g., import foo as bar)
566
+ if "." in alias.name:
567
+ # Extract root module (e.g., foo.bar.baz -> foo)
568
+ root_module = alias.name.split(".")[0]
569
+ imports.add(root_module)
570
+ else:
571
+ imports.add(alias.name)
572
+
573
+ elif isinstance(node, ast.ImportFrom) and node.module:
574
+ # From imports (e.g., from foo.bar import baz)
575
+ if "." in node.module:
576
+ # Extract root module
577
+ root_module = node.module.split(".")[0]
578
+ imports.add(root_module)
579
+ else:
580
+ imports.add(node.module)
581
+
582
+ # Try to resolve local imports (relative to current file)
583
+ resolved_imports: list[str] = []
584
+ current_module = self._path_to_module_name(file_path)
585
+
586
+ for imported in imports:
587
+ # Skip stdlib imports (common patterns)
588
+ stdlib_modules = {
589
+ "sys",
590
+ "os",
591
+ "json",
592
+ "yaml",
593
+ "pathlib",
594
+ "typing",
595
+ "collections",
596
+ "dataclasses",
597
+ "enum",
598
+ "abc",
599
+ "asyncio",
600
+ "functools",
601
+ "itertools",
602
+ "re",
603
+ "datetime",
604
+ "time",
605
+ "logging",
606
+ "hashlib",
607
+ "base64",
608
+ "urllib",
609
+ "http",
610
+ "socket",
611
+ "threading",
612
+ "multiprocessing",
613
+ }
614
+
615
+ if imported in stdlib_modules:
616
+ continue
617
+
618
+ # Try to resolve relative imports
619
+ # If imported module matches a pattern from our repo, resolve it
620
+ potential_module = self._resolve_local_import(imported, current_module)
621
+ if potential_module:
622
+ resolved_imports.append(potential_module)
623
+ else:
624
+ # Keep as external dependency
625
+ resolved_imports.append(imported)
626
+
627
+ return resolved_imports
628
+
629
+ def _resolve_local_import(self, imported: str, current_module: str) -> str | None:
630
+ """
631
+ Try to resolve a local import relative to current module.
632
+
633
+ Returns:
634
+ Resolved module name if found in repo, None otherwise
635
+ """
636
+ # Check if it's already in our dependency graph
637
+ if imported in self.dependency_graph:
638
+ return imported
639
+
640
+ # Try relative import resolution (e.g., from .foo import bar)
641
+ # This is simplified - full resolution would need to handle package structure
642
+ current_parts = current_module.split(".")
643
+ if len(current_parts) > 1:
644
+ # Try parent package
645
+ parent_module = ".".join(current_parts[:-1])
646
+ potential = f"{parent_module}.{imported}"
647
+ if potential in self.dependency_graph:
648
+ return potential
649
+
650
+ return None
651
+
652
+ def _extract_type_hints(self, tree: ast.AST, file_path: Path) -> dict[str, str]:
653
+ """
654
+ Extract type hints from function/method signatures.
655
+
656
+ Returns:
657
+ Dictionary mapping function names to their return type hints
658
+ """
659
+ type_hints: dict[str, str] = {}
660
+ module_name = self._path_to_module_name(file_path)
661
+
662
+ for node in ast.walk(tree):
663
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
664
+ func_name = node.name
665
+ return_type = "None"
666
+
667
+ # Extract return type annotation
668
+ if node.returns:
669
+ # Convert AST node to string representation
670
+ if isinstance(node.returns, ast.Name):
671
+ return_type = node.returns.id
672
+ elif isinstance(node.returns, ast.Subscript):
673
+ # Handle generics like List[str], Dict[str, int]
674
+ container = node.returns.value.id if isinstance(node.returns.value, ast.Name) else "Any"
675
+ return_type = str(container) # Simplified representation
676
+
677
+ type_hints[func_name] = return_type
678
+
679
+ # Store per module
680
+ if module_name not in self.type_hints:
681
+ self.type_hints[module_name] = {}
682
+ self.type_hints[module_name].update(type_hints)
683
+
684
+ return type_hints
685
+
686
+ def _detect_async_patterns(self, tree: ast.AST, file_path: Path) -> list[str]:
687
+ """
688
+ Detect async/await patterns in code.
689
+
690
+ Returns:
691
+ List of async method/function names
692
+ """
693
+ async_methods: list[str] = []
694
+ module_name = self._path_to_module_name(file_path)
695
+
696
+ for node in ast.walk(tree):
697
+ # Check for async functions
698
+ if isinstance(node, ast.AsyncFunctionDef):
699
+ async_methods.append(node.name)
700
+
701
+ # Check for await statements (even in sync functions)
702
+ if isinstance(node, ast.Await):
703
+ # Find containing function
704
+ for parent in ast.walk(tree):
705
+ if isinstance(parent, (ast.FunctionDef, ast.AsyncFunctionDef)):
706
+ for child in ast.walk(parent):
707
+ if child == node:
708
+ if parent.name not in async_methods:
709
+ async_methods.append(parent.name)
710
+ break
711
+
712
+ # Store per module
713
+ self.async_patterns[module_name] = async_methods
714
+
715
+ return async_methods
716
+
717
+ def _analyze_commit_history(self) -> None:
718
+ """
719
+ Mine commit history to identify feature boundaries.
720
+
721
+ Uses GitPython to analyze commit messages and associate them with features.
722
+ Limits analysis to recent commits to avoid performance issues.
723
+ """
724
+ try:
725
+ from git import Repo
726
+
727
+ if not (self.repo_path / ".git").exists():
728
+ return
729
+
730
+ repo = Repo(self.repo_path)
731
+ # Limit to last 100 commits to avoid performance issues with large repositories
732
+ max_commits = 100
733
+ commits = list(repo.iter_commits(max_count=max_commits))
734
+
735
+ # Map commits to files to features
736
+ # Note: This mapping would be implemented in a full version
737
+ # For now, we track commit bounds per feature
738
+ for _feature in self.features:
739
+ # Extract potential file paths from feature key
740
+ # This is simplified - in reality we'd track which files contributed to which features
741
+ pass
742
+
743
+ # Analyze commit messages for feature references
744
+ for commit in commits:
745
+ try:
746
+ # Skip commits that can't be accessed (corrupted or too old)
747
+ # Use commit.message which is lazy-loaded but faster than full commit object
748
+ commit_message = commit.message
749
+ if isinstance(commit_message, bytes):
750
+ commit_message = commit_message.decode("utf-8", errors="ignore")
751
+ message = commit_message.lower()
752
+ # Look for feature patterns (e.g., FEATURE-001, feat:, feature:)
753
+ if "feat" in message or "feature" in message:
754
+ # Try to extract feature keys from commit message
755
+ feature_match = re.search(r"feature[-\s]?(\d+)", message, re.IGNORECASE)
756
+ if feature_match:
757
+ feature_num = feature_match.group(1)
758
+ commit_hash = commit.hexsha[:8] # Short hash
759
+
760
+ # Find feature by key format (FEATURE-001, FEATURE-1, etc.)
761
+ for feature in self.features:
762
+ # Match feature key patterns: FEATURE-001, FEATURE-1, Feature-001, etc.
763
+ if re.search(rf"feature[-\s]?{feature_num}", feature.key, re.IGNORECASE):
764
+ # Update commit bounds for this feature
765
+ if feature.key not in self.commit_bounds:
766
+ # First commit found for this feature
767
+ self.commit_bounds[feature.key] = (commit_hash, commit_hash)
768
+ else:
769
+ # Update last commit (commits are in reverse chronological order)
770
+ first_commit, _last_commit = self.commit_bounds[feature.key]
771
+ self.commit_bounds[feature.key] = (first_commit, commit_hash)
772
+ break
773
+ except Exception:
774
+ # Skip individual commits that fail (corrupted, etc.)
775
+ continue
776
+
777
+ except ImportError:
778
+ # GitPython not available, skip
779
+ pass
780
+ except Exception:
781
+ # Git operations failed, skip gracefully
782
+ pass
783
+
784
+ def _enhance_features_with_dependencies(self) -> None:
785
+ """Enhance features with dependency graph information."""
786
+ for _feature in self.features:
787
+ # Find dependencies for this feature's module
788
+ # This is simplified - would need to track which module each feature comes from
789
+ pass
790
+
791
+ def _get_module_dependencies(self, module_name: str) -> list[str]:
792
+ """Get list of modules that the given module depends on."""
793
+ if module_name not in self.dependency_graph:
794
+ return []
795
+
796
+ return list(self.dependency_graph.successors(module_name))