sourcecode 1.22.0__py3-none-any.whl → 1.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.22.0"
3
+ __version__ = "1.23.0"
sourcecode/cli.py CHANGED
@@ -1783,6 +1783,14 @@ def prepare_context_cmd(
1783
1783
  out["risk_areas"] = output.risk_areas
1784
1784
  if output.why_these_files:
1785
1785
  out["reasoning"] = output.why_these_files
1786
+ if output.change_type:
1787
+ out["change_type"] = output.change_type
1788
+ if output.system_impact:
1789
+ out["system_impact"] = output.system_impact
1790
+ if output.dependency_graph_summary:
1791
+ out["dependency_graph_summary"] = output.dependency_graph_summary
1792
+ if output.impact_score_per_file:
1793
+ out["impact_score_per_file"] = output.impact_score_per_file
1786
1794
  if output.limitations:
1787
1795
  out["limitations"] = output.limitations
1788
1796
  if output.symptom:
@@ -329,6 +329,10 @@ class TaskOutput:
329
329
  affected_modules: list[str] = field(default_factory=list)
330
330
  risk_areas: list[dict] = field(default_factory=list)
331
331
  since: Optional[str] = None
332
+ system_impact: dict = field(default_factory=dict)
333
+ change_type: list[str] = field(default_factory=list)
334
+ dependency_graph_summary: dict = field(default_factory=dict)
335
+ impact_score_per_file: dict = field(default_factory=dict)
332
336
 
333
337
 
334
338
  # ─────────────────────────────────────────────────────────────────────────────
@@ -651,6 +655,10 @@ class TaskContextBuilder:
651
655
  _delta_risk_areas: list[dict] = []
652
656
  _delta_why: dict[str, str] = {}
653
657
  _delta_analysis_gaps: list[str] = []
658
+ _delta_system_impact: dict = {}
659
+ _delta_change_type: list[str] = []
660
+ _delta_dep_graph_summary: dict = {}
661
+ _delta_impact_score_per_file: dict = {}
654
662
 
655
663
  if task_name == "delta":
656
664
  _delta_changed_list: list[str] = sorted(_delta_files) if _delta_files else []
@@ -661,6 +669,10 @@ class TaskContextBuilder:
661
669
  _delta_risk_areas,
662
670
  _delta_why,
663
671
  _delta_analysis_gaps,
672
+ _delta_system_impact,
673
+ _delta_change_type,
674
+ _delta_dep_graph_summary,
675
+ _delta_impact_score_per_file,
664
676
  ) = self._build_delta_impact(
665
677
  changed_files=_delta_changed_list,
666
678
  all_paths=all_paths,
@@ -857,8 +869,9 @@ class TaskContextBuilder:
857
869
  changed_files = sorted(_delta_files) if _delta_files else self._get_git_changed_files(since=since)
858
870
  _ep_set = {ep.path for ep in entry_points}
859
871
  # include framework-detected entry points AND files classified as
860
- # entrypoint/controller by artifact taxonomy (CLI mains, Spring controllers)
861
- _EP_ARTIFACT_TYPES = frozenset({"entrypoint", "controller"})
872
+ # entrypoint/controller/security by artifact taxonomy
873
+ # (CLI mains, Spring controllers, Spring Security filters/interceptors)
874
+ _EP_ARTIFACT_TYPES = frozenset({"entrypoint", "controller", "security"})
862
875
  affected_entry_points = sorted({
863
876
  f for f in changed_files
864
877
  if f in _ep_set
@@ -889,6 +902,10 @@ class TaskContextBuilder:
889
902
  affected_modules=_delta_affected_modules,
890
903
  risk_areas=_delta_risk_areas,
891
904
  since=since if task_name == "delta" else None,
905
+ system_impact=_delta_system_impact,
906
+ change_type=_delta_change_type,
907
+ dependency_graph_summary=_delta_dep_graph_summary,
908
+ impact_score_per_file=_delta_impact_score_per_file,
892
909
  )
893
910
 
894
911
  def render_prompt(self, output: TaskOutput) -> str:
@@ -1189,11 +1206,15 @@ class TaskContextBuilder:
1189
1206
  Returns dict: artifact_type, risk_areas, impact_level, is_noise, module, confidence.
1190
1207
  Pure path/name heuristics — no file reads, fully deterministic.
1191
1208
 
1192
- Closed taxonomy (no unknown_* values ever emitted):
1209
+ Closed taxonomy (no unknown_* or generic_* values ever emitted):
1193
1210
  entrypoint | controller | service | repository | mapper | config |
1194
1211
  spring_config | spring_profile | security | domain_model | dto |
1195
- test | build_manifest | documentation | ide_noise | db_migration |
1196
- generic_source
1212
+ test | build_manifest | documentation | ide_noise | db_migration | source
1213
+
1214
+ Fallback order for unmatched source files:
1215
+ 1. Stem keyword match (controller/service/repository/…)
1216
+ 2. Folder path component match (innermost directory first)
1217
+ 3. source (confidence=low — extension only)
1197
1218
  """
1198
1219
  norm = path.replace("\\", "/")
1199
1220
  name = Path(path).name
@@ -1322,9 +1343,45 @@ class TaskContextBuilder:
1322
1343
  if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _DTO_KW):
1323
1344
  return {"artifact_type": "dto", "risk_areas": [], "impact_level": "low", "is_noise": False, "module": module, "confidence": "high"}
1324
1345
 
1325
- # Generic source code closed taxonomy, confidence=low signals uncertain classification
1346
+ # Folder-based disambiguation: check directory path components for layer hints
1347
+ # (innermost directory first) before falling back to unclassified source
1348
+ _FOLDER_TYPE_MAP: dict[str, tuple[str, list[str], str]] = {
1349
+ "controller": ("controller", ["api"], "high"),
1350
+ "controllers": ("controller", ["api"], "high"),
1351
+ "api": ("controller", ["api"], "high"),
1352
+ "web": ("controller", ["api"], "high"),
1353
+ "rest": ("controller", ["api"], "high"),
1354
+ "resource": ("controller", ["api"], "high"),
1355
+ "resources": ("controller", ["api"], "high"),
1356
+ "service": ("service", ["transactions", "business_logic"],"high"),
1357
+ "services": ("service", ["transactions", "business_logic"],"high"),
1358
+ "business": ("service", ["transactions", "business_logic"],"high"),
1359
+ "usecase": ("service", ["business_logic"], "high"),
1360
+ "usecases": ("service", ["business_logic"], "high"),
1361
+ "repository": ("repository", ["persistence"], "high"),
1362
+ "repositories": ("repository", ["persistence"], "high"),
1363
+ "dao": ("repository", ["persistence"], "high"),
1364
+ "persistence": ("repository", ["persistence"], "high"),
1365
+ "mapper": ("mapper", ["persistence"], "high"),
1366
+ "mappers": ("mapper", ["persistence"], "high"),
1367
+ "security": ("security", ["security"], "high"),
1368
+ "auth": ("security", ["security"], "high"),
1369
+ "config": ("config", ["config"], "medium"),
1370
+ "configuration": ("config", ["config"], "medium"),
1371
+ "configs": ("config", ["config"], "medium"),
1372
+ "domain": ("domain_model", ["persistence"], "medium"),
1373
+ "model": ("domain_model", ["persistence"], "medium"),
1374
+ "models": ("domain_model", ["persistence"], "medium"),
1375
+ "entity": ("domain_model", ["persistence"], "medium"),
1376
+ "entities": ("domain_model", ["persistence"], "medium"),
1377
+ }
1326
1378
  if suffix in _CODE_EXTS:
1327
- return {"artifact_type": "generic_source", "risk_areas": [], "impact_level": "medium", "is_noise": False, "module": module, "confidence": "low"}
1379
+ for part in reversed(path_dir_parts): # innermost directory first
1380
+ if part in _FOLDER_TYPE_MAP:
1381
+ atype, risk_areas, impact_level = _FOLDER_TYPE_MAP[part]
1382
+ return {"artifact_type": atype, "risk_areas": risk_areas, "impact_level": impact_level, "is_noise": False, "module": module, "confidence": "medium"}
1383
+ # No stem or folder hint matched — last-resort source type (extension-only classification)
1384
+ return {"artifact_type": "source", "risk_areas": [], "impact_level": "medium", "is_noise": False, "module": module, "confidence": "low"}
1328
1385
 
1329
1386
  # Generic config / data files — fold into config type
1330
1387
  if suffix in _CONFIG_EXTS:
@@ -1337,6 +1394,69 @@ class TaskContextBuilder:
1337
1394
  # Binaries, images, lock files — treat as noise (closed taxonomy: no unknown_*)
1338
1395
  return {"artifact_type": "ide_noise", "risk_areas": [], "impact_level": "noise", "is_noise": True, "module": module, "confidence": "low"}
1339
1396
 
1397
+ def _scan_import_dependents(
1398
+ self,
1399
+ changed_paths: list[str],
1400
+ candidate_paths: list[str],
1401
+ *,
1402
+ max_candidates: int = 40,
1403
+ ) -> dict[str, list[str]]:
1404
+ """Find files in candidate_paths that import/reference each changed file.
1405
+
1406
+ Returns mapping: changed_path → list[dependent_paths].
1407
+ Reads file contents — bounded by max_candidates per changed file.
1408
+ Only scans source files (.py, .java, .kt, .ts, .js, .tsx, .jsx).
1409
+ """
1410
+ import re as _re
1411
+
1412
+ _SCANNABLE = frozenset({".py", ".java", ".kt", ".ts", ".js", ".tsx", ".jsx", ".mjs"})
1413
+ dependents: dict[str, list[str]] = {p: [] for p in changed_paths}
1414
+
1415
+ for changed_path in changed_paths:
1416
+ stem = Path(changed_path).stem
1417
+ suffix = Path(changed_path).suffix.lower()
1418
+ if suffix not in _SCANNABLE:
1419
+ continue
1420
+
1421
+ # Build search patterns per language
1422
+ if suffix == ".py":
1423
+ patterns = [
1424
+ rf"(?:from|import)\s+[^\n]*\b{_re.escape(stem)}\b",
1425
+ ]
1426
+ elif suffix in (".java", ".kt"):
1427
+ patterns = [
1428
+ rf"\bimport\b[^;]*\b{_re.escape(stem)}\b",
1429
+ rf"(?:@Autowired|@Inject|@Resource)[^\n]*\n[^\n]*\b{_re.escape(stem)}\b",
1430
+ rf"\b{_re.escape(stem)}\s+\w", # field/param declaration: FooService fooService
1431
+ ]
1432
+ elif suffix in (".ts", ".tsx", ".js", ".jsx", ".mjs"):
1433
+ patterns = [
1434
+ rf"from\s+['\"][^'\"]*{_re.escape(stem)}['\"]",
1435
+ rf"require\s*\(\s*['\"][^'\"]*{_re.escape(stem)}['\"]",
1436
+ ]
1437
+ else:
1438
+ continue
1439
+
1440
+ combined = _re.compile("|".join(patterns), _re.MULTILINE)
1441
+
1442
+ scanned = 0
1443
+ for candidate in candidate_paths:
1444
+ if candidate == changed_path:
1445
+ continue
1446
+ if Path(candidate).suffix.lower() not in _SCANNABLE:
1447
+ continue
1448
+ if scanned >= max_candidates:
1449
+ break
1450
+ try:
1451
+ text = (self.root / candidate).read_text(encoding="utf-8", errors="ignore")
1452
+ if combined.search(text):
1453
+ dependents[changed_path].append(candidate)
1454
+ except OSError:
1455
+ pass
1456
+ scanned += 1
1457
+
1458
+ return dependents
1459
+
1340
1460
  def _build_delta_impact(
1341
1461
  self,
1342
1462
  changed_files: list[str],
@@ -1368,7 +1488,7 @@ class TaskContextBuilder:
1368
1488
  "spring_profile": 0.50,
1369
1489
  "domain_model": 0.50,
1370
1490
  "build_manifest": 0.45,
1371
- "generic_source": 0.45,
1491
+ "source": 0.45,
1372
1492
  "dto": 0.35,
1373
1493
  "test": 0.30,
1374
1494
  "documentation": 0.25,
@@ -1382,7 +1502,7 @@ class TaskContextBuilder:
1382
1502
  "mapper": "high", "db_migration": "high", "spring_config": "high",
1383
1503
  "config": "medium", "spring_profile": "medium",
1384
1504
  "build_manifest": "medium", "domain_model": "medium",
1385
- "generic_source": "medium",
1505
+ "source": "medium",
1386
1506
  "dto": "low", "test": "low", "documentation": "low", "ide_noise": "noise",
1387
1507
  }
1388
1508
 
@@ -1392,7 +1512,7 @@ class TaskContextBuilder:
1392
1512
  "db_migration": "high", "spring_config": "high",
1393
1513
  "service": "medium", "repository": "medium", "mapper": "medium",
1394
1514
  "config": "medium", "domain_model": "medium",
1395
- "spring_profile": "low", "build_manifest": "low", "generic_source": "low",
1515
+ "spring_profile": "low", "build_manifest": "low", "source": "low",
1396
1516
  "dto": "low", "test": "low", "documentation": "low", "ide_noise": "low",
1397
1517
  }
1398
1518
 
@@ -1410,55 +1530,126 @@ class TaskContextBuilder:
1410
1530
  "domain_model": frozenset({"repository", "service"}),
1411
1531
  "db_migration": frozenset({"repository", "mapper"}),
1412
1532
  "spring_profile": frozenset({"service", "config"}),
1413
- "generic_source": frozenset({"service", "repository"}),
1533
+ "source": frozenset({"service", "repository"}),
1414
1534
  "test": frozenset(),
1415
1535
  "documentation": frozenset(),
1416
1536
  "ide_noise": frozenset(),
1417
1537
  "build_manifest": frozenset(),
1418
1538
  }
1419
1539
 
1540
+ # deterministic change_effect descriptions per artifact type
1541
+ _CHANGE_EFFECT: dict[str, str] = {
1542
+ "entrypoint": "modifies application startup, CLI entry, or framework bootstrap — all request flows may be affected",
1543
+ "controller": "alters HTTP routing, API contract, or response shape — API consumers are affected",
1544
+ "service": "changes business rules, transaction scope, or orchestration logic — callers and dependents affected",
1545
+ "repository": "modifies persistence queries or data access patterns — data consistency and service layer affected",
1546
+ "mapper": "alters SQL-to-object binding or query templates — data shape and repositories affected",
1547
+ "security": "changes authentication flow, access control rules, or session handling — all secured endpoints affected",
1548
+ "spring_config": "modifies bean wiring, datasource, or framework-wide settings — all wired beans potentially affected",
1549
+ "spring_profile": "changes environment-specific overrides — behavior differs per active profile",
1550
+ "config": "adjusts configuration values — all modules reading this config are affected",
1551
+ "build_manifest": "changes dependencies, plugins, or project structure — compile-time and runtime classpath affected",
1552
+ "db_migration": "modifies database schema — existing queries, mappings, and constraints may break",
1553
+ "domain_model": "alters entity structure — cascades to repositories, DTOs, serializers, and mappers",
1554
+ "dto": "changes data transfer contract — serialization and API consumers may break",
1555
+ "test": "modifies test coverage or test behavior — no production code affected",
1556
+ "documentation": "updates documentation only — no runtime impact",
1557
+ "ide_noise": "IDE/tooling artifact — no application impact",
1558
+ "source": "modifies application source — artifact role derived from file path structure",
1559
+ }
1560
+
1561
+ # change_type taxonomy — closed set, derived from artifact type
1562
+ _ARTIFACT_CHANGE_TYPES: dict[str, list[str]] = {
1563
+ "entrypoint": ["behavioral_change", "structural_change"],
1564
+ "security": ["behavioral_change", "security_change"],
1565
+ "controller": ["behavioral_change", "structural_change"],
1566
+ "service": ["behavioral_change"],
1567
+ "repository": ["behavioral_change"],
1568
+ "mapper": ["behavioral_change"],
1569
+ "spring_config": ["configuration_change"],
1570
+ "spring_profile": ["configuration_change"],
1571
+ "config": ["configuration_change"],
1572
+ "build_manifest": ["dependency_change"],
1573
+ "db_migration": ["structural_change"],
1574
+ "domain_model": ["structural_change"],
1575
+ "dto": ["structural_change"],
1576
+ "source": ["behavioral_change"],
1577
+ "test": [],
1578
+ "documentation": [],
1579
+ "ide_noise": [],
1580
+ }
1581
+
1420
1582
  _SEV_ORDER = ["noise", "low", "medium", "high", "critical"]
1421
1583
 
1422
- # primary impact area used in structured reasoning
1423
- def _impact_area(risk_areas: list[str], atype: str) -> str:
1424
- if "security" in risk_areas:
1425
- return "security"
1426
- if "api" in risk_areas:
1427
- return "api"
1428
- if "persistence" in risk_areas or "transactions" in risk_areas:
1429
- return "persistence"
1430
- if "config" in risk_areas or "dependencies" in risk_areas:
1431
- return "config"
1432
- if "tests" in risk_areas:
1433
- return "tests"
1434
- return {
1435
- "controller": "api", "service": "business_logic",
1436
- "repository": "persistence", "mapper": "persistence",
1437
- "security": "security", "config": "config",
1438
- "spring_config": "config", "spring_profile": "config",
1439
- "build_manifest": "build", "domain_model": "persistence",
1440
- "dto": "api", "db_migration": "persistence",
1441
- "test": "tests", "entrypoint": "api",
1442
- "generic_source": "unknown",
1443
- }.get(atype, "unknown")
1444
-
1445
- def _role_in_system(atype: str, in_ep_paths: bool) -> str:
1446
- if in_ep_paths or atype in ("entrypoint", "controller"):
1584
+ # Closed impact_area taxonomy 9 categories, no "unknown"
1585
+ _ATYPE_IMPACT_AREA: dict[str, str] = {
1586
+ "entrypoint": "api_surface",
1587
+ "controller": "api_surface",
1588
+ "dto": "api_surface",
1589
+ "security": "security_layer",
1590
+ "spring_config": "dependency_injection",
1591
+ "service": "transaction_boundary",
1592
+ "repository": "persistence_layer",
1593
+ "mapper": "persistence_layer",
1594
+ "db_migration": "persistence_layer",
1595
+ "domain_model": "persistence_layer",
1596
+ "config": "configuration",
1597
+ "spring_profile": "configuration",
1598
+ "build_manifest": "build_system",
1599
+ "source": "transaction_boundary",
1600
+ "test": "configuration",
1601
+ "documentation": "configuration",
1602
+ "ide_noise": "configuration",
1603
+ }
1604
+ _UI_PATH_SEGS = frozenset({"frontend", "angular", "react", "webapp", "web-app", "ui", "client-app", "client"})
1605
+ _INTEGRATION_STEMS = frozenset({"client", "adapter", "gateway", "proxy", "stub", "feignclient"})
1606
+
1607
+ def _classify_impact_area(path: str, risk_areas: list[str], atype: str) -> str:
1608
+ path_lower = path.lower()
1609
+ suffix = Path(path).suffix.lower()
1610
+ path_segs = set(path_lower.replace("\\", "/").split("/"))
1611
+ # UI layer: frontend file extensions or frontend directory segments
1612
+ if suffix in (".tsx", ".jsx", ".vue") or (
1613
+ suffix in (".ts", ".js") and bool(path_segs & _UI_PATH_SEGS)
1614
+ ):
1615
+ return "ui_layer"
1616
+ # Integration layer: external service clients and adapters
1617
+ stem_lower = Path(path).stem.lower()
1618
+ if any(kw in stem_lower for kw in _INTEGRATION_STEMS):
1619
+ return "integration_layer"
1620
+ return _ATYPE_IMPACT_AREA.get(atype, "transaction_boundary")
1621
+
1622
+ # Closed role_in_system taxonomy — 8 roles, no "leaf"/"dependency"
1623
+ def _role_in_system(path: str, atype: str, in_ep_paths: bool) -> str:
1624
+ if atype == "build_manifest":
1625
+ return "build_artifact"
1626
+ if atype == "test":
1627
+ return "test"
1628
+ stem_lower = Path(path).stem.lower()
1629
+ if atype == "dto" or any(kw in stem_lower for kw in ("util", "helper", "constant", "enum", "exception")):
1630
+ return "utility"
1631
+ if in_ep_paths or atype == "entrypoint":
1447
1632
  return "entrypoint"
1448
- if atype in ("config", "spring_config", "spring_profile", "build_manifest"):
1449
- return "config"
1450
- if atype in ("dto", "domain_model", "test", "documentation"):
1451
- return "leaf"
1452
- return "dependency"
1453
-
1454
- def _structured_why(atype: str, module: str, role: str, risk_areas: list[str]) -> str:
1455
- area = _impact_area(risk_areas, atype)
1633
+ if atype == "controller":
1634
+ return "external_interface"
1635
+ if atype == "security":
1636
+ return "external_interface"
1637
+ if atype in ("config", "spring_config", "spring_profile"):
1638
+ return "configuration"
1639
+ if atype in ("repository", "mapper", "db_migration"):
1640
+ return "data_access"
1641
+ return "core_service"
1642
+
1643
+ def _structured_why(path: str, atype: str, module: str, role: str, risk_areas: list[str]) -> str:
1644
+ area = _classify_impact_area(path, risk_areas, atype)
1456
1645
  prop = _PROPAGATION_RISK.get(atype, "low")
1646
+ effect = _CHANGE_EFFECT.get(atype, "modifies application logic")
1457
1647
  parts = [
1458
1648
  f"artifact_type: {atype}",
1459
1649
  f"role_in_system: {role}",
1460
1650
  f"impact_area: {area}",
1461
1651
  f"propagation_risk: {prop}",
1652
+ f"change_effect: {effect}",
1462
1653
  ]
1463
1654
  if module:
1464
1655
  parts.append(f"module: {module}")
@@ -1472,9 +1663,14 @@ class TaskContextBuilder:
1472
1663
  [],
1473
1664
  {},
1474
1665
  ["No changed files found. Check that --since ref exists and the diff is non-empty."],
1666
+ {}, # system_impact
1667
+ [], # change_type
1668
+ {"edges": [], "propagation_depth": 0}, # dependency_graph_summary
1669
+ {}, # impact_score_per_file
1475
1670
  )
1476
1671
 
1477
1672
  ep_paths = {ep.path for ep in entry_points}
1673
+ graph_edges: list[dict] = []
1478
1674
 
1479
1675
  # ── Step 1: classify every changed file ───────────────────────────────
1480
1676
  classifications: dict[str, dict[str, Any]] = {
@@ -1517,8 +1713,8 @@ class TaskContextBuilder:
1517
1713
  wanted_expansion_types = wanted_expansion_types | _EXPANSION_TARGETS.get(atype, frozenset())
1518
1714
 
1519
1715
  in_ep = path in ep_paths
1520
- role = _role_in_system(atype, in_ep)
1521
- why_str = _structured_why(atype, module, role, cls["risk_areas"])
1716
+ role = _role_in_system(path, atype, in_ep)
1717
+ why_str = _structured_why(path, atype, module, role, cls["risk_areas"])
1522
1718
  reason = f"changed since {ref_label} | artifact: {atype} | score: {score:.2f}"
1523
1719
 
1524
1720
  relevant.append(RelevantFile(path=path, role=role, score=round(score, 2), reason=reason, why=why_str))
@@ -1567,9 +1763,12 @@ class TaskContextBuilder:
1567
1763
  )
1568
1764
  ]
1569
1765
  in_ep = path in ep_paths
1570
- role = _role_in_system(rel_atype, in_ep)
1766
+ role = _role_in_system(path, rel_atype, in_ep)
1571
1767
  why_str = (
1572
1768
  f"artifact_type: {rel_atype} | role_in_system: {role}"
1769
+ f" | impact_area: {_classify_impact_area(path, rel_cls['risk_areas'], rel_atype)}"
1770
+ f" | propagation_risk: {_PROPAGATION_RISK.get(rel_atype, 'low')}"
1771
+ f" | change_effect: {_CHANGE_EFFECT.get(rel_atype, 'modifies application logic')}"
1573
1772
  f" | pulled_by: type-aware expansion from {ctx_type} '{ctx_val}'"
1574
1773
  f" | triggered_by: {', '.join(triggers[:3])}"
1575
1774
  )
@@ -1578,10 +1777,208 @@ class TaskContextBuilder:
1578
1777
  path=path, role=role, score=rel_score, reason=reason, why=why_str
1579
1778
  )))
1580
1779
  why[path] = why_str
1780
+ for trigger_name in triggers[:3]:
1781
+ trigger_full = next((f for f in changed_files if Path(f).name == trigger_name), None)
1782
+ if trigger_full:
1783
+ graph_edges.append({"from": trigger_full, "to": path, "edge_type": "module_proximity", "hop": 1})
1581
1784
 
1582
1785
  related.sort(key=lambda x: (-x[0], x[1]))
1583
1786
  relevant.extend(rf for _, _, rf in related[:10])
1584
1787
 
1788
+ # ── Step 3b: import-link expansion (bounded file scan) ────────────────
1789
+ # Find files in the same module that import/reference a changed file.
1790
+ # More precise than directory proximity: based on actual import statements.
1791
+ _import_candidates = [
1792
+ p for p in all_paths
1793
+ if p not in existing_paths and Path(p).suffix.lower() in {
1794
+ ".py", ".java", ".kt", ".ts", ".js", ".tsx", ".jsx", ".mjs"
1795
+ }
1796
+ and (
1797
+ _extract_ddd_domain(p) in affected_modules_set
1798
+ or str(Path(p).parent).replace("\\", "/") in changed_dirs
1799
+ )
1800
+ ]
1801
+ if _import_candidates:
1802
+ _import_dep_map = self._scan_import_dependents(
1803
+ changed_paths=changed_files,
1804
+ candidate_paths=_import_candidates,
1805
+ )
1806
+ _import_seen = {rf.path for rf in relevant}
1807
+ _import_extra: list[tuple[float, str, RelevantFile]] = []
1808
+ for changed_path, dep_paths in _import_dep_map.items():
1809
+ changed_atype = classifications[changed_path]["artifact_type"]
1810
+ for dep_path in dep_paths:
1811
+ if dep_path in _import_seen:
1812
+ continue
1813
+ dep_cls = self._classify_changed_file(dep_path)
1814
+ if dep_cls["is_noise"]:
1815
+ continue
1816
+ dep_atype = dep_cls["artifact_type"]
1817
+ dep_base = _ARTIFACT_SCORE.get(dep_atype, 0.45)
1818
+ dep_score = round(dep_base * 0.70, 2)
1819
+ dep_role = _role_in_system(dep_path, dep_atype, dep_path in ep_paths)
1820
+ why_str = (
1821
+ f"artifact_type: {dep_atype} | role_in_system: {dep_role}"
1822
+ f" | impact_area: {_classify_impact_area(dep_path, dep_cls['risk_areas'], dep_atype)}"
1823
+ f" | propagation_risk: {_PROPAGATION_RISK.get(dep_atype, 'low')}"
1824
+ f" | change_effect: {_CHANGE_EFFECT.get(dep_atype, 'modifies application logic')}"
1825
+ f" | pulled_by: import-link from {Path(changed_path).name}"
1826
+ )
1827
+ reason = f"import-dependent of {Path(changed_path).name} ({changed_atype}) | score: {dep_score:.2f}"
1828
+ _import_extra.append((dep_score, dep_path, RelevantFile(
1829
+ path=dep_path, role=dep_role, score=dep_score,
1830
+ reason=reason, why=why_str,
1831
+ )))
1832
+ why[dep_path] = why_str
1833
+ _import_seen.add(dep_path)
1834
+ graph_edges.append({"from": changed_path, "to": dep_path, "edge_type": "import_dependency", "hop": 1})
1835
+ _import_extra.sort(key=lambda x: (-x[0], x[1]))
1836
+ relevant.extend(rf for _, _, rf in _import_extra[:8])
1837
+
1838
+ # ── Step 3c: hop-2 import propagation (callers of hop-1 dependents) ────
1839
+ _hop1_seeds = list(dict.fromkeys(
1840
+ dep_path
1841
+ for dep_paths in (_import_dep_map.values() if _import_candidates else [])
1842
+ for dep_path in dep_paths
1843
+ ))
1844
+ if _hop1_seeds:
1845
+ _hop2_seen = {rf.path for rf in relevant}
1846
+ _hop2_candidates = [
1847
+ p for p in all_paths
1848
+ if p not in _hop2_seen
1849
+ and Path(p).suffix.lower() in {".py", ".java", ".kt", ".ts", ".js", ".tsx", ".jsx", ".mjs"}
1850
+ and (
1851
+ _extract_ddd_domain(p) in affected_modules_set
1852
+ or str(Path(p).parent).replace("\\", "/") in changed_dirs
1853
+ )
1854
+ ]
1855
+ if _hop2_candidates:
1856
+ _hop2_dep_map = self._scan_import_dependents(
1857
+ changed_paths=_hop1_seeds,
1858
+ candidate_paths=_hop2_candidates,
1859
+ max_candidates=20,
1860
+ )
1861
+ _hop2_extra: list[tuple[float, str, RelevantFile]] = []
1862
+ for hop1_path, dep_paths in _hop2_dep_map.items():
1863
+ hop1_cls = self._classify_changed_file(hop1_path)
1864
+ hop1_atype = hop1_cls["artifact_type"]
1865
+ for dep_path in dep_paths:
1866
+ if dep_path in _hop2_seen:
1867
+ continue
1868
+ dep_cls = self._classify_changed_file(dep_path)
1869
+ if dep_cls["is_noise"]:
1870
+ continue
1871
+ dep_atype = dep_cls["artifact_type"]
1872
+ dep_base = _ARTIFACT_SCORE.get(dep_atype, 0.45)
1873
+ dep_score = round(dep_base * 0.50, 2)
1874
+ dep_role = _role_in_system(dep_path, dep_atype, dep_path in ep_paths)
1875
+ why_str = (
1876
+ f"artifact_type: {dep_atype} | role_in_system: {dep_role}"
1877
+ f" | impact_area: {_classify_impact_area(dep_path, dep_cls['risk_areas'], dep_atype)}"
1878
+ f" | propagation_risk: {_PROPAGATION_RISK.get(dep_atype, 'low')}"
1879
+ f" | change_effect: {_CHANGE_EFFECT.get(dep_atype, 'modifies application logic')}"
1880
+ f" | pulled_by: hop-2 import from {Path(hop1_path).name}"
1881
+ )
1882
+ reason = f"hop-2 import-dependent of {Path(hop1_path).name} ({hop1_atype}) | score: {dep_score:.2f}"
1883
+ _hop2_extra.append((dep_score, dep_path, RelevantFile(
1884
+ path=dep_path, role=dep_role, score=dep_score,
1885
+ reason=reason, why=why_str,
1886
+ )))
1887
+ why[dep_path] = why_str
1888
+ _hop2_seen.add(dep_path)
1889
+ graph_edges.append({"from": hop1_path, "to": dep_path, "edge_type": "import_dependency", "hop": 2})
1890
+ _hop2_extra.sort(key=lambda x: (-x[0], x[1]))
1891
+ relevant.extend(rf for _, _, rf in _hop2_extra[:5])
1892
+
1893
+ # ── Step 3d: per-file impact scores, change_type, system_impact ─────────
1894
+ # Downstream fanout: count graph edges originating from each changed file
1895
+ _downstream_count: dict[str, int] = {f: 0 for f in changed_files}
1896
+ for _edge in graph_edges:
1897
+ if _edge["from"] in _downstream_count:
1898
+ _downstream_count[_edge["from"]] += 1
1899
+
1900
+ impact_score_per_file: dict[str, float] = {}
1901
+ all_change_types: set[str] = set()
1902
+ for _path in changed_files:
1903
+ _cls = classifications[_path]
1904
+ _atype = _cls["artifact_type"]
1905
+ _file_ctypes = _ARTIFACT_CHANGE_TYPES.get(_atype, [])
1906
+ all_change_types.update(_file_ctypes)
1907
+ _base = _ARTIFACT_SCORE.get(_atype, 0.45)
1908
+ _sec_w = 0.20 if (_atype == "security" or "security" in _cls["risk_areas"]) else 0.0
1909
+ _fw = (
1910
+ 0.15 if _atype in {"entrypoint", "spring_config"} else
1911
+ 0.08 if _atype in {"security", "controller"} else
1912
+ 0.0
1913
+ )
1914
+ _fanout = min(0.15, _downstream_count.get(_path, 0) * 0.05)
1915
+ _ctw = (
1916
+ 0.10 if any(ct in ("behavioral_change", "security_change") for ct in _file_ctypes) else
1917
+ 0.05 if any(ct in ("structural_change", "dependency_change") for ct in _file_ctypes) else
1918
+ 0.02
1919
+ )
1920
+ impact_score_per_file[_path] = round(min(1.0, _base * 0.50 + _sec_w + _fw + _fanout + _ctw), 3)
1921
+
1922
+ _CT_ORDER = ["security_change", "behavioral_change", "structural_change",
1923
+ "configuration_change", "dependency_change", "ui_change"]
1924
+ aggregate_change_type = [ct for ct in _CT_ORDER if ct in all_change_types]
1925
+
1926
+ # system_impact block
1927
+ _IMPACT_AREA_TO_SUBSYSTEM = {
1928
+ "api_surface": "api_layer",
1929
+ "security_layer": "security_layer",
1930
+ "dependency_injection": "spring_di_layer",
1931
+ "persistence_layer": "persistence_layer",
1932
+ "transaction_boundary": "transaction_layer",
1933
+ "configuration": "configuration_layer",
1934
+ "build_system": "build_system",
1935
+ "ui_layer": "ui_layer",
1936
+ "integration_layer": "integration_layer",
1937
+ }
1938
+ _seen_subsys: set[str] = set()
1939
+ changed_subsystems: list[str] = []
1940
+ for _p, _cls in classifications.items():
1941
+ if not _cls["is_noise"]:
1942
+ _area = _classify_impact_area(_p, _cls["risk_areas"], _cls["artifact_type"])
1943
+ _subsys = _IMPACT_AREA_TO_SUBSYSTEM.get(_area)
1944
+ if _subsys and _subsys not in _seen_subsys:
1945
+ changed_subsystems.append(_subsys)
1946
+ _seen_subsys.add(_subsys)
1947
+
1948
+ behavioral_changes: list[str] = [
1949
+ f"{Path(_p).name}: {_CHANGE_EFFECT[_cls['artifact_type']]}"
1950
+ for _p, _cls in classifications.items()
1951
+ if not _cls["is_noise"]
1952
+ and any(ct in ("behavioral_change", "security_change")
1953
+ for ct in _ARTIFACT_CHANGE_TYPES.get(_cls["artifact_type"], []))
1954
+ ]
1955
+
1956
+ def _runtime_impact(tc: dict[str, int]) -> list[str]:
1957
+ _ri: list[str] = []
1958
+ if "entrypoint" in tc:
1959
+ _ri.append("Application bootstrap modified — full context restart required before deploy")
1960
+ if "spring_config" in tc:
1961
+ _ri.append("Spring ApplicationContext bean wiring modified — dependent beans rewired on restart")
1962
+ if "security" in tc:
1963
+ _ri.append("Security filter chain modified — all secured endpoints affected after restart")
1964
+ if "db_migration" in tc:
1965
+ _ri.append("Database schema migration pending — execute before deploying application")
1966
+ _svc = tc.get("service", 0)
1967
+ if _svc >= 2:
1968
+ _ri.append(f"{_svc} service(s) modified — verify transaction scope and data consistency")
1969
+ _repo = tc.get("repository", 0) + tc.get("mapper", 0)
1970
+ if _repo > 0:
1971
+ _ri.append(f"{_repo} persistence component(s) modified — verify data access queries")
1972
+ if "build_manifest" in tc:
1973
+ _ri.append("Build manifest modified — dependency resolution required before compile")
1974
+ return _ri
1975
+
1976
+ _max_hop = max((e["hop"] for e in graph_edges), default=0)
1977
+ dependency_graph_summary: dict = {
1978
+ "edges": graph_edges[:30],
1979
+ "propagation_depth": _max_hop,
1980
+ }
1981
+
1585
1982
  # ── Step 4: impact summary ─────────────────────────────────────────────
1586
1983
  type_counts: dict[str, int] = {}
1587
1984
  all_risk_areas: set[str] = set()
@@ -1609,7 +2006,7 @@ class TaskContextBuilder:
1609
2006
  "domain_model": "domain model(s)",
1610
2007
  "dto": "DTO(s)",
1611
2008
  "test": "test file(s)",
1612
- "generic_source": "source file(s)",
2009
+ "source": "source file(s)",
1613
2010
  "documentation": "documentation file(s)",
1614
2011
  }
1615
2012
 
@@ -1648,9 +2045,18 @@ class TaskContextBuilder:
1648
2045
  key=lambda x: (-_SEV_ORDER.index(x["severity"]), x["area"]),
1649
2046
  )
1650
2047
 
2048
+ # ── Build system_impact (needs risk_areas_out + type_counts) ─────────────
2049
+ system_impact = {
2050
+ "changed_subsystems": changed_subsystems,
2051
+ "behavioral_changes": behavioral_changes,
2052
+ "risk_areas": risk_areas_out,
2053
+ "runtime_impact": _runtime_impact(type_counts),
2054
+ }
2055
+
1651
2056
  # ── Step 6: analysis gaps ──────────────────────────────────────────────
2057
+ _hop_desc = f"+ hop-2 import propagation (propagation_depth={_max_hop})" if _max_hop >= 2 else ""
1652
2058
  analysis_gaps: list[str] = [
1653
- "Related file expansion uses type-aware propagation chains + module/directory heuristics import graph not traced",
2059
+ f"Related file expansion uses type-aware propagation chains + bounded import-link scanning {_hop_desc}+ module/directory heuristics".replace(" ", " ").strip(),
1654
2060
  ]
1655
2061
  if noise_count > 0 and meaningful > 0:
1656
2062
  analysis_gaps.append(
@@ -1681,6 +2087,10 @@ class TaskContextBuilder:
1681
2087
  risk_areas_out,
1682
2088
  why,
1683
2089
  analysis_gaps,
2090
+ system_impact,
2091
+ aggregate_change_type,
2092
+ dependency_graph_summary,
2093
+ impact_score_per_file,
1684
2094
  )
1685
2095
 
1686
2096
  def _get_git_changed_files(self, since: Optional[str] = None) -> list[str]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.22.0
3
+ Version: 1.23.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -221,7 +221,7 @@ Description-Content-Type: text/markdown
221
221
 
222
222
  **Compressed AI-ready context for Java/Spring enterprise codebases.**
223
223
 
224
- ![Version](https://img.shields.io/badge/version-1.22.0-blue)
224
+ ![Version](https://img.shields.io/badge/version-1.23.0-blue)
225
225
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
226
226
 
227
227
  ---
@@ -255,7 +255,7 @@ pipx install sourcecode
255
255
 
256
256
  ```bash
257
257
  sourcecode version
258
- # sourcecode 1.22.0
258
+ # sourcecode 1.23.0
259
259
  ```
260
260
 
261
261
  ---
@@ -1,10 +1,10 @@
1
- sourcecode/__init__.py,sha256=AYpzylZKC4FdV_cDIgkpP-gpSaG-icZE-DD43XcAFXA,103
1
+ sourcecode/__init__.py,sha256=hroJh2LmoLLrGGckoeR4GMbC9gYwm_6WNueh4slqXPM,103
2
2
  sourcecode/adaptive_scanner.py,sha256=RTNExwWPXzjgLaRueT7UuxkPj5ZEToWjGbx1j0LSZ9E,10250
3
3
  sourcecode/architecture_analyzer.py,sha256=MyBa0Hf5HmkudZQDLKrjcWDKETXETXl0mQX1swtTwAA,39091
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
5
5
  sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
6
6
  sourcecode/classifier.py,sha256=pYve2J1LqtYssU3lYLMDz18PT-CjN5c18QYE7R_IG1Q,7507
7
- sourcecode/cli.py,sha256=gcOs2FiimQi8uS-ORhmkDvAZf3IiJgfUyYutqO1ECaQ,76407
7
+ sourcecode/cli.py,sha256=d2fAGBqy0xy2ubYe7TQ7oIgsMPPEfJj4W02_Gu_-d4M,76822
8
8
  sourcecode/code_notes_analyzer.py,sha256=y1MJBnPZHYp4i6cQCXUb9ATIyifS_qMQWjw_8lPkpsU,9215
9
9
  sourcecode/confidence_analyzer.py,sha256=xw_Jv8pAd0wd8t2vvQlorw8Ih0rSF3YCoFS8K-_4aXg,15762
10
10
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -20,7 +20,7 @@ sourcecode/file_classifier.py,sha256=48ly5Z6exkzBy8lNy1AkdP4-oJqIA1zT3LZfffuTyDo
20
20
  sourcecode/git_analyzer.py,sha256=_pCg2V4d2aa17k9hayTzpexAj8syvyk4y9NYNvvgOAI,12802
21
21
  sourcecode/graph_analyzer.py,sha256=iUK-7pSV-cvGqqD2hENdYmhnm0wcXFEyK-xnu5ul8OU,62515
22
22
  sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
23
- sourcecode/prepare_context.py,sha256=v9BMh1Ro2CssAPUwo3Ch7ml0R7X8c5c13eJs3e4m6FE,80841
23
+ sourcecode/prepare_context.py,sha256=8lzyOfS-CoVEcj8xJL0rXcoi7jDRR28TgwH1jtA0ok4,104485
24
24
  sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
25
25
  sourcecode/ranking_engine.py,sha256=virVglafZufioHpZpwktjMvUiL0TZELWQCQnQNV8dFo,9360
26
26
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
@@ -61,8 +61,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
61
61
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
62
62
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
63
63
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
64
- sourcecode-1.22.0.dist-info/METADATA,sha256=sMO13GIMA6DnMIxi80QFCAwZte4pDIyZ3MiIVeWiEag,20626
65
- sourcecode-1.22.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
66
- sourcecode-1.22.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
67
- sourcecode-1.22.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
68
- sourcecode-1.22.0.dist-info/RECORD,,
64
+ sourcecode-1.23.0.dist-info/METADATA,sha256=uI_zgfrJv6CQCmX2X-KxDiZxRRE1mxqgXz6eE4FKtDQ,20626
65
+ sourcecode-1.23.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
66
+ sourcecode-1.23.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
67
+ sourcecode-1.23.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
68
+ sourcecode-1.23.0.dist-info/RECORD,,