sourcecode 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.20.0"
3
+ __version__ = "1.22.0"
sourcecode/cli.py CHANGED
@@ -1727,9 +1727,9 @@ def prepare_context_cmd(
1727
1727
  "changed_files": False, "affected_entry_points": False,
1728
1728
  },
1729
1729
  "delta": {
1730
- "project_summary": True, "architecture_summary": False,
1730
+ "project_summary": False, "architecture_summary": False,
1731
1731
  "relevant_files": True, "key_dependencies": False,
1732
- "gaps": False, "confidence": True,
1732
+ "gaps": True, "confidence": True,
1733
1733
  "suspected_areas": False, "improvement_opportunities": False,
1734
1734
  "test_gaps": False, "code_notes_summary": False,
1735
1735
  "changed_files": True, "affected_entry_points": True,
@@ -1771,6 +1771,18 @@ def prepare_context_cmd(
1771
1771
  out["changed_files"] = output.changed_files
1772
1772
  if _task_include("affected_entry_points") and output.affected_entry_points:
1773
1773
  out["affected_entry_points"] = output.affected_entry_points
1774
+ # Delta-specific impact fields
1775
+ if task == "delta":
1776
+ if output.since:
1777
+ out["since"] = output.since
1778
+ if output.impact_summary:
1779
+ out["impact_summary"] = output.impact_summary
1780
+ if output.affected_modules:
1781
+ out["affected_modules"] = output.affected_modules
1782
+ if output.risk_areas:
1783
+ out["risk_areas"] = output.risk_areas
1784
+ if output.why_these_files:
1785
+ out["reasoning"] = output.why_these_files
1774
1786
  if output.limitations:
1775
1787
  out["limitations"] = output.limitations
1776
1788
  if output.symptom:
@@ -324,6 +324,11 @@ class TaskOutput:
324
324
  symptom: Optional[str] = None # fix-bug only
325
325
  related_notes: list[dict] = field(default_factory=list) # fix-bug + symptom only
326
326
  symptom_note: Optional[str] = None # fix-bug: cross-layer synonym note
327
+ # delta-specific impact fields
328
+ impact_summary: Optional[str] = None
329
+ affected_modules: list[str] = field(default_factory=list)
330
+ risk_areas: list[dict] = field(default_factory=list)
331
+ since: Optional[str] = None
327
332
 
328
333
 
329
334
  # ─────────────────────────────────────────────────────────────────────────────
@@ -640,14 +645,37 @@ class TaskContextBuilder:
640
645
  test_set = {p for p in all_paths if self._is_test(p)}
641
646
  source_set = {p for p in all_paths if not self._is_test(p) and self._is_source(p)}
642
647
 
643
- relevant_files = self._rank_files(
644
- task_name, spec, all_paths, entry_set, test_set,
645
- monorepo_packages=sm.monorepo_packages if sm.monorepo_packages else None,
646
- git_hotspots=git_hotspots,
647
- uncommitted_files=uncommitted_files,
648
- code_notes=cn_notes_for_ranking if cn_notes_for_ranking else None,
649
- delta_files=_delta_files,
650
- )
648
+ # Delta uses a dedicated impact-analysis path — never the generic ranker.
649
+ _delta_impact_summary: Optional[str] = None
650
+ _delta_affected_modules: list[str] = []
651
+ _delta_risk_areas: list[dict] = []
652
+ _delta_why: dict[str, str] = {}
653
+ _delta_analysis_gaps: list[str] = []
654
+
655
+ if task_name == "delta":
656
+ _delta_changed_list: list[str] = sorted(_delta_files) if _delta_files else []
657
+ (
658
+ relevant_files,
659
+ _delta_impact_summary,
660
+ _delta_affected_modules,
661
+ _delta_risk_areas,
662
+ _delta_why,
663
+ _delta_analysis_gaps,
664
+ ) = self._build_delta_impact(
665
+ changed_files=_delta_changed_list,
666
+ all_paths=all_paths,
667
+ entry_points=entry_points,
668
+ since=since,
669
+ )
670
+ else:
671
+ relevant_files = self._rank_files(
672
+ task_name, spec, all_paths, entry_set, test_set,
673
+ monorepo_packages=sm.monorepo_packages if sm.monorepo_packages else None,
674
+ git_hotspots=git_hotspots,
675
+ uncommitted_files=uncommitted_files,
676
+ code_notes=cn_notes_for_ranking if cn_notes_for_ranking else None,
677
+ delta_files=None,
678
+ )
651
679
 
652
680
  # ── 6b. Symptom keyword boost + related notes (fix-bug + --symptom) ──
653
681
  symptom_keywords: list[str] = []
@@ -805,22 +833,37 @@ class TaskContextBuilder:
805
833
 
806
834
  conf_summary, analysis_gaps = ConfidenceAnalyzer().analyze(sm_for_conf)
807
835
  confidence = conf_summary.overall
808
- gaps = [g.reason for g in analysis_gaps]
809
- if _mybatis_warning:
810
- gaps.append(_mybatis_warning["reason"])
836
+ if task_name == "delta":
837
+ # Use delta-specific gaps; ConfidenceAnalyzer gaps are about full-repo
838
+ # detection quality and are not meaningful for an incremental diff.
839
+ gaps = _delta_analysis_gaps
840
+ if _mybatis_warning:
841
+ gaps.append(_mybatis_warning["reason"])
842
+ else:
843
+ gaps = [g.reason for g in analysis_gaps]
844
+ if _mybatis_warning:
845
+ gaps.append(_mybatis_warning["reason"])
811
846
 
812
847
  # ── 9. why_these_files ────────────────────────────────────────────────
813
- why_these_files: dict[str, str] = {
814
- rf.path: rf.reason for rf in relevant_files
815
- }
848
+ if task_name == "delta":
849
+ why_these_files = _delta_why
850
+ else:
851
+ why_these_files = {rf.path: rf.reason for rf in relevant_files}
816
852
 
817
- # ── 10. Delta: git changed files (reuse pre-computed set from step 5c) ──
853
+ # ── 10. Delta: git changed files + entry points ───────────────────────
818
854
  changed_files: list[str] = []
819
855
  affected_entry_points: list[str] = []
820
856
  if task_name == "delta":
821
857
  changed_files = sorted(_delta_files) if _delta_files else self._get_git_changed_files(since=since)
822
- ep_set = {ep.path for ep in entry_points}
823
- affected_entry_points = [f for f in changed_files if f in ep_set]
858
+ _ep_set = {ep.path for ep in entry_points}
859
+ # include framework-detected entry points AND files classified as
860
+ # entrypoint/controller by artifact taxonomy (CLI mains, Spring controllers)
861
+ _EP_ARTIFACT_TYPES = frozenset({"entrypoint", "controller"})
862
+ affected_entry_points = sorted({
863
+ f for f in changed_files
864
+ if f in _ep_set
865
+ or self._classify_changed_file(f)["artifact_type"] in _EP_ARTIFACT_TYPES
866
+ })
824
867
 
825
868
  return TaskOutput(
826
869
  task=task_name,
@@ -842,6 +885,10 @@ class TaskContextBuilder:
842
885
  symptom=symptom if task_name == "fix-bug" and symptom else None,
843
886
  related_notes=related_notes,
844
887
  symptom_note=symptom_note,
888
+ impact_summary=_delta_impact_summary,
889
+ affected_modules=_delta_affected_modules,
890
+ risk_areas=_delta_risk_areas,
891
+ since=since if task_name == "delta" else None,
845
892
  )
846
893
 
847
894
  def render_prompt(self, output: TaskOutput) -> str:
@@ -1133,6 +1180,509 @@ class TaskContextBuilder:
1133
1180
  def _is_source(self, path: str) -> bool:
1134
1181
  return Path(path).suffix.lower() in _SOURCE_EXTENSIONS
1135
1182
 
1183
+ # ── Delta impact analysis ─────────────────────────────────────────────────
1184
+
1185
+ @staticmethod
1186
+ def _classify_changed_file(path: str) -> dict[str, Any]:
1187
+ """Classify a changed file by artifact type, risk areas, impact level, and confidence.
1188
+
1189
+ Returns dict: artifact_type, risk_areas, impact_level, is_noise, module, confidence.
1190
+ Pure path/name heuristics — no file reads, fully deterministic.
1191
+
1192
+ Closed taxonomy (no unknown_* values ever emitted):
1193
+ entrypoint | controller | service | repository | mapper | config |
1194
+ spring_config | spring_profile | security | domain_model | dto |
1195
+ test | build_manifest | documentation | ide_noise | db_migration |
1196
+ generic_source
1197
+ """
1198
+ norm = path.replace("\\", "/")
1199
+ name = Path(path).name
1200
+ stem = Path(path).stem
1201
+ suffix = Path(path).suffix.lower()
1202
+ norm_lower = norm.lower()
1203
+ stem_lower = stem.lower()
1204
+ name_lower = name.lower()
1205
+
1206
+ _CODE_EXTS = frozenset({
1207
+ ".py", ".js", ".ts", ".tsx", ".jsx", ".java", ".kt", ".go",
1208
+ ".rs", ".rb", ".php", ".cs", ".dart", ".mjs", ".cjs", ".scala",
1209
+ })
1210
+ _CONFIG_EXTS = frozenset({
1211
+ ".yml", ".yaml", ".json", ".xml", ".toml", ".properties",
1212
+ ".env", ".cfg", ".ini", ".conf",
1213
+ })
1214
+
1215
+ # IDE/hidden-tool directories → noise, skip impact analysis
1216
+ _IDE_DIR_NAMES = frozenset({
1217
+ ".idea", ".vscode", ".eclipse", ".fleet", ".git", ".github",
1218
+ ".circleci", ".travis", ".teamcity", ".gradle", ".mvn",
1219
+ })
1220
+ path_dir_parts = norm_lower.split("/")[:-1] # all components except filename
1221
+ if any(part in _IDE_DIR_NAMES for part in path_dir_parts):
1222
+ return {
1223
+ "artifact_type": "ide_noise",
1224
+ "risk_areas": [],
1225
+ "impact_level": "noise",
1226
+ "is_noise": True,
1227
+ "module": "",
1228
+ "confidence": "high",
1229
+ }
1230
+
1231
+ module = _extract_ddd_domain(path)
1232
+
1233
+ # Tests (before other checks to avoid misclassifying TestFoo as service etc.)
1234
+ _is_test = (
1235
+ (stem_lower.startswith("test") and len(stem_lower) > 4)
1236
+ or (stem_lower.endswith("test") and len(stem_lower) > 4)
1237
+ or stem_lower.endswith("tests")
1238
+ or stem_lower.endswith("spec")
1239
+ or any(t in f"/{norm_lower}/" for t in (
1240
+ "/test/", "/tests/", "/spec/", "/specs/", "/__tests__/", "/it/",
1241
+ ))
1242
+ )
1243
+ if _is_test:
1244
+ return {"artifact_type": "test", "risk_areas": ["tests"], "impact_level": "low", "is_noise": False, "module": module, "confidence": "high"}
1245
+
1246
+ # Entrypoints: Spring Boot Application, CLI mains, framework entry files
1247
+ _ENTRYPOINT_NAMES = frozenset({
1248
+ "main.py", "app.py", "run.py", "server.py", "wsgi.py", "asgi.py",
1249
+ "__main__.py", "index.js", "index.ts", "server.js", "server.ts",
1250
+ "app.js", "app.ts", "main.js", "main.ts",
1251
+ })
1252
+ if (
1253
+ name_lower in _ENTRYPOINT_NAMES
1254
+ or (suffix in _CODE_EXTS and stem_lower in ("cli", "manage", "entrypoint", "startup", "launcher"))
1255
+ or (suffix in (".java", ".kt") and stem_lower.endswith("application"))
1256
+ ):
1257
+ return {"artifact_type": "entrypoint", "risk_areas": ["api", "config"], "impact_level": "critical", "is_noise": False, "module": module, "confidence": "high"}
1258
+
1259
+ # Security surface (extended: interceptor, filter, cors, acl)
1260
+ _SECURITY_KW = ("security", "auth", "jwt", "token", "permission", "role",
1261
+ "credential", "encrypt", "decrypt", "oauth", "saml", "ldap",
1262
+ "password", "secret", "interceptor", "filter", "cors", "acl")
1263
+ if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _SECURITY_KW):
1264
+ impact = "critical" if any(kw in stem_lower for kw in ("security", "auth", "jwt")) else "high"
1265
+ return {"artifact_type": "security", "risk_areas": ["security"], "impact_level": impact, "is_noise": False, "module": module, "confidence": "high"}
1266
+
1267
+ # API / controller layer
1268
+ _API_KW = ("controller", "restcontroller", "resource", "handler",
1269
+ "router", "route", "endpoint", "servlet")
1270
+ if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _API_KW):
1271
+ return {"artifact_type": "controller", "risk_areas": ["api"], "impact_level": "high", "is_noise": False, "module": module, "confidence": "high"}
1272
+
1273
+ # Business logic / services (extended: facade, usecase, aspect, listener, component)
1274
+ _SERVICE_KW = ("service", "serviceimpl", "servicefacade", "facade", "usecase",
1275
+ "interactor", "aspect", "listener", "subscriber", "eventhandler", "component")
1276
+ if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _SERVICE_KW):
1277
+ return {"artifact_type": "service", "risk_areas": ["transactions", "business_logic"], "impact_level": "high", "is_noise": False, "module": module, "confidence": "high"}
1278
+
1279
+ # Data access / repositories
1280
+ _DAO_KW = ("repository", "repositoryimpl", "dao", "daoimpl", "store", "jparepository")
1281
+ if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _DAO_KW):
1282
+ return {"artifact_type": "repository", "risk_areas": ["persistence"], "impact_level": "high", "is_noise": False, "module": module, "confidence": "high"}
1283
+
1284
+ # MyBatis / ORM mappers
1285
+ if "mapper" in stem_lower:
1286
+ return {"artifact_type": "mapper", "risk_areas": ["persistence"], "impact_level": "high", "is_noise": False, "module": module, "confidence": "high"}
1287
+
1288
+ # Spring / app config files (by canonical name)
1289
+ if name_lower in ("application.yml", "application.yaml", "application.properties",
1290
+ "bootstrap.yml", "bootstrap.yaml", "bootstrap.properties"):
1291
+ return {"artifact_type": "spring_config", "risk_areas": ["config"], "impact_level": "high", "is_noise": False, "module": module, "confidence": "high"}
1292
+ if name_lower.startswith("application-") and suffix in (".yml", ".yaml", ".properties"):
1293
+ return {"artifact_type": "spring_profile", "risk_areas": ["config"], "impact_level": "medium", "is_noise": False, "module": module, "confidence": "high"}
1294
+ _BUILD_MANIFEST_NAMES = frozenset({
1295
+ "pom.xml", "build.gradle", "build.gradle.kts",
1296
+ "settings.gradle", "settings.gradle.kts",
1297
+ "pyproject.toml", "setup.py", "setup.cfg",
1298
+ "package.json", "package-lock.json", "yarn.lock",
1299
+ "cargo.toml", "go.mod", "go.sum",
1300
+ "gemfile", "gemfile.lock", "build.sbt",
1301
+ "requirements.txt", "requirements-dev.txt",
1302
+ })
1303
+ if name_lower in _BUILD_MANIFEST_NAMES:
1304
+ return {"artifact_type": "build_manifest", "risk_areas": ["config", "dependencies"], "impact_level": "medium", "is_noise": False, "module": module, "confidence": "high"}
1305
+
1306
+ # Configuration classes / files
1307
+ _CONFIG_STEM_KW = ("config", "configuration", "properties", "settings")
1308
+ if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _CONFIG_STEM_KW):
1309
+ return {"artifact_type": "config", "risk_areas": ["config"], "impact_level": "medium", "is_noise": False, "module": module, "confidence": "high"}
1310
+
1311
+ # DB migrations / SQL
1312
+ if suffix == ".sql" or any(kw in norm_lower for kw in ("migration", "flyway", "liquibase", "changelog")):
1313
+ return {"artifact_type": "db_migration", "risk_areas": ["persistence"], "impact_level": "high", "is_noise": False, "module": module, "confidence": "high"}
1314
+
1315
+ # Domain models / entities
1316
+ _ENTITY_KW = ("entity", "model", "domain", "aggregate", "valueobject")
1317
+ if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _ENTITY_KW):
1318
+ return {"artifact_type": "domain_model", "risk_areas": ["persistence"], "impact_level": "medium", "is_noise": False, "module": module, "confidence": "high"}
1319
+
1320
+ # DTOs / request-response objects
1321
+ _DTO_KW = ("dto", "request", "response", "payload", "command", "query", "event")
1322
+ if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _DTO_KW):
1323
+ return {"artifact_type": "dto", "risk_areas": [], "impact_level": "low", "is_noise": False, "module": module, "confidence": "high"}
1324
+
1325
+ # Generic source code — closed taxonomy, confidence=low signals uncertain classification
1326
+ if suffix in _CODE_EXTS:
1327
+ return {"artifact_type": "generic_source", "risk_areas": [], "impact_level": "medium", "is_noise": False, "module": module, "confidence": "low"}
1328
+
1329
+ # Generic config / data files — fold into config type
1330
+ if suffix in _CONFIG_EXTS:
1331
+ return {"artifact_type": "config", "risk_areas": ["config"], "impact_level": "low", "is_noise": False, "module": module, "confidence": "low"}
1332
+
1333
+ # Docs
1334
+ if suffix in (".md", ".rst", ".txt", ".adoc"):
1335
+ return {"artifact_type": "documentation", "risk_areas": [], "impact_level": "low", "is_noise": False, "module": module, "confidence": "high"}
1336
+
1337
+ # Binaries, images, lock files — treat as noise (closed taxonomy: no unknown_*)
1338
+ return {"artifact_type": "ide_noise", "risk_areas": [], "impact_level": "noise", "is_noise": True, "module": module, "confidence": "low"}
1339
+
1340
+ def _build_delta_impact(
1341
+ self,
1342
+ changed_files: list[str],
1343
+ all_paths: list[str],
1344
+ entry_points: list,
1345
+ since: Optional[str],
1346
+ ) -> tuple[list[RelevantFile], str, list[str], list[dict[str, Any]], dict[str, str], list[str]]:
1347
+ """Build incremental impact analysis for changed files.
1348
+
1349
+ Returns:
1350
+ (relevant_files, impact_summary, affected_modules, risk_areas,
1351
+ why_these_files, analysis_gaps)
1352
+
1353
+ Changed files are always included in relevant_files (never dropped by score).
1354
+ Related files are expanded type-aware: controller→service→repository→mapper chain.
1355
+ Scoring is hierarchical by artifact_type, not by heuristic impact_level.
1356
+ """
1357
+ # Per-artifact deterministic scores — strictly ordered by semantic role
1358
+ _ARTIFACT_SCORE: dict[str, float] = {
1359
+ "entrypoint": 0.95,
1360
+ "security": 0.90,
1361
+ "controller": 0.85,
1362
+ "service": 0.80,
1363
+ "db_migration": 0.75,
1364
+ "repository": 0.70,
1365
+ "mapper": 0.65,
1366
+ "spring_config": 0.60,
1367
+ "config": 0.55,
1368
+ "spring_profile": 0.50,
1369
+ "domain_model": 0.50,
1370
+ "build_manifest": 0.45,
1371
+ "generic_source": 0.45,
1372
+ "dto": 0.35,
1373
+ "test": 0.30,
1374
+ "documentation": 0.25,
1375
+ "ide_noise": 0.10,
1376
+ }
1377
+
1378
+ # impact_level per artifact_type — used for risk_areas severity ordering
1379
+ _ARTIFACT_IMPACT: dict[str, str] = {
1380
+ "entrypoint": "critical", "security": "critical",
1381
+ "controller": "high", "service": "high", "repository": "high",
1382
+ "mapper": "high", "db_migration": "high", "spring_config": "high",
1383
+ "config": "medium", "spring_profile": "medium",
1384
+ "build_manifest": "medium", "domain_model": "medium",
1385
+ "generic_source": "medium",
1386
+ "dto": "low", "test": "low", "documentation": "low", "ide_noise": "noise",
1387
+ }
1388
+
1389
+ # propagation_risk per artifact_type
1390
+ _PROPAGATION_RISK: dict[str, str] = {
1391
+ "entrypoint": "high", "security": "high", "controller": "high",
1392
+ "db_migration": "high", "spring_config": "high",
1393
+ "service": "medium", "repository": "medium", "mapper": "medium",
1394
+ "config": "medium", "domain_model": "medium",
1395
+ "spring_profile": "low", "build_manifest": "low", "generic_source": "low",
1396
+ "dto": "low", "test": "low", "documentation": "low", "ide_noise": "low",
1397
+ }
1398
+
1399
+ # type-aware expansion: which artifact types a changed type should pull in
1400
+ _EXPANSION_TARGETS: dict[str, frozenset[str]] = {
1401
+ "controller": frozenset({"service", "security", "dto"}),
1402
+ "service": frozenset({"repository", "mapper"}),
1403
+ "repository": frozenset({"mapper", "domain_model"}),
1404
+ "mapper": frozenset({"repository", "domain_model"}),
1405
+ "security": frozenset({"controller", "config", "spring_config"}),
1406
+ "spring_config": frozenset({"service", "config", "repository"}),
1407
+ "config": frozenset({"service", "repository", "controller"}),
1408
+ "entrypoint": frozenset({"security", "config", "spring_config"}),
1409
+ "dto": frozenset({"controller", "service"}),
1410
+ "domain_model": frozenset({"repository", "service"}),
1411
+ "db_migration": frozenset({"repository", "mapper"}),
1412
+ "spring_profile": frozenset({"service", "config"}),
1413
+ "generic_source": frozenset({"service", "repository"}),
1414
+ "test": frozenset(),
1415
+ "documentation": frozenset(),
1416
+ "ide_noise": frozenset(),
1417
+ "build_manifest": frozenset(),
1418
+ }
1419
+
1420
+ _SEV_ORDER = ["noise", "low", "medium", "high", "critical"]
1421
+
1422
+ # primary impact area used in structured reasoning
1423
+ def _impact_area(risk_areas: list[str], atype: str) -> str:
1424
+ if "security" in risk_areas:
1425
+ return "security"
1426
+ if "api" in risk_areas:
1427
+ return "api"
1428
+ if "persistence" in risk_areas or "transactions" in risk_areas:
1429
+ return "persistence"
1430
+ if "config" in risk_areas or "dependencies" in risk_areas:
1431
+ return "config"
1432
+ if "tests" in risk_areas:
1433
+ return "tests"
1434
+ return {
1435
+ "controller": "api", "service": "business_logic",
1436
+ "repository": "persistence", "mapper": "persistence",
1437
+ "security": "security", "config": "config",
1438
+ "spring_config": "config", "spring_profile": "config",
1439
+ "build_manifest": "build", "domain_model": "persistence",
1440
+ "dto": "api", "db_migration": "persistence",
1441
+ "test": "tests", "entrypoint": "api",
1442
+ "generic_source": "unknown",
1443
+ }.get(atype, "unknown")
1444
+
1445
+ def _role_in_system(atype: str, in_ep_paths: bool) -> str:
1446
+ if in_ep_paths or atype in ("entrypoint", "controller"):
1447
+ return "entrypoint"
1448
+ if atype in ("config", "spring_config", "spring_profile", "build_manifest"):
1449
+ return "config"
1450
+ if atype in ("dto", "domain_model", "test", "documentation"):
1451
+ return "leaf"
1452
+ return "dependency"
1453
+
1454
+ def _structured_why(atype: str, module: str, role: str, risk_areas: list[str]) -> str:
1455
+ area = _impact_area(risk_areas, atype)
1456
+ prop = _PROPAGATION_RISK.get(atype, "low")
1457
+ parts = [
1458
+ f"artifact_type: {atype}",
1459
+ f"role_in_system: {role}",
1460
+ f"impact_area: {area}",
1461
+ f"propagation_risk: {prop}",
1462
+ ]
1463
+ if module:
1464
+ parts.append(f"module: {module}")
1465
+ return " | ".join(parts)
1466
+
1467
+ if not changed_files:
1468
+ return (
1469
+ [],
1470
+ "No changes detected — verify the git ref passed to --since",
1471
+ [],
1472
+ [],
1473
+ {},
1474
+ ["No changed files found. Check that --since ref exists and the diff is non-empty."],
1475
+ )
1476
+
1477
+ ep_paths = {ep.path for ep in entry_points}
1478
+
1479
+ # ── Step 1: classify every changed file ───────────────────────────────
1480
+ classifications: dict[str, dict[str, Any]] = {
1481
+ f: self._classify_changed_file(f) for f in changed_files
1482
+ }
1483
+
1484
+ # ── Step 2: build relevant_files from the changed set ─────────────────
1485
+ relevant: list[RelevantFile] = []
1486
+ why: dict[str, str] = {}
1487
+ affected_modules_set: set[str] = set()
1488
+ changed_dirs: set[str] = set()
1489
+ risk_acc: dict[str, dict[str, Any]] = {} # area → {files, severity}
1490
+ ref_label = since or "HEAD~1"
1491
+
1492
+ # union of expansion targets across all changed artifact types
1493
+ wanted_expansion_types: frozenset[str] = frozenset()
1494
+
1495
+ for path, cls in classifications.items():
1496
+ atype = cls["artifact_type"]
1497
+ score = _ARTIFACT_SCORE.get(atype, 0.45)
1498
+ module = cls["module"]
1499
+
1500
+ if module:
1501
+ affected_modules_set.add(module)
1502
+ if not cls["is_noise"]:
1503
+ parent = str(Path(path).parent).replace("\\", "/")
1504
+ if parent and parent != ".":
1505
+ changed_dirs.add(parent)
1506
+
1507
+ impact_level = _ARTIFACT_IMPACT.get(atype, "medium")
1508
+ for area in cls["risk_areas"]:
1509
+ if area not in risk_acc:
1510
+ risk_acc[area] = {"files": [], "severity": "noise"}
1511
+ risk_acc[area]["files"].append(path)
1512
+ cur_idx = _SEV_ORDER.index(risk_acc[area]["severity"])
1513
+ new_idx = _SEV_ORDER.index(impact_level)
1514
+ if new_idx > cur_idx:
1515
+ risk_acc[area]["severity"] = impact_level
1516
+
1517
+ wanted_expansion_types = wanted_expansion_types | _EXPANSION_TARGETS.get(atype, frozenset())
1518
+
1519
+ in_ep = path in ep_paths
1520
+ role = _role_in_system(atype, in_ep)
1521
+ why_str = _structured_why(atype, module, role, cls["risk_areas"])
1522
+ reason = f"changed since {ref_label} | artifact: {atype} | score: {score:.2f}"
1523
+
1524
+ relevant.append(RelevantFile(path=path, role=role, score=round(score, 2), reason=reason, why=why_str))
1525
+ why[path] = why_str
1526
+
1527
+ relevant.sort(key=lambda f: (-f.score, f.path))
1528
+
1529
+ # ── Step 3: type-aware expansion to related files ─────────────────────
1530
+ existing_paths = {rf.path for rf in relevant}
1531
+
1532
+ related: list[tuple[float, str, RelevantFile]] = []
1533
+ for path in all_paths:
1534
+ if path in existing_paths:
1535
+ continue
1536
+ if Path(path).suffix.lower() not in _ALL_EXTENSIONS:
1537
+ continue
1538
+
1539
+ rel_cls = self._classify_changed_file(path)
1540
+ if rel_cls["is_noise"]:
1541
+ continue
1542
+
1543
+ rel_atype = rel_cls["artifact_type"]
1544
+ # only expand if this file's type is in the wanted expansion set
1545
+ if rel_atype not in wanted_expansion_types:
1546
+ continue
1547
+
1548
+ parent = str(Path(path).parent).replace("\\", "/")
1549
+ path_module = _extract_ddd_domain(path)
1550
+
1551
+ in_same_module = bool(path_module and path_module in affected_modules_set)
1552
+ in_same_dir = parent in changed_dirs
1553
+
1554
+ if not (in_same_module or in_same_dir):
1555
+ continue
1556
+
1557
+ rel_base = _ARTIFACT_SCORE.get(rel_atype, 0.45)
1558
+ rel_score = round(rel_base * 0.60, 2)
1559
+ ctx_type = "module" if in_same_module else "directory"
1560
+ ctx_val = path_module if in_same_module else parent
1561
+
1562
+ triggers = [
1563
+ Path(f).name for f in changed_files
1564
+ if (
1565
+ (_extract_ddd_domain(f) == path_module if in_same_module
1566
+ else str(Path(f).parent).replace("\\", "/") == parent)
1567
+ )
1568
+ ]
1569
+ in_ep = path in ep_paths
1570
+ role = _role_in_system(rel_atype, in_ep)
1571
+ why_str = (
1572
+ f"artifact_type: {rel_atype} | role_in_system: {role}"
1573
+ f" | pulled_by: type-aware expansion from {ctx_type} '{ctx_val}'"
1574
+ f" | triggered_by: {', '.join(triggers[:3])}"
1575
+ )
1576
+ reason = f"expansion: {ctx_type} '{ctx_val}' | artifact: {rel_atype} | score: {rel_score:.2f}"
1577
+ related.append((rel_score, path, RelevantFile(
1578
+ path=path, role=role, score=rel_score, reason=reason, why=why_str
1579
+ )))
1580
+ why[path] = why_str
1581
+
1582
+ related.sort(key=lambda x: (-x[0], x[1]))
1583
+ relevant.extend(rf for _, _, rf in related[:10])
1584
+
1585
+ # ── Step 4: impact summary ─────────────────────────────────────────────
1586
+ type_counts: dict[str, int] = {}
1587
+ all_risk_areas: set[str] = set()
1588
+ noise_count = 0
1589
+ for cls in classifications.values():
1590
+ t = cls["artifact_type"]
1591
+ type_counts[t] = type_counts.get(t, 0) + 1
1592
+ all_risk_areas.update(cls["risk_areas"])
1593
+ if cls["is_noise"]:
1594
+ noise_count += 1
1595
+ meaningful = len(changed_files) - noise_count
1596
+
1597
+ _SUMMARY_LABELS: dict[str, str] = {
1598
+ "entrypoint": "entrypoint(s)",
1599
+ "security": "security file(s)",
1600
+ "controller": "controller(s)",
1601
+ "service": "service(s)",
1602
+ "repository": "repository/repositories",
1603
+ "mapper": "MyBatis mapper(s)",
1604
+ "spring_config": "Spring config file(s)",
1605
+ "spring_profile": "Spring profile config(s)",
1606
+ "config": "configuration file(s)",
1607
+ "build_manifest": "build manifest(s)",
1608
+ "db_migration": "database migration(s)",
1609
+ "domain_model": "domain model(s)",
1610
+ "dto": "DTO(s)",
1611
+ "test": "test file(s)",
1612
+ "generic_source": "source file(s)",
1613
+ "documentation": "documentation file(s)",
1614
+ }
1615
+
1616
+ if meaningful == 0:
1617
+ impact_summary = (
1618
+ f"{noise_count} IDE/tooling file(s) changed"
1619
+ " — no semantic impact on application logic"
1620
+ )
1621
+ else:
1622
+ _sev_rank = {"critical": 4, "high": 3, "medium": 2, "low": 1, "noise": 0}
1623
+ parts = []
1624
+ for atype, count in sorted(
1625
+ type_counts.items(),
1626
+ key=lambda kv: -_sev_rank.get(_ARTIFACT_IMPACT.get(kv[0], "medium"), 0),
1627
+ ):
1628
+ if atype == "ide_noise":
1629
+ continue
1630
+ label = _SUMMARY_LABELS.get(atype, f"source file(s) ({atype})")
1631
+ parts.append(f"{count} {label}")
1632
+ impact_summary = "; ".join(parts) if parts else f"{meaningful} source file(s) changed"
1633
+ if all_risk_areas:
1634
+ impact_summary += f" — risk areas: {', '.join(sorted(all_risk_areas))}"
1635
+ if noise_count > 0:
1636
+ impact_summary += f" ({noise_count} IDE/tooling file(s) excluded)"
1637
+
1638
+ # ── Step 5: risk_areas output list ─────────────────────────────────────
1639
+ risk_areas_out: list[dict[str, Any]] = sorted(
1640
+ [
1641
+ {
1642
+ "area": area,
1643
+ "severity": info["severity"],
1644
+ "affected_files": sorted(info["files"])[:5],
1645
+ }
1646
+ for area, info in risk_acc.items()
1647
+ ],
1648
+ key=lambda x: (-_SEV_ORDER.index(x["severity"]), x["area"]),
1649
+ )
1650
+
1651
+ # ── Step 6: analysis gaps ──────────────────────────────────────────────
1652
+ analysis_gaps: list[str] = [
1653
+ "Related file expansion uses type-aware propagation chains + module/directory heuristics — import graph not traced",
1654
+ ]
1655
+ if noise_count > 0 and meaningful > 0:
1656
+ analysis_gaps.append(
1657
+ f"{noise_count} IDE/tooling file(s) in diff excluded from impact analysis"
1658
+ )
1659
+ elif noise_count > 0 and meaningful == 0:
1660
+ analysis_gaps.append(
1661
+ "All changed files are IDE/tooling — no actionable semantic impact detected"
1662
+ )
1663
+ low_confidence = [f for f, cls in classifications.items() if cls.get("confidence") == "low" and not cls["is_noise"]]
1664
+ if low_confidence:
1665
+ analysis_gaps.append(
1666
+ f"{len(low_confidence)} file(s) classified with low confidence"
1667
+ " (artifact type inferred from extension only)"
1668
+ " — consider adding stem patterns to _classify_changed_file: "
1669
+ + ", ".join(Path(f).name for f in low_confidence[:3])
1670
+ )
1671
+ if not affected_modules_set and any(not cls["is_noise"] for cls in classifications.values()):
1672
+ analysis_gaps.append(
1673
+ "DDD module/package structure not detected in changed paths"
1674
+ " — related file expansion uses directory proximity only"
1675
+ )
1676
+
1677
+ return (
1678
+ relevant,
1679
+ impact_summary,
1680
+ sorted(affected_modules_set),
1681
+ risk_areas_out,
1682
+ why,
1683
+ analysis_gaps,
1684
+ )
1685
+
1136
1686
  def _get_git_changed_files(self, since: Optional[str] = None) -> list[str]:
1137
1687
  """Get files changed since a git ref (default: HEAD~1) relative to self.root.
1138
1688
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.20.0
3
+ Version: 1.22.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -221,7 +221,7 @@ Description-Content-Type: text/markdown
221
221
 
222
222
  **Compressed AI-ready context for Java/Spring enterprise codebases.**
223
223
 
224
- ![Version](https://img.shields.io/badge/version-1.20.0-blue)
224
+ ![Version](https://img.shields.io/badge/version-1.22.0-blue)
225
225
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
226
226
 
227
227
  ---
@@ -255,7 +255,7 @@ pipx install sourcecode
255
255
 
256
256
  ```bash
257
257
  sourcecode version
258
- # sourcecode 1.20.0
258
+ # sourcecode 1.22.0
259
259
  ```
260
260
 
261
261
  ---
@@ -1,10 +1,10 @@
1
- sourcecode/__init__.py,sha256=QZACuNjk_A3P8zRH7TdSQLgIDesiUHq3J12ZlBSdjvo,103
1
+ sourcecode/__init__.py,sha256=AYpzylZKC4FdV_cDIgkpP-gpSaG-icZE-DD43XcAFXA,103
2
2
  sourcecode/adaptive_scanner.py,sha256=RTNExwWPXzjgLaRueT7UuxkPj5ZEToWjGbx1j0LSZ9E,10250
3
3
  sourcecode/architecture_analyzer.py,sha256=MyBa0Hf5HmkudZQDLKrjcWDKETXETXl0mQX1swtTwAA,39091
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
5
5
  sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
6
6
  sourcecode/classifier.py,sha256=pYve2J1LqtYssU3lYLMDz18PT-CjN5c18QYE7R_IG1Q,7507
7
- sourcecode/cli.py,sha256=5Nhv7GdhG1i76eWRzlv8RCn6UkUJHSYiq6953_bDOBM,75912
7
+ sourcecode/cli.py,sha256=gcOs2FiimQi8uS-ORhmkDvAZf3IiJgfUyYutqO1ECaQ,76407
8
8
  sourcecode/code_notes_analyzer.py,sha256=y1MJBnPZHYp4i6cQCXUb9ATIyifS_qMQWjw_8lPkpsU,9215
9
9
  sourcecode/confidence_analyzer.py,sha256=xw_Jv8pAd0wd8t2vvQlorw8Ih0rSF3YCoFS8K-_4aXg,15762
10
10
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -20,7 +20,7 @@ sourcecode/file_classifier.py,sha256=48ly5Z6exkzBy8lNy1AkdP4-oJqIA1zT3LZfffuTyDo
20
20
  sourcecode/git_analyzer.py,sha256=_pCg2V4d2aa17k9hayTzpexAj8syvyk4y9NYNvvgOAI,12802
21
21
  sourcecode/graph_analyzer.py,sha256=iUK-7pSV-cvGqqD2hENdYmhnm0wcXFEyK-xnu5ul8OU,62515
22
22
  sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
23
- sourcecode/prepare_context.py,sha256=314QXlwlRj-cTKvKbyDZfaHLECZQBGpSKJJopXfXUQw,53217
23
+ sourcecode/prepare_context.py,sha256=v9BMh1Ro2CssAPUwo3Ch7ml0R7X8c5c13eJs3e4m6FE,80841
24
24
  sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
25
25
  sourcecode/ranking_engine.py,sha256=virVglafZufioHpZpwktjMvUiL0TZELWQCQnQNV8dFo,9360
26
26
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
@@ -61,8 +61,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
61
61
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
62
62
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
63
63
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
64
- sourcecode-1.20.0.dist-info/METADATA,sha256=-xA4je3NwLeknIlLOwmGFovavH8n4uy-rxNaTwgSieo,20626
65
- sourcecode-1.20.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
66
- sourcecode-1.20.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
67
- sourcecode-1.20.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
68
- sourcecode-1.20.0.dist-info/RECORD,,
64
+ sourcecode-1.22.0.dist-info/METADATA,sha256=sMO13GIMA6DnMIxi80QFCAwZte4pDIyZ3MiIVeWiEag,20626
65
+ sourcecode-1.22.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
66
+ sourcecode-1.22.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
67
+ sourcecode-1.22.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
68
+ sourcecode-1.22.0.dist-info/RECORD,,