sourcecode 1.33.20__tar.gz → 1.33.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {sourcecode-1.33.20 → sourcecode-1.33.22}/PKG-INFO +2 -2
  2. {sourcecode-1.33.20 → sourcecode-1.33.22}/README.md +1 -1
  3. {sourcecode-1.33.20 → sourcecode-1.33.22}/pyproject.toml +1 -1
  4. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/__init__.py +1 -1
  5. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/cli.py +278 -52
  6. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/mcp/runner.py +11 -6
  7. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/mcp/server.py +61 -0
  8. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/repository_ir.py +8 -0
  9. {sourcecode-1.33.20 → sourcecode-1.33.22}/.github/workflows/build-windows.yml +0 -0
  10. {sourcecode-1.33.20 → sourcecode-1.33.22}/.gitignore +0 -0
  11. {sourcecode-1.33.20 → sourcecode-1.33.22}/.ruff.toml +0 -0
  12. {sourcecode-1.33.20 → sourcecode-1.33.22}/CHANGELOG.md +0 -0
  13. {sourcecode-1.33.20 → sourcecode-1.33.22}/CONTRIBUTING.md +0 -0
  14. {sourcecode-1.33.20 → sourcecode-1.33.22}/LICENSE +0 -0
  15. {sourcecode-1.33.20 → sourcecode-1.33.22}/SECURITY.md +0 -0
  16. {sourcecode-1.33.20 → sourcecode-1.33.22}/raw +0 -0
  17. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/adaptive_scanner.py +0 -0
  18. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/architecture_analyzer.py +0 -0
  19. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/architecture_summary.py +0 -0
  20. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/ast_extractor.py +0 -0
  21. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/cache.py +0 -0
  22. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/canonical_ir.py +0 -0
  23. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/classifier.py +0 -0
  24. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/code_notes_analyzer.py +0 -0
  25. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/confidence_analyzer.py +0 -0
  26. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/context_scorer.py +0 -0
  27. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/context_summarizer.py +0 -0
  28. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/contract_model.py +0 -0
  29. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/contract_pipeline.py +0 -0
  30. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/coverage_parser.py +0 -0
  31. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/dependency_analyzer.py +0 -0
  32. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/__init__.py +0 -0
  33. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/base.py +0 -0
  34. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/csproj_parser.py +0 -0
  35. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/dart.py +0 -0
  36. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/dotnet.py +0 -0
  37. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/elixir.py +0 -0
  38. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/go.py +0 -0
  39. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/heuristic.py +0 -0
  40. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/hybrid.py +0 -0
  41. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/java.py +0 -0
  42. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/jvm_ext.py +0 -0
  43. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/nodejs.py +0 -0
  44. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/parsers.py +0 -0
  45. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/php.py +0 -0
  46. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/project.py +0 -0
  47. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/python.py +0 -0
  48. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/ruby.py +0 -0
  49. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/rust.py +0 -0
  50. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/systems.py +0 -0
  51. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/terraform.py +0 -0
  52. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/detectors/tooling.py +0 -0
  53. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/doc_analyzer.py +0 -0
  54. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/entrypoint_classifier.py +0 -0
  55. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/env_analyzer.py +0 -0
  56. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/error_schema.py +0 -0
  57. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/file_classifier.py +0 -0
  58. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/flow_analyzer.py +0 -0
  59. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/git_analyzer.py +0 -0
  60. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/graph_analyzer.py +0 -0
  61. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/license.py +0 -0
  62. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/mcp/__init__.py +0 -0
  63. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
  64. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/mcp/onboarding/applier.py +0 -0
  65. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/mcp/onboarding/backup.py +0 -0
  66. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/mcp/onboarding/detector.py +0 -0
  67. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/mcp/onboarding/planner.py +0 -0
  68. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/mcp/orchestrator.py +0 -0
  69. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/mcp/registry.py +0 -0
  70. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/mcp_nudge.py +0 -0
  71. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/metrics_analyzer.py +0 -0
  72. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/output_budget.py +0 -0
  73. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/path_filters.py +0 -0
  74. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/pr_comment_renderer.py +0 -0
  75. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/prepare_context.py +0 -0
  76. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/progress.py +0 -0
  77. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/ranking_engine.py +0 -0
  78. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/redactor.py +0 -0
  79. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/relevance_scorer.py +0 -0
  80. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/repo_classifier.py +0 -0
  81. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/ris.py +0 -0
  82. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/runtime_classifier.py +0 -0
  83. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/scanner.py +0 -0
  84. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/schema.py +0 -0
  85. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/semantic_analyzer.py +0 -0
  86. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/serializer.py +0 -0
  87. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/summarizer.py +0 -0
  88. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/telemetry/__init__.py +0 -0
  89. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/telemetry/config.py +0 -0
  90. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/telemetry/consent.py +0 -0
  91. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/telemetry/events.py +0 -0
  92. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/telemetry/filters.py +0 -0
  93. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/telemetry/transport.py +0 -0
  94. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/tree_utils.py +0 -0
  95. {sourcecode-1.33.20 → sourcecode-1.33.22}/src/sourcecode/workspace.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.33.20
3
+ Version: 1.33.22
4
4
  Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
5
5
  License-File: LICENSE
6
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -39,7 +39,7 @@ Description-Content-Type: text/markdown
39
39
 
40
40
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
41
41
 
42
- ![Version](https://img.shields.io/badge/version-1.33.20-blue)
42
+ ![Version](https://img.shields.io/badge/version-1.33.22-blue)
43
43
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
44
44
 
45
45
  ---
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
4
4
 
5
- ![Version](https://img.shields.io/badge/version-1.33.20-blue)
5
+ ![Version](https://img.shields.io/badge/version-1.33.22-blue)
6
6
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
7
7
 
8
8
  ---
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "1.33.20"
7
+ version = "1.33.22"
8
8
  description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.33.20"
3
+ __version__ = "1.33.22"
@@ -1249,12 +1249,29 @@ def main(
1249
1249
  # ── Lookup ──────────────────────────────────────────────────────
1250
1250
  # Step 1: try L1 to obtain the core_hash needed for L2 key
1251
1251
  _l1_result = _cache_mod.read_core(target, _core_key)
1252
+
1253
+ # P1-A: --env-map misses L1 when base (em=False) exists.
1254
+ # Try the base key so env analysis can be injected lazily (<1 s)
1255
+ # instead of triggering a 17 s full rescan.
1256
+ _l1_needs_env_inject = False
1257
+ if _l1_result is None and env_map:
1258
+ _base_flags = _core_flags_str.replace(",em=True,", ",em=False,")
1259
+ _base_h8 = _hashlib.sha256(_base_flags.encode()).hexdigest()[:8]
1260
+ _base_key = f"{_git_sha}-{_base_h8}"
1261
+ _base_result = _cache_mod.read_core(target, _base_key)
1262
+ if _base_result is not None:
1263
+ _l1_result = _base_result
1264
+ _l1_needs_env_inject = True
1265
+
1252
1266
  if _l1_result is not None:
1253
1267
  _core_dict_l1, _core_hash = _l1_result
1254
1268
  _view_key = f"{_core_hash}-{_view_h}"
1255
1269
 
1256
- # Step 2: try L2 (exact view match)
1257
- _cache_hit_content = _cache_mod.read_view(target, _view_key)
1270
+ # Step 2: try L2 (exact view match).
1271
+ # Skip L2 for --changed-only: the stored view is a previous
1272
+ # diff snapshot that is stale for the current diff.
1273
+ if not changed_only:
1274
+ _cache_hit_content = _cache_mod.read_view(target, _view_key)
1258
1275
 
1259
1276
  # Step 3: L1 hit but L2 miss → rebuild view from core dict
1260
1277
  if _cache_hit_content is None:
@@ -1268,6 +1285,40 @@ def main(
1268
1285
  no_tree=no_tree,
1269
1286
  tree=tree,
1270
1287
  )
1288
+ # P1-A: inject env analysis when base L1 (em=False) was used.
1289
+ # EnvAnalyzer walks only env/config files — typically <1 s.
1290
+ if _rebuilt is not None and _l1_needs_env_inject and compact:
1291
+ try:
1292
+ from sourcecode.env_analyzer import EnvAnalyzer as _EnvA_p1a
1293
+ _env_r_p1a, _env_s_p1a = _EnvA_p1a().analyze(target, {})
1294
+ if _env_s_p1a and (getattr(_env_s_p1a, "total", 0) or _env_r_p1a):
1295
+ _es_p1a: dict = {
1296
+ "total": getattr(_env_s_p1a, "total", 0),
1297
+ "required": getattr(_env_s_p1a, "required_count", 0),
1298
+ }
1299
+ _cats = getattr(_env_s_p1a, "categories", None)
1300
+ if _cats:
1301
+ _es_p1a["categories"] = _cats
1302
+ _rebuilt = dict(_rebuilt)
1303
+ _rebuilt["env_summary"] = _es_p1a
1304
+ if _env_r_p1a:
1305
+ _sorted_er = sorted(
1306
+ _env_r_p1a,
1307
+ key=lambda e: (
1308
+ not getattr(e, "required", False),
1309
+ getattr(e, "key", ""),
1310
+ ),
1311
+ )
1312
+ _rebuilt["env_map"] = [
1313
+ {
1314
+ "key": getattr(e, "key", ""),
1315
+ **({"required": True} if getattr(e, "required", False) else {}),
1316
+ **({"category": getattr(e, "category", None)} if getattr(e, "category", None) else {}),
1317
+ }
1318
+ for e in _sorted_er[:15]
1319
+ ]
1320
+ except Exception:
1321
+ pass # env inject failed — continue without env data
1271
1322
  if _rebuilt is not None:
1272
1323
  # Apply redaction
1273
1324
  if not no_redact:
@@ -1306,8 +1357,8 @@ def main(
1306
1357
  _cache_hit_content = _json_l1.dumps(
1307
1358
  _rebuilt, indent=2, ensure_ascii=False
1308
1359
  )
1309
- # Cache rebuilt view in L2
1310
- if _cache_hit_content:
1360
+ # Cache rebuilt view in L2 (skip for --changed-only: stale diff)
1361
+ if _cache_hit_content and not changed_only:
1311
1362
  _cache_mod.write_view(
1312
1363
  target,
1313
1364
  _view_key,
@@ -1323,40 +1374,104 @@ def main(
1323
1374
  _view_key = ""
1324
1375
  _core_hash = ""
1325
1376
 
1326
- if _cache_hit_content is not None and not changed_only:
1377
+ if _cache_hit_content is not None:
1327
1378
  from sourcecode.serializer import write_output
1328
- if format == "json":
1329
- try:
1330
- from sourcecode.ris import _has_uncommitted_changes as _huc
1331
- _uncommitted = _huc(target)
1332
- except Exception:
1333
- _uncommitted = False
1334
- _hit_source = "L2_view" if (_view_key and _core_hash) else "L1_core"
1335
- _data_scope = "COMPACT" if compact else ("AGENT" if agent else "FULL")
1336
- # Recover generated_at from cached content before overwriting _cache block.
1337
- _cached_generated_at = None
1379
+ _hit_source = "L2_view" if (_view_key and _core_hash) else "L1_core"
1380
+
1381
+ # P0-A/B/C: --changed-only fast path via warm cache.
1382
+ if changed_only:
1383
+ _co_git_ok = False
1384
+ _co_uc_files: set[str] = set()
1338
1385
  try:
1339
- import json as _json_ga
1340
- _cached_generated_at = (
1341
- _json_ga.loads(_cache_hit_content)
1342
- .get("_cache", {})
1343
- .get("generated_at")
1344
- )
1386
+ from sourcecode.git_analyzer import GitAnalyzer as _GitAnalyzerCO
1387
+ _gc_co = _GitAnalyzerCO().analyze(target, depth=1, days=1)
1388
+ _bad_gc_co = {"no_git_repo", "git_not_found", "git_timeout"}
1389
+ if _gc_co and not (_bad_gc_co & set(_gc_co.limitations)):
1390
+ _co_git_ok = True
1391
+ _uc_co = _gc_co.uncommitted_changes
1392
+ if _uc_co:
1393
+ _uc_untracked = {p for p in _uc_co.untracked if not p.endswith("/")}
1394
+ _co_uc_files = set(_uc_co.staged) | set(_uc_co.unstaged) | _uc_untracked
1345
1395
  except Exception:
1346
1396
  pass
1347
- _cache_hit_content = _inject_cache_meta(_cache_hit_content, {
1348
- "cache_source": _hit_source,
1349
- "git_head_at_generation": _git_sha,
1350
- "current_git_head": _git_sha,
1351
- "is_stale": False,
1352
- "has_uncommitted_changes": _uncommitted,
1353
- "generated_at": _cached_generated_at,
1354
- "data_scope": _data_scope,
1355
- })
1356
- write_output(_cache_hit_content, output=output)
1357
- if copy and not output:
1358
- _copy_to_clipboard(_cache_hit_content)
1359
- return
1397
+
1398
+ if not _co_git_ok:
1399
+ # Git unavailable — disable changed_only, fall through to normal cached output.
1400
+ changed_only = False
1401
+ typer.echo("[changed-only] git unavailable — falling back to full scan.", err=True)
1402
+ elif not _co_uc_files:
1403
+ # Clean repo — unified empty schema.
1404
+ _co_clean = json.dumps({
1405
+ "schema_version": "1.0",
1406
+ "changed_files_count": 0,
1407
+ "changed_files": [],
1408
+ "analysis_scope": "empty",
1409
+ "context": None,
1410
+ "_meta": {"changed_only": True, "cache_source": _hit_source},
1411
+ }, ensure_ascii=False)
1412
+ write_output(_co_clean, output=output)
1413
+ if copy and not output:
1414
+ _copy_to_clipboard(_co_clean)
1415
+ return
1416
+ else:
1417
+ # Dirty repo — filter file_paths in cached compact, unified schema.
1418
+ try:
1419
+ _co_base = json.loads(_cache_hit_content)
1420
+ _fps_all = _co_base.get("file_paths", [])
1421
+ _co_base["file_paths"] = [
1422
+ p for p in _fps_all
1423
+ if p in _co_uc_files or _is_always_include_ref(p)
1424
+ ]
1425
+ _co_base.pop("_cache", None)
1426
+ _co_dirty = json.dumps({
1427
+ "schema_version": "1.0",
1428
+ "changed_files_count": len(_co_uc_files),
1429
+ "changed_files": sorted(_co_uc_files),
1430
+ "analysis_scope": "partial",
1431
+ "context": _co_base,
1432
+ "_meta": {"changed_only": True, "cache_source": _hit_source},
1433
+ }, indent=2, ensure_ascii=False)
1434
+ write_output(_co_dirty, output=output)
1435
+ if copy and not output:
1436
+ _copy_to_clipboard(_co_dirty)
1437
+ return
1438
+ except Exception:
1439
+ # Parse failed — fall through to full scan.
1440
+ changed_only = False
1441
+ typer.echo("[changed-only] cache parse failed — falling back to full scan.", err=True)
1442
+
1443
+ if not changed_only:
1444
+ if format == "json":
1445
+ try:
1446
+ from sourcecode.ris import _has_uncommitted_changes as _huc
1447
+ _uncommitted = _huc(target)
1448
+ except Exception:
1449
+ _uncommitted = False
1450
+ _data_scope = "COMPACT" if compact else ("AGENT" if agent else "FULL")
1451
+ # Recover generated_at from cached content before overwriting _cache block.
1452
+ _cached_generated_at = None
1453
+ try:
1454
+ import json as _json_ga
1455
+ _cached_generated_at = (
1456
+ _json_ga.loads(_cache_hit_content)
1457
+ .get("_cache", {})
1458
+ .get("generated_at")
1459
+ )
1460
+ except Exception:
1461
+ pass
1462
+ _cache_hit_content = _inject_cache_meta(_cache_hit_content, {
1463
+ "cache_source": _hit_source,
1464
+ "git_head_at_generation": _git_sha,
1465
+ "current_git_head": _git_sha,
1466
+ "is_stale": False,
1467
+ "has_uncommitted_changes": _uncommitted,
1468
+ "generated_at": _cached_generated_at,
1469
+ "data_scope": _data_scope,
1470
+ })
1471
+ write_output(_cache_hit_content, output=output)
1472
+ if copy and not output:
1473
+ _copy_to_clipboard(_cache_hit_content)
1474
+ return
1360
1475
 
1361
1476
  _extra_excludes: Optional[frozenset[str]] = None
1362
1477
  if exclude:
@@ -1993,12 +2108,12 @@ def main(
1993
2108
  changed_only = False
1994
2109
  if _git_confirmed_clean:
1995
2110
  _nc_payload = json.dumps({
2111
+ "schema_version": "1.0",
1996
2112
  "changed_files_count": 0,
1997
2113
  "changed_files": [],
1998
- "message": "no uncommitted changes detected",
1999
2114
  "analysis_scope": "empty",
2000
- "note": "No uncommitted changes detected. No output produced — use without --changed-only for full context.",
2001
- "_meta": {"changed_only": True},
2115
+ "context": None,
2116
+ "_meta": {"changed_only": True, "cache_source": "none"},
2002
2117
  }, ensure_ascii=False)
2003
2118
  write_output(_nc_payload, output=output)
2004
2119
  raise typer.Exit()
@@ -2092,20 +2207,17 @@ def main(
2092
2207
  # FIX-P0-2: agent mode must honour --format yaml (previously always emitted JSON).
2093
2208
  content = _serialize_dict(data, format)
2094
2209
  elif compact:
2095
- if changed_only and _allowed_changed_files:
2096
- # GAP-5: preserve full entry_points for architecture context even in
2097
- # --changed-only mode. Only filter file_paths and code_notes.
2098
- # ALWAYS-INCLUDE: security-const files must stay in file_paths even when
2099
- # not in the git diff — they resolve Java constant references used in
2100
- # @M3FiltroSeguridad annotations (read-only anchors, not diff output).
2101
- sm = _replace(sm,
2102
- file_paths=[
2103
- p for p in sm.file_paths
2104
- if p in _allowed_changed_files or _is_always_include_ref(p)
2105
- ],
2106
- code_notes=[n for n in sm.code_notes if n.path in _allowed_changed_files],
2107
- )
2210
+ # P0-C: compute compact_view with full sm.file_paths so mybatis/angular
2211
+ # counts are correct; filter file_paths in the output dict, not in sm.
2108
2212
  data = compact_view(sm, no_tree=no_tree, full=full)
2213
+ if changed_only and _allowed_changed_files:
2214
+ # GAP-5: preserve full entry_points; filter file_paths display only.
2215
+ # ALWAYS-INCLUDE: security-const files stay for Java constant refs.
2216
+ _fps_full = data.get("file_paths", [])
2217
+ data["file_paths"] = [
2218
+ p for p in _fps_full
2219
+ if p in _allowed_changed_files or _is_always_include_ref(p)
2220
+ ]
2109
2221
  if not no_redact:
2110
2222
  data = redact_dict(data)
2111
2223
  # P0-1: Apply output budget — safety net for large repos.
@@ -2157,6 +2269,20 @@ def main(
2157
2269
  "generated_at": _dt.datetime.now(_dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
2158
2270
  "data_scope": _data_scope_fresh,
2159
2271
  })
2272
+ # P0-B: wrap --changed-only cold-start dirty output in unified schema.
2273
+ if changed_only and compact:
2274
+ try:
2275
+ _co_inner = json.loads(content)
2276
+ content = json.dumps({
2277
+ "schema_version": "1.0",
2278
+ "changed_files_count": len(_allowed_changed_files) if _allowed_changed_files else 0,
2279
+ "changed_files": sorted(_allowed_changed_files) if _allowed_changed_files else [],
2280
+ "analysis_scope": "partial" if _allowed_changed_files else "empty",
2281
+ "context": _co_inner,
2282
+ "_meta": {"changed_only": True, "cache_source": "fresh"},
2283
+ }, indent=2, ensure_ascii=False)
2284
+ except Exception:
2285
+ pass
2160
2286
  write_output(content, output=output)
2161
2287
 
2162
2288
  # Persist to two-layer cache (git SHA unchanged → re-use on next run).
@@ -2169,7 +2295,9 @@ def main(
2169
2295
  # Writes happen in a background daemon thread so cold-run latency is not
2170
2296
  # penalised by gzip encoding + disk I/O. atexit join ensures writes
2171
2297
  # complete on clean exit without blocking the user-visible response.
2172
- if not no_cache and _core_key and not _pipeline_error:
2298
+ # Skip cache write for --changed-only: content is a filtered diff view,
2299
+ # not valid for reuse as a base compact on subsequent runs.
2300
+ if not no_cache and _core_key and not _pipeline_error and not changed_only:
2173
2301
  import atexit as _atexit
2174
2302
  import threading as _threading
2175
2303
 
@@ -2566,6 +2694,42 @@ def prepare_context_cmd(
2566
2694
  from dataclasses import asdict
2567
2695
  import time as _time
2568
2696
 
2697
+ # Task-level cache: keyed on (task, git_head, symptom) so warm calls complete in <1s.
2698
+ # Skip for diff-dependent tasks (delta, review-pr), fast mode, and llm_prompt
2699
+ # (those embed per-call content that must not be served from cache).
2700
+ import subprocess as _pctx_sub
2701
+ import hashlib as _pctx_hash
2702
+ from sourcecode import cache as _pctx_cache
2703
+ _pctx_git_sha = ""
2704
+ _pctx_cache_key = ""
2705
+ _pctx_cacheable = task not in ("delta", "review-pr") and not fast and not llm_prompt
2706
+ if _pctx_cacheable:
2707
+ try:
2708
+ _sha_r2 = _pctx_sub.run(
2709
+ ["git", "-C", str(target), "rev-parse", "--short", "HEAD"],
2710
+ capture_output=True, text=True, timeout=3,
2711
+ )
2712
+ _pctx_git_sha = _sha_r2.stdout.strip()
2713
+ except Exception:
2714
+ pass
2715
+ if _pctx_git_sha:
2716
+ _sym_h = _pctx_hash.sha256((symptom or "").encode()).hexdigest()[:8]
2717
+ _pctx_cache_key = f"pctx-{task}-{_pctx_git_sha}-{_sym_h}-{format or 'json'}"
2718
+ _cached_pctx = _pctx_cache.read(target, _pctx_cache_key)
2719
+ if _cached_pctx is not None:
2720
+ if output_path is not None:
2721
+ output_path.write_text(_cached_pctx, encoding="utf-8")
2722
+ else:
2723
+ sys.stdout.buffer.write(_cached_pctx.encode("utf-8"))
2724
+ if not _cached_pctx.endswith("\n"):
2725
+ sys.stdout.buffer.write(b"\n")
2726
+ sys.stdout.buffer.flush()
2727
+ if copy:
2728
+ _c = _cached_pctx.strip()
2729
+ if _c not in ("{}", "[]", "null"):
2730
+ _copy_to_clipboard(_cached_pctx)
2731
+ return
2732
+
2569
2733
  builder = TaskContextBuilder(target)
2570
2734
  _progress = Progress()
2571
2735
  _phase = f"analyzing ({task})"
@@ -2950,6 +3114,12 @@ def prepare_context_cmd(
2950
3114
  else:
2951
3115
  _pc_content = json.dumps(out, indent=2, ensure_ascii=False)
2952
3116
 
3117
+ if _pctx_cacheable and _pctx_cache_key and format != "github-comment":
3118
+ try:
3119
+ _pctx_cache.write(target, _pctx_cache_key, _pc_content)
3120
+ except Exception:
3121
+ pass
3122
+
2953
3123
  if output_path is not None:
2954
3124
  output_path.write_text(_pc_content, encoding="utf-8")
2955
3125
  else:
@@ -3082,6 +3252,11 @@ def repo_ir_cmd(
3082
3252
  "-c",
3083
3253
  help="Copy output to system clipboard after a successful run. No-op when --output is used or clipboard is unavailable.",
3084
3254
  ),
3255
+ force: bool = typer.Option(
3256
+ False,
3257
+ "--force",
3258
+ help="Bypass the token-size guard and emit output even when estimated tokens exceed 50K.",
3259
+ ),
3085
3260
  ) -> None:
3086
3261
  """Deterministic symbol-level IR for Java repositories.
3087
3262
 
@@ -3179,6 +3354,26 @@ def repo_ir_cmd(
3179
3354
  err=True,
3180
3355
  )
3181
3356
  else:
3357
+ _ir_size = len(output.encode("utf-8"))
3358
+ _ir_tokens_est = _ir_size // 4
3359
+ # P1-C: abort when estimated tokens > 50K unless --force or --output is given.
3360
+ if _ir_tokens_est > 50_000 and not force:
3361
+ _emit_error_json(
3362
+ "OUTPUT_TOO_LARGE",
3363
+ f"Estimated output is ~{_ir_tokens_est // 1000}K tokens — too large for most LLM context windows.",
3364
+ hint=(
3365
+ "Use --summary-only (~5K tokens), --max-nodes N --max-edges N, "
3366
+ "--output FILE to save to disk, or --force to bypass this guard."
3367
+ ),
3368
+ expected="Output under 50K estimated tokens.",
3369
+ )
3370
+ raise typer.Exit(1)
3371
+ if _ir_tokens_est > 10_000:
3372
+ sys.stderr.write(
3373
+ f"[repo-ir] ~{_ir_tokens_est // 1000}K tokens — "
3374
+ "use --summary-only or --output FILE for smaller output.\n"
3375
+ )
3376
+ sys.stderr.flush()
3182
3377
  try:
3183
3378
  sys.stdout.buffer.write(output.encode("utf-8"))
3184
3379
  sys.stdout.buffer.write(b"\n")
@@ -3943,17 +4138,48 @@ def config_cmd() -> None:
3943
4138
  @app.command("cold-start")
3944
4139
  def cold_start_cmd(
3945
4140
  path: Path = typer.Argument(Path("."), help="Repository path (default: current directory)"),
4141
+ compact: bool = typer.Option(
4142
+ False,
4143
+ "--compact",
4144
+ help="Emit a compact subset (~10K tokens): status, git_head, stacks, entry_points, and key_dependencies only.",
4145
+ ),
3946
4146
  ) -> None:
3947
4147
  """Output Repository Intelligence Snapshot bootstrap context as JSON.
3948
4148
 
3949
4149
  Returns instantly from persisted RIS — zero re-analysis cost.
3950
4150
  status: cold_start_ready | cold_start_stale | no_ris
4151
+
4152
+ \b
4153
+ Note: Full output is large (~100K–200K tokens for medium repos).
4154
+ Use --compact for a ~10K token subset safe for direct LLM injection.
4155
+ Use --output FILE to save the full snapshot for local search tools.
3951
4156
  """
3952
4157
  import json as _json
3953
4158
  from sourcecode.ris import get_cold_start_context as _gcs
3954
4159
  target = Path(path).resolve()
3955
4160
  result = _gcs(target)
3956
- typer.echo(_json.dumps(result, indent=2, ensure_ascii=False))
4161
+ if compact:
4162
+ # P1-C: cap at ~10K tokens — keep only fields essential for orientation.
4163
+ _cs_keys = {"status", "git_head", "stacks", "entry_points",
4164
+ "key_dependencies", "project_type", "project_summary",
4165
+ "validation", "_meta"}
4166
+ result = {k: v for k, v in result.items() if k in _cs_keys}
4167
+ result["_meta"] = {**(result.get("_meta") or {}), "compact_mode": True,
4168
+ "full_available": "sourcecode cold-start (without --compact)"}
4169
+ _out = _json.dumps(result, indent=2, ensure_ascii=False)
4170
+ _size = len(_out.encode("utf-8"))
4171
+ _tokens = _size // 4
4172
+ _out_with_meta = _json.loads(_out)
4173
+ _out_with_meta.setdefault("_meta", {})["estimated_tokens"] = _tokens
4174
+ _out = _json.dumps(_out_with_meta, indent=2, ensure_ascii=False)
4175
+ if not compact and _size > 400_000:
4176
+ sys.stderr.write(
4177
+ f"WARNING: Output is ~{_tokens // 1000}K tokens. This exceeds the context window of "
4178
+ "most LLMs (GPT-4o: 128K, Claude Sonnet: 200K). "
4179
+ "Use --compact for a ~10K token subset, or --output FILE to save.\n"
4180
+ )
4181
+ sys.stderr.flush()
4182
+ typer.echo(_out)
3957
4183
 
3958
4184
 
3959
4185
  # ── analyze (legacy alias) ────────────────────────────────────────────────────
@@ -51,15 +51,20 @@ def run_command(args: list[str]) -> Any:
51
51
  stderr_raw = getattr(result, "stderr", "")
52
52
  stdout = stdout_raw.strip() if isinstance(stdout_raw, str) else ""
53
53
  stderr = stderr_raw.strip() if isinstance(stderr_raw, str) else ""
54
- error_text = stderr or stdout
54
+ # P1-B: structured errors (e.g. pro_required) are written to stdout as
55
+ # JSON while stderr carries the human-readable message. Try stdout first
56
+ # for JSON; fall back to stderr so we never lose a structured payload.
55
57
  payload = None
56
- if error_text:
58
+ for _candidate in (stdout, stderr):
59
+ if not _candidate:
60
+ continue
57
61
  try:
58
- parsed = json.loads(error_text)
62
+ _parsed = json.loads(_candidate)
59
63
  except json.JSONDecodeError:
60
- parsed = None
61
- if isinstance(parsed, dict):
62
- payload = parsed
64
+ continue
65
+ if isinstance(_parsed, dict):
66
+ payload = _parsed
67
+ break
63
68
  raise CommandError(
64
69
  f"sourcecode command failed (exit {result.exit_code}). Args: {args}",
65
70
  exit_code=result.exit_code,
@@ -28,6 +28,41 @@ from sourcecode.error_schema import (
28
28
  )
29
29
  from sourcecode.mcp.runner import CommandError, run_command
30
30
 
31
+ # Patch FastMCP's Tool.run to intercept pydantic.ValidationError and return
32
+ # structured JSON instead of the raw "Error executing tool X: 1 validation
33
+ # error..." plain-text string that FastMCP produces by default.
34
+ try:
35
+ import pydantic as _pydantic
36
+ from mcp.server.fastmcp.tools.base import Tool as _FastMCPTool
37
+
38
+ _orig_tool_run = _FastMCPTool.run
39
+
40
+ async def _patched_tool_run(self, arguments, context=None, convert_result=False): # type: ignore[override]
41
+ try:
42
+ return await _orig_tool_run(self, arguments, context=context, convert_result=convert_result)
43
+ except Exception as _exc:
44
+ _cause = getattr(_exc, "__cause__", None)
45
+ if isinstance(_cause, _pydantic.ValidationError):
46
+ _errors = _cause.errors()
47
+ _missing = [str(e.get("loc", ("?",))[0]) for e in _errors if e.get("type") == "missing"]
48
+ _msg = f"Missing required field: {_missing[0]}" if _missing else "Argument validation failed"
49
+ _payload = {
50
+ "success": False,
51
+ "data": None,
52
+ "error": build_error_object(
53
+ INVALID_INPUT_CODE,
54
+ _msg,
55
+ hint="Pass the supported arguments using the documented tool schema.",
56
+ expected=f"{self.name} arguments with required field '{_missing[0]}'" if _missing else f"{self.name} arguments",
57
+ ),
58
+ }
59
+ return _payload
60
+ raise
61
+
62
+ _FastMCPTool.run = _patched_tool_run # type: ignore[method-assign]
63
+ except Exception:
64
+ pass # never break server startup over a patch failure
65
+
31
66
  # FIX-P0-5: MCP server version must match CLI version exactly.
32
67
  # FastMCP does not accept version= in __init__; inject it on the underlying
33
68
  # low-level Server so the MCP initialize handshake reports the correct version.
@@ -64,6 +99,32 @@ def _err(
64
99
  def _coerce_cli_error(exc: Exception, default_message: str) -> CallToolResult:
65
100
  payload = getattr(exc, "payload", None)
66
101
  if isinstance(payload, dict):
102
+ # P1-B: pro_required uses legacy flat format {error:"pro_required", feature, message}.
103
+ # exit code 2 = Pro license required.
104
+ if (
105
+ payload.get("error") == "pro_required"
106
+ or getattr(exc, "exit_code", None) == 2
107
+ and isinstance(payload.get("error"), str)
108
+ and "pro" in payload.get("error", "").lower()
109
+ ):
110
+ feature = payload.get("feature", "")
111
+ msg = payload.get("message", f"'{feature}' requires a Pro license. Run: sourcecode activate <key>")
112
+ structured = {
113
+ "success": False,
114
+ "data": None,
115
+ "error": build_error_object(
116
+ "PRO_REQUIRED",
117
+ msg,
118
+ hint="sourcecode activate <license_key>",
119
+ expected="Active Pro license.",
120
+ ),
121
+ }
122
+ if feature:
123
+ structured["feature"] = feature
124
+ return CallToolResult(
125
+ content=[TextContent(type="text", text=json.dumps(structured))],
126
+ isError=True,
127
+ )
67
128
  if "error" in payload and isinstance(payload["error"], dict):
68
129
  error = payload["error"]
69
130
  normalized = {
@@ -2399,6 +2399,8 @@ def _route_security_from_sym(
2399
2399
  m = _re2.search(r'(?:nombreRecurso\s*=\s*)?["\']([^"\']+)["\']', raw)
2400
2400
  if m:
2401
2401
  return {"policy": "custom_permission", "required_permission": m.group(1)}
2402
+ # Value is a constant reference or empty — still flag the annotation
2403
+ return {"policy": "custom_annotation", "annotation": "@M3FiltroSeguridad", "resource": raw.strip() or None}
2402
2404
  return None
2403
2405
 
2404
2406
  # Method-level first, then class-level fallback
@@ -2569,6 +2571,11 @@ def _build_route_surface(
2569
2571
  for child_fqn, parent_simple in extends_map.items()
2570
2572
  }
2571
2573
 
2574
+ # Build lookup for security_annotations from phase-2 routes
2575
+ _parent_sec_by_sym: dict[str, object] = {
2576
+ r["symbol"]: r.get("security_annotations") for r in routes
2577
+ }
2578
+
2572
2579
  for cls_simple, data in class_info.items():
2573
2580
  if data["own_endpoints"]:
2574
2581
  continue
@@ -2602,6 +2609,7 @@ def _build_route_surface(
2602
2609
  "method": verb,
2603
2610
  "stable_id": stable_id,
2604
2611
  "inheritance_depth": depth,
2612
+ "security_annotations": _parent_sec_by_sym.get(declaring_sym),
2605
2613
  })
2606
2614
  break
2607
2615
  chain = simple_extends.get(chain)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes