sourcecode 1.33.21__tar.gz → 1.33.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {sourcecode-1.33.21 → sourcecode-1.33.22}/PKG-INFO +2 -2
  2. {sourcecode-1.33.21 → sourcecode-1.33.22}/README.md +1 -1
  3. {sourcecode-1.33.21 → sourcecode-1.33.22}/pyproject.toml +1 -1
  4. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/__init__.py +1 -1
  5. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/cli.py +222 -65
  6. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/mcp/runner.py +11 -6
  7. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/mcp/server.py +26 -0
  8. {sourcecode-1.33.21 → sourcecode-1.33.22}/.github/workflows/build-windows.yml +0 -0
  9. {sourcecode-1.33.21 → sourcecode-1.33.22}/.gitignore +0 -0
  10. {sourcecode-1.33.21 → sourcecode-1.33.22}/.ruff.toml +0 -0
  11. {sourcecode-1.33.21 → sourcecode-1.33.22}/CHANGELOG.md +0 -0
  12. {sourcecode-1.33.21 → sourcecode-1.33.22}/CONTRIBUTING.md +0 -0
  13. {sourcecode-1.33.21 → sourcecode-1.33.22}/LICENSE +0 -0
  14. {sourcecode-1.33.21 → sourcecode-1.33.22}/SECURITY.md +0 -0
  15. {sourcecode-1.33.21 → sourcecode-1.33.22}/raw +0 -0
  16. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/adaptive_scanner.py +0 -0
  17. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/architecture_analyzer.py +0 -0
  18. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/architecture_summary.py +0 -0
  19. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/ast_extractor.py +0 -0
  20. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/cache.py +0 -0
  21. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/canonical_ir.py +0 -0
  22. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/classifier.py +0 -0
  23. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/code_notes_analyzer.py +0 -0
  24. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/confidence_analyzer.py +0 -0
  25. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/context_scorer.py +0 -0
  26. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/context_summarizer.py +0 -0
  27. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/contract_model.py +0 -0
  28. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/contract_pipeline.py +0 -0
  29. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/coverage_parser.py +0 -0
  30. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/dependency_analyzer.py +0 -0
  31. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/__init__.py +0 -0
  32. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/base.py +0 -0
  33. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/csproj_parser.py +0 -0
  34. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/dart.py +0 -0
  35. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/dotnet.py +0 -0
  36. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/elixir.py +0 -0
  37. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/go.py +0 -0
  38. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/heuristic.py +0 -0
  39. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/hybrid.py +0 -0
  40. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/java.py +0 -0
  41. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/jvm_ext.py +0 -0
  42. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/nodejs.py +0 -0
  43. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/parsers.py +0 -0
  44. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/php.py +0 -0
  45. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/project.py +0 -0
  46. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/python.py +0 -0
  47. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/ruby.py +0 -0
  48. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/rust.py +0 -0
  49. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/systems.py +0 -0
  50. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/terraform.py +0 -0
  51. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/detectors/tooling.py +0 -0
  52. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/doc_analyzer.py +0 -0
  53. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/entrypoint_classifier.py +0 -0
  54. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/env_analyzer.py +0 -0
  55. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/error_schema.py +0 -0
  56. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/file_classifier.py +0 -0
  57. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/flow_analyzer.py +0 -0
  58. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/git_analyzer.py +0 -0
  59. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/graph_analyzer.py +0 -0
  60. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/license.py +0 -0
  61. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/mcp/__init__.py +0 -0
  62. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
  63. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/mcp/onboarding/applier.py +0 -0
  64. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/mcp/onboarding/backup.py +0 -0
  65. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/mcp/onboarding/detector.py +0 -0
  66. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/mcp/onboarding/planner.py +0 -0
  67. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/mcp/orchestrator.py +0 -0
  68. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/mcp/registry.py +0 -0
  69. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/mcp_nudge.py +0 -0
  70. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/metrics_analyzer.py +0 -0
  71. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/output_budget.py +0 -0
  72. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/path_filters.py +0 -0
  73. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/pr_comment_renderer.py +0 -0
  74. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/prepare_context.py +0 -0
  75. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/progress.py +0 -0
  76. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/ranking_engine.py +0 -0
  77. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/redactor.py +0 -0
  78. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/relevance_scorer.py +0 -0
  79. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/repo_classifier.py +0 -0
  80. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/repository_ir.py +0 -0
  81. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/ris.py +0 -0
  82. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/runtime_classifier.py +0 -0
  83. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/scanner.py +0 -0
  84. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/schema.py +0 -0
  85. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/semantic_analyzer.py +0 -0
  86. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/serializer.py +0 -0
  87. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/summarizer.py +0 -0
  88. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/telemetry/__init__.py +0 -0
  89. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/telemetry/config.py +0 -0
  90. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/telemetry/consent.py +0 -0
  91. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/telemetry/events.py +0 -0
  92. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/telemetry/filters.py +0 -0
  93. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/telemetry/transport.py +0 -0
  94. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/tree_utils.py +0 -0
  95. {sourcecode-1.33.21 → sourcecode-1.33.22}/src/sourcecode/workspace.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.33.21
3
+ Version: 1.33.22
4
4
  Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
5
5
  License-File: LICENSE
6
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -39,7 +39,7 @@ Description-Content-Type: text/markdown
39
39
 
40
40
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
41
41
 
42
- ![Version](https://img.shields.io/badge/version-1.33.21-blue)
42
+ ![Version](https://img.shields.io/badge/version-1.33.22-blue)
43
43
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
44
44
 
45
45
  ---
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
4
4
 
5
- ![Version](https://img.shields.io/badge/version-1.33.21-blue)
5
+ ![Version](https://img.shields.io/badge/version-1.33.22-blue)
6
6
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
7
7
 
8
8
  ---
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "1.33.21"
7
+ version = "1.33.22"
8
8
  description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.33.21"
3
+ __version__ = "1.33.22"
@@ -1249,12 +1249,29 @@ def main(
1249
1249
  # ── Lookup ──────────────────────────────────────────────────────
1250
1250
  # Step 1: try L1 to obtain the core_hash needed for L2 key
1251
1251
  _l1_result = _cache_mod.read_core(target, _core_key)
1252
+
1253
+ # P1-A: --env-map misses L1 when base (em=False) exists.
1254
+ # Try the base key so env analysis can be injected lazily (<1 s)
1255
+ # instead of triggering a 17 s full rescan.
1256
+ _l1_needs_env_inject = False
1257
+ if _l1_result is None and env_map:
1258
+ _base_flags = _core_flags_str.replace(",em=True,", ",em=False,")
1259
+ _base_h8 = _hashlib.sha256(_base_flags.encode()).hexdigest()[:8]
1260
+ _base_key = f"{_git_sha}-{_base_h8}"
1261
+ _base_result = _cache_mod.read_core(target, _base_key)
1262
+ if _base_result is not None:
1263
+ _l1_result = _base_result
1264
+ _l1_needs_env_inject = True
1265
+
1252
1266
  if _l1_result is not None:
1253
1267
  _core_dict_l1, _core_hash = _l1_result
1254
1268
  _view_key = f"{_core_hash}-{_view_h}"
1255
1269
 
1256
- # Step 2: try L2 (exact view match)
1257
- _cache_hit_content = _cache_mod.read_view(target, _view_key)
1270
+ # Step 2: try L2 (exact view match).
1271
+ # Skip L2 for --changed-only: the stored view is a previous
1272
+ # diff snapshot that is stale for the current diff.
1273
+ if not changed_only:
1274
+ _cache_hit_content = _cache_mod.read_view(target, _view_key)
1258
1275
 
1259
1276
  # Step 3: L1 hit but L2 miss → rebuild view from core dict
1260
1277
  if _cache_hit_content is None:
@@ -1268,6 +1285,40 @@ def main(
1268
1285
  no_tree=no_tree,
1269
1286
  tree=tree,
1270
1287
  )
1288
+ # P1-A: inject env analysis when base L1 (em=False) was used.
1289
+ # EnvAnalyzer walks only env/config files — typically <1 s.
1290
+ if _rebuilt is not None and _l1_needs_env_inject and compact:
1291
+ try:
1292
+ from sourcecode.env_analyzer import EnvAnalyzer as _EnvA_p1a
1293
+ _env_r_p1a, _env_s_p1a = _EnvA_p1a().analyze(target, {})
1294
+ if _env_s_p1a and (getattr(_env_s_p1a, "total", 0) or _env_r_p1a):
1295
+ _es_p1a: dict = {
1296
+ "total": getattr(_env_s_p1a, "total", 0),
1297
+ "required": getattr(_env_s_p1a, "required_count", 0),
1298
+ }
1299
+ _cats = getattr(_env_s_p1a, "categories", None)
1300
+ if _cats:
1301
+ _es_p1a["categories"] = _cats
1302
+ _rebuilt = dict(_rebuilt)
1303
+ _rebuilt["env_summary"] = _es_p1a
1304
+ if _env_r_p1a:
1305
+ _sorted_er = sorted(
1306
+ _env_r_p1a,
1307
+ key=lambda e: (
1308
+ not getattr(e, "required", False),
1309
+ getattr(e, "key", ""),
1310
+ ),
1311
+ )
1312
+ _rebuilt["env_map"] = [
1313
+ {
1314
+ "key": getattr(e, "key", ""),
1315
+ **({"required": True} if getattr(e, "required", False) else {}),
1316
+ **({"category": getattr(e, "category", None)} if getattr(e, "category", None) else {}),
1317
+ }
1318
+ for e in _sorted_er[:15]
1319
+ ]
1320
+ except Exception:
1321
+ pass # env inject failed — continue without env data
1271
1322
  if _rebuilt is not None:
1272
1323
  # Apply redaction
1273
1324
  if not no_redact:
@@ -1306,8 +1357,8 @@ def main(
1306
1357
  _cache_hit_content = _json_l1.dumps(
1307
1358
  _rebuilt, indent=2, ensure_ascii=False
1308
1359
  )
1309
- # Cache rebuilt view in L2
1310
- if _cache_hit_content:
1360
+ # Cache rebuilt view in L2 (skip for --changed-only: stale diff)
1361
+ if _cache_hit_content and not changed_only:
1311
1362
  _cache_mod.write_view(
1312
1363
  target,
1313
1364
  _view_key,
@@ -1323,40 +1374,104 @@ def main(
1323
1374
  _view_key = ""
1324
1375
  _core_hash = ""
1325
1376
 
1326
- if _cache_hit_content is not None and not changed_only:
1377
+ if _cache_hit_content is not None:
1327
1378
  from sourcecode.serializer import write_output
1328
- if format == "json":
1329
- try:
1330
- from sourcecode.ris import _has_uncommitted_changes as _huc
1331
- _uncommitted = _huc(target)
1332
- except Exception:
1333
- _uncommitted = False
1334
- _hit_source = "L2_view" if (_view_key and _core_hash) else "L1_core"
1335
- _data_scope = "COMPACT" if compact else ("AGENT" if agent else "FULL")
1336
- # Recover generated_at from cached content before overwriting _cache block.
1337
- _cached_generated_at = None
1379
+ _hit_source = "L2_view" if (_view_key and _core_hash) else "L1_core"
1380
+
1381
+ # P0-A/B/C: --changed-only fast path via warm cache.
1382
+ if changed_only:
1383
+ _co_git_ok = False
1384
+ _co_uc_files: set[str] = set()
1338
1385
  try:
1339
- import json as _json_ga
1340
- _cached_generated_at = (
1341
- _json_ga.loads(_cache_hit_content)
1342
- .get("_cache", {})
1343
- .get("generated_at")
1344
- )
1386
+ from sourcecode.git_analyzer import GitAnalyzer as _GitAnalyzerCO
1387
+ _gc_co = _GitAnalyzerCO().analyze(target, depth=1, days=1)
1388
+ _bad_gc_co = {"no_git_repo", "git_not_found", "git_timeout"}
1389
+ if _gc_co and not (_bad_gc_co & set(_gc_co.limitations)):
1390
+ _co_git_ok = True
1391
+ _uc_co = _gc_co.uncommitted_changes
1392
+ if _uc_co:
1393
+ _uc_untracked = {p for p in _uc_co.untracked if not p.endswith("/")}
1394
+ _co_uc_files = set(_uc_co.staged) | set(_uc_co.unstaged) | _uc_untracked
1345
1395
  except Exception:
1346
1396
  pass
1347
- _cache_hit_content = _inject_cache_meta(_cache_hit_content, {
1348
- "cache_source": _hit_source,
1349
- "git_head_at_generation": _git_sha,
1350
- "current_git_head": _git_sha,
1351
- "is_stale": False,
1352
- "has_uncommitted_changes": _uncommitted,
1353
- "generated_at": _cached_generated_at,
1354
- "data_scope": _data_scope,
1355
- })
1356
- write_output(_cache_hit_content, output=output)
1357
- if copy and not output:
1358
- _copy_to_clipboard(_cache_hit_content)
1359
- return
1397
+
1398
+ if not _co_git_ok:
1399
+ # Git unavailable — disable changed_only, fall through to normal cached output.
1400
+ changed_only = False
1401
+ typer.echo("[changed-only] git unavailable — falling back to full scan.", err=True)
1402
+ elif not _co_uc_files:
1403
+ # Clean repo — unified empty schema.
1404
+ _co_clean = json.dumps({
1405
+ "schema_version": "1.0",
1406
+ "changed_files_count": 0,
1407
+ "changed_files": [],
1408
+ "analysis_scope": "empty",
1409
+ "context": None,
1410
+ "_meta": {"changed_only": True, "cache_source": _hit_source},
1411
+ }, ensure_ascii=False)
1412
+ write_output(_co_clean, output=output)
1413
+ if copy and not output:
1414
+ _copy_to_clipboard(_co_clean)
1415
+ return
1416
+ else:
1417
+ # Dirty repo — filter file_paths in cached compact, unified schema.
1418
+ try:
1419
+ _co_base = json.loads(_cache_hit_content)
1420
+ _fps_all = _co_base.get("file_paths", [])
1421
+ _co_base["file_paths"] = [
1422
+ p for p in _fps_all
1423
+ if p in _co_uc_files or _is_always_include_ref(p)
1424
+ ]
1425
+ _co_base.pop("_cache", None)
1426
+ _co_dirty = json.dumps({
1427
+ "schema_version": "1.0",
1428
+ "changed_files_count": len(_co_uc_files),
1429
+ "changed_files": sorted(_co_uc_files),
1430
+ "analysis_scope": "partial",
1431
+ "context": _co_base,
1432
+ "_meta": {"changed_only": True, "cache_source": _hit_source},
1433
+ }, indent=2, ensure_ascii=False)
1434
+ write_output(_co_dirty, output=output)
1435
+ if copy and not output:
1436
+ _copy_to_clipboard(_co_dirty)
1437
+ return
1438
+ except Exception:
1439
+ # Parse failed — fall through to full scan.
1440
+ changed_only = False
1441
+ typer.echo("[changed-only] cache parse failed — falling back to full scan.", err=True)
1442
+
1443
+ if not changed_only:
1444
+ if format == "json":
1445
+ try:
1446
+ from sourcecode.ris import _has_uncommitted_changes as _huc
1447
+ _uncommitted = _huc(target)
1448
+ except Exception:
1449
+ _uncommitted = False
1450
+ _data_scope = "COMPACT" if compact else ("AGENT" if agent else "FULL")
1451
+ # Recover generated_at from cached content before overwriting _cache block.
1452
+ _cached_generated_at = None
1453
+ try:
1454
+ import json as _json_ga
1455
+ _cached_generated_at = (
1456
+ _json_ga.loads(_cache_hit_content)
1457
+ .get("_cache", {})
1458
+ .get("generated_at")
1459
+ )
1460
+ except Exception:
1461
+ pass
1462
+ _cache_hit_content = _inject_cache_meta(_cache_hit_content, {
1463
+ "cache_source": _hit_source,
1464
+ "git_head_at_generation": _git_sha,
1465
+ "current_git_head": _git_sha,
1466
+ "is_stale": False,
1467
+ "has_uncommitted_changes": _uncommitted,
1468
+ "generated_at": _cached_generated_at,
1469
+ "data_scope": _data_scope,
1470
+ })
1471
+ write_output(_cache_hit_content, output=output)
1472
+ if copy and not output:
1473
+ _copy_to_clipboard(_cache_hit_content)
1474
+ return
1360
1475
 
1361
1476
  _extra_excludes: Optional[frozenset[str]] = None
1362
1477
  if exclude:
@@ -1993,12 +2108,12 @@ def main(
1993
2108
  changed_only = False
1994
2109
  if _git_confirmed_clean:
1995
2110
  _nc_payload = json.dumps({
2111
+ "schema_version": "1.0",
1996
2112
  "changed_files_count": 0,
1997
2113
  "changed_files": [],
1998
- "message": "no uncommitted changes detected",
1999
2114
  "analysis_scope": "empty",
2000
- "note": "No uncommitted changes detected. No output produced — use without --changed-only for full context.",
2001
- "_meta": {"changed_only": True},
2115
+ "context": None,
2116
+ "_meta": {"changed_only": True, "cache_source": "none"},
2002
2117
  }, ensure_ascii=False)
2003
2118
  write_output(_nc_payload, output=output)
2004
2119
  raise typer.Exit()
@@ -2092,20 +2207,17 @@ def main(
2092
2207
  # FIX-P0-2: agent mode must honour --format yaml (previously always emitted JSON).
2093
2208
  content = _serialize_dict(data, format)
2094
2209
  elif compact:
2095
- if changed_only and _allowed_changed_files:
2096
- # GAP-5: preserve full entry_points for architecture context even in
2097
- # --changed-only mode. Only filter file_paths and code_notes.
2098
- # ALWAYS-INCLUDE: security-const files must stay in file_paths even when
2099
- # not in the git diff — they resolve Java constant references used in
2100
- # @M3FiltroSeguridad annotations (read-only anchors, not diff output).
2101
- sm = _replace(sm,
2102
- file_paths=[
2103
- p for p in sm.file_paths
2104
- if p in _allowed_changed_files or _is_always_include_ref(p)
2105
- ],
2106
- code_notes=[n for n in sm.code_notes if n.path in _allowed_changed_files],
2107
- )
2210
+ # P0-C: compute compact_view with full sm.file_paths so mybatis/angular
2211
+ # counts are correct; filter file_paths in the output dict, not in sm.
2108
2212
  data = compact_view(sm, no_tree=no_tree, full=full)
2213
+ if changed_only and _allowed_changed_files:
2214
+ # GAP-5: preserve full entry_points; filter file_paths display only.
2215
+ # ALWAYS-INCLUDE: security-const files stay for Java constant refs.
2216
+ _fps_full = data.get("file_paths", [])
2217
+ data["file_paths"] = [
2218
+ p for p in _fps_full
2219
+ if p in _allowed_changed_files or _is_always_include_ref(p)
2220
+ ]
2109
2221
  if not no_redact:
2110
2222
  data = redact_dict(data)
2111
2223
  # P0-1: Apply output budget — safety net for large repos.
@@ -2157,6 +2269,20 @@ def main(
2157
2269
  "generated_at": _dt.datetime.now(_dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
2158
2270
  "data_scope": _data_scope_fresh,
2159
2271
  })
2272
+ # P0-B: wrap --changed-only cold-start dirty output in unified schema.
2273
+ if changed_only and compact:
2274
+ try:
2275
+ _co_inner = json.loads(content)
2276
+ content = json.dumps({
2277
+ "schema_version": "1.0",
2278
+ "changed_files_count": len(_allowed_changed_files) if _allowed_changed_files else 0,
2279
+ "changed_files": sorted(_allowed_changed_files) if _allowed_changed_files else [],
2280
+ "analysis_scope": "partial" if _allowed_changed_files else "empty",
2281
+ "context": _co_inner,
2282
+ "_meta": {"changed_only": True, "cache_source": "fresh"},
2283
+ }, indent=2, ensure_ascii=False)
2284
+ except Exception:
2285
+ pass
2160
2286
  write_output(content, output=output)
2161
2287
 
2162
2288
  # Persist to two-layer cache (git SHA unchanged → re-use on next run).
@@ -2169,7 +2295,9 @@ def main(
2169
2295
  # Writes happen in a background daemon thread so cold-run latency is not
2170
2296
  # penalised by gzip encoding + disk I/O. atexit join ensures writes
2171
2297
  # complete on clean exit without blocking the user-visible response.
2172
- if not no_cache and _core_key and not _pipeline_error:
2298
+ # Skip cache write for --changed-only: content is a filtered diff view,
2299
+ # not valid for reuse as a base compact on subsequent runs.
2300
+ if not no_cache and _core_key and not _pipeline_error and not changed_only:
2173
2301
  import atexit as _atexit
2174
2302
  import threading as _threading
2175
2303
 
@@ -3124,6 +3252,11 @@ def repo_ir_cmd(
3124
3252
  "-c",
3125
3253
  help="Copy output to system clipboard after a successful run. No-op when --output is used or clipboard is unavailable.",
3126
3254
  ),
3255
+ force: bool = typer.Option(
3256
+ False,
3257
+ "--force",
3258
+ help="Bypass the token-size guard and emit output even when estimated tokens exceed 50K.",
3259
+ ),
3127
3260
  ) -> None:
3128
3261
  """Deterministic symbol-level IR for Java repositories.
3129
3262
 
@@ -3222,13 +3355,23 @@ def repo_ir_cmd(
3222
3355
  )
3223
3356
  else:
3224
3357
  _ir_size = len(output.encode("utf-8"))
3225
- if _ir_size > 400_000:
3226
- _ir_tokens = _ir_size // 4
3358
+ _ir_tokens_est = _ir_size // 4
3359
+ # P1-C: abort when estimated tokens > 50K unless --force or --output is given.
3360
+ if _ir_tokens_est > 50_000 and not force:
3361
+ _emit_error_json(
3362
+ "OUTPUT_TOO_LARGE",
3363
+ f"Estimated output is ~{_ir_tokens_est // 1000}K tokens — too large for most LLM context windows.",
3364
+ hint=(
3365
+ "Use --summary-only (~5K tokens), --max-nodes N --max-edges N, "
3366
+ "--output FILE to save to disk, or --force to bypass this guard."
3367
+ ),
3368
+ expected="Output under 50K estimated tokens.",
3369
+ )
3370
+ raise typer.Exit(1)
3371
+ if _ir_tokens_est > 10_000:
3227
3372
  sys.stderr.write(
3228
- f"WARNING: Output is ~{_ir_tokens // 1000}K tokens. This exceeds the context window of "
3229
- "most LLMs (GPT-4o: 128K, Claude Sonnet: 200K). "
3230
- "Use --compact or --agent for LLM-consumable context. "
3231
- "Use --output FILE to save for local search tools.\n"
3373
+ f"[repo-ir] ~{_ir_tokens_est // 1000}K tokens "
3374
+ "use --summary-only or --output FILE for smaller output.\n"
3232
3375
  )
3233
3376
  sys.stderr.flush()
3234
3377
  try:
@@ -3995,6 +4138,11 @@ def config_cmd() -> None:
3995
4138
  @app.command("cold-start")
3996
4139
  def cold_start_cmd(
3997
4140
  path: Path = typer.Argument(Path("."), help="Repository path (default: current directory)"),
4141
+ compact: bool = typer.Option(
4142
+ False,
4143
+ "--compact",
4144
+ help="Emit a compact subset (~10K tokens): status, git_head, stacks, entry_points, and key_dependencies only.",
4145
+ ),
3998
4146
  ) -> None:
3999
4147
  """Output Repository Intelligence Snapshot bootstrap context as JSON.
4000
4148
 
@@ -4002,24 +4150,33 @@ def cold_start_cmd(
4002
4150
  status: cold_start_ready | cold_start_stale | no_ris
4003
4151
 
4004
4152
  \b
4005
- Note: Produces large output (~100K–200K tokens for medium repos).
4006
- Designed for bootstrap snapshots, not direct LLM injection.
4007
- Use --compact or --agent instead for LLM-consumable context.
4008
- Use --output FILE to save for local search tools.
4153
+ Note: Full output is large (~100K–200K tokens for medium repos).
4154
+ Use --compact for a ~10K token subset safe for direct LLM injection.
4155
+ Use --output FILE to save the full snapshot for local search tools.
4009
4156
  """
4010
4157
  import json as _json
4011
4158
  from sourcecode.ris import get_cold_start_context as _gcs
4012
4159
  target = Path(path).resolve()
4013
4160
  result = _gcs(target)
4161
+ if compact:
4162
+ # P1-C: cap at ~10K tokens — keep only fields essential for orientation.
4163
+ _cs_keys = {"status", "git_head", "stacks", "entry_points",
4164
+ "key_dependencies", "project_type", "project_summary",
4165
+ "validation", "_meta"}
4166
+ result = {k: v for k, v in result.items() if k in _cs_keys}
4167
+ result["_meta"] = {**(result.get("_meta") or {}), "compact_mode": True,
4168
+ "full_available": "sourcecode cold-start (without --compact)"}
4014
4169
  _out = _json.dumps(result, indent=2, ensure_ascii=False)
4015
4170
  _size = len(_out.encode("utf-8"))
4016
- if _size > 400_000:
4017
- _tokens = _size // 4
4171
+ _tokens = _size // 4
4172
+ _out_with_meta = _json.loads(_out)
4173
+ _out_with_meta.setdefault("_meta", {})["estimated_tokens"] = _tokens
4174
+ _out = _json.dumps(_out_with_meta, indent=2, ensure_ascii=False)
4175
+ if not compact and _size > 400_000:
4018
4176
  sys.stderr.write(
4019
4177
  f"WARNING: Output is ~{_tokens // 1000}K tokens. This exceeds the context window of "
4020
4178
  "most LLMs (GPT-4o: 128K, Claude Sonnet: 200K). "
4021
- "Use --compact or --agent for LLM-consumable context. "
4022
- "Use --output FILE to save for local search tools.\n"
4179
+ "Use --compact for a ~10K token subset, or --output FILE to save.\n"
4023
4180
  )
4024
4181
  sys.stderr.flush()
4025
4182
  typer.echo(_out)
@@ -51,15 +51,20 @@ def run_command(args: list[str]) -> Any:
51
51
  stderr_raw = getattr(result, "stderr", "")
52
52
  stdout = stdout_raw.strip() if isinstance(stdout_raw, str) else ""
53
53
  stderr = stderr_raw.strip() if isinstance(stderr_raw, str) else ""
54
- error_text = stderr or stdout
54
+ # P1-B: structured errors (e.g. pro_required) are written to stdout as
55
+ # JSON while stderr carries the human-readable message. Try stdout first
56
+ # for JSON; fall back to stderr so we never lose a structured payload.
55
57
  payload = None
56
- if error_text:
58
+ for _candidate in (stdout, stderr):
59
+ if not _candidate:
60
+ continue
57
61
  try:
58
- parsed = json.loads(error_text)
62
+ _parsed = json.loads(_candidate)
59
63
  except json.JSONDecodeError:
60
- parsed = None
61
- if isinstance(parsed, dict):
62
- payload = parsed
64
+ continue
65
+ if isinstance(_parsed, dict):
66
+ payload = _parsed
67
+ break
63
68
  raise CommandError(
64
69
  f"sourcecode command failed (exit {result.exit_code}). Args: {args}",
65
70
  exit_code=result.exit_code,
@@ -99,6 +99,32 @@ def _err(
99
99
  def _coerce_cli_error(exc: Exception, default_message: str) -> CallToolResult:
100
100
  payload = getattr(exc, "payload", None)
101
101
  if isinstance(payload, dict):
102
+ # P1-B: pro_required uses legacy flat format {error:"pro_required", feature, message}.
103
+ # exit code 2 = Pro license required.
104
+ if (
105
+ payload.get("error") == "pro_required"
106
+ or getattr(exc, "exit_code", None) == 2
107
+ and isinstance(payload.get("error"), str)
108
+ and "pro" in payload.get("error", "").lower()
109
+ ):
110
+ feature = payload.get("feature", "")
111
+ msg = payload.get("message", f"'{feature}' requires a Pro license. Run: sourcecode activate <key>")
112
+ structured = {
113
+ "success": False,
114
+ "data": None,
115
+ "error": build_error_object(
116
+ "PRO_REQUIRED",
117
+ msg,
118
+ hint="sourcecode activate <license_key>",
119
+ expected="Active Pro license.",
120
+ ),
121
+ }
122
+ if feature:
123
+ structured["feature"] = feature
124
+ return CallToolResult(
125
+ content=[TextContent(type="text", text=json.dumps(structured))],
126
+ isError=True,
127
+ )
102
128
  if "error" in payload and isinstance(payload["error"], dict):
103
129
  error = payload["error"]
104
130
  normalized = {
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes