sourcecode 1.33.21__tar.gz → 1.33.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {sourcecode-1.33.21 → sourcecode-1.33.23}/PKG-INFO +2 -2
  2. {sourcecode-1.33.21 → sourcecode-1.33.23}/README.md +1 -1
  3. {sourcecode-1.33.21 → sourcecode-1.33.23}/pyproject.toml +1 -1
  4. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/__init__.py +1 -1
  5. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/cli.py +291 -72
  6. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/env_analyzer.py +1 -1
  7. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/mcp/runner.py +11 -6
  8. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/mcp/server.py +26 -0
  9. {sourcecode-1.33.21 → sourcecode-1.33.23}/.github/workflows/build-windows.yml +0 -0
  10. {sourcecode-1.33.21 → sourcecode-1.33.23}/.gitignore +0 -0
  11. {sourcecode-1.33.21 → sourcecode-1.33.23}/.ruff.toml +0 -0
  12. {sourcecode-1.33.21 → sourcecode-1.33.23}/CHANGELOG.md +0 -0
  13. {sourcecode-1.33.21 → sourcecode-1.33.23}/CONTRIBUTING.md +0 -0
  14. {sourcecode-1.33.21 → sourcecode-1.33.23}/LICENSE +0 -0
  15. {sourcecode-1.33.21 → sourcecode-1.33.23}/SECURITY.md +0 -0
  16. {sourcecode-1.33.21 → sourcecode-1.33.23}/raw +0 -0
  17. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/adaptive_scanner.py +0 -0
  18. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/architecture_analyzer.py +0 -0
  19. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/architecture_summary.py +0 -0
  20. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/ast_extractor.py +0 -0
  21. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/cache.py +0 -0
  22. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/canonical_ir.py +0 -0
  23. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/classifier.py +0 -0
  24. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/code_notes_analyzer.py +0 -0
  25. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/confidence_analyzer.py +0 -0
  26. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/context_scorer.py +0 -0
  27. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/context_summarizer.py +0 -0
  28. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/contract_model.py +0 -0
  29. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/contract_pipeline.py +0 -0
  30. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/coverage_parser.py +0 -0
  31. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/dependency_analyzer.py +0 -0
  32. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/__init__.py +0 -0
  33. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/base.py +0 -0
  34. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/csproj_parser.py +0 -0
  35. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/dart.py +0 -0
  36. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/dotnet.py +0 -0
  37. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/elixir.py +0 -0
  38. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/go.py +0 -0
  39. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/heuristic.py +0 -0
  40. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/hybrid.py +0 -0
  41. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/java.py +0 -0
  42. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/jvm_ext.py +0 -0
  43. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/nodejs.py +0 -0
  44. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/parsers.py +0 -0
  45. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/php.py +0 -0
  46. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/project.py +0 -0
  47. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/python.py +0 -0
  48. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/ruby.py +0 -0
  49. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/rust.py +0 -0
  50. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/systems.py +0 -0
  51. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/terraform.py +0 -0
  52. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/detectors/tooling.py +0 -0
  53. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/doc_analyzer.py +0 -0
  54. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/entrypoint_classifier.py +0 -0
  55. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/error_schema.py +0 -0
  56. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/file_classifier.py +0 -0
  57. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/flow_analyzer.py +0 -0
  58. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/git_analyzer.py +0 -0
  59. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/graph_analyzer.py +0 -0
  60. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/license.py +0 -0
  61. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/mcp/__init__.py +0 -0
  62. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
  63. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/mcp/onboarding/applier.py +0 -0
  64. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/mcp/onboarding/backup.py +0 -0
  65. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/mcp/onboarding/detector.py +0 -0
  66. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/mcp/onboarding/planner.py +0 -0
  67. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/mcp/orchestrator.py +0 -0
  68. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/mcp/registry.py +0 -0
  69. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/mcp_nudge.py +0 -0
  70. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/metrics_analyzer.py +0 -0
  71. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/output_budget.py +0 -0
  72. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/path_filters.py +0 -0
  73. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/pr_comment_renderer.py +0 -0
  74. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/prepare_context.py +0 -0
  75. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/progress.py +0 -0
  76. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/ranking_engine.py +0 -0
  77. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/redactor.py +0 -0
  78. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/relevance_scorer.py +0 -0
  79. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/repo_classifier.py +0 -0
  80. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/repository_ir.py +0 -0
  81. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/ris.py +0 -0
  82. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/runtime_classifier.py +0 -0
  83. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/scanner.py +0 -0
  84. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/schema.py +0 -0
  85. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/semantic_analyzer.py +0 -0
  86. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/serializer.py +0 -0
  87. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/summarizer.py +0 -0
  88. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/telemetry/__init__.py +0 -0
  89. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/telemetry/config.py +0 -0
  90. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/telemetry/consent.py +0 -0
  91. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/telemetry/events.py +0 -0
  92. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/telemetry/filters.py +0 -0
  93. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/telemetry/transport.py +0 -0
  94. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/tree_utils.py +0 -0
  95. {sourcecode-1.33.21 → sourcecode-1.33.23}/src/sourcecode/workspace.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.33.21
3
+ Version: 1.33.23
4
4
  Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
5
5
  License-File: LICENSE
6
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -39,7 +39,7 @@ Description-Content-Type: text/markdown
39
39
 
40
40
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
41
41
 
42
- ![Version](https://img.shields.io/badge/version-1.33.21-blue)
42
+ ![Version](https://img.shields.io/badge/version-1.33.23-blue)
43
43
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
44
44
 
45
45
  ---
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
4
4
 
5
- ![Version](https://img.shields.io/badge/version-1.33.21-blue)
5
+ ![Version](https://img.shields.io/badge/version-1.33.23-blue)
6
6
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
7
7
 
8
8
  ---
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "1.33.21"
7
+ version = "1.33.23"
8
8
  description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.33.21"
3
+ __version__ = "1.33.23"
@@ -1185,6 +1185,16 @@ def main(
1185
1185
 
1186
1186
  _cache_hit_content: Optional[str] = None
1187
1187
  _git_sha = ""
1188
+ # HEAD SHA is diagnostic metadata — compute unconditionally, not tied to cache.
1189
+ try:
1190
+ _sha_r = _sub.run(
1191
+ ["git", "-C", str(target), "rev-parse", "--short", "HEAD"],
1192
+ capture_output=True, text=True, timeout=3,
1193
+ )
1194
+ _git_sha = _sha_r.stdout.strip()
1195
+ except Exception:
1196
+ pass
1197
+
1188
1198
  _core_key = ""
1189
1199
  _view_key = ""
1190
1200
  _core_hash = ""
@@ -1193,12 +1203,6 @@ def main(
1193
1203
 
1194
1204
  if not no_cache:
1195
1205
  try:
1196
- _sha_r = _sub.run(
1197
- ["git", "-C", str(target), "rev-parse", "--short", "HEAD"],
1198
- capture_output=True, text=True, timeout=3,
1199
- )
1200
- _git_sha = _sha_r.stdout.strip()
1201
-
1202
1206
  # Detect actual git root (may be an ancestor of target for monorepos,
1203
1207
  # multi-project repos, or SVN-migrated trees where .git is in a parent).
1204
1208
  # The original "(target / '.git').exists()" check broke these layouts.
@@ -1249,12 +1253,29 @@ def main(
1249
1253
  # ── Lookup ──────────────────────────────────────────────────────
1250
1254
  # Step 1: try L1 to obtain the core_hash needed for L2 key
1251
1255
  _l1_result = _cache_mod.read_core(target, _core_key)
1256
+
1257
+ # P1-A: --env-map misses L1 when base (em=False) exists.
1258
+ # Try the base key so env analysis can be injected lazily (<1 s)
1259
+ # instead of triggering a 17 s full rescan.
1260
+ _l1_needs_env_inject = False
1261
+ if _l1_result is None and env_map:
1262
+ _base_flags = _core_flags_str.replace(",em=True,", ",em=False,")
1263
+ _base_h8 = _hashlib.sha256(_base_flags.encode()).hexdigest()[:8]
1264
+ _base_key = f"{_git_sha}-{_base_h8}"
1265
+ _base_result = _cache_mod.read_core(target, _base_key)
1266
+ if _base_result is not None:
1267
+ _l1_result = _base_result
1268
+ _l1_needs_env_inject = True
1269
+
1252
1270
  if _l1_result is not None:
1253
1271
  _core_dict_l1, _core_hash = _l1_result
1254
1272
  _view_key = f"{_core_hash}-{_view_h}"
1255
1273
 
1256
- # Step 2: try L2 (exact view match)
1257
- _cache_hit_content = _cache_mod.read_view(target, _view_key)
1274
+ # Step 2: try L2 (exact view match).
1275
+ # Skip L2 for --changed-only: the stored view is a previous
1276
+ # diff snapshot that is stale for the current diff.
1277
+ if not changed_only:
1278
+ _cache_hit_content = _cache_mod.read_view(target, _view_key)
1258
1279
 
1259
1280
  # Step 3: L1 hit but L2 miss → rebuild view from core dict
1260
1281
  if _cache_hit_content is None:
@@ -1268,6 +1289,40 @@ def main(
1268
1289
  no_tree=no_tree,
1269
1290
  tree=tree,
1270
1291
  )
1292
+ # P1-A: inject env analysis when base L1 (em=False) was used.
1293
+ # EnvAnalyzer walks only env/config files — typically <1 s.
1294
+ if _rebuilt is not None and _l1_needs_env_inject and compact:
1295
+ try:
1296
+ from sourcecode.env_analyzer import EnvAnalyzer as _EnvA_p1a
1297
+ _env_r_p1a, _env_s_p1a = _EnvA_p1a().analyze(target, {})
1298
+ if _env_s_p1a and (getattr(_env_s_p1a, "total", 0) or _env_r_p1a):
1299
+ _es_p1a: dict = {
1300
+ "total": getattr(_env_s_p1a, "total", 0),
1301
+ "required": getattr(_env_s_p1a, "required_count", 0),
1302
+ }
1303
+ _cats = getattr(_env_s_p1a, "categories", None)
1304
+ if _cats:
1305
+ _es_p1a["categories"] = _cats
1306
+ _rebuilt = dict(_rebuilt)
1307
+ _rebuilt["env_summary"] = _es_p1a
1308
+ if _env_r_p1a:
1309
+ _sorted_er = sorted(
1310
+ _env_r_p1a,
1311
+ key=lambda e: (
1312
+ not getattr(e, "required", False),
1313
+ getattr(e, "key", ""),
1314
+ ),
1315
+ )
1316
+ _rebuilt["env_map"] = [
1317
+ {
1318
+ "key": getattr(e, "key", ""),
1319
+ **({"required": True} if getattr(e, "required", False) else {}),
1320
+ **({"category": getattr(e, "category", None)} if getattr(e, "category", None) else {}),
1321
+ }
1322
+ for e in _sorted_er[:15]
1323
+ ]
1324
+ except Exception:
1325
+ pass # env inject failed — continue without env data
1271
1326
  if _rebuilt is not None:
1272
1327
  # Apply redaction
1273
1328
  if not no_redact:
@@ -1306,8 +1361,8 @@ def main(
1306
1361
  _cache_hit_content = _json_l1.dumps(
1307
1362
  _rebuilt, indent=2, ensure_ascii=False
1308
1363
  )
1309
- # Cache rebuilt view in L2
1310
- if _cache_hit_content:
1364
+ # Cache rebuilt view in L2 (skip for --changed-only: stale diff)
1365
+ if _cache_hit_content and not changed_only:
1311
1366
  _cache_mod.write_view(
1312
1367
  target,
1313
1368
  _view_key,
@@ -1318,45 +1373,132 @@ def main(
1318
1373
  _cache_hit_content = None # rebuild failed → full analysis
1319
1374
 
1320
1375
  except Exception:
1321
- _git_sha = ""
1322
1376
  _core_key = ""
1323
1377
  _view_key = ""
1324
1378
  _core_hash = ""
1325
1379
 
1326
- if _cache_hit_content is not None and not changed_only:
1380
+ if _cache_hit_content is not None:
1327
1381
  from sourcecode.serializer import write_output
1328
- if format == "json":
1329
- try:
1330
- from sourcecode.ris import _has_uncommitted_changes as _huc
1331
- _uncommitted = _huc(target)
1332
- except Exception:
1333
- _uncommitted = False
1334
- _hit_source = "L2_view" if (_view_key and _core_hash) else "L1_core"
1335
- _data_scope = "COMPACT" if compact else ("AGENT" if agent else "FULL")
1336
- # Recover generated_at from cached content before overwriting _cache block.
1337
- _cached_generated_at = None
1382
+ _hit_source = "L2_view" if (_view_key and _core_hash) else "L1_core"
1383
+
1384
+ # P0-A/B/C: --changed-only fast path via warm cache.
1385
+ if changed_only:
1386
+ _co_git_ok = False
1387
+ _co_uc_files: set[str] = set()
1338
1388
  try:
1339
- import json as _json_ga
1340
- _cached_generated_at = (
1341
- _json_ga.loads(_cache_hit_content)
1342
- .get("_cache", {})
1343
- .get("generated_at")
1344
- )
1389
+ from sourcecode.git_analyzer import GitAnalyzer as _GitAnalyzerCO
1390
+ _gc_co = _GitAnalyzerCO().analyze(target, depth=1, days=1)
1391
+ _bad_gc_co = {"no_git_repo", "git_not_found", "git_timeout"}
1392
+ if _gc_co and not (_bad_gc_co & set(_gc_co.limitations)):
1393
+ _co_git_ok = True
1394
+ _uc_co = _gc_co.uncommitted_changes
1395
+ if _uc_co:
1396
+ _uc_untracked = {p for p in _uc_co.untracked if not p.endswith("/")}
1397
+ _co_uc_files = set(_uc_co.staged) | set(_uc_co.unstaged) | _uc_untracked
1345
1398
  except Exception:
1346
1399
  pass
1347
- _cache_hit_content = _inject_cache_meta(_cache_hit_content, {
1348
- "cache_source": _hit_source,
1349
- "git_head_at_generation": _git_sha,
1350
- "current_git_head": _git_sha,
1351
- "is_stale": False,
1352
- "has_uncommitted_changes": _uncommitted,
1353
- "generated_at": _cached_generated_at,
1354
- "data_scope": _data_scope,
1355
- })
1356
- write_output(_cache_hit_content, output=output)
1357
- if copy and not output:
1358
- _copy_to_clipboard(_cache_hit_content)
1359
- return
1400
+
1401
+ if not _co_git_ok:
1402
+ # Git unavailable — disable changed_only, fall through to normal cached output.
1403
+ changed_only = False
1404
+ typer.echo("[changed-only] git unavailable — falling back to full scan.", err=True)
1405
+ elif not _co_uc_files:
1406
+ # Clean repo — unified empty schema.
1407
+ _co_clean = json.dumps({
1408
+ "schema_version": "1.0",
1409
+ "changed_files_count": 0,
1410
+ "changed_files": [],
1411
+ "analysis_scope": "empty",
1412
+ "context": None,
1413
+ "_meta": {"changed_only": True, "cache_source": _hit_source},
1414
+ }, ensure_ascii=False)
1415
+ write_output(_co_clean, output=output)
1416
+ if copy and not output:
1417
+ _copy_to_clipboard(_co_clean)
1418
+ return
1419
+ else:
1420
+ # Dirty repo — filter file_paths in cached compact, unified schema.
1421
+ try:
1422
+ _co_base = json.loads(_cache_hit_content)
1423
+ _fps_all = _co_base.get("file_paths", [])
1424
+ _co_base["file_paths"] = [
1425
+ p for p in _fps_all
1426
+ if p in _co_uc_files or _is_always_include_ref(p)
1427
+ ]
1428
+ _co_base.pop("_cache", None)
1429
+ _co_dirty = json.dumps({
1430
+ "schema_version": "1.0",
1431
+ "changed_files_count": len(_co_uc_files),
1432
+ "changed_files": sorted(_co_uc_files),
1433
+ "analysis_scope": "partial",
1434
+ "context": _co_base,
1435
+ "_meta": {"changed_only": True, "cache_source": _hit_source},
1436
+ }, indent=2, ensure_ascii=False)
1437
+ write_output(_co_dirty, output=output)
1438
+ if copy and not output:
1439
+ _copy_to_clipboard(_co_dirty)
1440
+ return
1441
+ except Exception:
1442
+ # Parse failed — fall through to full scan.
1443
+ changed_only = False
1444
+ typer.echo("[changed-only] cache parse failed — falling back to full scan.", err=True)
1445
+
1446
+ if not changed_only:
1447
+ if format == "json":
1448
+ try:
1449
+ from sourcecode.ris import _has_uncommitted_changes as _huc
1450
+ _uncommitted = _huc(target)
1451
+ except Exception:
1452
+ _uncommitted = False
1453
+ _data_scope = "COMPACT" if compact else ("AGENT" if agent else "FULL")
1454
+ # Recover generated_at from cached content before overwriting _cache block.
1455
+ _cached_generated_at = None
1456
+ try:
1457
+ import json as _json_ga
1458
+ _cached_generated_at = (
1459
+ _json_ga.loads(_cache_hit_content)
1460
+ .get("_cache", {})
1461
+ .get("generated_at")
1462
+ )
1463
+ except Exception:
1464
+ pass
1465
+ _cache_hit_content = _inject_cache_meta(_cache_hit_content, {
1466
+ "cache_source": _hit_source,
1467
+ "git_head_at_generation": _git_sha,
1468
+ "current_git_head": _git_sha,
1469
+ "is_stale": False,
1470
+ "has_uncommitted_changes": _uncommitted,
1471
+ "generated_at": _cached_generated_at,
1472
+ "data_scope": _data_scope,
1473
+ })
1474
+ # Patch git_context.uncommitted_files when there are working-tree
1475
+ # changes — the cached body has stale 0 from generation time.
1476
+ if git_context and _uncommitted:
1477
+ try:
1478
+ import json as _json_gc
1479
+ import subprocess as _sub_gc
1480
+ _patched = _json_gc.loads(_cache_hit_content)
1481
+ if "git_context" in _patched:
1482
+ _uc_r = _sub_gc.run(
1483
+ ["git", "-C", str(target), "status", "--porcelain"],
1484
+ capture_output=True, text=True, timeout=3,
1485
+ )
1486
+ _uc_count = len(
1487
+ [l for l in _uc_r.stdout.splitlines() if l.strip()]
1488
+ )
1489
+ _patched["git_context"]["uncommitted_files"] = _uc_count
1490
+ _patched["git_context"]["_stale_fields_refreshed"] = [
1491
+ "uncommitted_files"
1492
+ ]
1493
+ _cache_hit_content = _json_gc.dumps(
1494
+ _patched, indent=2, ensure_ascii=False
1495
+ )
1496
+ except Exception:
1497
+ pass # stale value better than crash
1498
+ write_output(_cache_hit_content, output=output)
1499
+ if copy and not output:
1500
+ _copy_to_clipboard(_cache_hit_content)
1501
+ return
1360
1502
 
1361
1503
  _extra_excludes: Optional[frozenset[str]] = None
1362
1504
  if exclude:
@@ -1993,12 +2135,12 @@ def main(
1993
2135
  changed_only = False
1994
2136
  if _git_confirmed_clean:
1995
2137
  _nc_payload = json.dumps({
2138
+ "schema_version": "1.0",
1996
2139
  "changed_files_count": 0,
1997
2140
  "changed_files": [],
1998
- "message": "no uncommitted changes detected",
1999
2141
  "analysis_scope": "empty",
2000
- "note": "No uncommitted changes detected. No output produced — use without --changed-only for full context.",
2001
- "_meta": {"changed_only": True},
2142
+ "context": None,
2143
+ "_meta": {"changed_only": True, "cache_source": "none"},
2002
2144
  }, ensure_ascii=False)
2003
2145
  write_output(_nc_payload, output=output)
2004
2146
  raise typer.Exit()
@@ -2092,20 +2234,17 @@ def main(
2092
2234
  # FIX-P0-2: agent mode must honour --format yaml (previously always emitted JSON).
2093
2235
  content = _serialize_dict(data, format)
2094
2236
  elif compact:
2095
- if changed_only and _allowed_changed_files:
2096
- # GAP-5: preserve full entry_points for architecture context even in
2097
- # --changed-only mode. Only filter file_paths and code_notes.
2098
- # ALWAYS-INCLUDE: security-const files must stay in file_paths even when
2099
- # not in the git diff — they resolve Java constant references used in
2100
- # @M3FiltroSeguridad annotations (read-only anchors, not diff output).
2101
- sm = _replace(sm,
2102
- file_paths=[
2103
- p for p in sm.file_paths
2104
- if p in _allowed_changed_files or _is_always_include_ref(p)
2105
- ],
2106
- code_notes=[n for n in sm.code_notes if n.path in _allowed_changed_files],
2107
- )
2237
+ # P0-C: compute compact_view with full sm.file_paths so mybatis/angular
2238
+ # counts are correct; filter file_paths in the output dict, not in sm.
2108
2239
  data = compact_view(sm, no_tree=no_tree, full=full)
2240
+ if changed_only and _allowed_changed_files:
2241
+ # GAP-5: preserve full entry_points; filter file_paths display only.
2242
+ # ALWAYS-INCLUDE: security-const files stay for Java constant refs.
2243
+ _fps_full = data.get("file_paths", [])
2244
+ data["file_paths"] = [
2245
+ p for p in _fps_full
2246
+ if p in _allowed_changed_files or _is_always_include_ref(p)
2247
+ ]
2109
2248
  if not no_redact:
2110
2249
  data = redact_dict(data)
2111
2250
  # P0-1: Apply output budget — safety net for large repos.
@@ -2157,6 +2296,20 @@ def main(
2157
2296
  "generated_at": _dt.datetime.now(_dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
2158
2297
  "data_scope": _data_scope_fresh,
2159
2298
  })
2299
+ # P0-B: wrap --changed-only cold-start dirty output in unified schema.
2300
+ if changed_only and compact:
2301
+ try:
2302
+ _co_inner = json.loads(content)
2303
+ content = json.dumps({
2304
+ "schema_version": "1.0",
2305
+ "changed_files_count": len(_allowed_changed_files) if _allowed_changed_files else 0,
2306
+ "changed_files": sorted(_allowed_changed_files) if _allowed_changed_files else [],
2307
+ "analysis_scope": "partial" if _allowed_changed_files else "empty",
2308
+ "context": _co_inner,
2309
+ "_meta": {"changed_only": True, "cache_source": "fresh"},
2310
+ }, indent=2, ensure_ascii=False)
2311
+ except Exception:
2312
+ pass
2160
2313
  write_output(content, output=output)
2161
2314
 
2162
2315
  # Persist to two-layer cache (git SHA unchanged → re-use on next run).
@@ -2169,7 +2322,9 @@ def main(
2169
2322
  # Writes happen in a background daemon thread so cold-run latency is not
2170
2323
  # penalised by gzip encoding + disk I/O. atexit join ensures writes
2171
2324
  # complete on clean exit without blocking the user-visible response.
2172
- if not no_cache and _core_key and not _pipeline_error:
2325
+ # Skip cache write for --changed-only: content is a filtered diff view,
2326
+ # not valid for reuse as a base compact on subsequent runs.
2327
+ if not no_cache and _core_key and not _pipeline_error and not changed_only:
2173
2328
  import atexit as _atexit
2174
2329
  import threading as _threading
2175
2330
 
@@ -3124,6 +3279,11 @@ def repo_ir_cmd(
3124
3279
  "-c",
3125
3280
  help="Copy output to system clipboard after a successful run. No-op when --output is used or clipboard is unavailable.",
3126
3281
  ),
3282
+ force: bool = typer.Option(
3283
+ False,
3284
+ "--force",
3285
+ help="Bypass the token-size guard and emit output even when estimated tokens exceed 50K.",
3286
+ ),
3127
3287
  ) -> None:
3128
3288
  """Deterministic symbol-level IR for Java repositories.
3129
3289
 
@@ -3222,13 +3382,23 @@ def repo_ir_cmd(
3222
3382
  )
3223
3383
  else:
3224
3384
  _ir_size = len(output.encode("utf-8"))
3225
- if _ir_size > 400_000:
3226
- _ir_tokens = _ir_size // 4
3385
+ _ir_tokens_est = _ir_size // 4
3386
+ # P1-C: abort when estimated tokens > 50K unless --force or --output is given.
3387
+ if _ir_tokens_est > 50_000 and not force:
3388
+ _emit_error_json(
3389
+ "OUTPUT_TOO_LARGE",
3390
+ f"Estimated output is ~{_ir_tokens_est // 1000}K tokens — too large for most LLM context windows.",
3391
+ hint=(
3392
+ "Use --summary-only (~5K tokens), --max-nodes N --max-edges N, "
3393
+ "--output FILE to save to disk, or --force to bypass this guard."
3394
+ ),
3395
+ expected="Output under 50K estimated tokens.",
3396
+ )
3397
+ raise typer.Exit(1)
3398
+ if _ir_tokens_est > 10_000:
3227
3399
  sys.stderr.write(
3228
- f"WARNING: Output is ~{_ir_tokens // 1000}K tokens. This exceeds the context window of "
3229
- "most LLMs (GPT-4o: 128K, Claude Sonnet: 200K). "
3230
- "Use --compact or --agent for LLM-consumable context. "
3231
- "Use --output FILE to save for local search tools.\n"
3400
+ f"[repo-ir] ~{_ir_tokens_est // 1000}K tokens "
3401
+ "use --summary-only or --output FILE for smaller output.\n"
3232
3402
  )
3233
3403
  sys.stderr.flush()
3234
3404
  try:
@@ -3437,6 +3607,18 @@ def endpoints_cmd(
3437
3607
  "-c",
3438
3608
  help="Copy output to system clipboard after a successful run. No-op when --output is used or clipboard is unavailable.",
3439
3609
  ),
3610
+ path_prefix: Optional[str] = typer.Option(
3611
+ None, "--path-prefix", "-p",
3612
+ help="Filter endpoints whose URL path starts with this prefix. Example: /v1/liquidacion",
3613
+ ),
3614
+ controller: Optional[str] = typer.Option(
3615
+ None, "--controller",
3616
+ help="Filter endpoints from this controller class (substring match). Example: LiquidacionJornada",
3617
+ ),
3618
+ limit: Optional[int] = typer.Option(
3619
+ None, "--limit", "-n",
3620
+ help="Maximum number of endpoints to return.",
3621
+ ),
3440
3622
  ) -> None:
3441
3623
  """Extract REST API endpoint surface from Java source files.
3442
3624
 
@@ -3451,6 +3633,9 @@ def endpoints_cmd(
3451
3633
  sourcecode endpoints /path/to/repo
3452
3634
  sourcecode endpoints . --output endpoints.json
3453
3635
  sourcecode endpoints . --format yaml
3636
+ sourcecode endpoints . --path-prefix /v1/liquidacion
3637
+ sourcecode endpoints . --controller LiquidacionJornada
3638
+ sourcecode endpoints . --limit 10
3454
3639
  """
3455
3640
  target = path.resolve()
3456
3641
  if not target.exists() or not target.is_dir():
@@ -3472,6 +3657,26 @@ def endpoints_cmd(
3472
3657
  except Exception:
3473
3658
  pass
3474
3659
 
3660
+ # Apply filters before serialization.
3661
+ _total_before = data.get("total", len(data.get("endpoints", [])))
3662
+ endpoints_list = data.get("endpoints", [])
3663
+ if path_prefix:
3664
+ endpoints_list = [e for e in endpoints_list if e.get("path", "").startswith(path_prefix)]
3665
+ if controller:
3666
+ _ctrl_lower = controller.lower()
3667
+ endpoints_list = [e for e in endpoints_list if _ctrl_lower in e.get("controller", "").lower()]
3668
+ if limit is not None and limit > 0:
3669
+ endpoints_list = endpoints_list[:limit]
3670
+ if path_prefix or controller or limit is not None:
3671
+ data["endpoints"] = endpoints_list
3672
+ data["total"] = len(endpoints_list)
3673
+ data["_filter"] = {
3674
+ "path_prefix": path_prefix,
3675
+ "controller": controller,
3676
+ "limit": limit,
3677
+ "total_before_filter": _total_before,
3678
+ }
3679
+
3475
3680
  output = _serialize_dict(data, format)
3476
3681
 
3477
3682
  if output_path is not None:
@@ -3995,6 +4200,11 @@ def config_cmd() -> None:
3995
4200
  @app.command("cold-start")
3996
4201
  def cold_start_cmd(
3997
4202
  path: Path = typer.Argument(Path("."), help="Repository path (default: current directory)"),
4203
+ compact: bool = typer.Option(
4204
+ False,
4205
+ "--compact",
4206
+ help="Emit a compact subset (~10K tokens): status, git_head, stacks, entry_points, and key_dependencies only.",
4207
+ ),
3998
4208
  ) -> None:
3999
4209
  """Output Repository Intelligence Snapshot bootstrap context as JSON.
4000
4210
 
@@ -4002,24 +4212,33 @@ def cold_start_cmd(
4002
4212
  status: cold_start_ready | cold_start_stale | no_ris
4003
4213
 
4004
4214
  \b
4005
- Note: Produces large output (~100K–200K tokens for medium repos).
4006
- Designed for bootstrap snapshots, not direct LLM injection.
4007
- Use --compact or --agent instead for LLM-consumable context.
4008
- Use --output FILE to save for local search tools.
4215
+ Note: Full output is large (~100K–200K tokens for medium repos).
4216
+ Use --compact for a ~10K token subset safe for direct LLM injection.
4217
+ Use --output FILE to save the full snapshot for local search tools.
4009
4218
  """
4010
4219
  import json as _json
4011
4220
  from sourcecode.ris import get_cold_start_context as _gcs
4012
4221
  target = Path(path).resolve()
4013
4222
  result = _gcs(target)
4223
+ if compact:
4224
+ # P1-C: cap at ~10K tokens — keep only fields essential for orientation.
4225
+ _cs_keys = {"status", "git_head", "stacks", "entry_points",
4226
+ "key_dependencies", "project_type", "project_summary",
4227
+ "validation", "_meta"}
4228
+ result = {k: v for k, v in result.items() if k in _cs_keys}
4229
+ result["_meta"] = {**(result.get("_meta") or {}), "compact_mode": True,
4230
+ "full_available": "sourcecode cold-start (without --compact)"}
4014
4231
  _out = _json.dumps(result, indent=2, ensure_ascii=False)
4015
4232
  _size = len(_out.encode("utf-8"))
4016
- if _size > 400_000:
4017
- _tokens = _size // 4
4233
+ _tokens = _size // 4
4234
+ _out_with_meta = _json.loads(_out)
4235
+ _out_with_meta.setdefault("_meta", {})["estimated_tokens"] = _tokens
4236
+ _out = _json.dumps(_out_with_meta, indent=2, ensure_ascii=False)
4237
+ if not compact and _size > 400_000:
4018
4238
  sys.stderr.write(
4019
4239
  f"WARNING: Output is ~{_tokens // 1000}K tokens. This exceeds the context window of "
4020
4240
  "most LLMs (GPT-4o: 128K, Claude Sonnet: 200K). "
4021
- "Use --compact or --agent for LLM-consumable context. "
4022
- "Use --output FILE to save for local search tools.\n"
4241
+ "Use --compact for a ~10K token subset, or --output FILE to save.\n"
4023
4242
  )
4024
4243
  sys.stderr.flush()
4025
4244
  typer.echo(_out)
@@ -85,7 +85,7 @@ _PATTERNS: list[tuple[str, re.Pattern]] = [
85
85
  r"""System\.getenv\(\s*["']([A-Z][A-Z0-9_]*)["']\s*\)"""
86
86
  )),
87
87
  ("java_spring_value", re.compile(
88
- r"""@Value\(\s*["']\$\{([A-Z][A-Z0-9_]*)(?::[^}]*)?\}["']\s*\)"""
88
+ r"""@Value\(\s*["']\$\{([A-Za-z][A-Za-z0-9._\-]*)(?::[^}]*)?\}["']\s*\)"""
89
89
  )),
90
90
  ("php_getenv", re.compile(
91
91
  r"""getenv\(\s*["']([A-Z][A-Z0-9_]*)["']\s*\)"""
@@ -51,15 +51,20 @@ def run_command(args: list[str]) -> Any:
51
51
  stderr_raw = getattr(result, "stderr", "")
52
52
  stdout = stdout_raw.strip() if isinstance(stdout_raw, str) else ""
53
53
  stderr = stderr_raw.strip() if isinstance(stderr_raw, str) else ""
54
- error_text = stderr or stdout
54
+ # P1-B: structured errors (e.g. pro_required) are written to stdout as
55
+ # JSON while stderr carries the human-readable message. Try stdout first
56
+ # for JSON; fall back to stderr so we never lose a structured payload.
55
57
  payload = None
56
- if error_text:
58
+ for _candidate in (stdout, stderr):
59
+ if not _candidate:
60
+ continue
57
61
  try:
58
- parsed = json.loads(error_text)
62
+ _parsed = json.loads(_candidate)
59
63
  except json.JSONDecodeError:
60
- parsed = None
61
- if isinstance(parsed, dict):
62
- payload = parsed
64
+ continue
65
+ if isinstance(_parsed, dict):
66
+ payload = _parsed
67
+ break
63
68
  raise CommandError(
64
69
  f"sourcecode command failed (exit {result.exit_code}). Args: {args}",
65
70
  exit_code=result.exit_code,
@@ -99,6 +99,32 @@ def _err(
99
99
  def _coerce_cli_error(exc: Exception, default_message: str) -> CallToolResult:
100
100
  payload = getattr(exc, "payload", None)
101
101
  if isinstance(payload, dict):
102
+ # P1-B: pro_required uses legacy flat format {error:"pro_required", feature, message}.
103
+ # exit code 2 = Pro license required.
104
+ if (
105
+ payload.get("error") == "pro_required"
106
+ or getattr(exc, "exit_code", None) == 2
107
+ and isinstance(payload.get("error"), str)
108
+ and "pro" in payload.get("error", "").lower()
109
+ ):
110
+ feature = payload.get("feature", "")
111
+ msg = payload.get("message", f"'{feature}' requires a Pro license. Run: sourcecode activate <key>")
112
+ structured = {
113
+ "success": False,
114
+ "data": None,
115
+ "error": build_error_object(
116
+ "PRO_REQUIRED",
117
+ msg,
118
+ hint="sourcecode activate <license_key>",
119
+ expected="Active Pro license.",
120
+ ),
121
+ }
122
+ if feature:
123
+ structured["feature"] = feature
124
+ return CallToolResult(
125
+ content=[TextContent(type="text", text=json.dumps(structured))],
126
+ isError=True,
127
+ )
102
128
  if "error" in payload and isinstance(payload["error"], dict):
103
129
  error = payload["error"]
104
130
  normalized = {
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes