sourcecode 0.47.0__py3-none-any.whl → 0.49.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "0.47.0"
3
+ __version__ = "0.49.0"
sourcecode/cli.py CHANGED
@@ -1445,8 +1445,10 @@ def main(
1445
1445
  data = agent_view(sm)
1446
1446
  # When contract pipeline ran (mode=contract, no legacy flags), include
1447
1447
  # per-file contracts in agent output so agents get structural context.
1448
+ # Remove file_relevance — contracts cover this signal with more detail.
1448
1449
  if _is_contract_mode and sm.file_contracts:
1449
1450
  from sourcecode.serializer import _serialize_contract_minimal
1451
+ data.pop("file_relevance", None)
1450
1452
  data["contracts"] = [_serialize_contract_minimal(c) for c in sm.file_contracts]
1451
1453
  if sm.contract_summary is not None:
1452
1454
  cs = sm.contract_summary
@@ -177,8 +177,8 @@ class ContextScorer:
177
177
  node_scores: dict[str, NodeScore],
178
178
  contracts: list[Any],
179
179
  *,
180
- budget: int = 30,
181
- min_score: float = 0.05,
180
+ budget: int = 25,
181
+ min_score: float = 0.15,
182
182
  ) -> list[str]:
183
183
  """Greedy minimum-sufficient subgraph selection with diversity re-ranking.
184
184
 
@@ -17,6 +17,7 @@ from typing import Any, Literal, Optional
17
17
 
18
18
  from sourcecode.ast_extractor import AstExtractor, _LANGUAGE_MAP
19
19
  from sourcecode.contract_model import ContractSummary, FileContract
20
+ from sourcecode.entrypoint_classifier import is_production_entry_point
20
21
  from sourcecode.ranking_engine import RankingEngine
21
22
  from sourcecode.relevance_scorer import RelevanceScorer
22
23
  from sourcecode.schema import EntryPoint, MonorepoPackageInfo
@@ -25,7 +26,9 @@ from sourcecode.schema import EntryPoint, MonorepoPackageInfo
25
26
  # Constants
26
27
  # ---------------------------------------------------------------------------
27
28
 
28
- _MAX_FILES = 500 # hard cap on files extracted per run
29
+ _MAX_FILES = 500 # hard cap on files extracted per run
30
+ _MAX_CONTRACTS = 15 # default top-N output cap — omit rather than flood
31
+ _MIN_CONTRACT_SCORE = 0.15 # drop contracts below this relevance threshold
29
32
  _SRC_EXTENSIONS: frozenset[str] = frozenset(_LANGUAGE_MAP.keys())
30
33
 
31
34
 
@@ -178,12 +181,20 @@ class ContractPipeline:
178
181
  max_importers: int = 50,
179
182
  semantic_calls: Optional[list] = None,
180
183
  code_notes: Optional[list] = None,
184
+ max_contracts: Optional[int] = _MAX_CONTRACTS,
181
185
  ) -> tuple[list[FileContract], ContractSummary]:
182
186
  """Run the full extraction pipeline.
183
187
 
184
188
  Returns (ranked_contracts, summary).
185
189
  """
186
- entry_paths = {ep.path.replace("\\", "/") for ep in (entry_points or [])}
190
+ # Only treat production-classified entrypoints as anchors.
191
+ # Convention/heuristic entrypoints (index.ts barrels, plugin examples)
192
+ # must not bubble to the top via is_entrypoint=True ranking boost.
193
+ entry_paths = {
194
+ ep.path.replace("\\", "/")
195
+ for ep in (entry_points or [])
196
+ if is_production_entry_point(ep)
197
+ }
187
198
  scorer = RelevanceScorer(monorepo_packages)
188
199
  engine = RankingEngine(monorepo_packages)
189
200
 
@@ -299,16 +310,25 @@ class ContractPipeline:
299
310
  max_importers=max_importers,
300
311
  )
301
312
 
302
- # 9. Entrypoints-only filter
313
+ # 9. Entrypoints-only filter — production entry points only, no export spill
303
314
  if entrypoints_only and not symbol:
304
- contracts = [c for c in contracts if c.is_entrypoint or c.exports]
305
-
306
- # 10. Compress types if requested
315
+ contracts = [c for c in contracts if c.is_entrypoint]
316
+
317
+ # 10. Top-N cap enforce max_contracts when not in symbol-search mode.
318
+ # Symbol searches must return all matching files; budget applies only to
319
+ # the default architectural briefing use case.
320
+ if symbol is None and max_contracts is not None:
321
+ contracts = [
322
+ c for c in contracts
323
+ if c.relevance_score >= _MIN_CONTRACT_SCORE or c.is_entrypoint
324
+ ][:max_contracts]
325
+
326
+ # 11. Compress types if requested
307
327
  if compress_types:
308
328
  for c in contracts:
309
329
  _compress_contract_types(c)
310
330
 
311
- # 11. Apply max_symbols limit (limits total exports across all contracts)
331
+ # 12. Apply max_symbols limit (limits total exports across all contracts)
312
332
  if max_symbols is not None and max_symbols > 0:
313
333
  contracts = _limit_symbols(contracts, max_symbols)
314
334
 
@@ -758,7 +758,7 @@ class TaskContextBuilder:
758
758
  )
759
759
  for total, path, rf in scored
760
760
  }
761
- _selected = _ctx.select_subgraph(_ns, contracts=[], budget=15, min_score=0.05)
761
+ _selected = _ctx.select_subgraph(_ns, contracts=[], budget=15, min_score=0.15)
762
762
  _rf_map = {path: rf for _, path, rf in scored}
763
763
  return [_rf_map[p] for p in _selected if p in _rf_map]
764
764
  except Exception:
@@ -124,8 +124,9 @@ _HIGH_VALUE_SUFFIXES: frozenset[str] = frozenset({
124
124
  })
125
125
 
126
126
  _ENTRYPOINT_STEMS: frozenset[str] = frozenset({
127
- "main", "cli", "app", "server", "index", "__main__",
127
+ "main", "cli", "app", "server", "__main__",
128
128
  "application", "bootstrap", "entry",
129
+ # "index" removed — barrel export convention, not a runtime entrypoint signal
129
130
  })
130
131
 
131
132
 
sourcecode/serializer.py CHANGED
@@ -31,7 +31,7 @@ from sourcecode.schema import (
31
31
  _EP_PRODUCTION_CAP = 5 # max production entry points in default output
32
32
  _EP_DEV_CAP = 3 # max development entry points in default output
33
33
  _FILE_RELEVANCE_LIMIT = 10 # max files in file_relevance section
34
- _FILE_RELEVANCE_MIN_COMBINED = 0.20 # minimum combined score to appear (out of ~2.0 max)
34
+ _FILE_RELEVANCE_MIN_COMBINED = 0.40 # minimum combined score must earn inclusion
35
35
  _PROD_DEPS_CAP = 10 # max production dependencies shown
36
36
  _SECONDARY_DEPS_CAP = 5 # max per dev/test/build dependency group
37
37
  _MONOREPO_PKGS_CAP = 8 # max workspace/runtime packages shown
@@ -1130,9 +1130,6 @@ def _contract_view_minimal(
1130
1130
  "project": project,
1131
1131
  }
1132
1132
 
1133
- if sm.metadata.traversal_topology:
1134
- result["traversal"] = sm.metadata.traversal_topology
1135
-
1136
1133
  # Per-file contracts
1137
1134
  if contracts:
1138
1135
  serialized: list[dict[str, Any]] = []
@@ -1310,6 +1307,10 @@ def _compress_sig(name: str, sig: str, max_len: int = 100) -> str:
1310
1307
  return full
1311
1308
 
1312
1309
 
1310
+ _MAX_FN_PER_CONTRACT = 5 # max function signatures per contract (token budget)
1311
+ _MAX_SIG_LEN = 60 # max chars per compressed signature
1312
+
1313
+
1313
1314
  def _serialize_contract_minimal(c: Any) -> dict[str, Any]:
1314
1315
  """Serialize one FileContract to minimal format."""
1315
1316
  item: dict[str, Any] = {"path": c.path, "role": c.role}
@@ -1317,38 +1318,47 @@ def _serialize_contract_minimal(c: Any) -> dict[str, Any]:
1317
1318
  if c.is_changed:
1318
1319
  item["changed"] = True
1319
1320
 
1320
- # Exports: flat string for functions/unknown, {name,k} for others
1321
- # When all exports are same non-function kind, group them
1322
- if c.exports:
1323
- exs: list[Any] = []
1324
- kinds = {e.kind for e in c.exports}
1325
- if len(kinds) == 1 and "function" not in kinds and "unknown" not in kinds:
1326
- # All same non-function kind — compact: {"k": "class", "names": [...]}
1327
- only_kind = next(iter(kinds))
1328
- exs = [{"k": only_kind, "names": sorted(e.name for e in c.exports)}]
1329
- else:
1330
- for e in sorted(c.exports, key=lambda e: e.name):
1331
- if e.kind in ("function", "unknown"):
1332
- exs.append(e.name)
1333
- else:
1334
- exs.append({"name": e.name, "k": e.kind})
1335
- item["exports"] = exs
1336
-
1337
- # External deps (non-stdlib already filtered in extractor)
1338
- if c.dependencies:
1339
- item["deps"] = sorted(c.dependencies)
1340
-
1341
- # Exported function signatures — compressed
1321
+ # Exported function signatures compressed, capped
1342
1322
  exported_names = {e.name for e in c.exports}
1323
+ fn_names_in_sigs: set[str] = set()
1343
1324
  if c.functions:
1344
1325
  fns = []
1345
1326
  for f in sorted(c.functions, key=lambda f: f.name):
1346
1327
  if not (f.exported or f.name in exported_names):
1347
1328
  continue
1348
- fns.append(_compress_sig(f.name, f.signature))
1329
+ fns.append(_compress_sig(f.name, f.signature, max_len=_MAX_SIG_LEN))
1330
+ fn_names_in_sigs.add(f.name)
1331
+ if len(fns) >= _MAX_FN_PER_CONTRACT:
1332
+ break
1349
1333
  if fns:
1350
1334
  item["fn"] = fns
1351
1335
 
1336
+ # Exports: omit function names already shown in fn; keep non-function exports
1337
+ if c.exports:
1338
+ exs: list[Any] = []
1339
+ non_fn_exports = [e for e in c.exports if e.kind not in ("function", "unknown")]
1340
+ fn_exports_not_in_sig = [
1341
+ e for e in c.exports
1342
+ if e.kind in ("function", "unknown") and e.name not in fn_names_in_sigs
1343
+ ]
1344
+ remaining = non_fn_exports + fn_exports_not_in_sig
1345
+ if remaining:
1346
+ kinds = {e.kind for e in remaining}
1347
+ if len(kinds) == 1 and "function" not in kinds and "unknown" not in kinds:
1348
+ only_kind = next(iter(kinds))
1349
+ exs = [{"k": only_kind, "names": sorted(e.name for e in remaining)}]
1350
+ else:
1351
+ for e in sorted(remaining, key=lambda e: e.name):
1352
+ if e.kind in ("function", "unknown"):
1353
+ exs.append(e.name)
1354
+ else:
1355
+ exs.append({"name": e.name, "k": e.kind})
1356
+ item["exports"] = exs
1357
+
1358
+ # External deps (non-stdlib already filtered in extractor)
1359
+ if c.dependencies:
1360
+ item["deps"] = sorted(c.dependencies)
1361
+
1352
1362
  # Types: skip if fully covered by exports (avoids duplication in model files)
1353
1363
  if c.types:
1354
1364
  export_names_set = {e.name for e in c.exports}
@@ -1363,12 +1373,6 @@ def _serialize_contract_minimal(c: Any) -> dict[str, Any]:
1363
1373
  if c.hooks_used:
1364
1374
  item["hooks"] = c.hooks_used
1365
1375
 
1366
- # Ranking signals: why this file was ranked here
1367
- if getattr(c, "ranking_reasons", None):
1368
- non_trivial = [r for r in c.ranking_reasons if r not in ("source file", "noise")]
1369
- if non_trivial:
1370
- item["why"] = non_trivial
1371
-
1372
1376
  return item
1373
1377
 
1374
1378
 
@@ -1409,12 +1413,12 @@ def _contract_view_standard(
1409
1413
  "package_manager": s.package_manager}
1410
1414
  for s in sm.stacks
1411
1415
  ],
1412
- "entry_points": ep_groups["production"],
1416
+ "entry_points": ep_groups["production"][:_EP_PRODUCTION_CAP],
1413
1417
  }
1414
1418
  if sm.metadata.traversal_topology:
1415
1419
  result["traversal"] = sm.metadata.traversal_topology
1416
1420
  if ep_groups["development"]:
1417
- result["development_entry_points"] = ep_groups["development"]
1421
+ result["development_entry_points"] = ep_groups["development"][:_EP_DEV_CAP]
1418
1422
 
1419
1423
  if sm.confidence_summary is not None:
1420
1424
  result["confidence"] = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 0.47.0
3
+ Version: 0.49.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -1,16 +1,16 @@
1
- sourcecode/__init__.py,sha256=d1ZO7BWBwtFn6pK50mntp9_F0e-laNVU7vBBdeZXfU0,103
1
+ sourcecode/__init__.py,sha256=W3DJGnBZMJZBnvn9pO7FSLfHppERKWNuRgtqy1X-umM,103
2
2
  sourcecode/adaptive_scanner.py,sha256=6dh34C2qZXyRbw-8xBhbEwDdXanM6CRFRWayVoYITnA,10190
3
3
  sourcecode/architecture_analyzer.py,sha256=O4AXc7l_WTzIXrcAzstqZy-TGKNaFa6p3MzpgVjaO8g,27749
4
4
  sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
5
5
  sourcecode/ast_extractor.py,sha256=0OHQwTUBBc9lmqPLryVeB1z8dGIC6NhLlar800CD9oI,41129
6
6
  sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
7
- sourcecode/cli.py,sha256=BC83AWW3oSIWcQM8IPN0tU-1bSb2NyhkMS_Fyi3bRRc,71887
7
+ sourcecode/cli.py,sha256=YusMOF5OfihL3nBw66LcANRFSiVHugPrXE0vPIycjLQ,72016
8
8
  sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvUQKw4,9226
9
9
  sourcecode/confidence_analyzer.py,sha256=HxJMPLI5ulqtkncnv98W4iVO6yMbpQo87VuxiuNbDmY,12167
10
- sourcecode/context_scorer.py,sha256=nhppAo80fblAqcB9Ns0iQd21TZUrl2mQMo_xzPgavRE,14679
10
+ sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
11
11
  sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
12
12
  sourcecode/contract_model.py,sha256=gCf9-Kj0G7l0lvRTAcRfFAfMgs1Rpizv4mKovQLYUkw,3434
13
- sourcecode/contract_pipeline.py,sha256=wWkm0_zScUy-Q-mrAG57qysYxjNPOybjObxpwp859e0,24615
13
+ sourcecode/contract_pipeline.py,sha256=eann6PZJ8Bg2zavaOhmfN_oy9rMWNSkKPDW68rRZV8E,25686
14
14
  sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
15
15
  sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
16
16
  sourcecode/doc_analyzer.py,sha256=TttdS7mndKQhyJCfJnnAsyGCJrf-TIL7oXxDlTLUFKE,21248
@@ -20,16 +20,16 @@ sourcecode/file_classifier.py,sha256=_KfFIIolharaIxbSTrCkaWauQIqNHCyor_n47RGyDh8
20
20
  sourcecode/git_analyzer.py,sha256=PD3eNWydznQ6KLNpxGzBqizIHoPIKevfwz9Xyf_pDt4,11600
21
21
  sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
22
22
  sourcecode/metrics_analyzer.py,sha256=e2cFwB9XubFq_dIVsP2PLjpr4wX0N6ulb3ol3sGDUeo,20777
23
- sourcecode/prepare_context.py,sha256=pSVti4gjfBQM24bBCnEW74uZ_c6XijX6z4f8Bt-DkSY,33943
23
+ sourcecode/prepare_context.py,sha256=n7NghZJt8zPt7bzMVpk6gvHlQfhwDYjuLJjgHSOTfD4,33943
24
24
  sourcecode/ranking_engine.py,sha256=virVglafZufioHpZpwktjMvUiL0TZELWQCQnQNV8dFo,9360
25
25
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
26
- sourcecode/relevance_scorer.py,sha256=E74w7nlsNVobO3LqKHiMtBd84ONwGp8uDpwXJEjRtLA,8330
26
+ sourcecode/relevance_scorer.py,sha256=MYF4FFkveAQps9SmTeTlh6ODiBz2F--_hWNeHMLtUHQ,8405
27
27
  sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
28
28
  sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
29
29
  sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
30
30
  sourcecode/schema.py,sha256=ofEge9hTWHOTjeWt7ceCDQWzP-uhhenrYX2usjW2KVU,22759
31
31
  sourcecode/semantic_analyzer.py,sha256=16EFTgM7ooW0m5gNUKOlTSn7IEMLSzKmzQn-cWaSqjs,82604
32
- sourcecode/serializer.py,sha256=Bt7HLDu6qt6NllsYhoPEeA1Nrk8nDkwZByymwuF3NKs,62595
32
+ sourcecode/serializer.py,sha256=nh8DNGVPVszy60YnWGVH_sLyskgDN973glPIMzNeFWA,62843
33
33
  sourcecode/summarizer.py,sha256=ZuzIdm3t8A-d5MuQL0TSNLrd-L0IQIuguIxeNXMNJf8,16070
34
34
  sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
35
35
  sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
@@ -60,8 +60,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
60
60
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
61
61
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
62
62
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
63
- sourcecode-0.47.0.dist-info/METADATA,sha256=5jHdy2fsqrC8KEFvA7Z_ZcunHN_zUyDcYafe0X1tKWs,25209
64
- sourcecode-0.47.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
65
- sourcecode-0.47.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
66
- sourcecode-0.47.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
67
- sourcecode-0.47.0.dist-info/RECORD,,
63
+ sourcecode-0.49.0.dist-info/METADATA,sha256=5FVQYOuzhccMc8oiJ-tPJPr3XJqrdDzWRWf32W8HqWk,25209
64
+ sourcecode-0.49.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
65
+ sourcecode-0.49.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
66
+ sourcecode-0.49.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
67
+ sourcecode-0.49.0.dist-info/RECORD,,