sourcecode 1.33.13__tar.gz → 1.33.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {sourcecode-1.33.13 → sourcecode-1.33.15}/PKG-INFO +1 -1
  2. {sourcecode-1.33.13 → sourcecode-1.33.15}/pyproject.toml +1 -1
  3. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/__init__.py +1 -1
  4. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/file_classifier.py +9 -6
  5. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/prepare_context.py +50 -15
  6. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/ris.py +12 -0
  7. {sourcecode-1.33.13 → sourcecode-1.33.15}/.github/workflows/build-windows.yml +0 -0
  8. {sourcecode-1.33.13 → sourcecode-1.33.15}/.gitignore +0 -0
  9. {sourcecode-1.33.13 → sourcecode-1.33.15}/.ruff.toml +0 -0
  10. {sourcecode-1.33.13 → sourcecode-1.33.15}/CHANGELOG.md +0 -0
  11. {sourcecode-1.33.13 → sourcecode-1.33.15}/CONTRIBUTING.md +0 -0
  12. {sourcecode-1.33.13 → sourcecode-1.33.15}/LICENSE +0 -0
  13. {sourcecode-1.33.13 → sourcecode-1.33.15}/README.md +0 -0
  14. {sourcecode-1.33.13 → sourcecode-1.33.15}/SECURITY.md +0 -0
  15. {sourcecode-1.33.13 → sourcecode-1.33.15}/raw +0 -0
  16. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/adaptive_scanner.py +0 -0
  17. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/architecture_analyzer.py +0 -0
  18. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/architecture_summary.py +0 -0
  19. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/ast_extractor.py +0 -0
  20. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/cache.py +0 -0
  21. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/canonical_ir.py +0 -0
  22. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/classifier.py +0 -0
  23. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/cli.py +0 -0
  24. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/code_notes_analyzer.py +0 -0
  25. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/confidence_analyzer.py +0 -0
  26. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/context_scorer.py +0 -0
  27. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/context_summarizer.py +0 -0
  28. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/contract_model.py +0 -0
  29. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/contract_pipeline.py +0 -0
  30. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/coverage_parser.py +0 -0
  31. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/dependency_analyzer.py +0 -0
  32. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/__init__.py +0 -0
  33. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/base.py +0 -0
  34. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/csproj_parser.py +0 -0
  35. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/dart.py +0 -0
  36. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/dotnet.py +0 -0
  37. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/elixir.py +0 -0
  38. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/go.py +0 -0
  39. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/heuristic.py +0 -0
  40. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/hybrid.py +0 -0
  41. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/java.py +0 -0
  42. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/jvm_ext.py +0 -0
  43. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/nodejs.py +0 -0
  44. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/parsers.py +0 -0
  45. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/php.py +0 -0
  46. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/project.py +0 -0
  47. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/python.py +0 -0
  48. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/ruby.py +0 -0
  49. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/rust.py +0 -0
  50. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/systems.py +0 -0
  51. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/terraform.py +0 -0
  52. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/detectors/tooling.py +0 -0
  53. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/doc_analyzer.py +0 -0
  54. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/entrypoint_classifier.py +0 -0
  55. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/env_analyzer.py +0 -0
  56. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/error_schema.py +0 -0
  57. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/flow_analyzer.py +0 -0
  58. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/git_analyzer.py +0 -0
  59. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/graph_analyzer.py +0 -0
  60. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/license.py +0 -0
  61. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/mcp/__init__.py +0 -0
  62. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
  63. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/mcp/onboarding/applier.py +0 -0
  64. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/mcp/onboarding/backup.py +0 -0
  65. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/mcp/onboarding/detector.py +0 -0
  66. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/mcp/onboarding/planner.py +0 -0
  67. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/mcp/orchestrator.py +0 -0
  68. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/mcp/registry.py +0 -0
  69. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/mcp/runner.py +0 -0
  70. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/mcp/server.py +0 -0
  71. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/mcp_nudge.py +0 -0
  72. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/metrics_analyzer.py +0 -0
  73. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/output_budget.py +0 -0
  74. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/path_filters.py +0 -0
  75. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/pr_comment_renderer.py +0 -0
  76. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/progress.py +0 -0
  77. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/ranking_engine.py +0 -0
  78. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/redactor.py +0 -0
  79. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/relevance_scorer.py +0 -0
  80. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/repo_classifier.py +0 -0
  81. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/repository_ir.py +0 -0
  82. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/runtime_classifier.py +0 -0
  83. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/scanner.py +0 -0
  84. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/schema.py +0 -0
  85. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/semantic_analyzer.py +0 -0
  86. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/serializer.py +0 -0
  87. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/summarizer.py +0 -0
  88. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/telemetry/__init__.py +0 -0
  89. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/telemetry/config.py +0 -0
  90. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/telemetry/consent.py +0 -0
  91. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/telemetry/events.py +0 -0
  92. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/telemetry/filters.py +0 -0
  93. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/telemetry/transport.py +0 -0
  94. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/tree_utils.py +0 -0
  95. {sourcecode-1.33.13 → sourcecode-1.33.15}/src/sourcecode/workspace.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.33.13
3
+ Version: 1.33.15
4
4
  Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
5
5
  License-File: LICENSE
6
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "1.33.13"
7
+ version = "1.33.15"
8
8
  description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.33.13"
3
+ __version__ = "1.33.15"
@@ -187,16 +187,19 @@ class FileClassifier:
187
187
  if java_class is not None:
188
188
  return java_class
189
189
 
190
- if self._has_any_import(imports, _API_IMPORTS):
191
- evidence = self._matched_imports(imports, _API_IMPORTS)
190
+ # Fix 4: call _matched_imports once per category instead of twice
191
+ # (_has_any_import was calling _matched_imports and discarding the result,
192
+ # then the caller invoked it again to get the evidence — halving throughput).
193
+ evidence = self._matched_imports(imports, _API_IMPORTS)
194
+ if evidence:
192
195
  return FileClassification(norm, "api_layer", "high", 0.82, "imports API/server framework", evidence)
193
196
 
194
- if self._has_any_import(imports, _DB_IMPORTS):
195
- evidence = self._matched_imports(imports, _DB_IMPORTS)
197
+ evidence = self._matched_imports(imports, _DB_IMPORTS)
198
+ if evidence:
196
199
  return FileClassification(norm, "database_layer", "high", 0.78, "imports database/persistence dependency", evidence)
197
200
 
198
- if self._has_any_import(imports, _INFRA_IMPORTS):
199
- evidence = self._matched_imports(imports, _INFRA_IMPORTS)
201
+ evidence = self._matched_imports(imports, _INFRA_IMPORTS)
202
+ if evidence:
200
203
  return FileClassification(norm, "infrastructure", "high", 0.72, "imports infrastructure dependency", evidence)
201
204
 
202
205
  role = self._package_role(norm)
@@ -1781,7 +1781,12 @@ class TaskContextBuilder:
1781
1781
  # Pass 4: inject files whose path matches symptom keywords.
1782
1782
  # CamelCase-expand the filename stem so "OfflineSessionLoader" matches
1783
1783
  # the keyword "offline" even without an explicit directory separator.
1784
+ # Large repos: cap per-keyword injections so a common term like
1785
+ # "authentication" (50+ path matches in an IAM repo) cannot flood the
1786
+ # candidate list and push specific terms like "ldap" out of the budget.
1784
1787
  _p4_dirs_of_injected: set[str] = set() # directories of high-score injects
1788
+ _P4_KW_CAP = 15 # max path-injections per keyword in large repos
1789
+ _p4_kw_counts: dict[str, int] = {}
1785
1790
  for _p in all_paths:
1786
1791
  if _p in _existing_paths:
1787
1792
  continue
@@ -1798,6 +1803,16 @@ class TaskContextBuilder:
1798
1803
  _matching_kws = [kw for kw in symptom_keywords if kw in _p_search]
1799
1804
  if not _matching_kws:
1800
1805
  continue
1806
+ # In large repos, skip keywords already at cap; keep file only if at
1807
+ # least one keyword still has quota (multi-kw matches exhaust each
1808
+ # keyword's quota independently so specific terms survive longer).
1809
+ if _is_large_repo:
1810
+ _matching_kws = [
1811
+ kw for kw in _matching_kws
1812
+ if _p4_kw_counts.get(kw, 0) < _P4_KW_CAP
1813
+ ]
1814
+ if not _matching_kws:
1815
+ continue
1801
1816
  _boost = 0.2 * len(_matching_kws)
1802
1817
  _injected_score = round(min(0.5 + _boost, 1.0), 2)
1803
1818
  _first_kw = _matching_kws[0]
@@ -1810,6 +1825,9 @@ class TaskContextBuilder:
1810
1825
  ))
1811
1826
  _existing_paths.add(_p)
1812
1827
  _sx_direct_path.append(_p)
1828
+ if _is_large_repo:
1829
+ for _kw in _matching_kws:
1830
+ _p4_kw_counts[_kw] = _p4_kw_counts.get(_kw, 0) + 1
1813
1831
  if _injected_score >= 0.7:
1814
1832
  _p4_dirs_of_injected.add(str(Path(_p).parent))
1815
1833
 
@@ -1818,6 +1836,15 @@ class TaskContextBuilder:
1818
1836
  # the candidate pool (e.g. AkitaBaseService containing setLoading).
1819
1837
  _src_exts = frozenset({".java", ".py", ".ts", ".js", ".kt", ".go"})
1820
1838
  _frontend_kws = [kw for kw in symptom_keywords if kw in _FRONTEND_SYMPTOM_MAP]
1839
+ # Fix 5: In large repos, skip frontend→backend synonym grep for keywords
1840
+ # that already have direct path matches — those are backend terms (e.g.
1841
+ # "login" in an IAM repo) that don't need UI→service-layer translation.
1842
+ # Prevents "authentication" grep flooding keycloak with SAML adapter files.
1843
+ if _is_large_repo and _frontend_kws:
1844
+ _frontend_kws = [
1845
+ kw for kw in _frontend_kws
1846
+ if not any(kw in p.lower() for p in _sx_direct_path)
1847
+ ]
1821
1848
  _backend_terms_set: list[str] = []
1822
1849
  if _frontend_kws:
1823
1850
  _bt: list[str] = []
@@ -1864,9 +1891,15 @@ class TaskContextBuilder:
1864
1891
  # architecturally adjacent classes that don't mention symptom keywords
1865
1892
  # in their own name (e.g. InfinispanOfflineSessionCacheEntryLifespan…
1866
1893
  # siblings in the same infinispan/ package).
1894
+ # Large repos: cap total co-location injections so that a keyword
1895
+ # matching many directories doesn't flood the candidate list.
1867
1896
  if _is_large_repo and _p4_dirs_of_injected:
1868
1897
  _coloc_existing = {rf.path for rf in relevant_files}
1898
+ _P4C_CAP = 30
1899
+ _coloc_count = 0
1869
1900
  for _cp in all_paths:
1901
+ if _coloc_count >= _P4C_CAP:
1902
+ break
1870
1903
  if _cp in _coloc_existing:
1871
1904
  continue
1872
1905
  if Path(_cp).suffix.lower() not in _src_exts:
@@ -1880,6 +1913,7 @@ class TaskContextBuilder:
1880
1913
  why="directory proximity injection",
1881
1914
  ))
1882
1915
  _coloc_existing.add(_cp)
1916
+ _coloc_count += 1
1883
1917
 
1884
1918
  # Sort before content scan so top candidates get read first.
1885
1919
  # In large repos: prioritise symptom_match files within each score band
@@ -1898,6 +1932,7 @@ class TaskContextBuilder:
1898
1932
  _no_scan_candidates = relevant_files[_CONTENT_SCAN_LIMIT:]
1899
1933
 
1900
1934
  _boosted: list[RelevantFile] = []
1935
+ _raw_signals: dict[str, float] = {} # uncapped accumulated signal per file
1901
1936
  _scanned_body: dict[str, str] = {} # cache for graph expansion (Pass 5)
1902
1937
  for _rf in _scan_candidates:
1903
1938
  _extra = 0.0
@@ -1971,7 +2006,9 @@ class TaskContextBuilder:
1971
2006
  elif _extra_syn > 0:
1972
2007
  _new_reason = _rf.reason + f", synonym-match backend (+{_extra_syn:.2f})"
1973
2008
 
1974
- _final_score = round(min(_rf.score + _total_extra, 1.0), 2)
2009
+ _raw_signal = _rf.score + _total_extra # uncapped for ranking
2010
+ _raw_signals[_rf.path] = _raw_signal
2011
+ _final_score = round(min(_raw_signal, 1.0), 2)
1975
2012
  _boosted.append(RelevantFile(
1976
2013
  path=_rf.path,
1977
2014
  role=_rf.role,
@@ -1980,21 +2017,14 @@ class TaskContextBuilder:
1980
2017
  why=_rf.why,
1981
2018
  ))
1982
2019
 
1983
- # Use total boost as a secondary sort key so symptom-matched files
1984
- # that were boosted from a lower base score rank above structural
1985
- # files that coincidentally reach the same capped score of 1.0.
1986
- # This prevents budget-trimming from discarding the most relevant files.
1987
- _boost_totals: dict[str, float] = {}
1988
- for _rf in _scan_candidates:
1989
- pass # populated below
1990
- _boost_totals = {}
1991
- for _idx, _rf in enumerate(_scan_candidates):
1992
- _b_rf = _boosted[_idx]
1993
- _boost_totals[_b_rf.path] = round(_b_rf.score - _rf.score, 4)
1994
-
2020
+ # Sort by uncapped raw signal so files with more accumulated evidence
2021
+ # (path matches + content hits + commit matches) rank above files that
2022
+ # merely cap at the same display score of 1.0.
2023
+ # _raw_signals holds each file's full sum before the display cap.
2024
+ # Files not content-scanned (_no_scan_candidates) use their base score.
1995
2025
  relevant_files = sorted(
1996
2026
  _boosted + _no_scan_candidates,
1997
- key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
2027
+ key=lambda rf: -_raw_signals.get(rf.path, rf.score),
1998
2028
  )
1999
2029
 
2000
2030
  # Pass 5: reverse graph expansion from high-score seed nodes.
@@ -2093,9 +2123,14 @@ class TaskContextBuilder:
2093
2123
  if _gx_new:
2094
2124
  relevant_files = sorted(
2095
2125
  relevant_files + _gx_new,
2096
- key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
2126
+ key=lambda rf: -_raw_signals.get(rf.path, rf.score),
2097
2127
  )
2098
2128
 
2129
+ # Fix 2: Cap output for large repos to stay within agent context budgets.
2130
+ # Raw signal sort above ensures highest-signal files survive the cut.
2131
+ if _is_large_repo and len(relevant_files) > 40:
2132
+ relevant_files = relevant_files[:40]
2133
+
2099
2134
  # Synonym note (only when synonyms actually fired)
2100
2135
  if _frontend_kws and _sx_synonyms:
2101
2136
  symptom_note = (
@@ -437,6 +437,18 @@ def get_cold_start_context(repo_root: Path) -> dict:
437
437
  "endpoints": endpoints,
438
438
  "hotspots": ris.git_context_snapshot.get("hotspots", []),
439
439
  "validation": _validation,
440
+ # Fix 3: _cache wrapper for backward compat with CLI schema consumers.
441
+ # CLI outputs inject _cache via _inject_cache_meta; MCP cold-start path
442
+ # skips that step, leaving agents that read _cache.cache_source with None.
443
+ "_cache": {
444
+ "cache_source": "RIS",
445
+ "git_head_at_generation": ris.git_head or "",
446
+ "current_git_head": current_head or "",
447
+ "is_stale": stale,
448
+ "has_uncommitted_changes": uncommitted,
449
+ "generated_at": ris.last_updated_at,
450
+ "data_scope": "RIS_BOOTSTRAP",
451
+ },
440
452
  }
441
453
  if not endpoints and _is_java:
442
454
  result["endpoints_hint"] = (
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes