sourcecode 0.36.0__py3-none-any.whl → 0.38.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "0.36.0"
3
+ __version__ = "0.38.0"
@@ -0,0 +1,258 @@
1
+ from __future__ import annotations
2
+
3
+ """Adaptive file tree scanner with topology-aware depth budgets.
4
+
5
+ Replaces pure depth filtering with relevance-oriented traversal:
6
+ - Source roots (packages/*/src, apps/*/src) get deep scan budgets.
7
+ - Low-signal directories (docs/, benchmarks/) are limited to 2 levels.
8
+ - Generated/excluded directories (dist/, node_modules/) are skipped.
9
+ - Unclassified directories fall back to the base depth limit.
10
+
11
+ Drop-in replacement for FileScanner: same scan_tree() and find_manifests()
12
+ interface, same output format (None = file, dict = directory).
13
+ """
14
+
15
+ import os
16
+ from pathlib import Path
17
+ from typing import Any, Optional, cast
18
+
19
+ from pathspec import GitIgnoreSpec
20
+
21
+ from sourcecode.repo_classifier import RepoTopology
22
+ from sourcecode.scanner import DEFAULT_EXCLUDES, MANIFEST_NAMES
23
+
24
+
25
+ class AdaptiveScanner:
26
+ """File tree scanner driven by repository topology.
27
+
28
+ When *topology* is provided, traversal depth is controlled per-directory:
29
+ directories inside source roots receive a deep budget; low-signal dirs
30
+ are restricted; generated dirs are excluded entirely.
31
+
32
+ When *topology* is None, falls back to the base depth limit — identical
33
+ behaviour to FileScanner.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ root: Path,
39
+ topology: Optional[RepoTopology] = None,
40
+ base_depth: int = 4,
41
+ extra_excludes: Optional[frozenset[str]] = None,
42
+ ) -> None:
43
+ self.root = root.resolve()
44
+ self.topology = topology
45
+ self.base_depth = base_depth
46
+ self._excludes = DEFAULT_EXCLUDES | (extra_excludes or frozenset())
47
+ self._gitignore_spec: Optional[GitIgnoreSpec] = None
48
+
49
+ # Pre-compute lookup tables from topology for O(1) classification
50
+ # during traversal.
51
+ #
52
+ # Each entry is (path_parts_tuple, max_absolute_depth):
53
+ # source prefix → (src_parts, len(src_parts) + source_depth)
54
+ # low-signal prefix → (ls_parts, len(ls_parts) + low_signal_depth)
55
+ #
56
+ # "max_absolute_depth" is depth measured from the repo root, not from
57
+ # the classified directory. At depth D, files are visible; at depth
58
+ # >= max we clear dirnames and skip files.
59
+ self._source_prefixes: list[tuple[tuple[str, ...], int]] = []
60
+ self._low_signal_prefixes: list[tuple[tuple[str, ...], int]] = []
61
+ self._extra_exclude_names: frozenset[str] = frozenset()
62
+
63
+ if topology is not None:
64
+ budget = topology.scan_budget
65
+ for sr in topology.source_roots:
66
+ parts = tuple(p for p in sr.path.split("/") if p)
67
+ if parts:
68
+ max_d = len(parts) + budget.source_depth
69
+ self._source_prefixes.append((parts, max_d))
70
+
71
+ for lr in topology.low_signal_roots:
72
+ parts = tuple(p for p in lr.path.split("/") if p)
73
+ if parts:
74
+ max_d = len(parts) + budget.low_signal_depth
75
+ self._low_signal_prefixes.append((parts, max_d))
76
+
77
+ # Generated roots at depth 1 → add to excludes so os.walk never enters
78
+ top_generated = {
79
+ gr.path.split("/")[0]
80
+ for gr in topology.generated_roots
81
+ if "/" not in gr.path
82
+ }
83
+ self._extra_exclude_names = frozenset(top_generated)
84
+
85
+ # ------------------------------------------------------------------
86
+ # Gitignore
87
+ # ------------------------------------------------------------------
88
+
89
+ def _load_gitignore_spec(self) -> GitIgnoreSpec:
90
+ if self._gitignore_spec is None:
91
+ gitignore = self.root / ".gitignore"
92
+ lines: list[str] = []
93
+ if gitignore.exists():
94
+ try:
95
+ lines = gitignore.read_text(encoding="utf-8", errors="replace").splitlines()
96
+ except OSError:
97
+ pass
98
+ self._gitignore_spec = GitIgnoreSpec.from_lines(lines)
99
+ return self._gitignore_spec
100
+
101
+ def _is_excluded_by_gitignore(self, rel_path: str, is_dir: bool) -> bool:
102
+ spec = self._load_gitignore_spec()
103
+ path_to_match = rel_path + "/" if is_dir else rel_path
104
+ return spec.match_file(path_to_match)
105
+
106
+ # ------------------------------------------------------------------
107
+ # Depth budget computation — the core of adaptive traversal
108
+ # ------------------------------------------------------------------
109
+
110
+ def _compute_max_depth(self, rel_parts: tuple[str, ...]) -> int:
111
+ """Return the maximum absolute depth allowed at *rel_parts*.
112
+
113
+ Depth is the number of path components from the repo root. Files
114
+ at depth D are included; the scan stops (dirnames cleared) when
115
+ depth >= returned value.
116
+
117
+ Priority order:
118
+ 1. Inside a source root → deep budget (source_depth extra levels)
119
+ 2. Ancestor of a source root → must allow traversal to reach it
120
+ 3. Inside a low-signal root → restricted budget (low_signal_depth)
121
+ 4. Default → base_depth
122
+ """
123
+ if not self._source_prefixes and not self._low_signal_prefixes:
124
+ return self.base_depth
125
+
126
+ current_depth = len(rel_parts)
127
+
128
+ # Track the best depth found via ancestor matching (may have multiple
129
+ # source roots; return the maximum so all are reachable).
130
+ ancestor_best = self.base_depth
131
+ found_ancestor = False
132
+
133
+ for src_parts, src_max in self._source_prefixes:
134
+ n = len(src_parts)
135
+ if current_depth >= n:
136
+ # At or inside the source root
137
+ if rel_parts[:n] == src_parts:
138
+ return src_max # definite source territory — early exit
139
+ else:
140
+ # Ancestor check: src_parts starts with rel_parts?
141
+ if src_parts[:current_depth] == rel_parts:
142
+ found_ancestor = True
143
+ if src_max > ancestor_best:
144
+ ancestor_best = src_max
145
+
146
+ if found_ancestor:
147
+ return ancestor_best
148
+
149
+ # Low-signal roots (only if not already committed to a source path)
150
+ for ls_parts, ls_max in self._low_signal_prefixes:
151
+ n = len(ls_parts)
152
+ if current_depth >= n and rel_parts[:n] == ls_parts:
153
+ return ls_max
154
+
155
+ return self.base_depth
156
+
157
+ # ------------------------------------------------------------------
158
+ # Main traversal
159
+ # ------------------------------------------------------------------
160
+
161
+ def scan_tree(self) -> dict[str, Any]:
162
+ """Build the nested file tree dictionary.
163
+
164
+ Returns dict where None = file (D-02) and dict = directory (D-01).
165
+ Depth limits are applied per-directory using topology-derived budgets.
166
+ """
167
+ self._load_gitignore_spec()
168
+ root_tree: dict[str, Any] = {}
169
+ all_excludes = self._excludes | self._extra_exclude_names
170
+
171
+ for dirpath, dirnames, filenames in os.walk(self.root, followlinks=False):
172
+ current = Path(dirpath)
173
+ try:
174
+ rel = current.relative_to(self.root)
175
+ except ValueError:
176
+ continue
177
+
178
+ rel_parts = rel.parts
179
+ depth = len(rel_parts)
180
+
181
+ effective_max_depth = self._compute_max_depth(rel_parts)
182
+
183
+ if depth >= effective_max_depth:
184
+ dirnames.clear()
185
+ continue
186
+
187
+ # Filter dirnames in-place (critical: slice assignment)
188
+ dirnames[:] = [
189
+ d for d in dirnames
190
+ if d not in all_excludes
191
+ and not (current / d).is_symlink()
192
+ and not self._is_excluded_by_gitignore(
193
+ str(rel / d) if rel_parts else d,
194
+ is_dir=True,
195
+ )
196
+ ]
197
+
198
+ node = self._get_or_create_node(root_tree, rel_parts)
199
+
200
+ for fname in filenames:
201
+ # Skip flag-shaped names (shell redirect artifacts)
202
+ if fname.startswith("-"):
203
+ continue
204
+ fpath = current / fname
205
+ if fpath.is_symlink():
206
+ continue
207
+ rel_file = str(rel / fname) if rel_parts else fname
208
+ if self._is_excluded_by_gitignore(rel_file, is_dir=False):
209
+ continue
210
+ node[fname] = None # D-02: None = file
211
+
212
+ # Ensure accepted subdirs exist as dict nodes
213
+ for d in dirnames:
214
+ if d not in node:
215
+ node[d] = {}
216
+
217
+ return root_tree
218
+
219
+ def _get_or_create_node(
220
+ self, tree: dict[str, Any], parts: tuple[str, ...]
221
+ ) -> dict[str, Any]:
222
+ node = tree
223
+ for part in parts:
224
+ if part not in node or node[part] is None:
225
+ node[part] = {}
226
+ node = cast(dict[str, Any], node[part])
227
+ return node
228
+
229
+ # ------------------------------------------------------------------
230
+ # Manifest discovery — same interface as FileScanner
231
+ # ------------------------------------------------------------------
232
+
233
+ def find_manifests(self) -> list[str]:
234
+ """Find manifest files at depth 0-1.
235
+
236
+ Identical logic to FileScanner.find_manifests() — depth-0 root
237
+ manifests plus depth-1 sub-package manifests, hidden dirs excluded.
238
+ """
239
+ manifests: list[str] = []
240
+ for name in MANIFEST_NAMES:
241
+ candidate = self.root / name
242
+ if candidate.exists() and not candidate.is_symlink():
243
+ manifests.append(str(candidate))
244
+ try:
245
+ for child in self.root.iterdir():
246
+ if (
247
+ child.is_dir()
248
+ and not child.is_symlink()
249
+ and child.name not in self._excludes
250
+ and not child.name.startswith(".")
251
+ ):
252
+ for name in MANIFEST_NAMES:
253
+ candidate = child / name
254
+ if candidate.exists() and not candidate.is_symlink():
255
+ manifests.append(str(candidate))
256
+ except PermissionError:
257
+ pass
258
+ return manifests
sourcecode/cli.py CHANGED
@@ -384,7 +384,8 @@ def main(
384
384
  no_tree: bool = typer.Option(
385
385
  False,
386
386
  "--no-tree",
387
- help="(Deprecated) Previously suppressed file_tree. The file tree is excluded by default — this flag is now a no-op. Use --tree to include the file tree.",
387
+ hidden=True,
388
+ help="(Removed) No-op. File tree is excluded by default. Use --tree to include it.",
388
389
  ),
389
390
  tree: bool = typer.Option(
390
391
  False,
@@ -516,13 +517,13 @@ def main(
516
517
  "contract",
517
518
  "--mode",
518
519
  help=(
519
- "Output mode: contract|minimal (default) | standard | deep | hybrid | raw. "
520
- "contract/minimal: minimal per-file contracts — exports, signatures, deps. Smallest output. "
520
+ "Output mode: contract (default) | standard | raw. "
521
+ "contract: minimal per-file contracts — exports, signatures, deps. "
522
+ "Smallest output, recommended for AI agents. "
523
+ "minimal is accepted as an alias for contract. "
521
524
  "standard: full per-file detail with imports, relevance scores, extraction method. "
522
- "deep: standard + optional analysis sections (deps, env, git). "
523
- "hybrid: contracts + compact bodies for top-ranked files. "
524
- "raw: legacy project-level analysis (stacks, entry points, dependencies). "
525
- "contract/minimal is the recommended default for AI coding agents."
525
+ "raw: project-level analysis only (stacks, entry points, dependency summary). "
526
+ "No per-file contracts."
526
527
  ),
527
528
  ),
528
529
  max_symbols: Optional[int] = typer.Option(
@@ -534,7 +535,8 @@ def main(
534
535
  dependency_depth: int = typer.Option(
535
536
  0,
536
537
  "--dependency-depth",
537
- help="Transitive import traversal depth for contract mode (0 = direct only, N = follow N levels).",
538
+ hidden=True,
539
+ help="(Removed) Transitive resolution is not implemented. Pass 0 or omit.",
538
540
  min=0,
539
541
  max=5,
540
542
  ),
@@ -561,7 +563,8 @@ def main(
561
563
  compress_types: bool = typer.Option(
562
564
  False,
563
565
  "--compress-types",
564
- help="Contract mode: abbreviate verbose type signatures (React.FC → FC, Promise<X> stays).",
566
+ hidden=True,
567
+ help="(Removed) No observable effect when type signatures are not extracted. Omit.",
565
568
  ),
566
569
  symbol: Optional[str] = typer.Option(
567
570
  None,
@@ -589,8 +592,20 @@ def main(
589
592
  _t0 = time.monotonic()
590
593
 
591
594
  # Validate new flag choices
592
- _MODE_CHOICES = ("contract", "minimal", "standard", "deep", "hybrid", "raw")
593
- if mode not in _MODE_CHOICES:
595
+ _MODE_CHOICES = ("contract", "minimal", "standard", "raw")
596
+ _DEPRECATED_MODES: dict[str, str] = {
597
+ "hybrid": "contract",
598
+ "deep": "standard",
599
+ }
600
+ if mode in _DEPRECATED_MODES:
601
+ fallback = _DEPRECATED_MODES[mode]
602
+ typer.echo(
603
+ f"[deprecated] --mode {mode} is removed: produced identical output to --mode {fallback}. "
604
+ f"Using --mode {fallback}.",
605
+ err=True,
606
+ )
607
+ mode = fallback
608
+ elif mode not in _MODE_CHOICES:
594
609
  typer.echo(
595
610
  f"Error: invalid value '{mode}' for --mode. Valid options: {', '.join(_MODE_CHOICES)}",
596
611
  err=True,
@@ -604,6 +619,22 @@ def main(
604
619
  )
605
620
  raise typer.Exit(code=1)
606
621
 
622
+ if dependency_depth > 0:
623
+ typer.echo(
624
+ f"[warning] --dependency-depth {dependency_depth} has no effect: "
625
+ "transitive import resolution is not implemented for npm/yarn/pip projects. "
626
+ "Using depth=0 (direct dependencies only).",
627
+ err=True,
628
+ )
629
+ dependency_depth = 0
630
+
631
+ if compress_types:
632
+ typer.echo(
633
+ "[deprecated] --compress-types is removed: type signatures are rarely extracted "
634
+ "at default depth. Flag ignored.",
635
+ err=True,
636
+ )
637
+
607
638
  # Validate format choices
608
639
  if format not in FORMAT_CHOICES:
609
640
  typer.echo(
@@ -634,9 +665,9 @@ def main(
634
665
  raise typer.Exit(code=1)
635
666
 
636
667
  # Normalize mode aliases
637
- _CONTRACT_MODES = frozenset({"contract", "minimal", "standard", "deep", "hybrid"})
668
+ _CONTRACT_MODES = frozenset({"contract", "minimal", "standard"})
638
669
  if mode == "minimal":
639
- mode = "contract" # minimal is the canonical default contract rendering
670
+ mode = "contract" # minimal is a documented alias for contract
640
671
  elif mode not in _CONTRACT_MODES and mode != "raw":
641
672
  mode = "contract" # unknown → safe default
642
673
 
@@ -648,15 +679,13 @@ def main(
648
679
  compact or agent or tree or format == "yaml" or trace_pipeline
649
680
  or docs or semantics or graph_modules or full_metrics or architecture
650
681
  )
651
- if mode in ("contract", "standard", "deep") and _legacy_flags_active:
682
+ if mode in ("contract", "standard") and _legacy_flags_active:
652
683
  mode = "raw"
653
684
 
654
685
  # Map mode to contract_view depth
655
686
  _CONTRACT_DEPTH = {
656
687
  "contract": "minimal",
657
688
  "standard": "standard",
658
- "deep": "deep",
659
- "hybrid": "minimal", # hybrid adds bodies via pipeline, minimal header
660
689
  }
661
690
 
662
691
  # --- Import analysis modules ---
@@ -685,6 +714,13 @@ def main(
685
714
  # 1. Scan directory (SCAN-01 to SCAN-05)
686
715
  redactor = SecretRedactor(enabled=not no_redact)
687
716
 
717
+ # Classify repository topology before scanning. This is a shallow
718
+ # filesystem read (depth 0-1 only) and completes in milliseconds.
719
+ # The topology drives per-directory depth budgets in AdaptiveScanner.
720
+ from sourcecode.adaptive_scanner import AdaptiveScanner
721
+ from sourcecode.repo_classifier import RepoClassifier
722
+ _topology = RepoClassifier().classify(target)
723
+
688
724
  # Detect manifests before scan to adjust depth.
689
725
  # find_manifests() only looks at depth 0-1, does not need the full tree.
690
726
  _pre_scanner = FileScanner(target, max_depth=1)
@@ -706,7 +742,7 @@ def main(
706
742
  no_tree = True # agents never need the raw file tree
707
743
  typer.echo("[agent] dependencies env-map code-notes (no-tree)", err=True)
708
744
 
709
- scanner = FileScanner(target, max_depth=effective_depth)
745
+ scanner = AdaptiveScanner(target, topology=_topology, base_depth=effective_depth)
710
746
  raw_tree = scanner.scan_tree()
711
747
 
712
748
  # 2. Filter .env and *.secret entries from file tree (SEC-02, all levels)
@@ -746,6 +782,17 @@ def main(
746
782
  detector = ProjectDetector(build_default_detectors())
747
783
  workspace_analysis = WorkspaceAnalyzer().analyze(target, manifests)
748
784
 
785
+ # Adaptive traversal handles monorepo source root discovery automatically.
786
+ # Emit a diagnostic when topology confidence is low so users know why.
787
+ import sys as _sys
788
+ if _topology.workspace_type == "monorepo" and _topology.confidence < 0.5:
789
+ if _sys.stderr.isatty():
790
+ typer.echo(
791
+ "[traversal] monorepo detected but source root confidence is low "
792
+ f"({_topology.confidence:.0%}). Use --depth 8 or higher if files are missing.",
793
+ err=True,
794
+ )
795
+
749
796
  # --compact implicitly enables lightweight analysis passes so that
750
797
  # dependency_summary, env_summary and code_notes_summary are never null.
751
798
  if compact:
@@ -854,7 +901,8 @@ def main(
854
901
  workspace_root = target / workspace.path
855
902
  if not workspace_root.exists() or not workspace_root.is_dir():
856
903
  continue
857
- workspace_scanner = FileScanner(workspace_root, max_depth=depth)
904
+ _ws_topology = RepoClassifier().classify(workspace_root)
905
+ workspace_scanner = AdaptiveScanner(workspace_root, topology=_ws_topology, base_depth=depth)
858
906
  workspace_tree = filter_sensitive_files(workspace_scanner.scan_tree())
859
907
  workspace_manifests = workspace_scanner.find_manifests()
860
908
  workspace_stacks, workspace_entry_points, _ = detector.detect(
@@ -966,6 +1014,7 @@ def main(
966
1014
  metadata = AnalysisMetadata(
967
1015
  analyzed_path=str(target),
968
1016
  analyzer_fingerprints=_fingerprints,
1017
+ traversal_topology=_topology.as_dict(),
969
1018
  )
970
1019
  sm = SourceMap(
971
1020
  metadata=metadata,
@@ -995,7 +1044,7 @@ def main(
995
1044
  target / ws.path,
996
1045
  (
997
1046
  filter_sensitive_files(
998
- FileScanner(target / ws.path, max_depth=depth).scan_tree()
1047
+ AdaptiveScanner(target / ws.path, base_depth=depth).scan_tree()
999
1048
  )
1000
1049
  ),
1001
1050
  workspace=ws.path,
@@ -1244,7 +1293,7 @@ def main(
1244
1293
  sm = _replace(sm, pipeline_trace=_trace.build_trace())
1245
1294
 
1246
1295
  # Contract pipeline — runs for mode=contract|standard|deep|hybrid (skip for raw)
1247
- _is_contract_mode = mode in ("contract", "standard", "deep", "hybrid")
1296
+ _is_contract_mode = mode in ("contract", "standard")
1248
1297
  if _is_contract_mode:
1249
1298
  from sourcecode.contract_pipeline import ContractPipeline
1250
1299
  _cp = ContractPipeline()
@@ -1263,6 +1312,13 @@ def main(
1263
1312
  compress_types=compress_types,
1264
1313
  )
1265
1314
  sm = _replace(sm, file_contracts=_contracts, contract_summary=_contract_summary)
1315
+ if symbol is not None and len(_contracts) == 0:
1316
+ typer.echo(
1317
+ f"[warning] --symbol '{symbol}' matched 0 files. "
1318
+ "The symbol may not exist at the current --depth, or the name may differ in case. "
1319
+ "Try --depth 8 or verify the symbol name.",
1320
+ err=True,
1321
+ )
1266
1322
  if agent:
1267
1323
  typer.echo(f"[contract] {len(_contracts)} files extracted ({_contract_summary.method_breakdown})", err=True)
1268
1324
 
@@ -198,6 +198,13 @@ def _is_hotspot_admin(path: str) -> bool:
198
198
  for suffix in _HOTSPOT_ADMIN_SUFFIXES:
199
199
  if filename.endswith(suffix):
200
200
  return True
201
+ # Localized changelogs: CHANGELOG.zh-CN.md, CHANGES.en-US.md, etc.
202
+ _lower = filename.lower()
203
+ if _lower.startswith("changelog.") or _lower.startswith("changes."):
204
+ return True
205
+ # lerna.json and root-level package.json are modified by version bumps, not dev work
206
+ if filename in ("lerna.json",):
207
+ return True
201
208
  return False
202
209
 
203
210
 
@@ -219,6 +219,16 @@ class MetricsAnalyzer:
219
219
  if fm.language != "unknown":
220
220
  languages.add(fm.language)
221
221
 
222
+ # Emit explicit limitation when JS/TS files are present but complexity is unavailable.
223
+ # This prevents agents from assuming null complexity means "no functions found".
224
+ _js_ts_count = sum(1 for r in records if r.language in ("javascript", "typescript") and r.complexity_availability == "unavailable")
225
+ if _js_ts_count > 0:
226
+ limitations.append(
227
+ f"cyclomatic_complexity_unavailable: {_js_ts_count} JS/TS file(s) — "
228
+ "complexity requires tree-sitter (pip install 'sourcecode[ast]'). "
229
+ "null complexity fields are expected, not an error."
230
+ )
231
+
222
232
  summary = MetricsSummary(
223
233
  requested=True,
224
234
  file_count=len(records),
@@ -0,0 +1,570 @@
1
+ from __future__ import annotations
2
+
3
+ """Repository topology classifier for adaptive traversal.
4
+
5
+ Detects monorepo vs single-package structure, identifies source roots,
6
+ low-signal directories, and generated content. Feeds AdaptiveScanner
7
+ with per-path depth budgets so traversal is relevance-oriented, not
8
+ purely structural.
9
+ """
10
+
11
+ import json
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+ from typing import Any, Optional
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # Signal tables
18
+ # ---------------------------------------------------------------------------
19
+
20
+ # Top-level dirs that almost always contain actual source code
21
+ _SOURCE_DIRS: frozenset[str] = frozenset({
22
+ "src", "lib", "source", "sources", "core",
23
+ "app", "server", "client", "backend", "frontend",
24
+ "cmd", "pkg", # Go conventions
25
+ "main", # Java src/main
26
+ "kotlin", "java", "scala", # JVM source dirs
27
+ })
28
+
29
+ # First-level dirs that act as workspace containers in monorepos
30
+ _WORKSPACE_CONTAINERS: frozenset[str] = frozenset({
31
+ "packages", "apps", "libs", "services", "internal",
32
+ "plugins", "modules", "components", "crates",
33
+ "workspaces", "projects",
34
+ })
35
+
36
+ # Directories with low signal value for AI code understanding
37
+ _LOW_SIGNAL_DIRS: frozenset[str] = frozenset({
38
+ "docs", "doc", "documentation", "docsrc", "website", "site",
39
+ "benchmark", "benchmarks", "bench", "perf", "perfs",
40
+ "examples", "example", "demo", "demos", "sample", "samples",
41
+ "fixtures", "fixture", "__fixtures__",
42
+ "scripts", "script", "tools", "tool",
43
+ "ci", ".ci",
44
+ "storybook", "stories", "__stories__",
45
+ "sandbox", "playground", "playgrounds",
46
+ "migrations", "migration",
47
+ ".github", ".vscode", ".claude", ".cursor", ".idea",
48
+ "themes", "theme",
49
+ "static", "public", "assets",
50
+ })
51
+
52
+ # Directories to skip entirely — generated content and dependency stores
53
+ _GENERATED_DIRS: frozenset[str] = frozenset({
54
+ "dist", "build", "out", "output", "release", "releases",
55
+ "target", "coverage", ".next", ".nuxt", ".svelte-kit",
56
+ ".turbo", "node_modules", "__pycache__",
57
+ ".venv", "venv", "env",
58
+ ".mypy_cache", ".pytest_cache", ".ruff_cache",
59
+ ".nyc_output", ".tox",
60
+ "generated", ".generated", "gen", "_gen",
61
+ ".cache", "cache",
62
+ "vendor",
63
+ ".git",
64
+ })
65
+
66
+ # Manifest file names that mark a directory as a source package
67
+ _PACKAGE_MANIFESTS: frozenset[str] = frozenset({
68
+ "package.json", "pyproject.toml", "setup.py", "setup.cfg",
69
+ "go.mod", "Cargo.toml", "pom.xml", "build.gradle",
70
+ "build.gradle.kts", "composer.json", "Gemfile", "pubspec.yaml",
71
+ })
72
+
73
+ # Source file extensions — presence signals a directory has real code
74
+ _SOURCE_EXTENSIONS: frozenset[str] = frozenset({
75
+ ".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs",
76
+ ".py", ".go", ".rs", ".java", ".kt", ".rb",
77
+ ".cs", ".swift", ".scala", ".cpp", ".c", ".h",
78
+ })
79
+
80
+
81
+ # ---------------------------------------------------------------------------
82
+ # Data structures
83
+ # ---------------------------------------------------------------------------
84
+
85
+ @dataclass
86
+ class SourceRoot:
87
+ """A classified directory with traversal priority and depth budget."""
88
+ path: str # repo-relative path, forward slashes
89
+ signal: str # "high" | "medium" | "low" | "excluded"
90
+ reason: str # human-readable explanation
91
+ priority: float # 0.0–1.0 traversal priority
92
+
93
+
94
+ @dataclass
95
+ class ScanBudget:
96
+ """Per-topology traversal budget constraints."""
97
+ max_files: int = 2000
98
+ base_depth: int = 4 # depth cap for unclassified paths
99
+ source_depth: int = 8 # additional levels allowed inside source roots
100
+ low_signal_depth: int = 2 # additional levels allowed inside low-signal roots
101
+
102
+
103
+ @dataclass
104
+ class RepoTopology:
105
+ """Classified repository topology for adaptive traversal.
106
+
107
+ Produced by RepoClassifier.classify() and consumed by AdaptiveScanner.
108
+ The three root lists partition the repository's first-level directories
109
+ into source code, low-value content, and generated/excluded content.
110
+ """
111
+ workspace_type: str = "unknown" # "monorepo" | "single-package" | "unknown"
112
+ source_roots: list[SourceRoot] = field(default_factory=list)
113
+ low_signal_roots: list[SourceRoot] = field(default_factory=list)
114
+ generated_roots: list[SourceRoot] = field(default_factory=list)
115
+ package_manager: str = "unknown"
116
+ confidence: float = 0.0
117
+ scan_budget: ScanBudget = field(default_factory=ScanBudget)
118
+
119
+ def as_dict(self) -> dict[str, Any]:
120
+ return {
121
+ "workspace_type": self.workspace_type,
122
+ "source_roots": [
123
+ {"path": r.path, "reason": r.reason, "priority": round(r.priority, 2)}
124
+ for r in self.source_roots
125
+ ],
126
+ "low_signal_roots": [r.path for r in self.low_signal_roots],
127
+ "generated_roots": [r.path for r in self.generated_roots],
128
+ "package_manager": self.package_manager,
129
+ "confidence": round(self.confidence, 2),
130
+ "scan_budget": {
131
+ "base_depth": self.scan_budget.base_depth,
132
+ "source_depth": self.scan_budget.source_depth,
133
+ "low_signal_depth": self.scan_budget.low_signal_depth,
134
+ },
135
+ }
136
+
137
+
138
+ # ---------------------------------------------------------------------------
139
+ # RepoClassifier
140
+ # ---------------------------------------------------------------------------
141
+
142
+ class RepoClassifier:
143
+ """Detects repository topology and classifies directories for adaptive traversal.
144
+
145
+ Reads workspace config files (pnpm-workspace.yaml, package.json workspaces,
146
+ turbo.json, nx.json, lerna.json, go.work, Cargo.toml), resolves package
147
+ glob patterns, and identifies which directories contain real source code
148
+ vs. docs, benchmarks, or generated content.
149
+
150
+ Classification is fast: only depth-0 and depth-1 filesystem reads.
151
+ """
152
+
153
+ def classify(self, root: Path) -> RepoTopology:
154
+ """Classify the repository at *root* and return its topology."""
155
+ topology = RepoTopology()
156
+ topology.package_manager = self._detect_package_manager(root)
157
+
158
+ markers = self._detect_markers(root)
159
+ workspace_patterns = self._read_workspace_patterns(root, markers)
160
+
161
+ try:
162
+ root_children = [
163
+ d for d in sorted(root.iterdir())
164
+ if d.is_dir() and not d.is_symlink()
165
+ ]
166
+ except PermissionError:
167
+ root_children = []
168
+
169
+ source_roots = self._find_source_roots(
170
+ root, root_children, workspace_patterns, bool(markers) or bool(workspace_patterns)
171
+ )
172
+ low_signal_roots = self._find_low_signal_roots(root, root_children, source_roots)
173
+ generated_roots = self._find_generated_roots(root, root_children)
174
+
175
+ # Monorepo heuristic: explicit markers OR multiple packages found via
176
+ # workspace containers (packages/*, apps/*, etc.) without top-level src/
177
+ container_sourced = [
178
+ r for r in source_roots
179
+ if "container:" in r.reason or "workspace:" in r.reason
180
+ ]
181
+ has_top_level_src = any(
182
+ r.reason == "top_level_source" for r in source_roots
183
+ )
184
+ is_monorepo = (
185
+ bool(markers)
186
+ or bool(workspace_patterns)
187
+ or (len(container_sourced) >= 2 and not has_top_level_src)
188
+ )
189
+ topology.workspace_type = "monorepo" if is_monorepo else "single-package"
190
+
191
+ topology.source_roots = sorted(source_roots, key=lambda r: -r.priority)
192
+ topology.low_signal_roots = low_signal_roots
193
+ topology.generated_roots = generated_roots
194
+ topology.confidence = self._compute_confidence(topology, is_monorepo)
195
+ topology.scan_budget = self._compute_budget(topology)
196
+
197
+ return topology
198
+
199
+ # ------------------------------------------------------------------
200
+ # Package manager detection
201
+ # ------------------------------------------------------------------
202
+
203
+ def _detect_package_manager(self, root: Path) -> str:
204
+ if (root / "pnpm-lock.yaml").exists() or (root / "pnpm-workspace.yaml").exists():
205
+ return "pnpm"
206
+ if (root / "yarn.lock").exists():
207
+ return "yarn"
208
+ if (root / "bun.lockb").exists() or (root / "bun.lock").exists():
209
+ return "bun"
210
+ if (root / "package-lock.json").exists():
211
+ return "npm"
212
+ if (root / "go.work").exists():
213
+ return "go-workspace"
214
+ if (root / "go.mod").exists():
215
+ return "go-modules"
216
+ if (root / "Cargo.toml").exists():
217
+ return "cargo"
218
+ if (root / "uv.lock").exists():
219
+ return "uv"
220
+ if (root / "Pipfile").exists():
221
+ return "pipenv"
222
+ if (root / "pyproject.toml").exists() or (root / "setup.py").exists():
223
+ return "python"
224
+ return "unknown"
225
+
226
+ # ------------------------------------------------------------------
227
+ # Workspace marker detection
228
+ # ------------------------------------------------------------------
229
+
230
+ def _detect_markers(self, root: Path) -> list[str]:
231
+ """Return list of workspace marker file names present at root."""
232
+ markers: list[str] = []
233
+ for name in ("pnpm-workspace.yaml", "go.work", "turbo.json", "lerna.json", "nx.json"):
234
+ if (root / name).exists():
235
+ markers.append(name)
236
+
237
+ cargo = root / "Cargo.toml"
238
+ if cargo.exists():
239
+ try:
240
+ content = cargo.read_text(encoding="utf-8", errors="replace")
241
+ if "[workspace]" in content:
242
+ markers.append("Cargo.toml[workspace]")
243
+ except OSError:
244
+ pass
245
+
246
+ pkg = root / "package.json"
247
+ if pkg.exists():
248
+ try:
249
+ data = json.loads(pkg.read_text(encoding="utf-8", errors="replace"))
250
+ if "workspaces" in data:
251
+ markers.append("package.json[workspaces]")
252
+ except (json.JSONDecodeError, OSError, ValueError):
253
+ pass
254
+
255
+ return markers
256
+
257
+ # ------------------------------------------------------------------
258
+ # Workspace pattern extraction from config files
259
+ # ------------------------------------------------------------------
260
+
261
+ def _read_workspace_patterns(self, root: Path, markers: list[str]) -> list[str]:
262
+ """Extract glob patterns from workspace config files."""
263
+ patterns: list[str] = []
264
+
265
+ if "pnpm-workspace.yaml" in markers:
266
+ patterns.extend(self._patterns_from_pnpm(root))
267
+
268
+ if "package.json[workspaces]" in markers:
269
+ patterns.extend(self._patterns_from_npm_workspaces(root))
270
+
271
+ if "nx.json" in markers:
272
+ patterns.extend(self._patterns_from_nx(root))
273
+
274
+ if "lerna.json" in markers:
275
+ patterns.extend(self._patterns_from_lerna(root))
276
+
277
+ if "Cargo.toml[workspace]" in markers:
278
+ patterns.extend(self._patterns_from_cargo_workspace(root))
279
+
280
+ if "go.work" in markers:
281
+ patterns.extend(self._patterns_from_go_work(root))
282
+
283
+ return list(dict.fromkeys(patterns)) # deduplicate, preserve order
284
+
285
+ def _patterns_from_pnpm(self, root: Path) -> list[str]:
286
+ try:
287
+ content = (root / "pnpm-workspace.yaml").read_text(encoding="utf-8", errors="replace")
288
+ result = []
289
+ for line in content.splitlines():
290
+ stripped = line.strip().lstrip("- ").strip("'\"")
291
+ if stripped and not stripped.startswith("#"):
292
+ result.append(stripped)
293
+ return result
294
+ except OSError:
295
+ return []
296
+
297
+ def _patterns_from_npm_workspaces(self, root: Path) -> list[str]:
298
+ try:
299
+ data = json.loads((root / "package.json").read_text(encoding="utf-8", errors="replace"))
300
+ ws = data.get("workspaces", [])
301
+ if isinstance(ws, list):
302
+ return [str(p) for p in ws]
303
+ if isinstance(ws, dict):
304
+ return [str(p) for p in ws.get("packages", [])]
305
+ except (json.JSONDecodeError, OSError, ValueError):
306
+ pass
307
+ return []
308
+
309
+ def _patterns_from_nx(self, root: Path) -> list[str]:
310
+ try:
311
+ data = json.loads((root / "nx.json").read_text(encoding="utf-8", errors="replace"))
312
+ patterns = []
313
+ wl = data.get("workspaceLayout", {})
314
+ if "appsDir" in wl:
315
+ patterns.append(f"{wl['appsDir']}/*")
316
+ if "libsDir" in wl:
317
+ patterns.append(f"{wl['libsDir']}/*")
318
+ return patterns
319
+ except (json.JSONDecodeError, OSError, ValueError):
320
+ return []
321
+
322
+ def _patterns_from_lerna(self, root: Path) -> list[str]:
323
+ try:
324
+ data = json.loads((root / "lerna.json").read_text(encoding="utf-8", errors="replace"))
325
+ pkgs = data.get("packages", ["packages/*"])
326
+ return [str(p) for p in pkgs] if isinstance(pkgs, list) else []
327
+ except (json.JSONDecodeError, OSError, ValueError):
328
+ return []
329
+
330
+ def _patterns_from_cargo_workspace(self, root: Path) -> list[str]:
331
+ try:
332
+ content = (root / "Cargo.toml").read_text(encoding="utf-8", errors="replace")
333
+ in_members = False
334
+ patterns = []
335
+ for line in content.splitlines():
336
+ stripped = line.strip()
337
+ if "members" in stripped and "=" in stripped:
338
+ in_members = True
339
+ if in_members:
340
+ for quote in ('"', "'"):
341
+ if quote in stripped:
342
+ for segment in stripped.split(quote):
343
+ segment = segment.strip(" [],")
344
+ if segment and "/" in segment:
345
+ patterns.append(segment)
346
+ if "]" in stripped:
347
+ in_members = False
348
+ return patterns
349
+ except OSError:
350
+ return []
351
+
352
+ def _patterns_from_go_work(self, root: Path) -> list[str]:
353
+ try:
354
+ content = (root / "go.work").read_text(encoding="utf-8", errors="replace")
355
+ patterns = []
356
+ for line in content.splitlines():
357
+ stripped = line.strip()
358
+ if stripped.startswith("use "):
359
+ target = stripped[4:].strip().strip("()")
360
+ if target and target != ".":
361
+ patterns.append(target.removeprefix("./").rstrip("/"))
362
+ elif stripped.startswith("./") and not stripped.startswith("//"):
363
+ patterns.append(stripped.removeprefix("./").rstrip())
364
+ return patterns
365
+ except OSError:
366
+ return []
367
+
368
+ # ------------------------------------------------------------------
369
+ # Source root discovery
370
+ # ------------------------------------------------------------------
371
+
372
+ def _find_source_roots(
373
+ self,
374
+ root: Path,
375
+ root_children: list[Path],
376
+ workspace_patterns: list[str],
377
+ is_monorepo: bool,
378
+ ) -> list[SourceRoot]:
379
+ """Identify directories that contain actual source code."""
380
+ result: list[SourceRoot] = []
381
+ seen: set[str] = set()
382
+
383
+ def _add(path_str: str, reason: str, priority: float) -> None:
384
+ if path_str not in seen:
385
+ seen.add(path_str)
386
+ result.append(SourceRoot(
387
+ path=path_str, signal="high", reason=reason, priority=priority
388
+ ))
389
+
390
+ # 1. Resolve workspace glob patterns → packages → src/
391
+ for pattern in workspace_patterns:
392
+ try:
393
+ for pkg_dir in sorted(root.glob(pattern)):
394
+ if not pkg_dir.is_dir() or pkg_dir.is_symlink():
395
+ continue
396
+ try:
397
+ rel = pkg_dir.relative_to(root)
398
+ except ValueError:
399
+ continue
400
+ rel_str = str(rel).replace("\\", "/")
401
+ if not self._is_allowed_path(rel_str):
402
+ continue
403
+
404
+ found_src = False
405
+ for src_name in ("src", "lib", "source"):
406
+ src_dir = pkg_dir / src_name
407
+ if src_dir.is_dir() and not src_dir.is_symlink():
408
+ _add(f"{rel_str}/{src_name}", f"workspace:{pattern}", 0.92)
409
+ found_src = True
410
+
411
+ if not found_src and self._has_source_signal(pkg_dir):
412
+ _add(rel_str, f"workspace_flat:{pattern}", 0.72)
413
+ except Exception:
414
+ continue
415
+
416
+ # 2. Check known workspace container dirs even without explicit patterns
417
+ for child in root_children:
418
+ name = child.name
419
+ if name not in _WORKSPACE_CONTAINERS:
420
+ continue
421
+ try:
422
+ for pkg_dir in sorted(child.iterdir()):
423
+ if not pkg_dir.is_dir() or pkg_dir.is_symlink():
424
+ continue
425
+ try:
426
+ rel = pkg_dir.relative_to(root)
427
+ except ValueError:
428
+ continue
429
+ rel_str = str(rel).replace("\\", "/")
430
+ if not self._is_allowed_path(rel_str):
431
+ continue
432
+
433
+ found_src = False
434
+ for src_name in ("src", "lib", "source"):
435
+ src_dir = pkg_dir / src_name
436
+ if src_dir.is_dir() and not src_dir.is_symlink():
437
+ _add(f"{rel_str}/{src_name}", f"container:{name}", 0.88)
438
+ found_src = True
439
+
440
+ if not found_src and self._has_source_signal(pkg_dir):
441
+ _add(rel_str, f"container_flat:{name}", 0.68)
442
+ except PermissionError:
443
+ continue
444
+
445
+ # 3. Top-level source dirs (single-package repos or workspace containers)
446
+ for child in root_children:
447
+ name = child.name
448
+ if name in _SOURCE_DIRS and name not in _GENERATED_DIRS:
449
+ try:
450
+ rel_str = str(child.relative_to(root)).replace("\\", "/")
451
+ _add(rel_str, "top_level_source", 0.95)
452
+ except ValueError:
453
+ pass
454
+
455
+ # 4. Workspace containers themselves if they contain source files at root
456
+ for child in root_children:
457
+ name = child.name
458
+ if name in _WORKSPACE_CONTAINERS and name not in _GENERATED_DIRS:
459
+ try:
460
+ rel_str = str(child.relative_to(root)).replace("\\", "/")
461
+ except ValueError:
462
+ continue
463
+ if rel_str not in seen and self._has_source_signal(child):
464
+ _add(rel_str, f"workspace_container_source:{name}", 0.55)
465
+
466
+ return result
467
+
468
+ def _has_source_signal(self, directory: Path) -> bool:
469
+ """Return True if directory has a manifest or source files."""
470
+ for name in _PACKAGE_MANIFESTS:
471
+ if (directory / name).exists():
472
+ return True
473
+ try:
474
+ for entry in directory.iterdir():
475
+ if entry.is_file() and entry.suffix.lower() in _SOURCE_EXTENSIONS:
476
+ return True
477
+ if entry.name in _PACKAGE_MANIFESTS:
478
+ return True
479
+ except PermissionError:
480
+ pass
481
+ return False
482
+
483
+ def _is_allowed_path(self, rel_str: str) -> bool:
484
+ parts = rel_str.split("/")
485
+ return all(p not in _GENERATED_DIRS for p in parts)
486
+
487
+ # ------------------------------------------------------------------
488
+ # Low-signal root discovery
489
+ # ------------------------------------------------------------------
490
+
491
+ def _find_low_signal_roots(
492
+ self,
493
+ root: Path,
494
+ root_children: list[Path],
495
+ source_roots: list[SourceRoot],
496
+ ) -> list[SourceRoot]:
497
+ """Identify root-level directories with low signal value."""
498
+ top_source_names = {sr.path.split("/")[0] for sr in source_roots}
499
+ low_signal: list[SourceRoot] = []
500
+
501
+ for child in root_children:
502
+ name = child.name
503
+ if name in top_source_names or name in _GENERATED_DIRS:
504
+ continue
505
+ try:
506
+ rel_str = str(child.relative_to(root)).replace("\\", "/")
507
+ except ValueError:
508
+ continue
509
+
510
+ if name in _LOW_SIGNAL_DIRS:
511
+ low_signal.append(SourceRoot(
512
+ path=rel_str, signal="low",
513
+ reason=f"low_signal:{name}", priority=0.15,
514
+ ))
515
+ elif name.startswith("."):
516
+ low_signal.append(SourceRoot(
517
+ path=rel_str, signal="low",
518
+ reason="hidden_dir", priority=0.05,
519
+ ))
520
+
521
+ return low_signal
522
+
523
+ # ------------------------------------------------------------------
524
+ # Generated root discovery
525
+ # ------------------------------------------------------------------
526
+
527
+ def _find_generated_roots(
528
+ self,
529
+ root: Path,
530
+ root_children: list[Path],
531
+ ) -> list[SourceRoot]:
532
+ """Identify root-level generated/excluded directories."""
533
+ generated: list[SourceRoot] = []
534
+ for child in root_children:
535
+ name = child.name
536
+ if name in _GENERATED_DIRS:
537
+ generated.append(SourceRoot(
538
+ path=name, signal="excluded",
539
+ reason=f"generated:{name}", priority=0.0,
540
+ ))
541
+ return generated
542
+
543
+ # ------------------------------------------------------------------
544
+ # Budget and confidence
545
+ # ------------------------------------------------------------------
546
+
547
+ def _compute_confidence(self, topology: RepoTopology, is_monorepo: bool) -> float:
548
+ sc = len(topology.source_roots)
549
+ if sc >= 5:
550
+ return 0.95
551
+ if sc >= 2:
552
+ return 0.85
553
+ if sc >= 1:
554
+ return 0.75 if is_monorepo else 0.80
555
+ return 0.30
556
+
557
+ def _compute_budget(self, topology: RepoTopology) -> ScanBudget:
558
+ if topology.workspace_type == "monorepo":
559
+ return ScanBudget(
560
+ max_files=2000,
561
+ base_depth=4,
562
+ source_depth=8,
563
+ low_signal_depth=2,
564
+ )
565
+ return ScanBudget(
566
+ max_files=2000,
567
+ base_depth=6,
568
+ source_depth=8,
569
+ low_signal_depth=2,
570
+ )
sourcecode/schema.py CHANGED
@@ -34,6 +34,7 @@ class AnalysisMetadata:
34
34
  sourcecode_version: str = field(default_factory=_sourcecode_version)
35
35
  analyzed_path: str = ""
36
36
  analyzer_fingerprints: dict[str, str] = field(default_factory=dict)
37
+ traversal_topology: Optional[dict[str, Any]] = None
37
38
 
38
39
 
39
40
  @dataclass
sourcecode/serializer.py CHANGED
@@ -923,10 +923,13 @@ def _contract_view_minimal(
923
923
 
924
924
  result: dict[str, Any] = {
925
925
  "schema_version": sm.metadata.schema_version,
926
- "mode": "minimal",
926
+ "mode": "contract",
927
927
  "project": project,
928
928
  }
929
929
 
930
+ if sm.metadata.traversal_topology:
931
+ result["traversal"] = sm.metadata.traversal_topology
932
+
930
933
  # Per-file contracts
931
934
  if contracts:
932
935
  serialized: list[dict[str, Any]] = []
@@ -949,9 +952,28 @@ def _contract_view_minimal(
949
952
 
950
953
  if sm.env_summary is not None and sm.env_summary.requested:
951
954
  result["env_summary"] = asdict(sm.env_summary)
955
+ if sm.env_map:
956
+ # Include top-20 env entries sorted by required first, then name.
957
+ # Agents read the summary count but need the actual keys to act on them.
958
+ _sorted_env = sorted(sm.env_map, key=lambda e: (not getattr(e, "required", False), getattr(e, "name", "")))
959
+ result["env_map"] = [
960
+ {k: v for k, v in asdict(e).items() if v is not None and v != ""}
961
+ for e in _sorted_env[:20]
962
+ ]
952
963
 
953
964
  if sm.code_notes_summary is not None and sm.code_notes_summary.requested:
954
965
  result["code_notes_summary"] = asdict(sm.code_notes_summary)
966
+ if sm.code_notes:
967
+ # Include top-20 notes by severity: BUG > FIXME > DEPRECATED > TODO > others.
968
+ _SEVERITY_ORDER = {"BUG": 0, "FIXME": 1, "DEPRECATED": 2, "TODO": 3, "HACK": 4, "WARNING": 5}
969
+ _sorted_notes = sorted(
970
+ sm.code_notes,
971
+ key=lambda n: (_SEVERITY_ORDER.get(getattr(n, "kind", "").upper(), 9), getattr(n, "path", "")),
972
+ )
973
+ result["code_notes"] = [
974
+ {k: v for k, v in asdict(n).items() if v is not None and v != ""}
975
+ for n in _sorted_notes[:20]
976
+ ]
955
977
 
956
978
  if sm.git_context is not None and sm.git_context.requested:
957
979
  result["git_context"] = asdict(sm.git_context)
@@ -1151,6 +1173,8 @@ def _contract_view_standard(
1151
1173
  ],
1152
1174
  "entry_points": ep_groups["production"],
1153
1175
  }
1176
+ if sm.metadata.traversal_topology:
1177
+ result["traversal"] = sm.metadata.traversal_topology
1154
1178
  if ep_groups["development"]:
1155
1179
  result["development_entry_points"] = ep_groups["development"]
1156
1180
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 0.36.0
3
+ Version: 0.38.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -1,9 +1,10 @@
1
- sourcecode/__init__.py,sha256=X_JznUsRmn3yELdS1V1zFgnDITiz-h3a451D5eLoTnY,103
1
+ sourcecode/__init__.py,sha256=RjrfBH06OIJiq-xk4Hadj8Zl3Soer5r1Ct1ogF0xqaU,103
2
+ sourcecode/adaptive_scanner.py,sha256=6dh34C2qZXyRbw-8xBhbEwDdXanM6CRFRWayVoYITnA,10190
2
3
  sourcecode/architecture_analyzer.py,sha256=H6noGgVArUJ25z1qC0fFA0KvJJeHZYyhKvKSkOyWHUk,23096
3
4
  sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
4
5
  sourcecode/ast_extractor.py,sha256=0OHQwTUBBc9lmqPLryVeB1z8dGIC6NhLlar800CD9oI,41129
5
6
  sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
6
- sourcecode/cli.py,sha256=O1ObfcxvhMYMXjd6otx6G0fE9ethIAX4qDUpUUjOxgY,63167
7
+ sourcecode/cli.py,sha256=dJ0kkwC0pQ4LJyhjlbtHKSpD-TvRQQyhdhvjRCHPA8o,65280
7
8
  sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvUQKw4,9226
8
9
  sourcecode/confidence_analyzer.py,sha256=HxJMPLI5ulqtkncnv98W4iVO6yMbpQo87VuxiuNbDmY,12167
9
10
  sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
@@ -15,17 +16,18 @@ sourcecode/doc_analyzer.py,sha256=Ec3orx6vBKsh5cNM3-F4y2Got2KuKx8w3dErwtdtM-A,19
15
16
  sourcecode/entrypoint_classifier.py,sha256=a69dMGyxCTd_LOm3oqj-EXWpRmbmeujN7T1mr2eJ1as,3877
16
17
  sourcecode/env_analyzer.py,sha256=slvq-eT24RVMNczLNDlZbe0hU8JXIIPxybqubvrrnSQ,14409
17
18
  sourcecode/file_classifier.py,sha256=_KfFIIolharaIxbSTrCkaWauQIqNHCyor_n47RGyDh8,8577
18
- sourcecode/git_analyzer.py,sha256=saI5wtHBEOXBhdk7SrVR7ArSM6MFkyGgukvGRuD9WRc,9638
19
+ sourcecode/git_analyzer.py,sha256=s7tJTd_GAczhrH7j9JhBNp7ozhkW3lzBN0TMNwFqJwE,9977
19
20
  sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
20
- sourcecode/metrics_analyzer.py,sha256=4uh11v-Q0gdrN87BOxuFWUym3N3AOkOuy21K5N8peB8,20126
21
+ sourcecode/metrics_analyzer.py,sha256=e2cFwB9XubFq_dIVsP2PLjpr4wX0N6ulb3ol3sGDUeo,20777
21
22
  sourcecode/prepare_context.py,sha256=vxEzr8czS3MFbdTx4hBJQlJLrl9cuvbHdL3ZokxFkvo,31384
22
23
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
23
24
  sourcecode/relevance_scorer.py,sha256=ea7_7AHVgahVEWK3ebKOpG67agzG_pGICu5f2KgzrIA,8133
25
+ sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
24
26
  sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
25
27
  sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
26
- sourcecode/schema.py,sha256=AShu_bcP30TYaw4Dl1nYy8aFnBCKxrUli3LhU3MZTjs,20739
28
+ sourcecode/schema.py,sha256=dVA-3EbHBakHLkgeZF-LfjKClEFRgPZkzblXpDTshFA,20796
27
29
  sourcecode/semantic_analyzer.py,sha256=asQfJf-EhzYaOTA-iMuZsrVXtbW7SV2WEKCxgsxa88Y,79413
28
- sourcecode/serializer.py,sha256=8ONhYuJ25AjMfZImwveYjVSHjIFDCuBRjCKNjs94bEA,50057
30
+ sourcecode/serializer.py,sha256=qJRJV_z-T_wU615KMA1ez5IIeV3wcexh29lY4-fcgjs,51329
29
31
  sourcecode/summarizer.py,sha256=ZuzIdm3t8A-d5MuQL0TSNLrd-L0IQIuguIxeNXMNJf8,16070
30
32
  sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
31
33
  sourcecode/workspace.py,sha256=fQlVoNx8S-fSHpKoJ0JBvEHCFkxszH0KZVJed1i3TRk,6845
@@ -56,8 +58,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
56
58
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
57
59
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
58
60
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
59
- sourcecode-0.36.0.dist-info/METADATA,sha256=vQc2-46U5UyDGNaaVWbr5Jb7_eBOgJAYIPpYAD_aiwA,25209
60
- sourcecode-0.36.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
61
- sourcecode-0.36.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
62
- sourcecode-0.36.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
63
- sourcecode-0.36.0.dist-info/RECORD,,
61
+ sourcecode-0.38.0.dist-info/METADATA,sha256=-RJ8bdDTHeuGmWN-iNo4eYkjPTuSnfriYYD1O59Gmwc,25209
62
+ sourcecode-0.38.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
63
+ sourcecode-0.38.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
64
+ sourcecode-0.38.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
65
+ sourcecode-0.38.0.dist-info/RECORD,,