sourcecode 0.37.0__tar.gz → 0.38.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. {sourcecode-0.37.0 → sourcecode-0.38.0}/PKG-INFO +1 -1
  2. {sourcecode-0.37.0 → sourcecode-0.38.0}/pyproject.toml +1 -1
  3. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/__init__.py +1 -1
  4. sourcecode-0.38.0/src/sourcecode/adaptive_scanner.py +258 -0
  5. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/cli.py +17 -10
  6. sourcecode-0.38.0/src/sourcecode/repo_classifier.py +570 -0
  7. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/schema.py +1 -0
  8. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/serializer.py +5 -0
  9. {sourcecode-0.37.0 → sourcecode-0.38.0}/.agents/skills/source-command-gsd-join-discord/SKILL.md +0 -0
  10. {sourcecode-0.37.0 → sourcecode-0.38.0}/.agents/skills/source-command-gsd-review-backlog/SKILL.md +0 -0
  11. {sourcecode-0.37.0 → sourcecode-0.38.0}/.agents/skills/source-command-gsd-workstreams/SKILL.md +0 -0
  12. {sourcecode-0.37.0 → sourcecode-0.38.0}/.gitignore +0 -0
  13. {sourcecode-0.37.0 → sourcecode-0.38.0}/.ruff.toml +0 -0
  14. {sourcecode-0.37.0 → sourcecode-0.38.0}/CONTRIBUTING.md +0 -0
  15. {sourcecode-0.37.0 → sourcecode-0.38.0}/LICENSE +0 -0
  16. {sourcecode-0.37.0 → sourcecode-0.38.0}/README.md +0 -0
  17. {sourcecode-0.37.0 → sourcecode-0.38.0}/SECURITY.md +0 -0
  18. {sourcecode-0.37.0 → sourcecode-0.38.0}/docs/privacy.md +0 -0
  19. {sourcecode-0.37.0 → sourcecode-0.38.0}/docs/schema.md +0 -0
  20. {sourcecode-0.37.0 → sourcecode-0.38.0}/raw +0 -0
  21. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/architecture_analyzer.py +0 -0
  22. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/architecture_summary.py +0 -0
  23. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/ast_extractor.py +0 -0
  24. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/classifier.py +0 -0
  25. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/code_notes_analyzer.py +0 -0
  26. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/confidence_analyzer.py +0 -0
  27. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/context_summarizer.py +0 -0
  28. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/contract_model.py +0 -0
  29. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/contract_pipeline.py +0 -0
  30. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/coverage_parser.py +0 -0
  31. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/dependency_analyzer.py +0 -0
  32. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/__init__.py +0 -0
  33. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/base.py +0 -0
  34. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/csproj_parser.py +0 -0
  35. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/dart.py +0 -0
  36. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/dotnet.py +0 -0
  37. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/elixir.py +0 -0
  38. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/go.py +0 -0
  39. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/heuristic.py +0 -0
  40. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/hybrid.py +0 -0
  41. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/java.py +0 -0
  42. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/jvm_ext.py +0 -0
  43. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/nodejs.py +0 -0
  44. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/parsers.py +0 -0
  45. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/php.py +0 -0
  46. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/project.py +0 -0
  47. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/python.py +0 -0
  48. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/ruby.py +0 -0
  49. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/rust.py +0 -0
  50. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/systems.py +0 -0
  51. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/terraform.py +0 -0
  52. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/detectors/tooling.py +0 -0
  53. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/doc_analyzer.py +0 -0
  54. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/entrypoint_classifier.py +0 -0
  55. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/env_analyzer.py +0 -0
  56. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/file_classifier.py +0 -0
  57. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/git_analyzer.py +0 -0
  58. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/graph_analyzer.py +0 -0
  59. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/metrics_analyzer.py +0 -0
  60. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/prepare_context.py +0 -0
  61. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/redactor.py +0 -0
  62. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/relevance_scorer.py +0 -0
  63. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/runtime_classifier.py +0 -0
  64. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/scanner.py +0 -0
  65. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/semantic_analyzer.py +0 -0
  66. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/summarizer.py +0 -0
  67. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/telemetry/__init__.py +0 -0
  68. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/telemetry/config.py +0 -0
  69. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/telemetry/consent.py +0 -0
  70. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/telemetry/events.py +0 -0
  71. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/telemetry/filters.py +0 -0
  72. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/telemetry/transport.py +0 -0
  73. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/tree_utils.py +0 -0
  74. {sourcecode-0.37.0 → sourcecode-0.38.0}/src/sourcecode/workspace.py +0 -0
  75. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/__init__.py +0 -0
  76. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/conftest.py +0 -0
  77. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/coverage.xml +0 -0
  78. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/fastapi_app/pyproject.toml +0 -0
  79. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/fastapi_app/src/main.py +0 -0
  80. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/go_service/cmd/api/main.go +0 -0
  81. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/go_service/go.mod +0 -0
  82. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/jacoco.xml +0 -0
  83. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/lcov.info +0 -0
  84. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/nextjs_app/app/page.tsx +0 -0
  85. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/nextjs_app/package.json +0 -0
  86. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/nextjs_app/pnpm-lock.yaml +0 -0
  87. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/pnpm_monorepo/apps/web/app/page.tsx +0 -0
  88. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/pnpm_monorepo/apps/web/package.json +0 -0
  89. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/pnpm_monorepo/packages/api/main.py +0 -0
  90. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/pnpm_monorepo/packages/api/pyproject.toml +0 -0
  91. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/fixtures/pnpm_monorepo/pnpm-workspace.yaml +0 -0
  92. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_architecture_analyzer.py +0 -0
  93. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_architecture_summary.py +0 -0
  94. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_ast_extractor.py +0 -0
  95. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_classifier.py +0 -0
  96. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_cli.py +0 -0
  97. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_code_notes_analyzer.py +0 -0
  98. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_contract_pipeline.py +0 -0
  99. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_coverage_parser.py +0 -0
  100. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_cross_consistency.py +0 -0
  101. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_dependency_analyzer_node_python.py +0 -0
  102. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_dependency_analyzer_polyglot.py +0 -0
  103. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_dependency_schema.py +0 -0
  104. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_detector_dotnet.py +0 -0
  105. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_detector_go_rust_java.py +0 -0
  106. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_detector_nodejs.py +0 -0
  107. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_detector_php_ruby_dart.py +0 -0
  108. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_detector_python.py +0 -0
  109. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_detector_universal_managed.py +0 -0
  110. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_detector_universal_systems.py +0 -0
  111. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_detectors_base.py +0 -0
  112. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_doc_analyzer_jsdom.py +0 -0
  113. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_doc_analyzer_python.py +0 -0
  114. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_graph_analyzer_polyglot.py +0 -0
  115. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_graph_analyzer_python_node.py +0 -0
  116. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_graph_schema.py +0 -0
  117. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_hybrid_inference.py +0 -0
  118. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_integration.py +0 -0
  119. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_integration_dependencies.py +0 -0
  120. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_integration_detection.py +0 -0
  121. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_integration_docs.py +0 -0
  122. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_integration_graph_modules.py +0 -0
  123. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_integration_lqn.py +0 -0
  124. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_integration_metrics.py +0 -0
  125. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_integration_multistack.py +0 -0
  126. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_integration_semantics.py +0 -0
  127. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_integration_universal.py +0 -0
  128. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_metrics_analyzer.py +0 -0
  129. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_packaging.py +0 -0
  130. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_phase1_improvements.py +0 -0
  131. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_pipeline_integrity.py +0 -0
  132. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_real_projects.py +0 -0
  133. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_redactor.py +0 -0
  134. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_scanner.py +0 -0
  135. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_schema.py +0 -0
  136. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_schema_normalization.py +0 -0
  137. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_semantic_analyzer_node.py +0 -0
  138. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_semantic_analyzer_python.py +0 -0
  139. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_semantic_import_resolution.py +0 -0
  140. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_semantic_schema.py +0 -0
  141. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_signal_hierarchy.py +0 -0
  142. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_summarizer.py +0 -0
  143. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_telemetry.py +0 -0
  144. {sourcecode-0.37.0 → sourcecode-0.38.0}/tests/test_workspace_analyzer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 0.37.0
3
+ Version: 0.38.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "0.37.0"
7
+ version = "0.38.0"
8
8
  description = "Deterministic codebase context for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "0.37.0"
3
+ __version__ = "0.38.0"
@@ -0,0 +1,258 @@
1
+ from __future__ import annotations
2
+
3
+ """Adaptive file tree scanner with topology-aware depth budgets.
4
+
5
+ Replaces pure depth filtering with relevance-oriented traversal:
6
+ - Source roots (packages/*/src, apps/*/src) get deep scan budgets.
7
+ - Low-signal directories (docs/, benchmarks/) are limited to 2 levels.
8
+ - Generated/excluded directories (dist/, node_modules/) are skipped.
9
+ - Unclassified directories fall back to the base depth limit.
10
+
11
+ Drop-in replacement for FileScanner: same scan_tree() and find_manifests()
12
+ interface, same output format (None = file, dict = directory).
13
+ """
14
+
15
+ import os
16
+ from pathlib import Path
17
+ from typing import Any, Optional, cast
18
+
19
+ from pathspec import GitIgnoreSpec
20
+
21
+ from sourcecode.repo_classifier import RepoTopology
22
+ from sourcecode.scanner import DEFAULT_EXCLUDES, MANIFEST_NAMES
23
+
24
+
25
+ class AdaptiveScanner:
26
+ """File tree scanner driven by repository topology.
27
+
28
+ When *topology* is provided, traversal depth is controlled per-directory:
29
+ directories inside source roots receive a deep budget; low-signal dirs
30
+ are restricted; generated dirs are excluded entirely.
31
+
32
+ When *topology* is None, falls back to the base depth limit — identical
33
+ behaviour to FileScanner.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ root: Path,
39
+ topology: Optional[RepoTopology] = None,
40
+ base_depth: int = 4,
41
+ extra_excludes: Optional[frozenset[str]] = None,
42
+ ) -> None:
43
+ self.root = root.resolve()
44
+ self.topology = topology
45
+ self.base_depth = base_depth
46
+ self._excludes = DEFAULT_EXCLUDES | (extra_excludes or frozenset())
47
+ self._gitignore_spec: Optional[GitIgnoreSpec] = None
48
+
49
+ # Pre-compute lookup tables from topology for O(1) classification
50
+ # during traversal.
51
+ #
52
+ # Each entry is (path_parts_tuple, max_absolute_depth):
53
+ # source prefix → (src_parts, len(src_parts) + source_depth)
54
+ # low-signal prefix → (ls_parts, len(ls_parts) + low_signal_depth)
55
+ #
56
+ # "max_absolute_depth" is depth measured from the repo root, not from
57
+ # the classified directory. At depth D, files are visible; at depth
58
+ # >= max we clear dirnames and skip files.
59
+ self._source_prefixes: list[tuple[tuple[str, ...], int]] = []
60
+ self._low_signal_prefixes: list[tuple[tuple[str, ...], int]] = []
61
+ self._extra_exclude_names: frozenset[str] = frozenset()
62
+
63
+ if topology is not None:
64
+ budget = topology.scan_budget
65
+ for sr in topology.source_roots:
66
+ parts = tuple(p for p in sr.path.split("/") if p)
67
+ if parts:
68
+ max_d = len(parts) + budget.source_depth
69
+ self._source_prefixes.append((parts, max_d))
70
+
71
+ for lr in topology.low_signal_roots:
72
+ parts = tuple(p for p in lr.path.split("/") if p)
73
+ if parts:
74
+ max_d = len(parts) + budget.low_signal_depth
75
+ self._low_signal_prefixes.append((parts, max_d))
76
+
77
+ # Generated roots at depth 1 → add to excludes so os.walk never enters
78
+ top_generated = {
79
+ gr.path.split("/")[0]
80
+ for gr in topology.generated_roots
81
+ if "/" not in gr.path
82
+ }
83
+ self._extra_exclude_names = frozenset(top_generated)
84
+
85
+ # ------------------------------------------------------------------
86
+ # Gitignore
87
+ # ------------------------------------------------------------------
88
+
89
+ def _load_gitignore_spec(self) -> GitIgnoreSpec:
90
+ if self._gitignore_spec is None:
91
+ gitignore = self.root / ".gitignore"
92
+ lines: list[str] = []
93
+ if gitignore.exists():
94
+ try:
95
+ lines = gitignore.read_text(encoding="utf-8", errors="replace").splitlines()
96
+ except OSError:
97
+ pass
98
+ self._gitignore_spec = GitIgnoreSpec.from_lines(lines)
99
+ return self._gitignore_spec
100
+
101
+ def _is_excluded_by_gitignore(self, rel_path: str, is_dir: bool) -> bool:
102
+ spec = self._load_gitignore_spec()
103
+ path_to_match = rel_path + "/" if is_dir else rel_path
104
+ return spec.match_file(path_to_match)
105
+
106
+ # ------------------------------------------------------------------
107
+ # Depth budget computation — the core of adaptive traversal
108
+ # ------------------------------------------------------------------
109
+
110
+ def _compute_max_depth(self, rel_parts: tuple[str, ...]) -> int:
111
+ """Return the maximum absolute depth allowed at *rel_parts*.
112
+
113
+ Depth is the number of path components from the repo root. Files
114
+ at depth D are included; the scan stops (dirnames cleared) when
115
+ depth >= returned value.
116
+
117
+ Priority order:
118
+ 1. Inside a source root → deep budget (source_depth extra levels)
119
+ 2. Ancestor of a source root → must allow traversal to reach it
120
+ 3. Inside a low-signal root → restricted budget (low_signal_depth)
121
+ 4. Default → base_depth
122
+ """
123
+ if not self._source_prefixes and not self._low_signal_prefixes:
124
+ return self.base_depth
125
+
126
+ current_depth = len(rel_parts)
127
+
128
+ # Track the best depth found via ancestor matching (may have multiple
129
+ # source roots; return the maximum so all are reachable).
130
+ ancestor_best = self.base_depth
131
+ found_ancestor = False
132
+
133
+ for src_parts, src_max in self._source_prefixes:
134
+ n = len(src_parts)
135
+ if current_depth >= n:
136
+ # At or inside the source root
137
+ if rel_parts[:n] == src_parts:
138
+ return src_max # definite source territory — early exit
139
+ else:
140
+ # Ancestor check: src_parts starts with rel_parts?
141
+ if src_parts[:current_depth] == rel_parts:
142
+ found_ancestor = True
143
+ if src_max > ancestor_best:
144
+ ancestor_best = src_max
145
+
146
+ if found_ancestor:
147
+ return ancestor_best
148
+
149
+ # Low-signal roots (only if not already committed to a source path)
150
+ for ls_parts, ls_max in self._low_signal_prefixes:
151
+ n = len(ls_parts)
152
+ if current_depth >= n and rel_parts[:n] == ls_parts:
153
+ return ls_max
154
+
155
+ return self.base_depth
156
+
157
+ # ------------------------------------------------------------------
158
+ # Main traversal
159
+ # ------------------------------------------------------------------
160
+
161
+ def scan_tree(self) -> dict[str, Any]:
162
+ """Build the nested file tree dictionary.
163
+
164
+ Returns dict where None = file (D-02) and dict = directory (D-01).
165
+ Depth limits are applied per-directory using topology-derived budgets.
166
+ """
167
+ self._load_gitignore_spec()
168
+ root_tree: dict[str, Any] = {}
169
+ all_excludes = self._excludes | self._extra_exclude_names
170
+
171
+ for dirpath, dirnames, filenames in os.walk(self.root, followlinks=False):
172
+ current = Path(dirpath)
173
+ try:
174
+ rel = current.relative_to(self.root)
175
+ except ValueError:
176
+ continue
177
+
178
+ rel_parts = rel.parts
179
+ depth = len(rel_parts)
180
+
181
+ effective_max_depth = self._compute_max_depth(rel_parts)
182
+
183
+ if depth >= effective_max_depth:
184
+ dirnames.clear()
185
+ continue
186
+
187
+ # Filter dirnames in-place (critical: slice assignment)
188
+ dirnames[:] = [
189
+ d for d in dirnames
190
+ if d not in all_excludes
191
+ and not (current / d).is_symlink()
192
+ and not self._is_excluded_by_gitignore(
193
+ str(rel / d) if rel_parts else d,
194
+ is_dir=True,
195
+ )
196
+ ]
197
+
198
+ node = self._get_or_create_node(root_tree, rel_parts)
199
+
200
+ for fname in filenames:
201
+ # Skip flag-shaped names (shell redirect artifacts)
202
+ if fname.startswith("-"):
203
+ continue
204
+ fpath = current / fname
205
+ if fpath.is_symlink():
206
+ continue
207
+ rel_file = str(rel / fname) if rel_parts else fname
208
+ if self._is_excluded_by_gitignore(rel_file, is_dir=False):
209
+ continue
210
+ node[fname] = None # D-02: None = file
211
+
212
+ # Ensure accepted subdirs exist as dict nodes
213
+ for d in dirnames:
214
+ if d not in node:
215
+ node[d] = {}
216
+
217
+ return root_tree
218
+
219
+ def _get_or_create_node(
220
+ self, tree: dict[str, Any], parts: tuple[str, ...]
221
+ ) -> dict[str, Any]:
222
+ node = tree
223
+ for part in parts:
224
+ if part not in node or node[part] is None:
225
+ node[part] = {}
226
+ node = cast(dict[str, Any], node[part])
227
+ return node
228
+
229
+ # ------------------------------------------------------------------
230
+ # Manifest discovery — same interface as FileScanner
231
+ # ------------------------------------------------------------------
232
+
233
+ def find_manifests(self) -> list[str]:
234
+ """Find manifest files at depth 0-1.
235
+
236
+ Identical logic to FileScanner.find_manifests() — depth-0 root
237
+ manifests plus depth-1 sub-package manifests, hidden dirs excluded.
238
+ """
239
+ manifests: list[str] = []
240
+ for name in MANIFEST_NAMES:
241
+ candidate = self.root / name
242
+ if candidate.exists() and not candidate.is_symlink():
243
+ manifests.append(str(candidate))
244
+ try:
245
+ for child in self.root.iterdir():
246
+ if (
247
+ child.is_dir()
248
+ and not child.is_symlink()
249
+ and child.name not in self._excludes
250
+ and not child.name.startswith(".")
251
+ ):
252
+ for name in MANIFEST_NAMES:
253
+ candidate = child / name
254
+ if candidate.exists() and not candidate.is_symlink():
255
+ manifests.append(str(candidate))
256
+ except PermissionError:
257
+ pass
258
+ return manifests
@@ -714,6 +714,13 @@ def main(
714
714
  # 1. Scan directory (SCAN-01 to SCAN-05)
715
715
  redactor = SecretRedactor(enabled=not no_redact)
716
716
 
717
+ # Classify repository topology before scanning. This is a shallow
718
+ # filesystem read (depth 0-1 only) and completes in milliseconds.
719
+ # The topology drives per-directory depth budgets in AdaptiveScanner.
720
+ from sourcecode.adaptive_scanner import AdaptiveScanner
721
+ from sourcecode.repo_classifier import RepoClassifier
722
+ _topology = RepoClassifier().classify(target)
723
+
717
724
  # Detect manifests before scan to adjust depth.
718
725
  # find_manifests() only looks at depth 0-1, does not need the full tree.
719
726
  _pre_scanner = FileScanner(target, max_depth=1)
@@ -735,7 +742,7 @@ def main(
735
742
  no_tree = True # agents never need the raw file tree
736
743
  typer.echo("[agent] dependencies env-map code-notes (no-tree)", err=True)
737
744
 
738
- scanner = FileScanner(target, max_depth=effective_depth)
745
+ scanner = AdaptiveScanner(target, topology=_topology, base_depth=effective_depth)
739
746
  raw_tree = scanner.scan_tree()
740
747
 
741
748
  # 2. Filter .env and *.secret entries from file tree (SEC-02, all levels)
@@ -775,16 +782,14 @@ def main(
775
782
  detector = ProjectDetector(build_default_detectors())
776
783
  workspace_analysis = WorkspaceAnalyzer().analyze(target, manifests)
777
784
 
778
- # Warn when scanning a monorepo at default depth — typical package sources
779
- # (packages/*/src/) live at depth 5+, so default depth=4 silently misses them.
780
- # Only emit to TTY to avoid contaminating piped/CI output; agents read analysis_gaps.
785
+ # Adaptive traversal handles monorepo source root discovery automatically.
786
+ # Emit a diagnostic when topology confidence is low so users know why.
781
787
  import sys as _sys
782
- if workspace_analysis.is_monorepo and depth <= 4 and effective_depth <= 4:
788
+ if _topology.workspace_type == "monorepo" and _topology.confidence < 0.5:
783
789
  if _sys.stderr.isatty():
784
790
  typer.echo(
785
- f"[warning] monorepo detected with --depth {depth}. "
786
- "Source files in packages/*/src/ (depth 5+) may be invisible. "
787
- "Use --depth 6 or higher for full coverage.",
791
+ "[traversal] monorepo detected but source root confidence is low "
792
+ f"({_topology.confidence:.0%}). Use --depth 8 or higher if files are missing.",
788
793
  err=True,
789
794
  )
790
795
 
@@ -896,7 +901,8 @@ def main(
896
901
  workspace_root = target / workspace.path
897
902
  if not workspace_root.exists() or not workspace_root.is_dir():
898
903
  continue
899
- workspace_scanner = FileScanner(workspace_root, max_depth=depth)
904
+ _ws_topology = RepoClassifier().classify(workspace_root)
905
+ workspace_scanner = AdaptiveScanner(workspace_root, topology=_ws_topology, base_depth=depth)
900
906
  workspace_tree = filter_sensitive_files(workspace_scanner.scan_tree())
901
907
  workspace_manifests = workspace_scanner.find_manifests()
902
908
  workspace_stacks, workspace_entry_points, _ = detector.detect(
@@ -1008,6 +1014,7 @@ def main(
1008
1014
  metadata = AnalysisMetadata(
1009
1015
  analyzed_path=str(target),
1010
1016
  analyzer_fingerprints=_fingerprints,
1017
+ traversal_topology=_topology.as_dict(),
1011
1018
  )
1012
1019
  sm = SourceMap(
1013
1020
  metadata=metadata,
@@ -1037,7 +1044,7 @@ def main(
1037
1044
  target / ws.path,
1038
1045
  (
1039
1046
  filter_sensitive_files(
1040
- FileScanner(target / ws.path, max_depth=depth).scan_tree()
1047
+ AdaptiveScanner(target / ws.path, base_depth=depth).scan_tree()
1041
1048
  )
1042
1049
  ),
1043
1050
  workspace=ws.path,