sourcecode 0.37.0__tar.gz → 0.39.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. {sourcecode-0.37.0 → sourcecode-0.39.0}/PKG-INFO +1 -1
  2. {sourcecode-0.37.0 → sourcecode-0.39.0}/pyproject.toml +1 -1
  3. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/__init__.py +1 -1
  4. sourcecode-0.39.0/src/sourcecode/adaptive_scanner.py +258 -0
  5. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/cli.py +67 -11
  6. sourcecode-0.39.0/src/sourcecode/repo_classifier.py +570 -0
  7. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/schema.py +1 -0
  8. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/serializer.py +5 -0
  9. {sourcecode-0.37.0 → sourcecode-0.39.0}/.agents/skills/source-command-gsd-join-discord/SKILL.md +0 -0
  10. {sourcecode-0.37.0 → sourcecode-0.39.0}/.agents/skills/source-command-gsd-review-backlog/SKILL.md +0 -0
  11. {sourcecode-0.37.0 → sourcecode-0.39.0}/.agents/skills/source-command-gsd-workstreams/SKILL.md +0 -0
  12. {sourcecode-0.37.0 → sourcecode-0.39.0}/.gitignore +0 -0
  13. {sourcecode-0.37.0 → sourcecode-0.39.0}/.ruff.toml +0 -0
  14. {sourcecode-0.37.0 → sourcecode-0.39.0}/CONTRIBUTING.md +0 -0
  15. {sourcecode-0.37.0 → sourcecode-0.39.0}/LICENSE +0 -0
  16. {sourcecode-0.37.0 → sourcecode-0.39.0}/README.md +0 -0
  17. {sourcecode-0.37.0 → sourcecode-0.39.0}/SECURITY.md +0 -0
  18. {sourcecode-0.37.0 → sourcecode-0.39.0}/docs/privacy.md +0 -0
  19. {sourcecode-0.37.0 → sourcecode-0.39.0}/docs/schema.md +0 -0
  20. {sourcecode-0.37.0 → sourcecode-0.39.0}/raw +0 -0
  21. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/architecture_analyzer.py +0 -0
  22. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/architecture_summary.py +0 -0
  23. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/ast_extractor.py +0 -0
  24. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/classifier.py +0 -0
  25. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/code_notes_analyzer.py +0 -0
  26. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/confidence_analyzer.py +0 -0
  27. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/context_summarizer.py +0 -0
  28. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/contract_model.py +0 -0
  29. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/contract_pipeline.py +0 -0
  30. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/coverage_parser.py +0 -0
  31. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/dependency_analyzer.py +0 -0
  32. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/__init__.py +0 -0
  33. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/base.py +0 -0
  34. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/csproj_parser.py +0 -0
  35. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/dart.py +0 -0
  36. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/dotnet.py +0 -0
  37. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/elixir.py +0 -0
  38. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/go.py +0 -0
  39. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/heuristic.py +0 -0
  40. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/hybrid.py +0 -0
  41. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/java.py +0 -0
  42. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/jvm_ext.py +0 -0
  43. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/nodejs.py +0 -0
  44. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/parsers.py +0 -0
  45. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/php.py +0 -0
  46. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/project.py +0 -0
  47. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/python.py +0 -0
  48. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/ruby.py +0 -0
  49. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/rust.py +0 -0
  50. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/systems.py +0 -0
  51. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/terraform.py +0 -0
  52. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/tooling.py +0 -0
  53. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/doc_analyzer.py +0 -0
  54. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/entrypoint_classifier.py +0 -0
  55. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/env_analyzer.py +0 -0
  56. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/file_classifier.py +0 -0
  57. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/git_analyzer.py +0 -0
  58. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/graph_analyzer.py +0 -0
  59. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/metrics_analyzer.py +0 -0
  60. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/prepare_context.py +0 -0
  61. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/redactor.py +0 -0
  62. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/relevance_scorer.py +0 -0
  63. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/runtime_classifier.py +0 -0
  64. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/scanner.py +0 -0
  65. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/semantic_analyzer.py +0 -0
  66. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/summarizer.py +0 -0
  67. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/telemetry/__init__.py +0 -0
  68. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/telemetry/config.py +0 -0
  69. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/telemetry/consent.py +0 -0
  70. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/telemetry/events.py +0 -0
  71. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/telemetry/filters.py +0 -0
  72. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/telemetry/transport.py +0 -0
  73. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/tree_utils.py +0 -0
  74. {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/workspace.py +0 -0
  75. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/__init__.py +0 -0
  76. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/conftest.py +0 -0
  77. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/coverage.xml +0 -0
  78. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/fastapi_app/pyproject.toml +0 -0
  79. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/fastapi_app/src/main.py +0 -0
  80. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/go_service/cmd/api/main.go +0 -0
  81. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/go_service/go.mod +0 -0
  82. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/jacoco.xml +0 -0
  83. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/lcov.info +0 -0
  84. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/nextjs_app/app/page.tsx +0 -0
  85. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/nextjs_app/package.json +0 -0
  86. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/nextjs_app/pnpm-lock.yaml +0 -0
  87. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/pnpm_monorepo/apps/web/app/page.tsx +0 -0
  88. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/pnpm_monorepo/apps/web/package.json +0 -0
  89. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/pnpm_monorepo/packages/api/main.py +0 -0
  90. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/pnpm_monorepo/packages/api/pyproject.toml +0 -0
  91. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/pnpm_monorepo/pnpm-workspace.yaml +0 -0
  92. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_architecture_analyzer.py +0 -0
  93. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_architecture_summary.py +0 -0
  94. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_ast_extractor.py +0 -0
  95. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_classifier.py +0 -0
  96. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_cli.py +0 -0
  97. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_code_notes_analyzer.py +0 -0
  98. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_contract_pipeline.py +0 -0
  99. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_coverage_parser.py +0 -0
  100. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_cross_consistency.py +0 -0
  101. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_dependency_analyzer_node_python.py +0 -0
  102. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_dependency_analyzer_polyglot.py +0 -0
  103. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_dependency_schema.py +0 -0
  104. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_dotnet.py +0 -0
  105. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_go_rust_java.py +0 -0
  106. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_nodejs.py +0 -0
  107. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_php_ruby_dart.py +0 -0
  108. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_python.py +0 -0
  109. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_universal_managed.py +0 -0
  110. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_universal_systems.py +0 -0
  111. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detectors_base.py +0 -0
  112. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_doc_analyzer_jsdom.py +0 -0
  113. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_doc_analyzer_python.py +0 -0
  114. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_graph_analyzer_polyglot.py +0 -0
  115. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_graph_analyzer_python_node.py +0 -0
  116. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_graph_schema.py +0 -0
  117. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_hybrid_inference.py +0 -0
  118. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration.py +0 -0
  119. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_dependencies.py +0 -0
  120. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_detection.py +0 -0
  121. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_docs.py +0 -0
  122. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_graph_modules.py +0 -0
  123. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_lqn.py +0 -0
  124. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_metrics.py +0 -0
  125. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_multistack.py +0 -0
  126. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_semantics.py +0 -0
  127. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_universal.py +0 -0
  128. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_metrics_analyzer.py +0 -0
  129. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_packaging.py +0 -0
  130. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_phase1_improvements.py +0 -0
  131. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_pipeline_integrity.py +0 -0
  132. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_real_projects.py +0 -0
  133. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_redactor.py +0 -0
  134. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_scanner.py +0 -0
  135. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_schema.py +0 -0
  136. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_schema_normalization.py +0 -0
  137. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_semantic_analyzer_node.py +0 -0
  138. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_semantic_analyzer_python.py +0 -0
  139. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_semantic_import_resolution.py +0 -0
  140. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_semantic_schema.py +0 -0
  141. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_signal_hierarchy.py +0 -0
  142. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_summarizer.py +0 -0
  143. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_telemetry.py +0 -0
  144. {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_workspace_analyzer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 0.37.0
3
+ Version: 0.39.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "0.37.0"
7
+ version = "0.39.0"
8
8
  description = "Deterministic codebase context for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "0.37.0"
3
+ __version__ = "0.39.0"
@@ -0,0 +1,258 @@
1
+ from __future__ import annotations
2
+
3
+ """Adaptive file tree scanner with topology-aware depth budgets.
4
+
5
+ Replaces pure depth filtering with relevance-oriented traversal:
6
+ - Source roots (packages/*/src, apps/*/src) get deep scan budgets.
7
+ - Low-signal directories (docs/, benchmarks/) are limited to 2 levels.
8
+ - Generated/excluded directories (dist/, node_modules/) are skipped.
9
+ - Unclassified directories fall back to the base depth limit.
10
+
11
+ Drop-in replacement for FileScanner: same scan_tree() and find_manifests()
12
+ interface, same output format (None = file, dict = directory).
13
+ """
14
+
15
+ import os
16
+ from pathlib import Path
17
+ from typing import Any, Optional, cast
18
+
19
+ from pathspec import GitIgnoreSpec
20
+
21
+ from sourcecode.repo_classifier import RepoTopology
22
+ from sourcecode.scanner import DEFAULT_EXCLUDES, MANIFEST_NAMES
23
+
24
+
25
+ class AdaptiveScanner:
26
+ """File tree scanner driven by repository topology.
27
+
28
+ When *topology* is provided, traversal depth is controlled per-directory:
29
+ directories inside source roots receive a deep budget; low-signal dirs
30
+ are restricted; generated dirs are excluded entirely.
31
+
32
+ When *topology* is None, falls back to the base depth limit — identical
33
+ behaviour to FileScanner.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ root: Path,
39
+ topology: Optional[RepoTopology] = None,
40
+ base_depth: int = 4,
41
+ extra_excludes: Optional[frozenset[str]] = None,
42
+ ) -> None:
43
+ self.root = root.resolve()
44
+ self.topology = topology
45
+ self.base_depth = base_depth
46
+ self._excludes = DEFAULT_EXCLUDES | (extra_excludes or frozenset())
47
+ self._gitignore_spec: Optional[GitIgnoreSpec] = None
48
+
49
+ # Pre-compute lookup tables from topology for O(1) classification
50
+ # during traversal.
51
+ #
52
+ # Each entry is (path_parts_tuple, max_absolute_depth):
53
+ # source prefix → (src_parts, len(src_parts) + source_depth)
54
+ # low-signal prefix → (ls_parts, len(ls_parts) + low_signal_depth)
55
+ #
56
+ # "max_absolute_depth" is depth measured from the repo root, not from
57
+ # the classified directory. At depth D, files are visible; at depth
58
+ # >= max we clear dirnames and skip files.
59
+ self._source_prefixes: list[tuple[tuple[str, ...], int]] = []
60
+ self._low_signal_prefixes: list[tuple[tuple[str, ...], int]] = []
61
+ self._extra_exclude_names: frozenset[str] = frozenset()
62
+
63
+ if topology is not None:
64
+ budget = topology.scan_budget
65
+ for sr in topology.source_roots:
66
+ parts = tuple(p for p in sr.path.split("/") if p)
67
+ if parts:
68
+ max_d = len(parts) + budget.source_depth
69
+ self._source_prefixes.append((parts, max_d))
70
+
71
+ for lr in topology.low_signal_roots:
72
+ parts = tuple(p for p in lr.path.split("/") if p)
73
+ if parts:
74
+ max_d = len(parts) + budget.low_signal_depth
75
+ self._low_signal_prefixes.append((parts, max_d))
76
+
77
+ # Generated roots at depth 1 → add to excludes so os.walk never enters
78
+ top_generated = {
79
+ gr.path.split("/")[0]
80
+ for gr in topology.generated_roots
81
+ if "/" not in gr.path
82
+ }
83
+ self._extra_exclude_names = frozenset(top_generated)
84
+
85
+ # ------------------------------------------------------------------
86
+ # Gitignore
87
+ # ------------------------------------------------------------------
88
+
89
+ def _load_gitignore_spec(self) -> GitIgnoreSpec:
90
+ if self._gitignore_spec is None:
91
+ gitignore = self.root / ".gitignore"
92
+ lines: list[str] = []
93
+ if gitignore.exists():
94
+ try:
95
+ lines = gitignore.read_text(encoding="utf-8", errors="replace").splitlines()
96
+ except OSError:
97
+ pass
98
+ self._gitignore_spec = GitIgnoreSpec.from_lines(lines)
99
+ return self._gitignore_spec
100
+
101
+ def _is_excluded_by_gitignore(self, rel_path: str, is_dir: bool) -> bool:
102
+ spec = self._load_gitignore_spec()
103
+ path_to_match = rel_path + "/" if is_dir else rel_path
104
+ return spec.match_file(path_to_match)
105
+
106
+ # ------------------------------------------------------------------
107
+ # Depth budget computation — the core of adaptive traversal
108
+ # ------------------------------------------------------------------
109
+
110
+ def _compute_max_depth(self, rel_parts: tuple[str, ...]) -> int:
111
+ """Return the maximum absolute depth allowed at *rel_parts*.
112
+
113
+ Depth is the number of path components from the repo root. Files
114
+ at depth D are included; the scan stops (dirnames cleared) when
115
+ depth >= returned value.
116
+
117
+ Priority order:
118
+ 1. Inside a source root → deep budget (source_depth extra levels)
119
+ 2. Ancestor of a source root → must allow traversal to reach it
120
+ 3. Inside a low-signal root → restricted budget (low_signal_depth)
121
+ 4. Default → base_depth
122
+ """
123
+ if not self._source_prefixes and not self._low_signal_prefixes:
124
+ return self.base_depth
125
+
126
+ current_depth = len(rel_parts)
127
+
128
+ # Track the best depth found via ancestor matching (may have multiple
129
+ # source roots; return the maximum so all are reachable).
130
+ ancestor_best = self.base_depth
131
+ found_ancestor = False
132
+
133
+ for src_parts, src_max in self._source_prefixes:
134
+ n = len(src_parts)
135
+ if current_depth >= n:
136
+ # At or inside the source root
137
+ if rel_parts[:n] == src_parts:
138
+ return src_max # definite source territory — early exit
139
+ else:
140
+ # Ancestor check: src_parts starts with rel_parts?
141
+ if src_parts[:current_depth] == rel_parts:
142
+ found_ancestor = True
143
+ if src_max > ancestor_best:
144
+ ancestor_best = src_max
145
+
146
+ if found_ancestor:
147
+ return ancestor_best
148
+
149
+ # Low-signal roots (only if not already committed to a source path)
150
+ for ls_parts, ls_max in self._low_signal_prefixes:
151
+ n = len(ls_parts)
152
+ if current_depth >= n and rel_parts[:n] == ls_parts:
153
+ return ls_max
154
+
155
+ return self.base_depth
156
+
157
+ # ------------------------------------------------------------------
158
+ # Main traversal
159
+ # ------------------------------------------------------------------
160
+
161
+ def scan_tree(self) -> dict[str, Any]:
162
+ """Build the nested file tree dictionary.
163
+
164
+ Returns dict where None = file (D-02) and dict = directory (D-01).
165
+ Depth limits are applied per-directory using topology-derived budgets.
166
+ """
167
+ self._load_gitignore_spec()
168
+ root_tree: dict[str, Any] = {}
169
+ all_excludes = self._excludes | self._extra_exclude_names
170
+
171
+ for dirpath, dirnames, filenames in os.walk(self.root, followlinks=False):
172
+ current = Path(dirpath)
173
+ try:
174
+ rel = current.relative_to(self.root)
175
+ except ValueError:
176
+ continue
177
+
178
+ rel_parts = rel.parts
179
+ depth = len(rel_parts)
180
+
181
+ effective_max_depth = self._compute_max_depth(rel_parts)
182
+
183
+ if depth >= effective_max_depth:
184
+ dirnames.clear()
185
+ continue
186
+
187
+ # Filter dirnames in-place (critical: slice assignment)
188
+ dirnames[:] = [
189
+ d for d in dirnames
190
+ if d not in all_excludes
191
+ and not (current / d).is_symlink()
192
+ and not self._is_excluded_by_gitignore(
193
+ str(rel / d) if rel_parts else d,
194
+ is_dir=True,
195
+ )
196
+ ]
197
+
198
+ node = self._get_or_create_node(root_tree, rel_parts)
199
+
200
+ for fname in filenames:
201
+ # Skip flag-shaped names (shell redirect artifacts)
202
+ if fname.startswith("-"):
203
+ continue
204
+ fpath = current / fname
205
+ if fpath.is_symlink():
206
+ continue
207
+ rel_file = str(rel / fname) if rel_parts else fname
208
+ if self._is_excluded_by_gitignore(rel_file, is_dir=False):
209
+ continue
210
+ node[fname] = None # D-02: None = file
211
+
212
+ # Ensure accepted subdirs exist as dict nodes
213
+ for d in dirnames:
214
+ if d not in node:
215
+ node[d] = {}
216
+
217
+ return root_tree
218
+
219
+ def _get_or_create_node(
220
+ self, tree: dict[str, Any], parts: tuple[str, ...]
221
+ ) -> dict[str, Any]:
222
+ node = tree
223
+ for part in parts:
224
+ if part not in node or node[part] is None:
225
+ node[part] = {}
226
+ node = cast(dict[str, Any], node[part])
227
+ return node
228
+
229
+ # ------------------------------------------------------------------
230
+ # Manifest discovery — same interface as FileScanner
231
+ # ------------------------------------------------------------------
232
+
233
+ def find_manifests(self) -> list[str]:
234
+ """Find manifest files at depth 0-1.
235
+
236
+ Identical logic to FileScanner.find_manifests() — depth-0 root
237
+ manifests plus depth-1 sub-package manifests, hidden dirs excluded.
238
+ """
239
+ manifests: list[str] = []
240
+ for name in MANIFEST_NAMES:
241
+ candidate = self.root / name
242
+ if candidate.exists() and not candidate.is_symlink():
243
+ manifests.append(str(candidate))
244
+ try:
245
+ for child in self.root.iterdir():
246
+ if (
247
+ child.is_dir()
248
+ and not child.is_symlink()
249
+ and child.name not in self._excludes
250
+ and not child.name.startswith(".")
251
+ ):
252
+ for name in MANIFEST_NAMES:
253
+ candidate = child / name
254
+ if candidate.exists() and not candidate.is_symlink():
255
+ manifests.append(str(candidate))
256
+ except PermissionError:
257
+ pass
258
+ return manifests
@@ -220,6 +220,29 @@ def _preprocess_argv() -> None:
220
220
  _sys.argv = _sys.argv[:1] + modified
221
221
 
222
222
 
223
+ def _copy_to_clipboard(content: str) -> bool:
224
+ """Copy text to system clipboard. Returns True on success, False otherwise (never raises)."""
225
+ import subprocess
226
+ import sys as _sys
227
+ try:
228
+ if _sys.platform == "darwin":
229
+ subprocess.run(["pbcopy"], input=content.encode("utf-8"), check=True, timeout=10)
230
+ return True
231
+ elif _sys.platform == "win32":
232
+ subprocess.run(["clip"], input=content.encode("utf-16"), check=True, timeout=10)
233
+ return True
234
+ else:
235
+ for cmd in (["xclip", "-selection", "clipboard"], ["xsel", "--clipboard", "--input"]):
236
+ try:
237
+ subprocess.run(cmd, input=content.encode("utf-8"), check=True, timeout=10)
238
+ return True
239
+ except (FileNotFoundError, subprocess.CalledProcessError):
240
+ continue
241
+ return False
242
+ except Exception:
243
+ return False
244
+
245
+
223
246
  app = typer.Typer(
224
247
  name="sourcecode",
225
248
  help=_HELP,
@@ -571,6 +594,12 @@ def main(
571
594
  "--symbol",
572
595
  help="Contract mode: extract localized context for a specific symbol name. Returns defining file + all importers.",
573
596
  ),
597
+ copy: bool = typer.Option(
598
+ False,
599
+ "--copy",
600
+ "-c",
601
+ help="Copy output to system clipboard after a successful run. No-op when --output is used or clipboard is unavailable.",
602
+ ),
574
603
  ) -> None:
575
604
  """Analyze a repository and produce structured context for AI coding agents.
576
605
 
@@ -714,6 +743,13 @@ def main(
714
743
  # 1. Scan directory (SCAN-01 to SCAN-05)
715
744
  redactor = SecretRedactor(enabled=not no_redact)
716
745
 
746
+ # Classify repository topology before scanning. This is a shallow
747
+ # filesystem read (depth 0-1 only) and completes in milliseconds.
748
+ # The topology drives per-directory depth budgets in AdaptiveScanner.
749
+ from sourcecode.adaptive_scanner import AdaptiveScanner
750
+ from sourcecode.repo_classifier import RepoClassifier
751
+ _topology = RepoClassifier().classify(target)
752
+
717
753
  # Detect manifests before scan to adjust depth.
718
754
  # find_manifests() only looks at depth 0-1, does not need the full tree.
719
755
  _pre_scanner = FileScanner(target, max_depth=1)
@@ -735,7 +771,7 @@ def main(
735
771
  no_tree = True # agents never need the raw file tree
736
772
  typer.echo("[agent] dependencies env-map code-notes (no-tree)", err=True)
737
773
 
738
- scanner = FileScanner(target, max_depth=effective_depth)
774
+ scanner = AdaptiveScanner(target, topology=_topology, base_depth=effective_depth)
739
775
  raw_tree = scanner.scan_tree()
740
776
 
741
777
  # 2. Filter .env and *.secret entries from file tree (SEC-02, all levels)
@@ -775,16 +811,14 @@ def main(
775
811
  detector = ProjectDetector(build_default_detectors())
776
812
  workspace_analysis = WorkspaceAnalyzer().analyze(target, manifests)
777
813
 
778
- # Warn when scanning a monorepo at default depth — typical package sources
779
- # (packages/*/src/) live at depth 5+, so default depth=4 silently misses them.
780
- # Only emit to TTY to avoid contaminating piped/CI output; agents read analysis_gaps.
814
+ # Adaptive traversal handles monorepo source root discovery automatically.
815
+ # Emit a diagnostic when topology confidence is low so users know why.
781
816
  import sys as _sys
782
- if workspace_analysis.is_monorepo and depth <= 4 and effective_depth <= 4:
817
+ if _topology.workspace_type == "monorepo" and _topology.confidence < 0.5:
783
818
  if _sys.stderr.isatty():
784
819
  typer.echo(
785
- f"[warning] monorepo detected with --depth {depth}. "
786
- "Source files in packages/*/src/ (depth 5+) may be invisible. "
787
- "Use --depth 6 or higher for full coverage.",
820
+ "[traversal] monorepo detected but source root confidence is low "
821
+ f"({_topology.confidence:.0%}). Use --depth 8 or higher if files are missing.",
788
822
  err=True,
789
823
  )
790
824
 
@@ -896,7 +930,8 @@ def main(
896
930
  workspace_root = target / workspace.path
897
931
  if not workspace_root.exists() or not workspace_root.is_dir():
898
932
  continue
899
- workspace_scanner = FileScanner(workspace_root, max_depth=depth)
933
+ _ws_topology = RepoClassifier().classify(workspace_root)
934
+ workspace_scanner = AdaptiveScanner(workspace_root, topology=_ws_topology, base_depth=depth)
900
935
  workspace_tree = filter_sensitive_files(workspace_scanner.scan_tree())
901
936
  workspace_manifests = workspace_scanner.find_manifests()
902
937
  workspace_stacks, workspace_entry_points, _ = detector.detect(
@@ -1008,6 +1043,7 @@ def main(
1008
1043
  metadata = AnalysisMetadata(
1009
1044
  analyzed_path=str(target),
1010
1045
  analyzer_fingerprints=_fingerprints,
1046
+ traversal_topology=_topology.as_dict(),
1011
1047
  )
1012
1048
  sm = SourceMap(
1013
1049
  metadata=metadata,
@@ -1037,7 +1073,7 @@ def main(
1037
1073
  target / ws.path,
1038
1074
  (
1039
1075
  filter_sensitive_files(
1040
- FileScanner(target / ws.path, max_depth=depth).scan_tree()
1076
+ AdaptiveScanner(target / ws.path, base_depth=depth).scan_tree()
1041
1077
  )
1042
1078
  ),
1043
1079
  workspace=ws.path,
@@ -1379,6 +1415,13 @@ def main(
1379
1415
  # 6. Write output (CLI-04)
1380
1416
  write_output(content, output=output)
1381
1417
 
1418
+ # 7. Clipboard copy (--copy / -c)
1419
+ if copy and output is None:
1420
+ _trimmed = content.strip()
1421
+ if _trimmed and _trimmed not in ("{}", "[]", "null"):
1422
+ if _copy_to_clipboard(content):
1423
+ typer.echo("✓ copied to clipboard", err=True)
1424
+
1382
1425
 
1383
1426
  @app.command("prepare-context")
1384
1427
  def prepare_context_cmd(
@@ -1410,6 +1453,12 @@ def prepare_context_cmd(
1410
1453
  "--dry-run",
1411
1454
  help="Show what would be analyzed without running it",
1412
1455
  ),
1456
+ copy: bool = typer.Option(
1457
+ False,
1458
+ "--copy",
1459
+ "-c",
1460
+ help="Copy output to system clipboard after a successful run. No-op when clipboard is unavailable.",
1461
+ ),
1413
1462
  ) -> None:
1414
1463
  """Task-specific context for AI coding agents.
1415
1464
 
@@ -1507,7 +1556,14 @@ def prepare_context_cmd(
1507
1556
  if llm_prompt:
1508
1557
  out["llm_prompt"] = builder.render_prompt(output)
1509
1558
 
1510
- typer.echo(json.dumps(out, indent=2, ensure_ascii=False))
1559
+ _pc_content = json.dumps(out, indent=2, ensure_ascii=False)
1560
+ typer.echo(_pc_content)
1561
+
1562
+ if copy:
1563
+ _trimmed = _pc_content.strip()
1564
+ if _trimmed and _trimmed not in ("{}", "[]", "null"):
1565
+ if _copy_to_clipboard(_pc_content):
1566
+ typer.echo("✓ copied to clipboard", err=True)
1511
1567
 
1512
1568
 
1513
1569
  # ── Telemetry commands ────────────────────────────────────────────────────────