sourcecode 0.37.0__tar.gz → 0.39.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sourcecode-0.37.0 → sourcecode-0.39.0}/PKG-INFO +1 -1
- {sourcecode-0.37.0 → sourcecode-0.39.0}/pyproject.toml +1 -1
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/__init__.py +1 -1
- sourcecode-0.39.0/src/sourcecode/adaptive_scanner.py +258 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/cli.py +67 -11
- sourcecode-0.39.0/src/sourcecode/repo_classifier.py +570 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/schema.py +1 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/serializer.py +5 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/.agents/skills/source-command-gsd-join-discord/SKILL.md +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/.agents/skills/source-command-gsd-review-backlog/SKILL.md +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/.agents/skills/source-command-gsd-workstreams/SKILL.md +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/.gitignore +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/.ruff.toml +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/CONTRIBUTING.md +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/LICENSE +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/README.md +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/SECURITY.md +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/docs/privacy.md +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/docs/schema.md +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/raw +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/architecture_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/architecture_summary.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/ast_extractor.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/classifier.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/code_notes_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/confidence_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/context_summarizer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/contract_model.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/contract_pipeline.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/coverage_parser.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/dependency_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/__init__.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/base.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/csproj_parser.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/dart.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/dotnet.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/elixir.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/go.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/heuristic.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/hybrid.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/java.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/jvm_ext.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/nodejs.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/parsers.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/php.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/project.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/python.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/ruby.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/rust.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/systems.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/terraform.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/detectors/tooling.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/doc_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/entrypoint_classifier.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/env_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/file_classifier.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/git_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/graph_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/metrics_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/prepare_context.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/redactor.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/relevance_scorer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/runtime_classifier.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/scanner.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/semantic_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/summarizer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/telemetry/__init__.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/telemetry/config.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/telemetry/consent.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/telemetry/events.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/telemetry/filters.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/telemetry/transport.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/tree_utils.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/src/sourcecode/workspace.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/__init__.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/conftest.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/coverage.xml +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/fastapi_app/pyproject.toml +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/fastapi_app/src/main.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/go_service/cmd/api/main.go +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/go_service/go.mod +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/jacoco.xml +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/lcov.info +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/nextjs_app/app/page.tsx +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/nextjs_app/package.json +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/nextjs_app/pnpm-lock.yaml +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/pnpm_monorepo/apps/web/app/page.tsx +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/pnpm_monorepo/apps/web/package.json +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/pnpm_monorepo/packages/api/main.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/pnpm_monorepo/packages/api/pyproject.toml +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/fixtures/pnpm_monorepo/pnpm-workspace.yaml +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_architecture_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_architecture_summary.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_ast_extractor.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_classifier.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_cli.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_code_notes_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_contract_pipeline.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_coverage_parser.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_cross_consistency.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_dependency_analyzer_node_python.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_dependency_analyzer_polyglot.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_dependency_schema.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_dotnet.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_go_rust_java.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_nodejs.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_php_ruby_dart.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_python.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_universal_managed.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detector_universal_systems.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_detectors_base.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_doc_analyzer_jsdom.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_doc_analyzer_python.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_graph_analyzer_polyglot.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_graph_analyzer_python_node.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_graph_schema.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_hybrid_inference.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_dependencies.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_detection.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_docs.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_graph_modules.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_lqn.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_metrics.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_multistack.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_semantics.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_integration_universal.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_metrics_analyzer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_packaging.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_phase1_improvements.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_pipeline_integrity.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_real_projects.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_redactor.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_scanner.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_schema.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_schema_normalization.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_semantic_analyzer_node.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_semantic_analyzer_python.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_semantic_import_resolution.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_semantic_schema.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_signal_hierarchy.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_summarizer.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_telemetry.py +0 -0
- {sourcecode-0.37.0 → sourcecode-0.39.0}/tests/test_workspace_analyzer.py +0 -0
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Adaptive file tree scanner with topology-aware depth budgets.
|
|
4
|
+
|
|
5
|
+
Replaces pure depth filtering with relevance-oriented traversal:
|
|
6
|
+
- Source roots (packages/*/src, apps/*/src) get deep scan budgets.
|
|
7
|
+
- Low-signal directories (docs/, benchmarks/) are limited to 2 levels.
|
|
8
|
+
- Generated/excluded directories (dist/, node_modules/) are skipped.
|
|
9
|
+
- Unclassified directories fall back to the base depth limit.
|
|
10
|
+
|
|
11
|
+
Drop-in replacement for FileScanner: same scan_tree() and find_manifests()
|
|
12
|
+
interface, same output format (None = file, dict = directory).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any, Optional, cast
|
|
18
|
+
|
|
19
|
+
from pathspec import GitIgnoreSpec
|
|
20
|
+
|
|
21
|
+
from sourcecode.repo_classifier import RepoTopology
|
|
22
|
+
from sourcecode.scanner import DEFAULT_EXCLUDES, MANIFEST_NAMES
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AdaptiveScanner:
|
|
26
|
+
"""File tree scanner driven by repository topology.
|
|
27
|
+
|
|
28
|
+
When *topology* is provided, traversal depth is controlled per-directory:
|
|
29
|
+
directories inside source roots receive a deep budget; low-signal dirs
|
|
30
|
+
are restricted; generated dirs are excluded entirely.
|
|
31
|
+
|
|
32
|
+
When *topology* is None, falls back to the base depth limit — identical
|
|
33
|
+
behaviour to FileScanner.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
root: Path,
|
|
39
|
+
topology: Optional[RepoTopology] = None,
|
|
40
|
+
base_depth: int = 4,
|
|
41
|
+
extra_excludes: Optional[frozenset[str]] = None,
|
|
42
|
+
) -> None:
|
|
43
|
+
self.root = root.resolve()
|
|
44
|
+
self.topology = topology
|
|
45
|
+
self.base_depth = base_depth
|
|
46
|
+
self._excludes = DEFAULT_EXCLUDES | (extra_excludes or frozenset())
|
|
47
|
+
self._gitignore_spec: Optional[GitIgnoreSpec] = None
|
|
48
|
+
|
|
49
|
+
# Pre-compute lookup tables from topology for O(1) classification
|
|
50
|
+
# during traversal.
|
|
51
|
+
#
|
|
52
|
+
# Each entry is (path_parts_tuple, max_absolute_depth):
|
|
53
|
+
# source prefix → (src_parts, len(src_parts) + source_depth)
|
|
54
|
+
# low-signal prefix → (ls_parts, len(ls_parts) + low_signal_depth)
|
|
55
|
+
#
|
|
56
|
+
# "max_absolute_depth" is depth measured from the repo root, not from
|
|
57
|
+
# the classified directory. At depth D, files are visible; at depth
|
|
58
|
+
# >= max we clear dirnames and skip files.
|
|
59
|
+
self._source_prefixes: list[tuple[tuple[str, ...], int]] = []
|
|
60
|
+
self._low_signal_prefixes: list[tuple[tuple[str, ...], int]] = []
|
|
61
|
+
self._extra_exclude_names: frozenset[str] = frozenset()
|
|
62
|
+
|
|
63
|
+
if topology is not None:
|
|
64
|
+
budget = topology.scan_budget
|
|
65
|
+
for sr in topology.source_roots:
|
|
66
|
+
parts = tuple(p for p in sr.path.split("/") if p)
|
|
67
|
+
if parts:
|
|
68
|
+
max_d = len(parts) + budget.source_depth
|
|
69
|
+
self._source_prefixes.append((parts, max_d))
|
|
70
|
+
|
|
71
|
+
for lr in topology.low_signal_roots:
|
|
72
|
+
parts = tuple(p for p in lr.path.split("/") if p)
|
|
73
|
+
if parts:
|
|
74
|
+
max_d = len(parts) + budget.low_signal_depth
|
|
75
|
+
self._low_signal_prefixes.append((parts, max_d))
|
|
76
|
+
|
|
77
|
+
# Generated roots at depth 1 → add to excludes so os.walk never enters
|
|
78
|
+
top_generated = {
|
|
79
|
+
gr.path.split("/")[0]
|
|
80
|
+
for gr in topology.generated_roots
|
|
81
|
+
if "/" not in gr.path
|
|
82
|
+
}
|
|
83
|
+
self._extra_exclude_names = frozenset(top_generated)
|
|
84
|
+
|
|
85
|
+
# ------------------------------------------------------------------
|
|
86
|
+
# Gitignore
|
|
87
|
+
# ------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
def _load_gitignore_spec(self) -> GitIgnoreSpec:
|
|
90
|
+
if self._gitignore_spec is None:
|
|
91
|
+
gitignore = self.root / ".gitignore"
|
|
92
|
+
lines: list[str] = []
|
|
93
|
+
if gitignore.exists():
|
|
94
|
+
try:
|
|
95
|
+
lines = gitignore.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
96
|
+
except OSError:
|
|
97
|
+
pass
|
|
98
|
+
self._gitignore_spec = GitIgnoreSpec.from_lines(lines)
|
|
99
|
+
return self._gitignore_spec
|
|
100
|
+
|
|
101
|
+
def _is_excluded_by_gitignore(self, rel_path: str, is_dir: bool) -> bool:
|
|
102
|
+
spec = self._load_gitignore_spec()
|
|
103
|
+
path_to_match = rel_path + "/" if is_dir else rel_path
|
|
104
|
+
return spec.match_file(path_to_match)
|
|
105
|
+
|
|
106
|
+
# ------------------------------------------------------------------
|
|
107
|
+
# Depth budget computation — the core of adaptive traversal
|
|
108
|
+
# ------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
def _compute_max_depth(self, rel_parts: tuple[str, ...]) -> int:
|
|
111
|
+
"""Return the maximum absolute depth allowed at *rel_parts*.
|
|
112
|
+
|
|
113
|
+
Depth is the number of path components from the repo root. Files
|
|
114
|
+
at depth D are included; the scan stops (dirnames cleared) when
|
|
115
|
+
depth >= returned value.
|
|
116
|
+
|
|
117
|
+
Priority order:
|
|
118
|
+
1. Inside a source root → deep budget (source_depth extra levels)
|
|
119
|
+
2. Ancestor of a source root → must allow traversal to reach it
|
|
120
|
+
3. Inside a low-signal root → restricted budget (low_signal_depth)
|
|
121
|
+
4. Default → base_depth
|
|
122
|
+
"""
|
|
123
|
+
if not self._source_prefixes and not self._low_signal_prefixes:
|
|
124
|
+
return self.base_depth
|
|
125
|
+
|
|
126
|
+
current_depth = len(rel_parts)
|
|
127
|
+
|
|
128
|
+
# Track the best depth found via ancestor matching (may have multiple
|
|
129
|
+
# source roots; return the maximum so all are reachable).
|
|
130
|
+
ancestor_best = self.base_depth
|
|
131
|
+
found_ancestor = False
|
|
132
|
+
|
|
133
|
+
for src_parts, src_max in self._source_prefixes:
|
|
134
|
+
n = len(src_parts)
|
|
135
|
+
if current_depth >= n:
|
|
136
|
+
# At or inside the source root
|
|
137
|
+
if rel_parts[:n] == src_parts:
|
|
138
|
+
return src_max # definite source territory — early exit
|
|
139
|
+
else:
|
|
140
|
+
# Ancestor check: src_parts starts with rel_parts?
|
|
141
|
+
if src_parts[:current_depth] == rel_parts:
|
|
142
|
+
found_ancestor = True
|
|
143
|
+
if src_max > ancestor_best:
|
|
144
|
+
ancestor_best = src_max
|
|
145
|
+
|
|
146
|
+
if found_ancestor:
|
|
147
|
+
return ancestor_best
|
|
148
|
+
|
|
149
|
+
# Low-signal roots (only if not already committed to a source path)
|
|
150
|
+
for ls_parts, ls_max in self._low_signal_prefixes:
|
|
151
|
+
n = len(ls_parts)
|
|
152
|
+
if current_depth >= n and rel_parts[:n] == ls_parts:
|
|
153
|
+
return ls_max
|
|
154
|
+
|
|
155
|
+
return self.base_depth
|
|
156
|
+
|
|
157
|
+
# ------------------------------------------------------------------
|
|
158
|
+
# Main traversal
|
|
159
|
+
# ------------------------------------------------------------------
|
|
160
|
+
|
|
161
|
+
def scan_tree(self) -> dict[str, Any]:
|
|
162
|
+
"""Build the nested file tree dictionary.
|
|
163
|
+
|
|
164
|
+
Returns dict where None = file (D-02) and dict = directory (D-01).
|
|
165
|
+
Depth limits are applied per-directory using topology-derived budgets.
|
|
166
|
+
"""
|
|
167
|
+
self._load_gitignore_spec()
|
|
168
|
+
root_tree: dict[str, Any] = {}
|
|
169
|
+
all_excludes = self._excludes | self._extra_exclude_names
|
|
170
|
+
|
|
171
|
+
for dirpath, dirnames, filenames in os.walk(self.root, followlinks=False):
|
|
172
|
+
current = Path(dirpath)
|
|
173
|
+
try:
|
|
174
|
+
rel = current.relative_to(self.root)
|
|
175
|
+
except ValueError:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
rel_parts = rel.parts
|
|
179
|
+
depth = len(rel_parts)
|
|
180
|
+
|
|
181
|
+
effective_max_depth = self._compute_max_depth(rel_parts)
|
|
182
|
+
|
|
183
|
+
if depth >= effective_max_depth:
|
|
184
|
+
dirnames.clear()
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
# Filter dirnames in-place (critical: slice assignment)
|
|
188
|
+
dirnames[:] = [
|
|
189
|
+
d for d in dirnames
|
|
190
|
+
if d not in all_excludes
|
|
191
|
+
and not (current / d).is_symlink()
|
|
192
|
+
and not self._is_excluded_by_gitignore(
|
|
193
|
+
str(rel / d) if rel_parts else d,
|
|
194
|
+
is_dir=True,
|
|
195
|
+
)
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
node = self._get_or_create_node(root_tree, rel_parts)
|
|
199
|
+
|
|
200
|
+
for fname in filenames:
|
|
201
|
+
# Skip flag-shaped names (shell redirect artifacts)
|
|
202
|
+
if fname.startswith("-"):
|
|
203
|
+
continue
|
|
204
|
+
fpath = current / fname
|
|
205
|
+
if fpath.is_symlink():
|
|
206
|
+
continue
|
|
207
|
+
rel_file = str(rel / fname) if rel_parts else fname
|
|
208
|
+
if self._is_excluded_by_gitignore(rel_file, is_dir=False):
|
|
209
|
+
continue
|
|
210
|
+
node[fname] = None # D-02: None = file
|
|
211
|
+
|
|
212
|
+
# Ensure accepted subdirs exist as dict nodes
|
|
213
|
+
for d in dirnames:
|
|
214
|
+
if d not in node:
|
|
215
|
+
node[d] = {}
|
|
216
|
+
|
|
217
|
+
return root_tree
|
|
218
|
+
|
|
219
|
+
def _get_or_create_node(
|
|
220
|
+
self, tree: dict[str, Any], parts: tuple[str, ...]
|
|
221
|
+
) -> dict[str, Any]:
|
|
222
|
+
node = tree
|
|
223
|
+
for part in parts:
|
|
224
|
+
if part not in node or node[part] is None:
|
|
225
|
+
node[part] = {}
|
|
226
|
+
node = cast(dict[str, Any], node[part])
|
|
227
|
+
return node
|
|
228
|
+
|
|
229
|
+
# ------------------------------------------------------------------
|
|
230
|
+
# Manifest discovery — same interface as FileScanner
|
|
231
|
+
# ------------------------------------------------------------------
|
|
232
|
+
|
|
233
|
+
def find_manifests(self) -> list[str]:
|
|
234
|
+
"""Find manifest files at depth 0-1.
|
|
235
|
+
|
|
236
|
+
Identical logic to FileScanner.find_manifests() — depth-0 root
|
|
237
|
+
manifests plus depth-1 sub-package manifests, hidden dirs excluded.
|
|
238
|
+
"""
|
|
239
|
+
manifests: list[str] = []
|
|
240
|
+
for name in MANIFEST_NAMES:
|
|
241
|
+
candidate = self.root / name
|
|
242
|
+
if candidate.exists() and not candidate.is_symlink():
|
|
243
|
+
manifests.append(str(candidate))
|
|
244
|
+
try:
|
|
245
|
+
for child in self.root.iterdir():
|
|
246
|
+
if (
|
|
247
|
+
child.is_dir()
|
|
248
|
+
and not child.is_symlink()
|
|
249
|
+
and child.name not in self._excludes
|
|
250
|
+
and not child.name.startswith(".")
|
|
251
|
+
):
|
|
252
|
+
for name in MANIFEST_NAMES:
|
|
253
|
+
candidate = child / name
|
|
254
|
+
if candidate.exists() and not candidate.is_symlink():
|
|
255
|
+
manifests.append(str(candidate))
|
|
256
|
+
except PermissionError:
|
|
257
|
+
pass
|
|
258
|
+
return manifests
|
|
@@ -220,6 +220,29 @@ def _preprocess_argv() -> None:
|
|
|
220
220
|
_sys.argv = _sys.argv[:1] + modified
|
|
221
221
|
|
|
222
222
|
|
|
223
|
+
def _copy_to_clipboard(content: str) -> bool:
|
|
224
|
+
"""Copy text to system clipboard. Returns True on success, False otherwise (never raises)."""
|
|
225
|
+
import subprocess
|
|
226
|
+
import sys as _sys
|
|
227
|
+
try:
|
|
228
|
+
if _sys.platform == "darwin":
|
|
229
|
+
subprocess.run(["pbcopy"], input=content.encode("utf-8"), check=True, timeout=10)
|
|
230
|
+
return True
|
|
231
|
+
elif _sys.platform == "win32":
|
|
232
|
+
subprocess.run(["clip"], input=content.encode("utf-16"), check=True, timeout=10)
|
|
233
|
+
return True
|
|
234
|
+
else:
|
|
235
|
+
for cmd in (["xclip", "-selection", "clipboard"], ["xsel", "--clipboard", "--input"]):
|
|
236
|
+
try:
|
|
237
|
+
subprocess.run(cmd, input=content.encode("utf-8"), check=True, timeout=10)
|
|
238
|
+
return True
|
|
239
|
+
except (FileNotFoundError, subprocess.CalledProcessError):
|
|
240
|
+
continue
|
|
241
|
+
return False
|
|
242
|
+
except Exception:
|
|
243
|
+
return False
|
|
244
|
+
|
|
245
|
+
|
|
223
246
|
app = typer.Typer(
|
|
224
247
|
name="sourcecode",
|
|
225
248
|
help=_HELP,
|
|
@@ -571,6 +594,12 @@ def main(
|
|
|
571
594
|
"--symbol",
|
|
572
595
|
help="Contract mode: extract localized context for a specific symbol name. Returns defining file + all importers.",
|
|
573
596
|
),
|
|
597
|
+
copy: bool = typer.Option(
|
|
598
|
+
False,
|
|
599
|
+
"--copy",
|
|
600
|
+
"-c",
|
|
601
|
+
help="Copy output to system clipboard after a successful run. No-op when --output is used or clipboard is unavailable.",
|
|
602
|
+
),
|
|
574
603
|
) -> None:
|
|
575
604
|
"""Analyze a repository and produce structured context for AI coding agents.
|
|
576
605
|
|
|
@@ -714,6 +743,13 @@ def main(
|
|
|
714
743
|
# 1. Scan directory (SCAN-01 to SCAN-05)
|
|
715
744
|
redactor = SecretRedactor(enabled=not no_redact)
|
|
716
745
|
|
|
746
|
+
# Classify repository topology before scanning. This is a shallow
|
|
747
|
+
# filesystem read (depth 0-1 only) and completes in milliseconds.
|
|
748
|
+
# The topology drives per-directory depth budgets in AdaptiveScanner.
|
|
749
|
+
from sourcecode.adaptive_scanner import AdaptiveScanner
|
|
750
|
+
from sourcecode.repo_classifier import RepoClassifier
|
|
751
|
+
_topology = RepoClassifier().classify(target)
|
|
752
|
+
|
|
717
753
|
# Detect manifests before scan to adjust depth.
|
|
718
754
|
# find_manifests() only looks at depth 0-1, does not need the full tree.
|
|
719
755
|
_pre_scanner = FileScanner(target, max_depth=1)
|
|
@@ -735,7 +771,7 @@ def main(
|
|
|
735
771
|
no_tree = True # agents never need the raw file tree
|
|
736
772
|
typer.echo("[agent] dependencies env-map code-notes (no-tree)", err=True)
|
|
737
773
|
|
|
738
|
-
scanner =
|
|
774
|
+
scanner = AdaptiveScanner(target, topology=_topology, base_depth=effective_depth)
|
|
739
775
|
raw_tree = scanner.scan_tree()
|
|
740
776
|
|
|
741
777
|
# 2. Filter .env and *.secret entries from file tree (SEC-02, all levels)
|
|
@@ -775,16 +811,14 @@ def main(
|
|
|
775
811
|
detector = ProjectDetector(build_default_detectors())
|
|
776
812
|
workspace_analysis = WorkspaceAnalyzer().analyze(target, manifests)
|
|
777
813
|
|
|
778
|
-
#
|
|
779
|
-
#
|
|
780
|
-
# Only emit to TTY to avoid contaminating piped/CI output; agents read analysis_gaps.
|
|
814
|
+
# Adaptive traversal handles monorepo source root discovery automatically.
|
|
815
|
+
# Emit a diagnostic when topology confidence is low so users know why.
|
|
781
816
|
import sys as _sys
|
|
782
|
-
if
|
|
817
|
+
if _topology.workspace_type == "monorepo" and _topology.confidence < 0.5:
|
|
783
818
|
if _sys.stderr.isatty():
|
|
784
819
|
typer.echo(
|
|
785
|
-
|
|
786
|
-
"
|
|
787
|
-
"Use --depth 6 or higher for full coverage.",
|
|
820
|
+
"[traversal] monorepo detected but source root confidence is low "
|
|
821
|
+
f"({_topology.confidence:.0%}). Use --depth 8 or higher if files are missing.",
|
|
788
822
|
err=True,
|
|
789
823
|
)
|
|
790
824
|
|
|
@@ -896,7 +930,8 @@ def main(
|
|
|
896
930
|
workspace_root = target / workspace.path
|
|
897
931
|
if not workspace_root.exists() or not workspace_root.is_dir():
|
|
898
932
|
continue
|
|
899
|
-
|
|
933
|
+
_ws_topology = RepoClassifier().classify(workspace_root)
|
|
934
|
+
workspace_scanner = AdaptiveScanner(workspace_root, topology=_ws_topology, base_depth=depth)
|
|
900
935
|
workspace_tree = filter_sensitive_files(workspace_scanner.scan_tree())
|
|
901
936
|
workspace_manifests = workspace_scanner.find_manifests()
|
|
902
937
|
workspace_stacks, workspace_entry_points, _ = detector.detect(
|
|
@@ -1008,6 +1043,7 @@ def main(
|
|
|
1008
1043
|
metadata = AnalysisMetadata(
|
|
1009
1044
|
analyzed_path=str(target),
|
|
1010
1045
|
analyzer_fingerprints=_fingerprints,
|
|
1046
|
+
traversal_topology=_topology.as_dict(),
|
|
1011
1047
|
)
|
|
1012
1048
|
sm = SourceMap(
|
|
1013
1049
|
metadata=metadata,
|
|
@@ -1037,7 +1073,7 @@ def main(
|
|
|
1037
1073
|
target / ws.path,
|
|
1038
1074
|
(
|
|
1039
1075
|
filter_sensitive_files(
|
|
1040
|
-
|
|
1076
|
+
AdaptiveScanner(target / ws.path, base_depth=depth).scan_tree()
|
|
1041
1077
|
)
|
|
1042
1078
|
),
|
|
1043
1079
|
workspace=ws.path,
|
|
@@ -1379,6 +1415,13 @@ def main(
|
|
|
1379
1415
|
# 6. Write output (CLI-04)
|
|
1380
1416
|
write_output(content, output=output)
|
|
1381
1417
|
|
|
1418
|
+
# 7. Clipboard copy (--copy / -c)
|
|
1419
|
+
if copy and output is None:
|
|
1420
|
+
_trimmed = content.strip()
|
|
1421
|
+
if _trimmed and _trimmed not in ("{}", "[]", "null"):
|
|
1422
|
+
if _copy_to_clipboard(content):
|
|
1423
|
+
typer.echo("✓ copied to clipboard", err=True)
|
|
1424
|
+
|
|
1382
1425
|
|
|
1383
1426
|
@app.command("prepare-context")
|
|
1384
1427
|
def prepare_context_cmd(
|
|
@@ -1410,6 +1453,12 @@ def prepare_context_cmd(
|
|
|
1410
1453
|
"--dry-run",
|
|
1411
1454
|
help="Show what would be analyzed without running it",
|
|
1412
1455
|
),
|
|
1456
|
+
copy: bool = typer.Option(
|
|
1457
|
+
False,
|
|
1458
|
+
"--copy",
|
|
1459
|
+
"-c",
|
|
1460
|
+
help="Copy output to system clipboard after a successful run. No-op when clipboard is unavailable.",
|
|
1461
|
+
),
|
|
1413
1462
|
) -> None:
|
|
1414
1463
|
"""Task-specific context for AI coding agents.
|
|
1415
1464
|
|
|
@@ -1507,7 +1556,14 @@ def prepare_context_cmd(
|
|
|
1507
1556
|
if llm_prompt:
|
|
1508
1557
|
out["llm_prompt"] = builder.render_prompt(output)
|
|
1509
1558
|
|
|
1510
|
-
|
|
1559
|
+
_pc_content = json.dumps(out, indent=2, ensure_ascii=False)
|
|
1560
|
+
typer.echo(_pc_content)
|
|
1561
|
+
|
|
1562
|
+
if copy:
|
|
1563
|
+
_trimmed = _pc_content.strip()
|
|
1564
|
+
if _trimmed and _trimmed not in ("{}", "[]", "null"):
|
|
1565
|
+
if _copy_to_clipboard(_pc_content):
|
|
1566
|
+
typer.echo("✓ copied to clipboard", err=True)
|
|
1511
1567
|
|
|
1512
1568
|
|
|
1513
1569
|
# ── Telemetry commands ────────────────────────────────────────────────────────
|