sourcecode 0.36.0__tar.gz → 0.38.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sourcecode-0.36.0 → sourcecode-0.38.0}/PKG-INFO +1 -1
- {sourcecode-0.36.0 → sourcecode-0.38.0}/pyproject.toml +1 -1
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/__init__.py +1 -1
- sourcecode-0.38.0/src/sourcecode/adaptive_scanner.py +258 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/cli.py +76 -20
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/git_analyzer.py +7 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/metrics_analyzer.py +10 -0
- sourcecode-0.38.0/src/sourcecode/repo_classifier.py +570 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/schema.py +1 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/serializer.py +25 -1
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_contract_pipeline.py +2 -2
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_integration.py +1 -1
- {sourcecode-0.36.0 → sourcecode-0.38.0}/.agents/skills/source-command-gsd-join-discord/SKILL.md +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/.agents/skills/source-command-gsd-review-backlog/SKILL.md +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/.agents/skills/source-command-gsd-workstreams/SKILL.md +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/.gitignore +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/.ruff.toml +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/CONTRIBUTING.md +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/LICENSE +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/README.md +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/SECURITY.md +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/docs/privacy.md +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/docs/schema.md +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/raw +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/architecture_analyzer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/architecture_summary.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/ast_extractor.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/classifier.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/code_notes_analyzer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/confidence_analyzer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/context_summarizer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/contract_model.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/contract_pipeline.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/coverage_parser.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/dependency_analyzer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/__init__.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/base.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/csproj_parser.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/dart.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/dotnet.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/elixir.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/go.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/heuristic.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/hybrid.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/java.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/jvm_ext.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/nodejs.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/parsers.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/php.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/project.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/python.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/ruby.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/rust.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/systems.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/terraform.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/detectors/tooling.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/doc_analyzer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/entrypoint_classifier.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/env_analyzer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/file_classifier.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/graph_analyzer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/prepare_context.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/redactor.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/relevance_scorer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/runtime_classifier.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/scanner.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/semantic_analyzer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/summarizer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/telemetry/__init__.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/telemetry/config.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/telemetry/consent.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/telemetry/events.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/telemetry/filters.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/telemetry/transport.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/tree_utils.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/src/sourcecode/workspace.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/__init__.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/conftest.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/coverage.xml +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/fastapi_app/pyproject.toml +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/fastapi_app/src/main.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/go_service/cmd/api/main.go +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/go_service/go.mod +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/jacoco.xml +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/lcov.info +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/nextjs_app/app/page.tsx +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/nextjs_app/package.json +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/nextjs_app/pnpm-lock.yaml +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/pnpm_monorepo/apps/web/app/page.tsx +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/pnpm_monorepo/apps/web/package.json +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/pnpm_monorepo/packages/api/main.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/pnpm_monorepo/packages/api/pyproject.toml +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/fixtures/pnpm_monorepo/pnpm-workspace.yaml +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_architecture_analyzer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_architecture_summary.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_ast_extractor.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_classifier.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_cli.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_code_notes_analyzer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_coverage_parser.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_cross_consistency.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_dependency_analyzer_node_python.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_dependency_analyzer_polyglot.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_dependency_schema.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_detector_dotnet.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_detector_go_rust_java.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_detector_nodejs.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_detector_php_ruby_dart.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_detector_python.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_detector_universal_managed.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_detector_universal_systems.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_detectors_base.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_doc_analyzer_jsdom.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_doc_analyzer_python.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_graph_analyzer_polyglot.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_graph_analyzer_python_node.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_graph_schema.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_hybrid_inference.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_integration_dependencies.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_integration_detection.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_integration_docs.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_integration_graph_modules.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_integration_lqn.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_integration_metrics.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_integration_multistack.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_integration_semantics.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_integration_universal.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_metrics_analyzer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_packaging.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_phase1_improvements.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_pipeline_integrity.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_real_projects.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_redactor.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_scanner.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_schema.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_schema_normalization.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_semantic_analyzer_node.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_semantic_analyzer_python.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_semantic_import_resolution.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_semantic_schema.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_signal_hierarchy.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_summarizer.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_telemetry.py +0 -0
- {sourcecode-0.36.0 → sourcecode-0.38.0}/tests/test_workspace_analyzer.py +0 -0
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Adaptive file tree scanner with topology-aware depth budgets.
|
|
4
|
+
|
|
5
|
+
Replaces pure depth filtering with relevance-oriented traversal:
|
|
6
|
+
- Source roots (packages/*/src, apps/*/src) get deep scan budgets.
|
|
7
|
+
- Low-signal directories (docs/, benchmarks/) are limited to 2 levels.
|
|
8
|
+
- Generated/excluded directories (dist/, node_modules/) are skipped.
|
|
9
|
+
- Unclassified directories fall back to the base depth limit.
|
|
10
|
+
|
|
11
|
+
Drop-in replacement for FileScanner: same scan_tree() and find_manifests()
|
|
12
|
+
interface, same output format (None = file, dict = directory).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any, Optional, cast
|
|
18
|
+
|
|
19
|
+
from pathspec import GitIgnoreSpec
|
|
20
|
+
|
|
21
|
+
from sourcecode.repo_classifier import RepoTopology
|
|
22
|
+
from sourcecode.scanner import DEFAULT_EXCLUDES, MANIFEST_NAMES
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AdaptiveScanner:
|
|
26
|
+
"""File tree scanner driven by repository topology.
|
|
27
|
+
|
|
28
|
+
When *topology* is provided, traversal depth is controlled per-directory:
|
|
29
|
+
directories inside source roots receive a deep budget; low-signal dirs
|
|
30
|
+
are restricted; generated dirs are excluded entirely.
|
|
31
|
+
|
|
32
|
+
When *topology* is None, falls back to the base depth limit — identical
|
|
33
|
+
behaviour to FileScanner.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
root: Path,
|
|
39
|
+
topology: Optional[RepoTopology] = None,
|
|
40
|
+
base_depth: int = 4,
|
|
41
|
+
extra_excludes: Optional[frozenset[str]] = None,
|
|
42
|
+
) -> None:
|
|
43
|
+
self.root = root.resolve()
|
|
44
|
+
self.topology = topology
|
|
45
|
+
self.base_depth = base_depth
|
|
46
|
+
self._excludes = DEFAULT_EXCLUDES | (extra_excludes or frozenset())
|
|
47
|
+
self._gitignore_spec: Optional[GitIgnoreSpec] = None
|
|
48
|
+
|
|
49
|
+
# Pre-compute lookup tables from topology for O(1) classification
|
|
50
|
+
# during traversal.
|
|
51
|
+
#
|
|
52
|
+
# Each entry is (path_parts_tuple, max_absolute_depth):
|
|
53
|
+
# source prefix → (src_parts, len(src_parts) + source_depth)
|
|
54
|
+
# low-signal prefix → (ls_parts, len(ls_parts) + low_signal_depth)
|
|
55
|
+
#
|
|
56
|
+
# "max_absolute_depth" is depth measured from the repo root, not from
|
|
57
|
+
# the classified directory. At depth D, files are visible; at depth
|
|
58
|
+
# >= max we clear dirnames and skip files.
|
|
59
|
+
self._source_prefixes: list[tuple[tuple[str, ...], int]] = []
|
|
60
|
+
self._low_signal_prefixes: list[tuple[tuple[str, ...], int]] = []
|
|
61
|
+
self._extra_exclude_names: frozenset[str] = frozenset()
|
|
62
|
+
|
|
63
|
+
if topology is not None:
|
|
64
|
+
budget = topology.scan_budget
|
|
65
|
+
for sr in topology.source_roots:
|
|
66
|
+
parts = tuple(p for p in sr.path.split("/") if p)
|
|
67
|
+
if parts:
|
|
68
|
+
max_d = len(parts) + budget.source_depth
|
|
69
|
+
self._source_prefixes.append((parts, max_d))
|
|
70
|
+
|
|
71
|
+
for lr in topology.low_signal_roots:
|
|
72
|
+
parts = tuple(p for p in lr.path.split("/") if p)
|
|
73
|
+
if parts:
|
|
74
|
+
max_d = len(parts) + budget.low_signal_depth
|
|
75
|
+
self._low_signal_prefixes.append((parts, max_d))
|
|
76
|
+
|
|
77
|
+
# Generated roots at depth 1 → add to excludes so os.walk never enters
|
|
78
|
+
top_generated = {
|
|
79
|
+
gr.path.split("/")[0]
|
|
80
|
+
for gr in topology.generated_roots
|
|
81
|
+
if "/" not in gr.path
|
|
82
|
+
}
|
|
83
|
+
self._extra_exclude_names = frozenset(top_generated)
|
|
84
|
+
|
|
85
|
+
# ------------------------------------------------------------------
|
|
86
|
+
# Gitignore
|
|
87
|
+
# ------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
def _load_gitignore_spec(self) -> GitIgnoreSpec:
|
|
90
|
+
if self._gitignore_spec is None:
|
|
91
|
+
gitignore = self.root / ".gitignore"
|
|
92
|
+
lines: list[str] = []
|
|
93
|
+
if gitignore.exists():
|
|
94
|
+
try:
|
|
95
|
+
lines = gitignore.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
96
|
+
except OSError:
|
|
97
|
+
pass
|
|
98
|
+
self._gitignore_spec = GitIgnoreSpec.from_lines(lines)
|
|
99
|
+
return self._gitignore_spec
|
|
100
|
+
|
|
101
|
+
def _is_excluded_by_gitignore(self, rel_path: str, is_dir: bool) -> bool:
|
|
102
|
+
spec = self._load_gitignore_spec()
|
|
103
|
+
path_to_match = rel_path + "/" if is_dir else rel_path
|
|
104
|
+
return spec.match_file(path_to_match)
|
|
105
|
+
|
|
106
|
+
# ------------------------------------------------------------------
|
|
107
|
+
# Depth budget computation — the core of adaptive traversal
|
|
108
|
+
# ------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
def _compute_max_depth(self, rel_parts: tuple[str, ...]) -> int:
|
|
111
|
+
"""Return the maximum absolute depth allowed at *rel_parts*.
|
|
112
|
+
|
|
113
|
+
Depth is the number of path components from the repo root. Files
|
|
114
|
+
at depth D are included; the scan stops (dirnames cleared) when
|
|
115
|
+
depth >= returned value.
|
|
116
|
+
|
|
117
|
+
Priority order:
|
|
118
|
+
1. Inside a source root → deep budget (source_depth extra levels)
|
|
119
|
+
2. Ancestor of a source root → must allow traversal to reach it
|
|
120
|
+
3. Inside a low-signal root → restricted budget (low_signal_depth)
|
|
121
|
+
4. Default → base_depth
|
|
122
|
+
"""
|
|
123
|
+
if not self._source_prefixes and not self._low_signal_prefixes:
|
|
124
|
+
return self.base_depth
|
|
125
|
+
|
|
126
|
+
current_depth = len(rel_parts)
|
|
127
|
+
|
|
128
|
+
# Track the best depth found via ancestor matching (may have multiple
|
|
129
|
+
# source roots; return the maximum so all are reachable).
|
|
130
|
+
ancestor_best = self.base_depth
|
|
131
|
+
found_ancestor = False
|
|
132
|
+
|
|
133
|
+
for src_parts, src_max in self._source_prefixes:
|
|
134
|
+
n = len(src_parts)
|
|
135
|
+
if current_depth >= n:
|
|
136
|
+
# At or inside the source root
|
|
137
|
+
if rel_parts[:n] == src_parts:
|
|
138
|
+
return src_max # definite source territory — early exit
|
|
139
|
+
else:
|
|
140
|
+
# Ancestor check: src_parts starts with rel_parts?
|
|
141
|
+
if src_parts[:current_depth] == rel_parts:
|
|
142
|
+
found_ancestor = True
|
|
143
|
+
if src_max > ancestor_best:
|
|
144
|
+
ancestor_best = src_max
|
|
145
|
+
|
|
146
|
+
if found_ancestor:
|
|
147
|
+
return ancestor_best
|
|
148
|
+
|
|
149
|
+
# Low-signal roots (only if not already committed to a source path)
|
|
150
|
+
for ls_parts, ls_max in self._low_signal_prefixes:
|
|
151
|
+
n = len(ls_parts)
|
|
152
|
+
if current_depth >= n and rel_parts[:n] == ls_parts:
|
|
153
|
+
return ls_max
|
|
154
|
+
|
|
155
|
+
return self.base_depth
|
|
156
|
+
|
|
157
|
+
# ------------------------------------------------------------------
|
|
158
|
+
# Main traversal
|
|
159
|
+
# ------------------------------------------------------------------
|
|
160
|
+
|
|
161
|
+
def scan_tree(self) -> dict[str, Any]:
|
|
162
|
+
"""Build the nested file tree dictionary.
|
|
163
|
+
|
|
164
|
+
Returns dict where None = file (D-02) and dict = directory (D-01).
|
|
165
|
+
Depth limits are applied per-directory using topology-derived budgets.
|
|
166
|
+
"""
|
|
167
|
+
self._load_gitignore_spec()
|
|
168
|
+
root_tree: dict[str, Any] = {}
|
|
169
|
+
all_excludes = self._excludes | self._extra_exclude_names
|
|
170
|
+
|
|
171
|
+
for dirpath, dirnames, filenames in os.walk(self.root, followlinks=False):
|
|
172
|
+
current = Path(dirpath)
|
|
173
|
+
try:
|
|
174
|
+
rel = current.relative_to(self.root)
|
|
175
|
+
except ValueError:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
rel_parts = rel.parts
|
|
179
|
+
depth = len(rel_parts)
|
|
180
|
+
|
|
181
|
+
effective_max_depth = self._compute_max_depth(rel_parts)
|
|
182
|
+
|
|
183
|
+
if depth >= effective_max_depth:
|
|
184
|
+
dirnames.clear()
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
# Filter dirnames in-place (critical: slice assignment)
|
|
188
|
+
dirnames[:] = [
|
|
189
|
+
d for d in dirnames
|
|
190
|
+
if d not in all_excludes
|
|
191
|
+
and not (current / d).is_symlink()
|
|
192
|
+
and not self._is_excluded_by_gitignore(
|
|
193
|
+
str(rel / d) if rel_parts else d,
|
|
194
|
+
is_dir=True,
|
|
195
|
+
)
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
node = self._get_or_create_node(root_tree, rel_parts)
|
|
199
|
+
|
|
200
|
+
for fname in filenames:
|
|
201
|
+
# Skip flag-shaped names (shell redirect artifacts)
|
|
202
|
+
if fname.startswith("-"):
|
|
203
|
+
continue
|
|
204
|
+
fpath = current / fname
|
|
205
|
+
if fpath.is_symlink():
|
|
206
|
+
continue
|
|
207
|
+
rel_file = str(rel / fname) if rel_parts else fname
|
|
208
|
+
if self._is_excluded_by_gitignore(rel_file, is_dir=False):
|
|
209
|
+
continue
|
|
210
|
+
node[fname] = None # D-02: None = file
|
|
211
|
+
|
|
212
|
+
# Ensure accepted subdirs exist as dict nodes
|
|
213
|
+
for d in dirnames:
|
|
214
|
+
if d not in node:
|
|
215
|
+
node[d] = {}
|
|
216
|
+
|
|
217
|
+
return root_tree
|
|
218
|
+
|
|
219
|
+
def _get_or_create_node(
|
|
220
|
+
self, tree: dict[str, Any], parts: tuple[str, ...]
|
|
221
|
+
) -> dict[str, Any]:
|
|
222
|
+
node = tree
|
|
223
|
+
for part in parts:
|
|
224
|
+
if part not in node or node[part] is None:
|
|
225
|
+
node[part] = {}
|
|
226
|
+
node = cast(dict[str, Any], node[part])
|
|
227
|
+
return node
|
|
228
|
+
|
|
229
|
+
# ------------------------------------------------------------------
|
|
230
|
+
# Manifest discovery — same interface as FileScanner
|
|
231
|
+
# ------------------------------------------------------------------
|
|
232
|
+
|
|
233
|
+
def find_manifests(self) -> list[str]:
|
|
234
|
+
"""Find manifest files at depth 0-1.
|
|
235
|
+
|
|
236
|
+
Identical logic to FileScanner.find_manifests() — depth-0 root
|
|
237
|
+
manifests plus depth-1 sub-package manifests, hidden dirs excluded.
|
|
238
|
+
"""
|
|
239
|
+
manifests: list[str] = []
|
|
240
|
+
for name in MANIFEST_NAMES:
|
|
241
|
+
candidate = self.root / name
|
|
242
|
+
if candidate.exists() and not candidate.is_symlink():
|
|
243
|
+
manifests.append(str(candidate))
|
|
244
|
+
try:
|
|
245
|
+
for child in self.root.iterdir():
|
|
246
|
+
if (
|
|
247
|
+
child.is_dir()
|
|
248
|
+
and not child.is_symlink()
|
|
249
|
+
and child.name not in self._excludes
|
|
250
|
+
and not child.name.startswith(".")
|
|
251
|
+
):
|
|
252
|
+
for name in MANIFEST_NAMES:
|
|
253
|
+
candidate = child / name
|
|
254
|
+
if candidate.exists() and not candidate.is_symlink():
|
|
255
|
+
manifests.append(str(candidate))
|
|
256
|
+
except PermissionError:
|
|
257
|
+
pass
|
|
258
|
+
return manifests
|
|
@@ -384,7 +384,8 @@ def main(
|
|
|
384
384
|
no_tree: bool = typer.Option(
|
|
385
385
|
False,
|
|
386
386
|
"--no-tree",
|
|
387
|
-
|
|
387
|
+
hidden=True,
|
|
388
|
+
help="(Removed) No-op. File tree is excluded by default. Use --tree to include it.",
|
|
388
389
|
),
|
|
389
390
|
tree: bool = typer.Option(
|
|
390
391
|
False,
|
|
@@ -516,13 +517,13 @@ def main(
|
|
|
516
517
|
"contract",
|
|
517
518
|
"--mode",
|
|
518
519
|
help=(
|
|
519
|
-
"Output mode: contract
|
|
520
|
-
"contract
|
|
520
|
+
"Output mode: contract (default) | standard | raw. "
|
|
521
|
+
"contract: minimal per-file contracts — exports, signatures, deps. "
|
|
522
|
+
"Smallest output, recommended for AI agents. "
|
|
523
|
+
"minimal is accepted as an alias for contract. "
|
|
521
524
|
"standard: full per-file detail with imports, relevance scores, extraction method. "
|
|
522
|
-
"
|
|
523
|
-
"
|
|
524
|
-
"raw: legacy project-level analysis (stacks, entry points, dependencies). "
|
|
525
|
-
"contract/minimal is the recommended default for AI coding agents."
|
|
525
|
+
"raw: project-level analysis only (stacks, entry points, dependency summary). "
|
|
526
|
+
"No per-file contracts."
|
|
526
527
|
),
|
|
527
528
|
),
|
|
528
529
|
max_symbols: Optional[int] = typer.Option(
|
|
@@ -534,7 +535,8 @@ def main(
|
|
|
534
535
|
dependency_depth: int = typer.Option(
|
|
535
536
|
0,
|
|
536
537
|
"--dependency-depth",
|
|
537
|
-
|
|
538
|
+
hidden=True,
|
|
539
|
+
help="(Removed) Transitive resolution is not implemented. Pass 0 or omit.",
|
|
538
540
|
min=0,
|
|
539
541
|
max=5,
|
|
540
542
|
),
|
|
@@ -561,7 +563,8 @@ def main(
|
|
|
561
563
|
compress_types: bool = typer.Option(
|
|
562
564
|
False,
|
|
563
565
|
"--compress-types",
|
|
564
|
-
|
|
566
|
+
hidden=True,
|
|
567
|
+
help="(Removed) No observable effect when type signatures are not extracted. Omit.",
|
|
565
568
|
),
|
|
566
569
|
symbol: Optional[str] = typer.Option(
|
|
567
570
|
None,
|
|
@@ -589,8 +592,20 @@ def main(
|
|
|
589
592
|
_t0 = time.monotonic()
|
|
590
593
|
|
|
591
594
|
# Validate new flag choices
|
|
592
|
-
_MODE_CHOICES = ("contract", "minimal", "standard", "
|
|
593
|
-
|
|
595
|
+
_MODE_CHOICES = ("contract", "minimal", "standard", "raw")
|
|
596
|
+
_DEPRECATED_MODES: dict[str, str] = {
|
|
597
|
+
"hybrid": "contract",
|
|
598
|
+
"deep": "standard",
|
|
599
|
+
}
|
|
600
|
+
if mode in _DEPRECATED_MODES:
|
|
601
|
+
fallback = _DEPRECATED_MODES[mode]
|
|
602
|
+
typer.echo(
|
|
603
|
+
f"[deprecated] --mode {mode} is removed: produced identical output to --mode {fallback}. "
|
|
604
|
+
f"Using --mode {fallback}.",
|
|
605
|
+
err=True,
|
|
606
|
+
)
|
|
607
|
+
mode = fallback
|
|
608
|
+
elif mode not in _MODE_CHOICES:
|
|
594
609
|
typer.echo(
|
|
595
610
|
f"Error: invalid value '{mode}' for --mode. Valid options: {', '.join(_MODE_CHOICES)}",
|
|
596
611
|
err=True,
|
|
@@ -604,6 +619,22 @@ def main(
|
|
|
604
619
|
)
|
|
605
620
|
raise typer.Exit(code=1)
|
|
606
621
|
|
|
622
|
+
if dependency_depth > 0:
|
|
623
|
+
typer.echo(
|
|
624
|
+
f"[warning] --dependency-depth {dependency_depth} has no effect: "
|
|
625
|
+
"transitive import resolution is not implemented for npm/yarn/pip projects. "
|
|
626
|
+
"Using depth=0 (direct dependencies only).",
|
|
627
|
+
err=True,
|
|
628
|
+
)
|
|
629
|
+
dependency_depth = 0
|
|
630
|
+
|
|
631
|
+
if compress_types:
|
|
632
|
+
typer.echo(
|
|
633
|
+
"[deprecated] --compress-types is removed: type signatures are rarely extracted "
|
|
634
|
+
"at default depth. Flag ignored.",
|
|
635
|
+
err=True,
|
|
636
|
+
)
|
|
637
|
+
|
|
607
638
|
# Validate format choices
|
|
608
639
|
if format not in FORMAT_CHOICES:
|
|
609
640
|
typer.echo(
|
|
@@ -634,9 +665,9 @@ def main(
|
|
|
634
665
|
raise typer.Exit(code=1)
|
|
635
666
|
|
|
636
667
|
# Normalize mode aliases
|
|
637
|
-
_CONTRACT_MODES = frozenset({"contract", "minimal", "standard"
|
|
668
|
+
_CONTRACT_MODES = frozenset({"contract", "minimal", "standard"})
|
|
638
669
|
if mode == "minimal":
|
|
639
|
-
mode = "contract" # minimal is
|
|
670
|
+
mode = "contract" # minimal is a documented alias for contract
|
|
640
671
|
elif mode not in _CONTRACT_MODES and mode != "raw":
|
|
641
672
|
mode = "contract" # unknown → safe default
|
|
642
673
|
|
|
@@ -648,15 +679,13 @@ def main(
|
|
|
648
679
|
compact or agent or tree or format == "yaml" or trace_pipeline
|
|
649
680
|
or docs or semantics or graph_modules or full_metrics or architecture
|
|
650
681
|
)
|
|
651
|
-
if mode in ("contract", "standard"
|
|
682
|
+
if mode in ("contract", "standard") and _legacy_flags_active:
|
|
652
683
|
mode = "raw"
|
|
653
684
|
|
|
654
685
|
# Map mode to contract_view depth
|
|
655
686
|
_CONTRACT_DEPTH = {
|
|
656
687
|
"contract": "minimal",
|
|
657
688
|
"standard": "standard",
|
|
658
|
-
"deep": "deep",
|
|
659
|
-
"hybrid": "minimal", # hybrid adds bodies via pipeline, minimal header
|
|
660
689
|
}
|
|
661
690
|
|
|
662
691
|
# --- Import analysis modules ---
|
|
@@ -685,6 +714,13 @@ def main(
|
|
|
685
714
|
# 1. Scan directory (SCAN-01 to SCAN-05)
|
|
686
715
|
redactor = SecretRedactor(enabled=not no_redact)
|
|
687
716
|
|
|
717
|
+
# Classify repository topology before scanning. This is a shallow
|
|
718
|
+
# filesystem read (depth 0-1 only) and completes in milliseconds.
|
|
719
|
+
# The topology drives per-directory depth budgets in AdaptiveScanner.
|
|
720
|
+
from sourcecode.adaptive_scanner import AdaptiveScanner
|
|
721
|
+
from sourcecode.repo_classifier import RepoClassifier
|
|
722
|
+
_topology = RepoClassifier().classify(target)
|
|
723
|
+
|
|
688
724
|
# Detect manifests before scan to adjust depth.
|
|
689
725
|
# find_manifests() only looks at depth 0-1, does not need the full tree.
|
|
690
726
|
_pre_scanner = FileScanner(target, max_depth=1)
|
|
@@ -706,7 +742,7 @@ def main(
|
|
|
706
742
|
no_tree = True # agents never need the raw file tree
|
|
707
743
|
typer.echo("[agent] dependencies env-map code-notes (no-tree)", err=True)
|
|
708
744
|
|
|
709
|
-
scanner =
|
|
745
|
+
scanner = AdaptiveScanner(target, topology=_topology, base_depth=effective_depth)
|
|
710
746
|
raw_tree = scanner.scan_tree()
|
|
711
747
|
|
|
712
748
|
# 2. Filter .env and *.secret entries from file tree (SEC-02, all levels)
|
|
@@ -746,6 +782,17 @@ def main(
|
|
|
746
782
|
detector = ProjectDetector(build_default_detectors())
|
|
747
783
|
workspace_analysis = WorkspaceAnalyzer().analyze(target, manifests)
|
|
748
784
|
|
|
785
|
+
# Adaptive traversal handles monorepo source root discovery automatically.
|
|
786
|
+
# Emit a diagnostic when topology confidence is low so users know why.
|
|
787
|
+
import sys as _sys
|
|
788
|
+
if _topology.workspace_type == "monorepo" and _topology.confidence < 0.5:
|
|
789
|
+
if _sys.stderr.isatty():
|
|
790
|
+
typer.echo(
|
|
791
|
+
"[traversal] monorepo detected but source root confidence is low "
|
|
792
|
+
f"({_topology.confidence:.0%}). Use --depth 8 or higher if files are missing.",
|
|
793
|
+
err=True,
|
|
794
|
+
)
|
|
795
|
+
|
|
749
796
|
# --compact implicitly enables lightweight analysis passes so that
|
|
750
797
|
# dependency_summary, env_summary and code_notes_summary are never null.
|
|
751
798
|
if compact:
|
|
@@ -854,7 +901,8 @@ def main(
|
|
|
854
901
|
workspace_root = target / workspace.path
|
|
855
902
|
if not workspace_root.exists() or not workspace_root.is_dir():
|
|
856
903
|
continue
|
|
857
|
-
|
|
904
|
+
_ws_topology = RepoClassifier().classify(workspace_root)
|
|
905
|
+
workspace_scanner = AdaptiveScanner(workspace_root, topology=_ws_topology, base_depth=depth)
|
|
858
906
|
workspace_tree = filter_sensitive_files(workspace_scanner.scan_tree())
|
|
859
907
|
workspace_manifests = workspace_scanner.find_manifests()
|
|
860
908
|
workspace_stacks, workspace_entry_points, _ = detector.detect(
|
|
@@ -966,6 +1014,7 @@ def main(
|
|
|
966
1014
|
metadata = AnalysisMetadata(
|
|
967
1015
|
analyzed_path=str(target),
|
|
968
1016
|
analyzer_fingerprints=_fingerprints,
|
|
1017
|
+
traversal_topology=_topology.as_dict(),
|
|
969
1018
|
)
|
|
970
1019
|
sm = SourceMap(
|
|
971
1020
|
metadata=metadata,
|
|
@@ -995,7 +1044,7 @@ def main(
|
|
|
995
1044
|
target / ws.path,
|
|
996
1045
|
(
|
|
997
1046
|
filter_sensitive_files(
|
|
998
|
-
|
|
1047
|
+
AdaptiveScanner(target / ws.path, base_depth=depth).scan_tree()
|
|
999
1048
|
)
|
|
1000
1049
|
),
|
|
1001
1050
|
workspace=ws.path,
|
|
@@ -1244,7 +1293,7 @@ def main(
|
|
|
1244
1293
|
sm = _replace(sm, pipeline_trace=_trace.build_trace())
|
|
1245
1294
|
|
|
1246
1295
|
# Contract pipeline — runs for mode=contract|standard|deep|hybrid (skip for raw)
|
|
1247
|
-
_is_contract_mode = mode in ("contract", "standard"
|
|
1296
|
+
_is_contract_mode = mode in ("contract", "standard")
|
|
1248
1297
|
if _is_contract_mode:
|
|
1249
1298
|
from sourcecode.contract_pipeline import ContractPipeline
|
|
1250
1299
|
_cp = ContractPipeline()
|
|
@@ -1263,6 +1312,13 @@ def main(
|
|
|
1263
1312
|
compress_types=compress_types,
|
|
1264
1313
|
)
|
|
1265
1314
|
sm = _replace(sm, file_contracts=_contracts, contract_summary=_contract_summary)
|
|
1315
|
+
if symbol is not None and len(_contracts) == 0:
|
|
1316
|
+
typer.echo(
|
|
1317
|
+
f"[warning] --symbol '{symbol}' matched 0 files. "
|
|
1318
|
+
"The symbol may not exist at the current --depth, or the name may differ in case. "
|
|
1319
|
+
"Try --depth 8 or verify the symbol name.",
|
|
1320
|
+
err=True,
|
|
1321
|
+
)
|
|
1266
1322
|
if agent:
|
|
1267
1323
|
typer.echo(f"[contract] {len(_contracts)} files extracted ({_contract_summary.method_breakdown})", err=True)
|
|
1268
1324
|
|
|
@@ -198,6 +198,13 @@ def _is_hotspot_admin(path: str) -> bool:
|
|
|
198
198
|
for suffix in _HOTSPOT_ADMIN_SUFFIXES:
|
|
199
199
|
if filename.endswith(suffix):
|
|
200
200
|
return True
|
|
201
|
+
# Localized changelogs: CHANGELOG.zh-CN.md, CHANGES.en-US.md, etc.
|
|
202
|
+
_lower = filename.lower()
|
|
203
|
+
if _lower.startswith("changelog.") or _lower.startswith("changes."):
|
|
204
|
+
return True
|
|
205
|
+
# lerna.json and root-level package.json are modified by version bumps, not dev work
|
|
206
|
+
if filename in ("lerna.json",):
|
|
207
|
+
return True
|
|
201
208
|
return False
|
|
202
209
|
|
|
203
210
|
|
|
@@ -219,6 +219,16 @@ class MetricsAnalyzer:
|
|
|
219
219
|
if fm.language != "unknown":
|
|
220
220
|
languages.add(fm.language)
|
|
221
221
|
|
|
222
|
+
# Emit explicit limitation when JS/TS files are present but complexity is unavailable.
|
|
223
|
+
# This prevents agents from assuming null complexity means "no functions found".
|
|
224
|
+
_js_ts_count = sum(1 for r in records if r.language in ("javascript", "typescript") and r.complexity_availability == "unavailable")
|
|
225
|
+
if _js_ts_count > 0:
|
|
226
|
+
limitations.append(
|
|
227
|
+
f"cyclomatic_complexity_unavailable: {_js_ts_count} JS/TS file(s) — "
|
|
228
|
+
"complexity requires tree-sitter (pip install 'sourcecode[ast]'). "
|
|
229
|
+
"null complexity fields are expected, not an error."
|
|
230
|
+
)
|
|
231
|
+
|
|
222
232
|
summary = MetricsSummary(
|
|
223
233
|
requested=True,
|
|
224
234
|
file_count=len(records),
|