sourcecode 0.33.0__tar.gz → 0.35.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sourcecode-0.33.0 → sourcecode-0.35.0}/PKG-INFO +1 -1
- {sourcecode-0.33.0 → sourcecode-0.35.0}/pyproject.toml +1 -1
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/__init__.py +1 -1
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/ast_extractor.py +71 -6
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/cli.py +28 -9
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/contract_pipeline.py +67 -19
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/relevance_scorer.py +54 -7
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/serializer.py +290 -53
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_ast_extractor.py +4 -2
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_contract_pipeline.py +17 -14
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_integration.py +9 -7
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_integration_detection.py +4 -4
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_integration_lqn.py +4 -3
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_integration_metrics.py +2 -2
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_integration_multistack.py +2 -2
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_integration_universal.py +3 -3
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_pipeline_integrity.py +7 -7
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_real_projects.py +4 -4
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_signal_hierarchy.py +6 -6
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_summarizer.py +4 -4
- {sourcecode-0.33.0 → sourcecode-0.35.0}/.agents/skills/source-command-gsd-join-discord/SKILL.md +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/.agents/skills/source-command-gsd-review-backlog/SKILL.md +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/.agents/skills/source-command-gsd-workstreams/SKILL.md +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/.gitignore +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/.ruff.toml +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/CONTRIBUTING.md +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/LICENSE +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/README.md +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/SECURITY.md +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/docs/privacy.md +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/docs/schema.md +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/raw +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/architecture_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/architecture_summary.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/classifier.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/code_notes_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/confidence_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/context_summarizer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/contract_model.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/coverage_parser.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/dependency_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/__init__.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/base.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/csproj_parser.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/dart.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/dotnet.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/elixir.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/go.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/heuristic.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/hybrid.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/java.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/jvm_ext.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/nodejs.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/parsers.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/php.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/project.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/python.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/ruby.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/rust.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/systems.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/terraform.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/detectors/tooling.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/doc_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/entrypoint_classifier.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/env_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/file_classifier.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/git_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/graph_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/metrics_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/prepare_context.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/redactor.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/runtime_classifier.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/scanner.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/schema.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/semantic_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/summarizer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/telemetry/__init__.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/telemetry/config.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/telemetry/consent.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/telemetry/events.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/telemetry/filters.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/telemetry/transport.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/tree_utils.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/src/sourcecode/workspace.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/__init__.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/conftest.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/coverage.xml +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/fastapi_app/pyproject.toml +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/fastapi_app/src/main.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/go_service/cmd/api/main.go +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/go_service/go.mod +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/jacoco.xml +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/lcov.info +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/nextjs_app/app/page.tsx +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/nextjs_app/package.json +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/nextjs_app/pnpm-lock.yaml +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/pnpm_monorepo/apps/web/app/page.tsx +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/pnpm_monorepo/apps/web/package.json +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/pnpm_monorepo/packages/api/main.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/pnpm_monorepo/packages/api/pyproject.toml +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/fixtures/pnpm_monorepo/pnpm-workspace.yaml +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_architecture_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_architecture_summary.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_classifier.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_cli.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_code_notes_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_coverage_parser.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_cross_consistency.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_dependency_analyzer_node_python.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_dependency_analyzer_polyglot.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_dependency_schema.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_detector_dotnet.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_detector_go_rust_java.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_detector_nodejs.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_detector_php_ruby_dart.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_detector_python.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_detector_universal_managed.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_detector_universal_systems.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_detectors_base.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_doc_analyzer_jsdom.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_doc_analyzer_python.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_graph_analyzer_polyglot.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_graph_analyzer_python_node.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_graph_schema.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_hybrid_inference.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_integration_dependencies.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_integration_docs.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_integration_graph_modules.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_integration_semantics.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_metrics_analyzer.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_packaging.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_phase1_improvements.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_redactor.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_scanner.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_schema.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_schema_normalization.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_semantic_analyzer_node.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_semantic_analyzer_python.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_semantic_import_resolution.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_semantic_schema.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_telemetry.py +0 -0
- {sourcecode-0.33.0 → sourcecode-0.35.0}/tests/test_workspace_analyzer.py +0 -0
|
@@ -13,6 +13,7 @@ Install tree-sitter for best TS/JS results:
|
|
|
13
13
|
|
|
14
14
|
import ast
|
|
15
15
|
import re
|
|
16
|
+
import sys
|
|
16
17
|
from pathlib import Path
|
|
17
18
|
from typing import Any, Iterator, Optional
|
|
18
19
|
|
|
@@ -31,6 +32,45 @@ from sourcecode.contract_model import (
|
|
|
31
32
|
|
|
32
33
|
_MAX_FILE_SIZE = 200_000 # bytes — skip files larger than this
|
|
33
34
|
|
|
35
|
+
# Python stdlib module names — used to filter noise from import lists.
|
|
36
|
+
# sys.stdlib_module_names is available in Python 3.10+; fall back to a
|
|
37
|
+
# curated set for 3.9 compatibility.
|
|
38
|
+
if hasattr(sys, "stdlib_module_names"):
|
|
39
|
+
_PY_STDLIB: frozenset[str] = sys.stdlib_module_names # type: ignore[attr-defined]
|
|
40
|
+
else:
|
|
41
|
+
_PY_STDLIB: frozenset[str] = frozenset({ # type: ignore[no-redef]
|
|
42
|
+
"__future__", "_thread", "abc", "aifc", "argparse", "array", "ast",
|
|
43
|
+
"asynchat", "asyncio", "asyncore", "atexit", "audioop", "base64",
|
|
44
|
+
"bdb", "binascii", "binhex", "bisect", "builtins", "bz2", "calendar",
|
|
45
|
+
"cgi", "cgitb", "chunk", "cmath", "cmd", "code", "codecs", "codeop",
|
|
46
|
+
"collections", "colorsys", "compileall", "concurrent", "configparser",
|
|
47
|
+
"contextlib", "contextvars", "copy", "copyreg", "cProfile", "csv",
|
|
48
|
+
"ctypes", "curses", "dataclasses", "datetime", "dbm", "decimal",
|
|
49
|
+
"difflib", "dis", "doctest", "email", "encodings", "enum", "errno",
|
|
50
|
+
"faulthandler", "fcntl", "filecmp", "fileinput", "fnmatch", "fractions",
|
|
51
|
+
"ftplib", "functools", "gc", "getopt", "getpass", "gettext", "glob",
|
|
52
|
+
"grp", "gzip", "hashlib", "heapq", "hmac", "html", "http", "idlelib",
|
|
53
|
+
"imaplib", "importlib", "inspect", "io", "ipaddress", "itertools",
|
|
54
|
+
"json", "keyword", "lib2to3", "linecache", "locale", "logging", "lzma",
|
|
55
|
+
"mailbox", "marshal", "math", "mimetypes", "mmap", "modulefinder",
|
|
56
|
+
"multiprocessing", "netrc", "nntplib", "numbers", "operator", "optparse",
|
|
57
|
+
"os", "pathlib", "pdb", "pickle", "pickletools", "pipes", "pkgutil",
|
|
58
|
+
"platform", "plistlib", "poplib", "posix", "posixpath", "pprint",
|
|
59
|
+
"profile", "pstats", "pty", "pwd", "py_compile", "pyclbr", "pydoc",
|
|
60
|
+
"queue", "quopri", "random", "re", "readline", "reprlib", "resource",
|
|
61
|
+
"rlcompleter", "runpy", "sched", "secrets", "select", "selectors",
|
|
62
|
+
"shelve", "shlex", "shutil", "signal", "site", "smtpd", "smtplib",
|
|
63
|
+
"sndhdr", "socket", "socketserver", "sqlite3", "ssl", "stat",
|
|
64
|
+
"statistics", "string", "stringprep", "struct", "subprocess", "sunau",
|
|
65
|
+
"symtable", "sys", "sysconfig", "syslog", "tabnanny", "tarfile",
|
|
66
|
+
"tempfile", "termios", "test", "textwrap", "threading", "time",
|
|
67
|
+
"timeit", "tkinter", "token", "tokenize", "tomllib", "trace",
|
|
68
|
+
"traceback", "tracemalloc", "tty", "types", "typing", "unicodedata",
|
|
69
|
+
"unittest", "urllib", "uuid", "venv", "warnings", "wave", "weakref",
|
|
70
|
+
"webbrowser", "wsgiref", "xml", "xmlrpc", "zipapp", "zipfile",
|
|
71
|
+
"zipimport", "zlib", "zoneinfo",
|
|
72
|
+
})
|
|
73
|
+
|
|
34
74
|
_LANGUAGE_MAP: dict[str, str] = {
|
|
35
75
|
".py": "python",
|
|
36
76
|
".ts": "typescript",
|
|
@@ -333,7 +373,8 @@ def _ts_types(root: Any, src: bytes) -> list[TypeDefinition]:
|
|
|
333
373
|
continue
|
|
334
374
|
name = _text(name_n, src)
|
|
335
375
|
fields: list[TypeField] = []
|
|
336
|
-
|
|
376
|
+
# "interface_body" in tree-sitter-typescript >= 0.21; "object_type" in older builds
|
|
377
|
+
body_n = _find_child(node, "interface_body", "object_type")
|
|
337
378
|
if body_n:
|
|
338
379
|
for prop in _walk(body_n):
|
|
339
380
|
if prop.type in ("property_signature", "method_signature"):
|
|
@@ -345,7 +386,7 @@ def _ts_types(root: Any, src: bytes) -> list[TypeDefinition]:
|
|
|
345
386
|
required = not any(c.type == "?" for c in prop.children)
|
|
346
387
|
fields.append(TypeField(name=prop_name, type=type_text, required=required))
|
|
347
388
|
extends: list[str] = []
|
|
348
|
-
heritage_n = _find_child(node, "extends_type_clause", "class_heritage")
|
|
389
|
+
heritage_n = _find_child(node, "extends_type_clause", "extends_clause", "class_heritage")
|
|
349
390
|
if heritage_n:
|
|
350
391
|
for ext_n in _walk(heritage_n):
|
|
351
392
|
if ext_n.type == "type_identifier":
|
|
@@ -389,6 +430,25 @@ def _ts_hooks(root: Any, src: bytes) -> list[str]:
|
|
|
389
430
|
return sorted(used)
|
|
390
431
|
|
|
391
432
|
|
|
433
|
+
def _merge_imports(imports: list[ImportRecord]) -> list[ImportRecord]:
|
|
434
|
+
"""Merge multiple ImportRecords with the same source into one.
|
|
435
|
+
|
|
436
|
+
Tree-sitter correctly captures `import { A }` and `import type { B }` from
|
|
437
|
+
the same module as two separate statements. Merging them produces a compact,
|
|
438
|
+
predictable contract where each source appears exactly once.
|
|
439
|
+
"""
|
|
440
|
+
merged: dict[str, ImportRecord] = {}
|
|
441
|
+
for imp in imports:
|
|
442
|
+
if imp.source in merged:
|
|
443
|
+
existing = merged[imp.source]
|
|
444
|
+
combined_symbols = sorted(set(existing.symbols) | set(imp.symbols))
|
|
445
|
+
kind = existing.kind if existing.kind != "side_effect" else imp.kind
|
|
446
|
+
merged[imp.source] = ImportRecord(source=imp.source, symbols=combined_symbols, kind=kind)
|
|
447
|
+
else:
|
|
448
|
+
merged[imp.source] = imp
|
|
449
|
+
return list(merged.values())
|
|
450
|
+
|
|
451
|
+
|
|
392
452
|
def _extract_ts_js_tree_sitter(path: str, source: str, lang_obj: Any, language: str) -> FileContract:
|
|
393
453
|
try:
|
|
394
454
|
parser = _get_parser(lang_obj)
|
|
@@ -396,7 +456,7 @@ def _extract_ts_js_tree_sitter(path: str, source: str, lang_obj: Any, language:
|
|
|
396
456
|
tree = parser.parse(src_bytes)
|
|
397
457
|
root = tree.root_node
|
|
398
458
|
|
|
399
|
-
imports = _ts_imports(root, src_bytes)
|
|
459
|
+
imports = _merge_imports(_ts_imports(root, src_bytes))
|
|
400
460
|
exports = _ts_exports(root, src_bytes)
|
|
401
461
|
exported_names = {e.name for e in exports}
|
|
402
462
|
functions = _ts_functions(root, src_bytes, exported_names)
|
|
@@ -729,9 +789,10 @@ def _py_signature(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
|
|
|
729
789
|
sig += f" -> {ast.unparse(node.returns)}"
|
|
730
790
|
except Exception:
|
|
731
791
|
pass
|
|
732
|
-
#
|
|
733
|
-
|
|
734
|
-
|
|
792
|
+
# Keep full signature — serializer applies per-mode compression.
|
|
793
|
+
# Hard cap at 2000 to prevent pathological cases.
|
|
794
|
+
if len(sig) > 2000:
|
|
795
|
+
sig = sig[:1997] + "..."
|
|
735
796
|
return sig
|
|
736
797
|
|
|
737
798
|
|
|
@@ -840,6 +901,10 @@ def _extract_python(path: str, source: str) -> FileContract:
|
|
|
840
901
|
if exported or name in all_names:
|
|
841
902
|
exports.append(ExportRecord(name=name, kind="class"))
|
|
842
903
|
|
|
904
|
+
# Filter stdlib from imports — they add noise without signal for agents
|
|
905
|
+
_stdlib_roots = {m.split(".")[0] for m in _PY_STDLIB}
|
|
906
|
+
imports = [i for i in imports if i.source.split(".")[0] not in _stdlib_roots]
|
|
907
|
+
|
|
843
908
|
deps = sorted({
|
|
844
909
|
imp.source.split(".")[0]
|
|
845
910
|
for imp in imports
|
|
@@ -516,11 +516,13 @@ def main(
|
|
|
516
516
|
"contract",
|
|
517
517
|
"--mode",
|
|
518
518
|
help=(
|
|
519
|
-
"Output mode: contract (default) | hybrid | raw. "
|
|
520
|
-
"contract: per-file
|
|
519
|
+
"Output mode: contract|minimal (default) | standard | deep | hybrid | raw. "
|
|
520
|
+
"contract/minimal: minimal per-file contracts — exports, signatures, deps. Smallest output. "
|
|
521
|
+
"standard: full per-file detail with imports, relevance scores, extraction method. "
|
|
522
|
+
"deep: standard + optional analysis sections (deps, env, git). "
|
|
521
523
|
"hybrid: contracts + compact bodies for top-ranked files. "
|
|
522
524
|
"raw: legacy project-level analysis (stacks, entry points, dependencies). "
|
|
523
|
-
"contract
|
|
525
|
+
"contract/minimal is the recommended default for AI coding agents."
|
|
524
526
|
),
|
|
525
527
|
),
|
|
526
528
|
max_symbols: Optional[int] = typer.Option(
|
|
@@ -587,7 +589,7 @@ def main(
|
|
|
587
589
|
_t0 = time.monotonic()
|
|
588
590
|
|
|
589
591
|
# Validate new flag choices
|
|
590
|
-
_MODE_CHOICES = ("contract", "hybrid", "raw")
|
|
592
|
+
_MODE_CHOICES = ("contract", "minimal", "standard", "deep", "hybrid", "raw")
|
|
591
593
|
if mode not in _MODE_CHOICES:
|
|
592
594
|
typer.echo(
|
|
593
595
|
f"Error: invalid value '{mode}' for --mode. Valid options: {', '.join(_MODE_CHOICES)}",
|
|
@@ -631,6 +633,13 @@ def main(
|
|
|
631
633
|
typer.echo(f"Error: '{target}' is not a directory.", err=True)
|
|
632
634
|
raise typer.Exit(code=1)
|
|
633
635
|
|
|
636
|
+
# Normalize mode aliases
|
|
637
|
+
_CONTRACT_MODES = frozenset({"contract", "minimal", "standard", "deep", "hybrid"})
|
|
638
|
+
if mode == "minimal":
|
|
639
|
+
mode = "contract" # minimal is the canonical default contract rendering
|
|
640
|
+
elif mode not in _CONTRACT_MODES and mode != "raw":
|
|
641
|
+
mode = "contract" # unknown → safe default
|
|
642
|
+
|
|
634
643
|
# Legacy flags imply raw mode unless --mode was explicitly overridden.
|
|
635
644
|
# These flags produce standard_view-only output sections not in contract_view.
|
|
636
645
|
# Preserves backward compat: callers using any legacy flag get their previous format.
|
|
@@ -639,9 +648,17 @@ def main(
|
|
|
639
648
|
compact or agent or tree or format == "yaml" or trace_pipeline
|
|
640
649
|
or docs or semantics or graph_modules or full_metrics or architecture
|
|
641
650
|
)
|
|
642
|
-
if mode
|
|
651
|
+
if mode in ("contract", "standard", "deep") and _legacy_flags_active:
|
|
643
652
|
mode = "raw"
|
|
644
653
|
|
|
654
|
+
# Map mode to contract_view depth
|
|
655
|
+
_CONTRACT_DEPTH = {
|
|
656
|
+
"contract": "minimal",
|
|
657
|
+
"standard": "standard",
|
|
658
|
+
"deep": "deep",
|
|
659
|
+
"hybrid": "minimal", # hybrid adds bodies via pipeline, minimal header
|
|
660
|
+
}
|
|
661
|
+
|
|
645
662
|
# --- Import analysis modules ---
|
|
646
663
|
from dataclasses import asdict, replace
|
|
647
664
|
|
|
@@ -1226,8 +1243,9 @@ def main(
|
|
|
1226
1243
|
))
|
|
1227
1244
|
sm = _replace(sm, pipeline_trace=_trace.build_trace())
|
|
1228
1245
|
|
|
1229
|
-
# Contract pipeline — runs for mode=contract|hybrid (skip for raw)
|
|
1230
|
-
|
|
1246
|
+
# Contract pipeline — runs for mode=contract|standard|deep|hybrid (skip for raw)
|
|
1247
|
+
_is_contract_mode = mode in ("contract", "standard", "deep", "hybrid")
|
|
1248
|
+
if _is_contract_mode:
|
|
1231
1249
|
from sourcecode.contract_pipeline import ContractPipeline
|
|
1232
1250
|
_cp = ContractPipeline()
|
|
1233
1251
|
_contracts, _contract_summary = _cp.run(
|
|
@@ -1249,9 +1267,10 @@ def main(
|
|
|
1249
1267
|
typer.echo(f"[contract] {len(_contracts)} files extracted ({_contract_summary.method_breakdown})", err=True)
|
|
1250
1268
|
|
|
1251
1269
|
# 4. Serialize
|
|
1252
|
-
if
|
|
1270
|
+
if _is_contract_mode:
|
|
1253
1271
|
from sourcecode.serializer import contract_view as _contract_view
|
|
1254
|
-
|
|
1272
|
+
_depth = _CONTRACT_DEPTH.get(mode, "minimal")
|
|
1273
|
+
data = _contract_view(sm, emit_graph=emit_graph, depth=_depth)
|
|
1255
1274
|
if not no_redact:
|
|
1256
1275
|
data = redact_dict(data)
|
|
1257
1276
|
content = json.dumps(data, indent=2, ensure_ascii=False)
|
|
@@ -25,6 +25,23 @@ from sourcecode.schema import EntryPoint, MonorepoPackageInfo
|
|
|
25
25
|
_MAX_FILES = 500 # hard cap on files extracted per run
|
|
26
26
|
_SRC_EXTENSIONS: frozenset[str] = frozenset(_LANGUAGE_MAP.keys())
|
|
27
27
|
|
|
28
|
+
# Role-based score adjustments applied after contract extraction.
|
|
29
|
+
# Runtime roles get a boost; config/util are neutral or penalized.
|
|
30
|
+
_ROLE_SCORE: dict[str, float] = {
|
|
31
|
+
"entrypoint": 0.15,
|
|
32
|
+
"service": 0.10,
|
|
33
|
+
"route": 0.10,
|
|
34
|
+
"api": 0.08,
|
|
35
|
+
"middleware": 0.06,
|
|
36
|
+
"store": 0.05,
|
|
37
|
+
"model": 0.05,
|
|
38
|
+
"hook": 0.05,
|
|
39
|
+
"component": 0.03,
|
|
40
|
+
"util": 0.00,
|
|
41
|
+
"config": -0.10,
|
|
42
|
+
"unknown": 0.00,
|
|
43
|
+
}
|
|
44
|
+
|
|
28
45
|
RankStrategy = Literal["relevance", "centrality", "git-churn"]
|
|
29
46
|
|
|
30
47
|
|
|
@@ -206,9 +223,9 @@ class ContractPipeline:
|
|
|
206
223
|
if changed_only:
|
|
207
224
|
src_paths = [p for p in src_paths if p in changed_files]
|
|
208
225
|
|
|
209
|
-
# Apply max_files cap
|
|
210
|
-
|
|
211
|
-
|
|
226
|
+
# Apply max_files cap — bypass when symbol search to ensure defining files are found.
|
|
227
|
+
# A symbol query over a large repo needs all files; result set is small after filtering.
|
|
228
|
+
if symbol is None and len(src_paths) > self.max_files:
|
|
212
229
|
src_paths = sorted(
|
|
213
230
|
src_paths,
|
|
214
231
|
key=lambda p: (p in entry_paths, scorer.score(p)),
|
|
@@ -255,23 +272,9 @@ class ContractPipeline:
|
|
|
255
272
|
# 7. Rank
|
|
256
273
|
contracts = self._rank(contracts, rank_by)
|
|
257
274
|
|
|
258
|
-
# 8. Symbol filter — keep files that
|
|
275
|
+
# 8. Symbol filter — keep files that define or import the symbol
|
|
259
276
|
if symbol:
|
|
260
|
-
|
|
261
|
-
c for c in contracts
|
|
262
|
-
if any(e.name == symbol for e in c.exports)
|
|
263
|
-
or any(f.name == symbol for f in c.functions)
|
|
264
|
-
or symbol in {t.name for t in c.types}
|
|
265
|
-
]
|
|
266
|
-
# Also pull in direct importers (fan_in sourcing)
|
|
267
|
-
importer_paths = {
|
|
268
|
-
c.path for c in contracts
|
|
269
|
-
for imp in c.imports
|
|
270
|
-
if symbol in imp.symbols
|
|
271
|
-
}
|
|
272
|
-
importer_contracts = [c for c in contracts if c.path in importer_paths]
|
|
273
|
-
symbol_contracts = list({c.path: c for c in symbol_contracts + importer_contracts}.values())
|
|
274
|
-
contracts = sorted(symbol_contracts, key=lambda c: -c.relevance_score)
|
|
277
|
+
contracts = _filter_by_symbol(contracts, symbol)
|
|
275
278
|
|
|
276
279
|
# 9. Entrypoints-only filter
|
|
277
280
|
if entrypoints_only and not symbol:
|
|
@@ -323,6 +326,9 @@ class ContractPipeline:
|
|
|
323
326
|
churn_score = min(churn.get(c.path, 0) / 20.0, 0.1)
|
|
324
327
|
base += churn_score
|
|
325
328
|
|
|
329
|
+
# Role-based boost: runtime roles score higher than auxiliary
|
|
330
|
+
base += _ROLE_SCORE.get(c.role, 0.0)
|
|
331
|
+
|
|
326
332
|
return min(1.0, base)
|
|
327
333
|
|
|
328
334
|
def _rank(self, contracts: list[FileContract], rank_by: RankStrategy) -> list[FileContract]:
|
|
@@ -385,6 +391,48 @@ def _limit_symbols(contracts: list[FileContract], max_symbols: int) -> list[File
|
|
|
385
391
|
return result
|
|
386
392
|
|
|
387
393
|
|
|
394
|
+
# ---------------------------------------------------------------------------
|
|
395
|
+
# Symbol-aware filter
|
|
396
|
+
# ---------------------------------------------------------------------------
|
|
397
|
+
|
|
398
|
+
def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileContract]:
|
|
399
|
+
"""Return contracts that define or import *symbol*.
|
|
400
|
+
|
|
401
|
+
Matching strategy:
|
|
402
|
+
1. Exact match on export/function/type names.
|
|
403
|
+
2. Case-insensitive fallback when exact match yields nothing.
|
|
404
|
+
3. Importer contracts: files that name the symbol in their imports.
|
|
405
|
+
|
|
406
|
+
Defining contracts are ranked first; importers follow.
|
|
407
|
+
"""
|
|
408
|
+
def _defines(c: FileContract, sym: str, case: bool) -> bool:
|
|
409
|
+
cmp = (lambda a, b: a.lower() == b.lower()) if case else (lambda a, b: a == b)
|
|
410
|
+
return (
|
|
411
|
+
any(cmp(e.name, sym) for e in c.exports)
|
|
412
|
+
or any(cmp(f.name, sym) for f in c.functions)
|
|
413
|
+
or any(cmp(t.name, sym) for t in c.types)
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
def _imports(c: FileContract, sym: str, case: bool) -> bool:
|
|
417
|
+
if case:
|
|
418
|
+
sym_l = sym.lower()
|
|
419
|
+
return any(sym_l == s.lower() for imp in c.imports for s in imp.symbols)
|
|
420
|
+
return any(sym in imp.symbols for imp in c.imports)
|
|
421
|
+
|
|
422
|
+
# Exact match first
|
|
423
|
+
defining = [c for c in contracts if _defines(c, symbol, case=False)]
|
|
424
|
+
if not defining:
|
|
425
|
+
defining = [c for c in contracts if _defines(c, symbol, case=True)]
|
|
426
|
+
|
|
427
|
+
importer_paths = {c.path for c in contracts if _imports(c, symbol, case=len(defining) == 0)}
|
|
428
|
+
# Exclude files already in defining set
|
|
429
|
+
defining_paths = {c.path for c in defining}
|
|
430
|
+
importers = [c for c in contracts if c.path in importer_paths and c.path not in defining_paths]
|
|
431
|
+
|
|
432
|
+
merged = list({c.path: c for c in defining + importers}.values())
|
|
433
|
+
return sorted(merged, key=lambda c: (c.path not in defining_paths, -c.relevance_score))
|
|
434
|
+
|
|
435
|
+
|
|
388
436
|
# ---------------------------------------------------------------------------
|
|
389
437
|
# Dependency graph emission
|
|
390
438
|
# ---------------------------------------------------------------------------
|
|
@@ -82,8 +82,42 @@ _AUXILIARY_DIR_PATTERNS: list[re.Pattern[str]] = [
|
|
|
82
82
|
re.compile(r"(?:^|/)scripts?(?:/|$)"),
|
|
83
83
|
re.compile(r"(?:^|/)tools?(?:/|$)"),
|
|
84
84
|
re.compile(r"(?:^|/)ci(?:/|$)"),
|
|
85
|
+
re.compile(r"(?:^|/)migrations?(?:/|$)"),
|
|
86
|
+
re.compile(r"(?:^|/)generated?(?:/|$)"),
|
|
87
|
+
re.compile(r"(?:^|/)storybook(?:/|$)"),
|
|
88
|
+
re.compile(r"(?:^|/)stories(?:/|$)"),
|
|
85
89
|
]
|
|
86
90
|
|
|
91
|
+
# Test file patterns — scored low, excluded from default contract output
|
|
92
|
+
_TEST_FILE_PATTERNS: tuple[str, ...] = (
|
|
93
|
+
"_test.", ".test.", ".spec.", "test_", "conftest", "_spec.",
|
|
94
|
+
)
|
|
95
|
+
_TEST_DIR_MARKERS: frozenset[str] = frozenset({
|
|
96
|
+
"/test/", "/tests/", "/spec/", "/specs/", "/__tests__/", "/__mocks__/",
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
# Config/tooling filenames that are low runtime-relevance
|
|
100
|
+
_LOW_RUNTIME_STEMS: frozenset[str] = frozenset({
|
|
101
|
+
"setup", "setup.cfg", "pyproject", "package", "package-lock",
|
|
102
|
+
"yarn.lock", "pnpm-lock", "composer", "gemfile", "podfile",
|
|
103
|
+
"dockerfile", "docker-compose", "makefile", "rakefile",
|
|
104
|
+
"gruntfile", "gulpfile", "webpack.config", "vite.config",
|
|
105
|
+
"rollup.config", "babel.config", "jest.config", "vitest.config",
|
|
106
|
+
"tsconfig", "jsconfig", ".eslintrc", ".prettierrc", ".editorconfig",
|
|
107
|
+
# doc-site tooling
|
|
108
|
+
"rspress", "rspress.config", "docusaurus.config", "docusaurus",
|
|
109
|
+
"vuepress.config", "vuepress", "nextra.config",
|
|
110
|
+
"astro.config", "gatsby.config", "gatsby-config",
|
|
111
|
+
# build/workspace orchestration
|
|
112
|
+
"turbo", "turbo.config", "nx", "nx.config", "lerna",
|
|
113
|
+
"esbuild.config", "swc.config", "postcss.config",
|
|
114
|
+
"tailwind.config", "tailwind",
|
|
115
|
+
# storybook
|
|
116
|
+
"main.storybook", "preview.storybook",
|
|
117
|
+
# playwright / cypress / e2e
|
|
118
|
+
"playwright.config", "cypress.config",
|
|
119
|
+
})
|
|
120
|
+
|
|
87
121
|
_HIGH_VALUE_SUFFIXES: frozenset[str] = frozenset({
|
|
88
122
|
".py", ".ts", ".tsx", ".js", ".jsx", ".mjs",
|
|
89
123
|
".go", ".java", ".kt", ".rs", ".rb", ".cs",
|
|
@@ -114,7 +148,7 @@ class RelevanceScorer:
|
|
|
114
148
|
|
|
115
149
|
base = 0.3
|
|
116
150
|
|
|
117
|
-
# Package role boost
|
|
151
|
+
# Package role boost — runtime code scores high, tooling/docs low
|
|
118
152
|
role = self._package_role(norm)
|
|
119
153
|
role_boost = {
|
|
120
154
|
"runtime_core": 0.4,
|
|
@@ -124,10 +158,10 @@ class RelevanceScorer:
|
|
|
124
158
|
"composition_layer": 0.2,
|
|
125
159
|
"plugin_package": 0.15,
|
|
126
160
|
"infrastructure_layer": 0.15,
|
|
127
|
-
"tooling_layer": -0.
|
|
128
|
-
"docs_layer": -0.
|
|
129
|
-
"test_layer": 0.
|
|
130
|
-
"benchmark_layer": -0.
|
|
161
|
+
"tooling_layer": -0.15,
|
|
162
|
+
"docs_layer": -0.25,
|
|
163
|
+
"test_layer": -0.1,
|
|
164
|
+
"benchmark_layer": -0.25,
|
|
131
165
|
}.get(role, 0.0)
|
|
132
166
|
base += role_boost
|
|
133
167
|
|
|
@@ -141,9 +175,22 @@ class RelevanceScorer:
|
|
|
141
175
|
if stem in _ENTRYPOINT_STEMS:
|
|
142
176
|
base += 0.15
|
|
143
177
|
|
|
144
|
-
#
|
|
178
|
+
# Test file penalty — tests are useful for coverage but not for
|
|
179
|
+
# understanding architecture or editing production code
|
|
180
|
+
fname = Path(norm).name.lower()
|
|
181
|
+
if (any(m in f"/{norm}/" for m in _TEST_DIR_MARKERS)
|
|
182
|
+
or any(fname.startswith(p.strip(".")) or p in fname
|
|
183
|
+
for p in _TEST_FILE_PATTERNS)):
|
|
184
|
+
base -= 0.30
|
|
185
|
+
|
|
186
|
+
# Config/tooling filename penalty — stronger than before
|
|
187
|
+
if stem.lower() in _LOW_RUNTIME_STEMS:
|
|
188
|
+
base -= 0.30
|
|
189
|
+
|
|
190
|
+
# Auxiliary dir penalty (docs, examples, demos, fixtures, scripts…)
|
|
191
|
+
# Aggressive: these almost never belong in top-ranked agent context
|
|
145
192
|
if self._is_auxiliary(norm):
|
|
146
|
-
base -= 0.
|
|
193
|
+
base -= 0.40
|
|
147
194
|
|
|
148
195
|
return max(0.0, min(1.0, base))
|
|
149
196
|
|