sourcecode 0.35.0__py3-none-any.whl → 0.37.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/ast_extractor.py +21 -6
- sourcecode/cli.py +66 -17
- sourcecode/contract_pipeline.py +178 -21
- sourcecode/git_analyzer.py +7 -0
- sourcecode/metrics_analyzer.py +10 -0
- sourcecode/serializer.py +20 -1
- {sourcecode-0.35.0.dist-info → sourcecode-0.37.0.dist-info}/METADATA +1 -1
- {sourcecode-0.35.0.dist-info → sourcecode-0.37.0.dist-info}/RECORD +12 -12
- {sourcecode-0.35.0.dist-info → sourcecode-0.37.0.dist-info}/WHEEL +0 -0
- {sourcecode-0.35.0.dist-info → sourcecode-0.37.0.dist-info}/entry_points.txt +0 -0
- {sourcecode-0.35.0.dist-info → sourcecode-0.37.0.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/ast_extractor.py
CHANGED
|
@@ -296,11 +296,20 @@ def _ts_exports(root: Any, src: bytes) -> list[ExportRecord]:
|
|
|
296
296
|
handled = True
|
|
297
297
|
|
|
298
298
|
if not handled and is_default:
|
|
299
|
-
# export default <expression>
|
|
299
|
+
# export default <expression> — preserve local binding name when available
|
|
300
300
|
for child in node.children:
|
|
301
301
|
if child.type not in ("export", "default", ";") and not child.type.startswith("comment"):
|
|
302
|
-
|
|
303
|
-
|
|
302
|
+
if child.type in ("identifier", "type_identifier"):
|
|
303
|
+
# export default app → name="app"
|
|
304
|
+
name = _text(child, src)
|
|
305
|
+
elif child.type == "call_expression":
|
|
306
|
+
# export default defineConfig({}) → name="defineConfig"
|
|
307
|
+
fn_n = _find_child(child, "identifier")
|
|
308
|
+
name = _text(fn_n, src) if fn_n else "default"
|
|
309
|
+
else:
|
|
310
|
+
# object/array/other expression — look one level deep
|
|
311
|
+
name_n = _find_child(child, "identifier", "type_identifier")
|
|
312
|
+
name = _text(name_n, src) if name_n else "default"
|
|
304
313
|
records.append(ExportRecord(name=name, kind="default"))
|
|
305
314
|
break
|
|
306
315
|
|
|
@@ -692,7 +701,7 @@ def _heuristic_ts_types(source: str) -> list[TypeDefinition]:
|
|
|
692
701
|
return types
|
|
693
702
|
|
|
694
703
|
|
|
695
|
-
def _extract_ts_js_heuristic(path: str, source: str, language: str) -> FileContract:
|
|
704
|
+
def _extract_ts_js_heuristic(path: str, source: str, language: str, *, ts_installed: bool = False) -> FileContract:
|
|
696
705
|
imports = _heuristic_ts_imports(source)
|
|
697
706
|
exports = _heuristic_ts_exports(source)
|
|
698
707
|
exported_names = {e.name for e in exports}
|
|
@@ -711,6 +720,12 @@ def _extract_ts_js_heuristic(path: str, source: str, language: str) -> FileContr
|
|
|
711
720
|
if not imp.source.startswith(".") and not imp.source.startswith("/")
|
|
712
721
|
})
|
|
713
722
|
|
|
723
|
+
# Distinguish "tree-sitter absent" from "language parser not loaded"
|
|
724
|
+
if ts_installed:
|
|
725
|
+
lim = f"ts_lang_missing: tree-sitter parser for {language!r} not loaded; install sourcecode[ast]"
|
|
726
|
+
else:
|
|
727
|
+
lim = "tree_sitter_unavailable: install sourcecode[ast] for full TS/JS extraction"
|
|
728
|
+
|
|
714
729
|
return FileContract(
|
|
715
730
|
path=path,
|
|
716
731
|
language=language,
|
|
@@ -721,7 +736,7 @@ def _extract_ts_js_heuristic(path: str, source: str, language: str) -> FileContr
|
|
|
721
736
|
hooks_used=hooks_used,
|
|
722
737
|
dependencies=deps,
|
|
723
738
|
extraction_method="heuristic",
|
|
724
|
-
limitations=[
|
|
739
|
+
limitations=[lim],
|
|
725
740
|
)
|
|
726
741
|
|
|
727
742
|
|
|
@@ -1039,7 +1054,7 @@ class AstExtractor:
|
|
|
1039
1054
|
if lang_obj is not None:
|
|
1040
1055
|
contract = _extract_ts_js_tree_sitter(rel_path, source, lang_obj, language)
|
|
1041
1056
|
else:
|
|
1042
|
-
contract = _extract_ts_js_heuristic(rel_path, source, language)
|
|
1057
|
+
contract = _extract_ts_js_heuristic(rel_path, source, language, ts_installed=True)
|
|
1043
1058
|
else:
|
|
1044
1059
|
contract = _extract_ts_js_heuristic(rel_path, source, language)
|
|
1045
1060
|
|
sourcecode/cli.py
CHANGED
|
@@ -384,7 +384,8 @@ def main(
|
|
|
384
384
|
no_tree: bool = typer.Option(
|
|
385
385
|
False,
|
|
386
386
|
"--no-tree",
|
|
387
|
-
|
|
387
|
+
hidden=True,
|
|
388
|
+
help="(Removed) No-op. File tree is excluded by default. Use --tree to include it.",
|
|
388
389
|
),
|
|
389
390
|
tree: bool = typer.Option(
|
|
390
391
|
False,
|
|
@@ -516,13 +517,13 @@ def main(
|
|
|
516
517
|
"contract",
|
|
517
518
|
"--mode",
|
|
518
519
|
help=(
|
|
519
|
-
"Output mode: contract
|
|
520
|
-
"contract
|
|
520
|
+
"Output mode: contract (default) | standard | raw. "
|
|
521
|
+
"contract: minimal per-file contracts — exports, signatures, deps. "
|
|
522
|
+
"Smallest output, recommended for AI agents. "
|
|
523
|
+
"minimal is accepted as an alias for contract. "
|
|
521
524
|
"standard: full per-file detail with imports, relevance scores, extraction method. "
|
|
522
|
-
"
|
|
523
|
-
"
|
|
524
|
-
"raw: legacy project-level analysis (stacks, entry points, dependencies). "
|
|
525
|
-
"contract/minimal is the recommended default for AI coding agents."
|
|
525
|
+
"raw: project-level analysis only (stacks, entry points, dependency summary). "
|
|
526
|
+
"No per-file contracts."
|
|
526
527
|
),
|
|
527
528
|
),
|
|
528
529
|
max_symbols: Optional[int] = typer.Option(
|
|
@@ -534,7 +535,8 @@ def main(
|
|
|
534
535
|
dependency_depth: int = typer.Option(
|
|
535
536
|
0,
|
|
536
537
|
"--dependency-depth",
|
|
537
|
-
|
|
538
|
+
hidden=True,
|
|
539
|
+
help="(Removed) Transitive resolution is not implemented. Pass 0 or omit.",
|
|
538
540
|
min=0,
|
|
539
541
|
max=5,
|
|
540
542
|
),
|
|
@@ -561,7 +563,8 @@ def main(
|
|
|
561
563
|
compress_types: bool = typer.Option(
|
|
562
564
|
False,
|
|
563
565
|
"--compress-types",
|
|
564
|
-
|
|
566
|
+
hidden=True,
|
|
567
|
+
help="(Removed) No observable effect when type signatures are not extracted. Omit.",
|
|
565
568
|
),
|
|
566
569
|
symbol: Optional[str] = typer.Option(
|
|
567
570
|
None,
|
|
@@ -589,8 +592,20 @@ def main(
|
|
|
589
592
|
_t0 = time.monotonic()
|
|
590
593
|
|
|
591
594
|
# Validate new flag choices
|
|
592
|
-
_MODE_CHOICES = ("contract", "minimal", "standard", "
|
|
593
|
-
|
|
595
|
+
_MODE_CHOICES = ("contract", "minimal", "standard", "raw")
|
|
596
|
+
_DEPRECATED_MODES: dict[str, str] = {
|
|
597
|
+
"hybrid": "contract",
|
|
598
|
+
"deep": "standard",
|
|
599
|
+
}
|
|
600
|
+
if mode in _DEPRECATED_MODES:
|
|
601
|
+
fallback = _DEPRECATED_MODES[mode]
|
|
602
|
+
typer.echo(
|
|
603
|
+
f"[deprecated] --mode {mode} is removed: produced identical output to --mode {fallback}. "
|
|
604
|
+
f"Using --mode {fallback}.",
|
|
605
|
+
err=True,
|
|
606
|
+
)
|
|
607
|
+
mode = fallback
|
|
608
|
+
elif mode not in _MODE_CHOICES:
|
|
594
609
|
typer.echo(
|
|
595
610
|
f"Error: invalid value '{mode}' for --mode. Valid options: {', '.join(_MODE_CHOICES)}",
|
|
596
611
|
err=True,
|
|
@@ -604,6 +619,22 @@ def main(
|
|
|
604
619
|
)
|
|
605
620
|
raise typer.Exit(code=1)
|
|
606
621
|
|
|
622
|
+
if dependency_depth > 0:
|
|
623
|
+
typer.echo(
|
|
624
|
+
f"[warning] --dependency-depth {dependency_depth} has no effect: "
|
|
625
|
+
"transitive import resolution is not implemented for npm/yarn/pip projects. "
|
|
626
|
+
"Using depth=0 (direct dependencies only).",
|
|
627
|
+
err=True,
|
|
628
|
+
)
|
|
629
|
+
dependency_depth = 0
|
|
630
|
+
|
|
631
|
+
if compress_types:
|
|
632
|
+
typer.echo(
|
|
633
|
+
"[deprecated] --compress-types is removed: type signatures are rarely extracted "
|
|
634
|
+
"at default depth. Flag ignored.",
|
|
635
|
+
err=True,
|
|
636
|
+
)
|
|
637
|
+
|
|
607
638
|
# Validate format choices
|
|
608
639
|
if format not in FORMAT_CHOICES:
|
|
609
640
|
typer.echo(
|
|
@@ -634,9 +665,9 @@ def main(
|
|
|
634
665
|
raise typer.Exit(code=1)
|
|
635
666
|
|
|
636
667
|
# Normalize mode aliases
|
|
637
|
-
_CONTRACT_MODES = frozenset({"contract", "minimal", "standard"
|
|
668
|
+
_CONTRACT_MODES = frozenset({"contract", "minimal", "standard"})
|
|
638
669
|
if mode == "minimal":
|
|
639
|
-
mode = "contract" # minimal is
|
|
670
|
+
mode = "contract" # minimal is a documented alias for contract
|
|
640
671
|
elif mode not in _CONTRACT_MODES and mode != "raw":
|
|
641
672
|
mode = "contract" # unknown → safe default
|
|
642
673
|
|
|
@@ -648,15 +679,13 @@ def main(
|
|
|
648
679
|
compact or agent or tree or format == "yaml" or trace_pipeline
|
|
649
680
|
or docs or semantics or graph_modules or full_metrics or architecture
|
|
650
681
|
)
|
|
651
|
-
if mode in ("contract", "standard"
|
|
682
|
+
if mode in ("contract", "standard") and _legacy_flags_active:
|
|
652
683
|
mode = "raw"
|
|
653
684
|
|
|
654
685
|
# Map mode to contract_view depth
|
|
655
686
|
_CONTRACT_DEPTH = {
|
|
656
687
|
"contract": "minimal",
|
|
657
688
|
"standard": "standard",
|
|
658
|
-
"deep": "deep",
|
|
659
|
-
"hybrid": "minimal", # hybrid adds bodies via pipeline, minimal header
|
|
660
689
|
}
|
|
661
690
|
|
|
662
691
|
# --- Import analysis modules ---
|
|
@@ -746,6 +775,19 @@ def main(
|
|
|
746
775
|
detector = ProjectDetector(build_default_detectors())
|
|
747
776
|
workspace_analysis = WorkspaceAnalyzer().analyze(target, manifests)
|
|
748
777
|
|
|
778
|
+
# Warn when scanning a monorepo at default depth — typical package sources
|
|
779
|
+
# (packages/*/src/) live at depth 5+, so default depth=4 silently misses them.
|
|
780
|
+
# Only emit to TTY to avoid contaminating piped/CI output; agents read analysis_gaps.
|
|
781
|
+
import sys as _sys
|
|
782
|
+
if workspace_analysis.is_monorepo and depth <= 4 and effective_depth <= 4:
|
|
783
|
+
if _sys.stderr.isatty():
|
|
784
|
+
typer.echo(
|
|
785
|
+
f"[warning] monorepo detected with --depth {depth}. "
|
|
786
|
+
"Source files in packages/*/src/ (depth 5+) may be invisible. "
|
|
787
|
+
"Use --depth 6 or higher for full coverage.",
|
|
788
|
+
err=True,
|
|
789
|
+
)
|
|
790
|
+
|
|
749
791
|
# --compact implicitly enables lightweight analysis passes so that
|
|
750
792
|
# dependency_summary, env_summary and code_notes_summary are never null.
|
|
751
793
|
if compact:
|
|
@@ -1244,7 +1286,7 @@ def main(
|
|
|
1244
1286
|
sm = _replace(sm, pipeline_trace=_trace.build_trace())
|
|
1245
1287
|
|
|
1246
1288
|
# Contract pipeline — runs for mode=contract|standard|deep|hybrid (skip for raw)
|
|
1247
|
-
_is_contract_mode = mode in ("contract", "standard"
|
|
1289
|
+
_is_contract_mode = mode in ("contract", "standard")
|
|
1248
1290
|
if _is_contract_mode:
|
|
1249
1291
|
from sourcecode.contract_pipeline import ContractPipeline
|
|
1250
1292
|
_cp = ContractPipeline()
|
|
@@ -1263,6 +1305,13 @@ def main(
|
|
|
1263
1305
|
compress_types=compress_types,
|
|
1264
1306
|
)
|
|
1265
1307
|
sm = _replace(sm, file_contracts=_contracts, contract_summary=_contract_summary)
|
|
1308
|
+
if symbol is not None and len(_contracts) == 0:
|
|
1309
|
+
typer.echo(
|
|
1310
|
+
f"[warning] --symbol '{symbol}' matched 0 files. "
|
|
1311
|
+
"The symbol may not exist at the current --depth, or the name may differ in case. "
|
|
1312
|
+
"Try --depth 8 or verify the symbol name.",
|
|
1313
|
+
err=True,
|
|
1314
|
+
)
|
|
1266
1315
|
if agent:
|
|
1267
1316
|
typer.echo(f"[contract] {len(_contracts)} files extracted ({_contract_summary.method_breakdown})", err=True)
|
|
1268
1317
|
|
sourcecode/contract_pipeline.py
CHANGED
|
@@ -8,6 +8,8 @@ Produces a list of FileContracts ranked by semantic importance,
|
|
|
8
8
|
with fan-in/fan-out computed from the import graph.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
11
13
|
import subprocess
|
|
12
14
|
from collections import Counter
|
|
13
15
|
from pathlib import Path
|
|
@@ -275,6 +277,16 @@ class ContractPipeline:
|
|
|
275
277
|
# 8. Symbol filter — keep files that define or import the symbol
|
|
276
278
|
if symbol:
|
|
277
279
|
contracts = _filter_by_symbol(contracts, symbol)
|
|
280
|
+
# When shallow scan missed the defining file (deep monorepo), fall back
|
|
281
|
+
# to a grep-based filesystem search over the full directory tree.
|
|
282
|
+
if not contracts:
|
|
283
|
+
contracts = self._symbol_deep_scan(
|
|
284
|
+
root, symbol,
|
|
285
|
+
known_paths=set(src_paths),
|
|
286
|
+
entry_paths=entry_paths,
|
|
287
|
+
changed_files=changed_files,
|
|
288
|
+
scorer=scorer,
|
|
289
|
+
)
|
|
278
290
|
|
|
279
291
|
# 9. Entrypoints-only filter
|
|
280
292
|
if entrypoints_only and not symbol:
|
|
@@ -340,6 +352,38 @@ class ContractPipeline:
|
|
|
340
352
|
# Default: relevance
|
|
341
353
|
return sorted(contracts, key=lambda c: (-c.is_entrypoint, -c.relevance_score))
|
|
342
354
|
|
|
355
|
+
def _symbol_deep_scan(
|
|
356
|
+
self,
|
|
357
|
+
root: Path,
|
|
358
|
+
symbol: str,
|
|
359
|
+
known_paths: set[str],
|
|
360
|
+
entry_paths: set[str],
|
|
361
|
+
changed_files: set[str],
|
|
362
|
+
scorer: RelevanceScorer,
|
|
363
|
+
) -> list[FileContract]:
|
|
364
|
+
"""Grep-based fallback when the shallow scan missed the defining files.
|
|
365
|
+
|
|
366
|
+
Searches the full directory tree for source files containing *symbol*,
|
|
367
|
+
extracts contracts for candidates not already processed, then re-applies
|
|
368
|
+
the symbol filter. Fan-in/fan-out are not computed for these contracts.
|
|
369
|
+
"""
|
|
370
|
+
candidates = _find_symbol_files(root, symbol, known_paths, scorer)
|
|
371
|
+
if not candidates:
|
|
372
|
+
return []
|
|
373
|
+
|
|
374
|
+
extra: list[FileContract] = []
|
|
375
|
+
for rel_path in candidates[:300]: # cap to prevent excessive extraction
|
|
376
|
+
abs_path = root / rel_path
|
|
377
|
+
contract = self._extractor.extract(abs_path, root)
|
|
378
|
+
if contract is None:
|
|
379
|
+
continue
|
|
380
|
+
contract.is_entrypoint = rel_path in entry_paths
|
|
381
|
+
contract.is_changed = rel_path in changed_files
|
|
382
|
+
contract.relevance_score = scorer.score(rel_path)
|
|
383
|
+
extra.append(contract)
|
|
384
|
+
|
|
385
|
+
return _filter_by_symbol(extra, symbol)
|
|
386
|
+
|
|
343
387
|
|
|
344
388
|
# ---------------------------------------------------------------------------
|
|
345
389
|
# Helpers
|
|
@@ -354,7 +398,6 @@ def _compress_contract_types(c: FileContract) -> None:
|
|
|
354
398
|
(r"React\.ReactNode", "ReactNode"),
|
|
355
399
|
(r"React\.ReactElement", "ReactElement"),
|
|
356
400
|
]
|
|
357
|
-
import re
|
|
358
401
|
for fn in c.functions:
|
|
359
402
|
for pattern, repl in _replacements:
|
|
360
403
|
fn.signature = re.sub(pattern, repl, fn.signature)
|
|
@@ -396,41 +439,155 @@ def _limit_symbols(contracts: list[FileContract], max_symbols: int) -> list[File
|
|
|
396
439
|
# ---------------------------------------------------------------------------
|
|
397
440
|
|
|
398
441
|
def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileContract]:
|
|
399
|
-
"""Return contracts that define or
|
|
442
|
+
"""Return contracts that define, import, or structurally reference *symbol*.
|
|
400
443
|
|
|
401
|
-
|
|
402
|
-
1. Exact match
|
|
403
|
-
2. Case-insensitive
|
|
404
|
-
3.
|
|
444
|
+
Four tiers applied in order:
|
|
445
|
+
1. Exact name match — export/function/type names.
|
|
446
|
+
2. Case-insensitive name match when tier 1 yields nothing.
|
|
447
|
+
3. Import symbol match — name appears in import symbol list.
|
|
448
|
+
4. Type-reference match — symbol in extends clauses, field types, or
|
|
449
|
+
function signatures (word-boundary). Only used when tiers 1-3 fail.
|
|
405
450
|
|
|
406
|
-
Defining contracts are ranked first; importers follow.
|
|
451
|
+
Defining contracts are ranked first; importers and references follow.
|
|
407
452
|
"""
|
|
408
|
-
|
|
453
|
+
sym_l = symbol.lower()
|
|
454
|
+
word_re = re.compile(
|
|
455
|
+
r"(?<![A-Za-z0-9_])" + re.escape(symbol) + r"(?![A-Za-z0-9_])",
|
|
456
|
+
re.IGNORECASE,
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
def _defines(c: FileContract, case: bool) -> bool:
|
|
409
460
|
cmp = (lambda a, b: a.lower() == b.lower()) if case else (lambda a, b: a == b)
|
|
410
461
|
return (
|
|
411
|
-
any(cmp(e.name,
|
|
412
|
-
or any(cmp(f.name,
|
|
413
|
-
or any(cmp(t.name,
|
|
462
|
+
any(cmp(e.name, symbol) for e in c.exports)
|
|
463
|
+
or any(cmp(f.name, symbol) for f in c.functions)
|
|
464
|
+
or any(cmp(t.name, symbol) for t in c.types)
|
|
414
465
|
)
|
|
415
466
|
|
|
416
|
-
def
|
|
467
|
+
def _imports_sym(c: FileContract, case: bool) -> bool:
|
|
417
468
|
if case:
|
|
418
|
-
sym_l = sym.lower()
|
|
419
469
|
return any(sym_l == s.lower() for imp in c.imports for s in imp.symbols)
|
|
420
|
-
return any(
|
|
470
|
+
return any(symbol in imp.symbols for imp in c.imports)
|
|
471
|
+
|
|
472
|
+
def _references_type(c: FileContract) -> bool:
|
|
473
|
+
"""Tier 4: symbol appears in extends clauses, field types, or signatures."""
|
|
474
|
+
for t in c.types:
|
|
475
|
+
if any(sym_l in ext.lower() for ext in t.extends):
|
|
476
|
+
return True
|
|
477
|
+
for field in t.fields:
|
|
478
|
+
if sym_l in field.type.lower():
|
|
479
|
+
return True
|
|
480
|
+
for f in c.functions:
|
|
481
|
+
if word_re.search(f.signature):
|
|
482
|
+
return True
|
|
483
|
+
return False
|
|
421
484
|
|
|
422
|
-
#
|
|
423
|
-
defining = [c for c in contracts if _defines(c,
|
|
485
|
+
# Tier 1: exact name match
|
|
486
|
+
defining = [c for c in contracts if _defines(c, case=False)]
|
|
487
|
+
# Tier 2: case-insensitive name match
|
|
424
488
|
if not defining:
|
|
425
|
-
defining = [c for c in contracts if _defines(c,
|
|
489
|
+
defining = [c for c in contracts if _defines(c, case=True)]
|
|
426
490
|
|
|
427
|
-
importer_paths = {c.path for c in contracts if _imports(c, symbol, case=len(defining) == 0)}
|
|
428
|
-
# Exclude files already in defining set
|
|
429
491
|
defining_paths = {c.path for c in defining}
|
|
492
|
+
|
|
493
|
+
# Tier 3: import matching (case-insensitive when no definers found)
|
|
494
|
+
ci_imports = len(defining) == 0
|
|
495
|
+
importer_paths = {c.path for c in contracts if _imports_sym(c, case=ci_imports)}
|
|
430
496
|
importers = [c for c in contracts if c.path in importer_paths and c.path not in defining_paths]
|
|
431
497
|
|
|
432
|
-
|
|
433
|
-
|
|
498
|
+
# Tier 4: type-reference matching (only when tiers 1-3 yield nothing)
|
|
499
|
+
references: list[FileContract] = []
|
|
500
|
+
if not defining and not importers:
|
|
501
|
+
ref_paths = {c.path for c in contracts if _references_type(c)}
|
|
502
|
+
references = [c for c in contracts if c.path in ref_paths]
|
|
503
|
+
|
|
504
|
+
# Merge in priority order: defining > importers > type-references
|
|
505
|
+
seen: set[str] = set()
|
|
506
|
+
merged: list[FileContract] = []
|
|
507
|
+
for c in defining + importers + references:
|
|
508
|
+
if c.path not in seen:
|
|
509
|
+
seen.add(c.path)
|
|
510
|
+
merged.append(c)
|
|
511
|
+
|
|
512
|
+
return sorted(merged, key=lambda c: (
|
|
513
|
+
c.path not in defining_paths,
|
|
514
|
+
c.path not in importer_paths,
|
|
515
|
+
-c.relevance_score,
|
|
516
|
+
))
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
# ---------------------------------------------------------------------------
|
|
520
|
+
# Deep symbol scan — grep-based fallback for shallow-scanned repos
|
|
521
|
+
# ---------------------------------------------------------------------------
|
|
522
|
+
|
|
523
|
+
_DEEP_SCAN_NOISE_DIRS: frozenset[str] = frozenset({
|
|
524
|
+
"node_modules", ".git", "dist", "build", "__pycache__",
|
|
525
|
+
".venv", "venv", "target", ".next", ".nuxt", ".turbo", "coverage",
|
|
526
|
+
".nyc_output", ".mypy_cache", ".pytest_cache",
|
|
527
|
+
})
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def _find_symbol_files(
|
|
531
|
+
root: Path,
|
|
532
|
+
symbol: str,
|
|
533
|
+
known_paths: set[str],
|
|
534
|
+
scorer: RelevanceScorer,
|
|
535
|
+
) -> list[str]:
|
|
536
|
+
"""Find source files outside *known_paths* that contain *symbol* as text.
|
|
537
|
+
|
|
538
|
+
Uses subprocess grep when available (fast); falls back to os.walk + read.
|
|
539
|
+
Returns repo-relative paths, noise-filtered.
|
|
540
|
+
"""
|
|
541
|
+
found: list[str] = []
|
|
542
|
+
|
|
543
|
+
# Try grep (fast, available on Linux/Mac)
|
|
544
|
+
try:
|
|
545
|
+
result = subprocess.run(
|
|
546
|
+
[
|
|
547
|
+
"grep", "-rl",
|
|
548
|
+
"--include=*.ts", "--include=*.tsx",
|
|
549
|
+
"--include=*.js", "--include=*.jsx",
|
|
550
|
+
"--include=*.py",
|
|
551
|
+
symbol, ".",
|
|
552
|
+
],
|
|
553
|
+
cwd=str(root),
|
|
554
|
+
capture_output=True,
|
|
555
|
+
text=True,
|
|
556
|
+
timeout=20,
|
|
557
|
+
)
|
|
558
|
+
for line in result.stdout.splitlines():
|
|
559
|
+
line = line.strip()
|
|
560
|
+
if line.startswith("./"):
|
|
561
|
+
line = line[2:]
|
|
562
|
+
line = line.replace("\\", "/")
|
|
563
|
+
if line and line not in known_paths and not scorer.is_noise(line):
|
|
564
|
+
found.append(line)
|
|
565
|
+
return found
|
|
566
|
+
except Exception:
|
|
567
|
+
pass
|
|
568
|
+
|
|
569
|
+
# Python fallback — os.walk + text search
|
|
570
|
+
for dirpath, dirnames, filenames in os.walk(str(root)):
|
|
571
|
+
dirnames[:] = sorted(d for d in dirnames if d not in _DEEP_SCAN_NOISE_DIRS)
|
|
572
|
+
for fname in filenames:
|
|
573
|
+
if Path(fname).suffix.lower() not in _SRC_EXTENSIONS:
|
|
574
|
+
continue
|
|
575
|
+
full = os.path.join(dirpath, fname)
|
|
576
|
+
try:
|
|
577
|
+
rel = Path(full).relative_to(root)
|
|
578
|
+
rel_str = str(rel).replace("\\", "/")
|
|
579
|
+
except ValueError:
|
|
580
|
+
continue
|
|
581
|
+
if rel_str in known_paths or scorer.is_noise(rel_str):
|
|
582
|
+
continue
|
|
583
|
+
try:
|
|
584
|
+
content = Path(full).read_text(encoding="utf-8", errors="replace")
|
|
585
|
+
if symbol in content:
|
|
586
|
+
found.append(rel_str)
|
|
587
|
+
except OSError:
|
|
588
|
+
pass
|
|
589
|
+
|
|
590
|
+
return found
|
|
434
591
|
|
|
435
592
|
|
|
436
593
|
# ---------------------------------------------------------------------------
|
sourcecode/git_analyzer.py
CHANGED
|
@@ -198,6 +198,13 @@ def _is_hotspot_admin(path: str) -> bool:
|
|
|
198
198
|
for suffix in _HOTSPOT_ADMIN_SUFFIXES:
|
|
199
199
|
if filename.endswith(suffix):
|
|
200
200
|
return True
|
|
201
|
+
# Localized changelogs: CHANGELOG.zh-CN.md, CHANGES.en-US.md, etc.
|
|
202
|
+
_lower = filename.lower()
|
|
203
|
+
if _lower.startswith("changelog.") or _lower.startswith("changes."):
|
|
204
|
+
return True
|
|
205
|
+
# lerna.json and root-level package.json are modified by version bumps, not dev work
|
|
206
|
+
if filename in ("lerna.json",):
|
|
207
|
+
return True
|
|
201
208
|
return False
|
|
202
209
|
|
|
203
210
|
|
sourcecode/metrics_analyzer.py
CHANGED
|
@@ -219,6 +219,16 @@ class MetricsAnalyzer:
|
|
|
219
219
|
if fm.language != "unknown":
|
|
220
220
|
languages.add(fm.language)
|
|
221
221
|
|
|
222
|
+
# Emit explicit limitation when JS/TS files are present but complexity is unavailable.
|
|
223
|
+
# This prevents agents from assuming null complexity means "no functions found".
|
|
224
|
+
_js_ts_count = sum(1 for r in records if r.language in ("javascript", "typescript") and r.complexity_availability == "unavailable")
|
|
225
|
+
if _js_ts_count > 0:
|
|
226
|
+
limitations.append(
|
|
227
|
+
f"cyclomatic_complexity_unavailable: {_js_ts_count} JS/TS file(s) — "
|
|
228
|
+
"complexity requires tree-sitter (pip install 'sourcecode[ast]'). "
|
|
229
|
+
"null complexity fields are expected, not an error."
|
|
230
|
+
)
|
|
231
|
+
|
|
222
232
|
summary = MetricsSummary(
|
|
223
233
|
requested=True,
|
|
224
234
|
file_count=len(records),
|
sourcecode/serializer.py
CHANGED
|
@@ -923,7 +923,7 @@ def _contract_view_minimal(
|
|
|
923
923
|
|
|
924
924
|
result: dict[str, Any] = {
|
|
925
925
|
"schema_version": sm.metadata.schema_version,
|
|
926
|
-
"mode": "
|
|
926
|
+
"mode": "contract",
|
|
927
927
|
"project": project,
|
|
928
928
|
}
|
|
929
929
|
|
|
@@ -949,9 +949,28 @@ def _contract_view_minimal(
|
|
|
949
949
|
|
|
950
950
|
if sm.env_summary is not None and sm.env_summary.requested:
|
|
951
951
|
result["env_summary"] = asdict(sm.env_summary)
|
|
952
|
+
if sm.env_map:
|
|
953
|
+
# Include top-20 env entries sorted by required first, then name.
|
|
954
|
+
# Agents read the summary count but need the actual keys to act on them.
|
|
955
|
+
_sorted_env = sorted(sm.env_map, key=lambda e: (not getattr(e, "required", False), getattr(e, "name", "")))
|
|
956
|
+
result["env_map"] = [
|
|
957
|
+
{k: v for k, v in asdict(e).items() if v is not None and v != ""}
|
|
958
|
+
for e in _sorted_env[:20]
|
|
959
|
+
]
|
|
952
960
|
|
|
953
961
|
if sm.code_notes_summary is not None and sm.code_notes_summary.requested:
|
|
954
962
|
result["code_notes_summary"] = asdict(sm.code_notes_summary)
|
|
963
|
+
if sm.code_notes:
|
|
964
|
+
# Include top-20 notes by severity: BUG > FIXME > DEPRECATED > TODO > others.
|
|
965
|
+
_SEVERITY_ORDER = {"BUG": 0, "FIXME": 1, "DEPRECATED": 2, "TODO": 3, "HACK": 4, "WARNING": 5}
|
|
966
|
+
_sorted_notes = sorted(
|
|
967
|
+
sm.code_notes,
|
|
968
|
+
key=lambda n: (_SEVERITY_ORDER.get(getattr(n, "kind", "").upper(), 9), getattr(n, "path", "")),
|
|
969
|
+
)
|
|
970
|
+
result["code_notes"] = [
|
|
971
|
+
{k: v for k, v in asdict(n).items() if v is not None and v != ""}
|
|
972
|
+
for n in _sorted_notes[:20]
|
|
973
|
+
]
|
|
955
974
|
|
|
956
975
|
if sm.git_context is not None and sm.git_context.requested:
|
|
957
976
|
result["git_context"] = asdict(sm.git_context)
|
|
@@ -1,23 +1,23 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=Xha8jq1XWD5Ze_B5mEne-d5fOfBVVwnX-Ieg7spvalk,103
|
|
2
2
|
sourcecode/architecture_analyzer.py,sha256=H6noGgVArUJ25z1qC0fFA0KvJJeHZYyhKvKSkOyWHUk,23096
|
|
3
3
|
sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
|
|
4
|
-
sourcecode/ast_extractor.py,sha256=
|
|
4
|
+
sourcecode/ast_extractor.py,sha256=0OHQwTUBBc9lmqPLryVeB1z8dGIC6NhLlar800CD9oI,41129
|
|
5
5
|
sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
|
|
6
|
-
sourcecode/cli.py,sha256=
|
|
6
|
+
sourcecode/cli.py,sha256=LSGytpRlyMFdmHugrP3USDhPb0hiigHn0PL9Ppac3R4,64852
|
|
7
7
|
sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvUQKw4,9226
|
|
8
8
|
sourcecode/confidence_analyzer.py,sha256=HxJMPLI5ulqtkncnv98W4iVO6yMbpQo87VuxiuNbDmY,12167
|
|
9
9
|
sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
|
|
10
10
|
sourcecode/contract_model.py,sha256=vr-9WHf0EBlbnuZGtTpUvSnbbjCBsm0q0tpLyxBJ-xI,3287
|
|
11
|
-
sourcecode/contract_pipeline.py,sha256=
|
|
11
|
+
sourcecode/contract_pipeline.py,sha256=Pu9SjgkngLgWoFaNj2ftKsk4lPngophW4840h0FvuEw,23187
|
|
12
12
|
sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
|
|
13
13
|
sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
|
|
14
14
|
sourcecode/doc_analyzer.py,sha256=Ec3orx6vBKsh5cNM3-F4y2Got2KuKx8w3dErwtdtM-A,19891
|
|
15
15
|
sourcecode/entrypoint_classifier.py,sha256=a69dMGyxCTd_LOm3oqj-EXWpRmbmeujN7T1mr2eJ1as,3877
|
|
16
16
|
sourcecode/env_analyzer.py,sha256=slvq-eT24RVMNczLNDlZbe0hU8JXIIPxybqubvrrnSQ,14409
|
|
17
17
|
sourcecode/file_classifier.py,sha256=_KfFIIolharaIxbSTrCkaWauQIqNHCyor_n47RGyDh8,8577
|
|
18
|
-
sourcecode/git_analyzer.py,sha256=
|
|
18
|
+
sourcecode/git_analyzer.py,sha256=s7tJTd_GAczhrH7j9JhBNp7ozhkW3lzBN0TMNwFqJwE,9977
|
|
19
19
|
sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
|
|
20
|
-
sourcecode/metrics_analyzer.py,sha256=
|
|
20
|
+
sourcecode/metrics_analyzer.py,sha256=e2cFwB9XubFq_dIVsP2PLjpr4wX0N6ulb3ol3sGDUeo,20777
|
|
21
21
|
sourcecode/prepare_context.py,sha256=vxEzr8czS3MFbdTx4hBJQlJLrl9cuvbHdL3ZokxFkvo,31384
|
|
22
22
|
sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
|
|
23
23
|
sourcecode/relevance_scorer.py,sha256=ea7_7AHVgahVEWK3ebKOpG67agzG_pGICu5f2KgzrIA,8133
|
|
@@ -25,7 +25,7 @@ sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBG
|
|
|
25
25
|
sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
|
|
26
26
|
sourcecode/schema.py,sha256=AShu_bcP30TYaw4Dl1nYy8aFnBCKxrUli3LhU3MZTjs,20739
|
|
27
27
|
sourcecode/semantic_analyzer.py,sha256=asQfJf-EhzYaOTA-iMuZsrVXtbW7SV2WEKCxgsxa88Y,79413
|
|
28
|
-
sourcecode/serializer.py,sha256=
|
|
28
|
+
sourcecode/serializer.py,sha256=uQGcytdaaM3qzxXcZ2NMjXYvzdvT9PP45960t-Thgqk,51128
|
|
29
29
|
sourcecode/summarizer.py,sha256=ZuzIdm3t8A-d5MuQL0TSNLrd-L0IQIuguIxeNXMNJf8,16070
|
|
30
30
|
sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
|
|
31
31
|
sourcecode/workspace.py,sha256=fQlVoNx8S-fSHpKoJ0JBvEHCFkxszH0KZVJed1i3TRk,6845
|
|
@@ -56,8 +56,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
56
56
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
57
57
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
58
58
|
sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
|
|
59
|
-
sourcecode-0.
|
|
60
|
-
sourcecode-0.
|
|
61
|
-
sourcecode-0.
|
|
62
|
-
sourcecode-0.
|
|
63
|
-
sourcecode-0.
|
|
59
|
+
sourcecode-0.37.0.dist-info/METADATA,sha256=75XE0yybH_O7U8rxcP6ZY2MdvibRaxALg4io5V9RsU4,25209
|
|
60
|
+
sourcecode-0.37.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
61
|
+
sourcecode-0.37.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
62
|
+
sourcecode-0.37.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
63
|
+
sourcecode-0.37.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|