sourcecode 0.35.0__py3-none-any.whl → 0.36.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/ast_extractor.py +21 -6
- sourcecode/contract_pipeline.py +178 -21
- {sourcecode-0.35.0.dist-info → sourcecode-0.36.0.dist-info}/METADATA +1 -1
- {sourcecode-0.35.0.dist-info → sourcecode-0.36.0.dist-info}/RECORD +8 -8
- {sourcecode-0.35.0.dist-info → sourcecode-0.36.0.dist-info}/WHEEL +0 -0
- {sourcecode-0.35.0.dist-info → sourcecode-0.36.0.dist-info}/entry_points.txt +0 -0
- {sourcecode-0.35.0.dist-info → sourcecode-0.36.0.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/ast_extractor.py
CHANGED
|
@@ -296,11 +296,20 @@ def _ts_exports(root: Any, src: bytes) -> list[ExportRecord]:
|
|
|
296
296
|
handled = True
|
|
297
297
|
|
|
298
298
|
if not handled and is_default:
|
|
299
|
-
# export default <expression>
|
|
299
|
+
# export default <expression> — preserve local binding name when available
|
|
300
300
|
for child in node.children:
|
|
301
301
|
if child.type not in ("export", "default", ";") and not child.type.startswith("comment"):
|
|
302
|
-
|
|
303
|
-
|
|
302
|
+
if child.type in ("identifier", "type_identifier"):
|
|
303
|
+
# export default app → name="app"
|
|
304
|
+
name = _text(child, src)
|
|
305
|
+
elif child.type == "call_expression":
|
|
306
|
+
# export default defineConfig({}) → name="defineConfig"
|
|
307
|
+
fn_n = _find_child(child, "identifier")
|
|
308
|
+
name = _text(fn_n, src) if fn_n else "default"
|
|
309
|
+
else:
|
|
310
|
+
# object/array/other expression — look one level deep
|
|
311
|
+
name_n = _find_child(child, "identifier", "type_identifier")
|
|
312
|
+
name = _text(name_n, src) if name_n else "default"
|
|
304
313
|
records.append(ExportRecord(name=name, kind="default"))
|
|
305
314
|
break
|
|
306
315
|
|
|
@@ -692,7 +701,7 @@ def _heuristic_ts_types(source: str) -> list[TypeDefinition]:
|
|
|
692
701
|
return types
|
|
693
702
|
|
|
694
703
|
|
|
695
|
-
def _extract_ts_js_heuristic(path: str, source: str, language: str) -> FileContract:
|
|
704
|
+
def _extract_ts_js_heuristic(path: str, source: str, language: str, *, ts_installed: bool = False) -> FileContract:
|
|
696
705
|
imports = _heuristic_ts_imports(source)
|
|
697
706
|
exports = _heuristic_ts_exports(source)
|
|
698
707
|
exported_names = {e.name for e in exports}
|
|
@@ -711,6 +720,12 @@ def _extract_ts_js_heuristic(path: str, source: str, language: str) -> FileContr
|
|
|
711
720
|
if not imp.source.startswith(".") and not imp.source.startswith("/")
|
|
712
721
|
})
|
|
713
722
|
|
|
723
|
+
# Distinguish "tree-sitter absent" from "language parser not loaded"
|
|
724
|
+
if ts_installed:
|
|
725
|
+
lim = f"ts_lang_missing: tree-sitter parser for {language!r} not loaded; install sourcecode[ast]"
|
|
726
|
+
else:
|
|
727
|
+
lim = "tree_sitter_unavailable: install sourcecode[ast] for full TS/JS extraction"
|
|
728
|
+
|
|
714
729
|
return FileContract(
|
|
715
730
|
path=path,
|
|
716
731
|
language=language,
|
|
@@ -721,7 +736,7 @@ def _extract_ts_js_heuristic(path: str, source: str, language: str) -> FileContr
|
|
|
721
736
|
hooks_used=hooks_used,
|
|
722
737
|
dependencies=deps,
|
|
723
738
|
extraction_method="heuristic",
|
|
724
|
-
limitations=[
|
|
739
|
+
limitations=[lim],
|
|
725
740
|
)
|
|
726
741
|
|
|
727
742
|
|
|
@@ -1039,7 +1054,7 @@ class AstExtractor:
|
|
|
1039
1054
|
if lang_obj is not None:
|
|
1040
1055
|
contract = _extract_ts_js_tree_sitter(rel_path, source, lang_obj, language)
|
|
1041
1056
|
else:
|
|
1042
|
-
contract = _extract_ts_js_heuristic(rel_path, source, language)
|
|
1057
|
+
contract = _extract_ts_js_heuristic(rel_path, source, language, ts_installed=True)
|
|
1043
1058
|
else:
|
|
1044
1059
|
contract = _extract_ts_js_heuristic(rel_path, source, language)
|
|
1045
1060
|
|
sourcecode/contract_pipeline.py
CHANGED
|
@@ -8,6 +8,8 @@ Produces a list of FileContracts ranked by semantic importance,
|
|
|
8
8
|
with fan-in/fan-out computed from the import graph.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
11
13
|
import subprocess
|
|
12
14
|
from collections import Counter
|
|
13
15
|
from pathlib import Path
|
|
@@ -275,6 +277,16 @@ class ContractPipeline:
|
|
|
275
277
|
# 8. Symbol filter — keep files that define or import the symbol
|
|
276
278
|
if symbol:
|
|
277
279
|
contracts = _filter_by_symbol(contracts, symbol)
|
|
280
|
+
# When shallow scan missed the defining file (deep monorepo), fall back
|
|
281
|
+
# to a grep-based filesystem search over the full directory tree.
|
|
282
|
+
if not contracts:
|
|
283
|
+
contracts = self._symbol_deep_scan(
|
|
284
|
+
root, symbol,
|
|
285
|
+
known_paths=set(src_paths),
|
|
286
|
+
entry_paths=entry_paths,
|
|
287
|
+
changed_files=changed_files,
|
|
288
|
+
scorer=scorer,
|
|
289
|
+
)
|
|
278
290
|
|
|
279
291
|
# 9. Entrypoints-only filter
|
|
280
292
|
if entrypoints_only and not symbol:
|
|
@@ -340,6 +352,38 @@ class ContractPipeline:
|
|
|
340
352
|
# Default: relevance
|
|
341
353
|
return sorted(contracts, key=lambda c: (-c.is_entrypoint, -c.relevance_score))
|
|
342
354
|
|
|
355
|
+
def _symbol_deep_scan(
|
|
356
|
+
self,
|
|
357
|
+
root: Path,
|
|
358
|
+
symbol: str,
|
|
359
|
+
known_paths: set[str],
|
|
360
|
+
entry_paths: set[str],
|
|
361
|
+
changed_files: set[str],
|
|
362
|
+
scorer: RelevanceScorer,
|
|
363
|
+
) -> list[FileContract]:
|
|
364
|
+
"""Grep-based fallback when the shallow scan missed the defining files.
|
|
365
|
+
|
|
366
|
+
Searches the full directory tree for source files containing *symbol*,
|
|
367
|
+
extracts contracts for candidates not already processed, then re-applies
|
|
368
|
+
the symbol filter. Fan-in/fan-out are not computed for these contracts.
|
|
369
|
+
"""
|
|
370
|
+
candidates = _find_symbol_files(root, symbol, known_paths, scorer)
|
|
371
|
+
if not candidates:
|
|
372
|
+
return []
|
|
373
|
+
|
|
374
|
+
extra: list[FileContract] = []
|
|
375
|
+
for rel_path in candidates[:300]: # cap to prevent excessive extraction
|
|
376
|
+
abs_path = root / rel_path
|
|
377
|
+
contract = self._extractor.extract(abs_path, root)
|
|
378
|
+
if contract is None:
|
|
379
|
+
continue
|
|
380
|
+
contract.is_entrypoint = rel_path in entry_paths
|
|
381
|
+
contract.is_changed = rel_path in changed_files
|
|
382
|
+
contract.relevance_score = scorer.score(rel_path)
|
|
383
|
+
extra.append(contract)
|
|
384
|
+
|
|
385
|
+
return _filter_by_symbol(extra, symbol)
|
|
386
|
+
|
|
343
387
|
|
|
344
388
|
# ---------------------------------------------------------------------------
|
|
345
389
|
# Helpers
|
|
@@ -354,7 +398,6 @@ def _compress_contract_types(c: FileContract) -> None:
|
|
|
354
398
|
(r"React\.ReactNode", "ReactNode"),
|
|
355
399
|
(r"React\.ReactElement", "ReactElement"),
|
|
356
400
|
]
|
|
357
|
-
import re
|
|
358
401
|
for fn in c.functions:
|
|
359
402
|
for pattern, repl in _replacements:
|
|
360
403
|
fn.signature = re.sub(pattern, repl, fn.signature)
|
|
@@ -396,41 +439,155 @@ def _limit_symbols(contracts: list[FileContract], max_symbols: int) -> list[File
|
|
|
396
439
|
# ---------------------------------------------------------------------------
|
|
397
440
|
|
|
398
441
|
def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileContract]:
|
|
399
|
-
"""Return contracts that define or
|
|
442
|
+
"""Return contracts that define, import, or structurally reference *symbol*.
|
|
400
443
|
|
|
401
|
-
|
|
402
|
-
1. Exact match
|
|
403
|
-
2. Case-insensitive
|
|
404
|
-
3.
|
|
444
|
+
Four tiers applied in order:
|
|
445
|
+
1. Exact name match — export/function/type names.
|
|
446
|
+
2. Case-insensitive name match when tier 1 yields nothing.
|
|
447
|
+
3. Import symbol match — name appears in import symbol list.
|
|
448
|
+
4. Type-reference match — symbol in extends clauses, field types, or
|
|
449
|
+
function signatures (word-boundary). Only used when tiers 1-3 fail.
|
|
405
450
|
|
|
406
|
-
Defining contracts are ranked first; importers follow.
|
|
451
|
+
Defining contracts are ranked first; importers and references follow.
|
|
407
452
|
"""
|
|
408
|
-
|
|
453
|
+
sym_l = symbol.lower()
|
|
454
|
+
word_re = re.compile(
|
|
455
|
+
r"(?<![A-Za-z0-9_])" + re.escape(symbol) + r"(?![A-Za-z0-9_])",
|
|
456
|
+
re.IGNORECASE,
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
def _defines(c: FileContract, case: bool) -> bool:
|
|
409
460
|
cmp = (lambda a, b: a.lower() == b.lower()) if case else (lambda a, b: a == b)
|
|
410
461
|
return (
|
|
411
|
-
any(cmp(e.name,
|
|
412
|
-
or any(cmp(f.name,
|
|
413
|
-
or any(cmp(t.name,
|
|
462
|
+
any(cmp(e.name, symbol) for e in c.exports)
|
|
463
|
+
or any(cmp(f.name, symbol) for f in c.functions)
|
|
464
|
+
or any(cmp(t.name, symbol) for t in c.types)
|
|
414
465
|
)
|
|
415
466
|
|
|
416
|
-
def
|
|
467
|
+
def _imports_sym(c: FileContract, case: bool) -> bool:
|
|
417
468
|
if case:
|
|
418
|
-
sym_l = sym.lower()
|
|
419
469
|
return any(sym_l == s.lower() for imp in c.imports for s in imp.symbols)
|
|
420
|
-
return any(
|
|
470
|
+
return any(symbol in imp.symbols for imp in c.imports)
|
|
471
|
+
|
|
472
|
+
def _references_type(c: FileContract) -> bool:
|
|
473
|
+
"""Tier 4: symbol appears in extends clauses, field types, or signatures."""
|
|
474
|
+
for t in c.types:
|
|
475
|
+
if any(sym_l in ext.lower() for ext in t.extends):
|
|
476
|
+
return True
|
|
477
|
+
for field in t.fields:
|
|
478
|
+
if sym_l in field.type.lower():
|
|
479
|
+
return True
|
|
480
|
+
for f in c.functions:
|
|
481
|
+
if word_re.search(f.signature):
|
|
482
|
+
return True
|
|
483
|
+
return False
|
|
421
484
|
|
|
422
|
-
#
|
|
423
|
-
defining = [c for c in contracts if _defines(c,
|
|
485
|
+
# Tier 1: exact name match
|
|
486
|
+
defining = [c for c in contracts if _defines(c, case=False)]
|
|
487
|
+
# Tier 2: case-insensitive name match
|
|
424
488
|
if not defining:
|
|
425
|
-
defining = [c for c in contracts if _defines(c,
|
|
489
|
+
defining = [c for c in contracts if _defines(c, case=True)]
|
|
426
490
|
|
|
427
|
-
importer_paths = {c.path for c in contracts if _imports(c, symbol, case=len(defining) == 0)}
|
|
428
|
-
# Exclude files already in defining set
|
|
429
491
|
defining_paths = {c.path for c in defining}
|
|
492
|
+
|
|
493
|
+
# Tier 3: import matching (case-insensitive when no definers found)
|
|
494
|
+
ci_imports = len(defining) == 0
|
|
495
|
+
importer_paths = {c.path for c in contracts if _imports_sym(c, case=ci_imports)}
|
|
430
496
|
importers = [c for c in contracts if c.path in importer_paths and c.path not in defining_paths]
|
|
431
497
|
|
|
432
|
-
|
|
433
|
-
|
|
498
|
+
# Tier 4: type-reference matching (only when tiers 1-3 yield nothing)
|
|
499
|
+
references: list[FileContract] = []
|
|
500
|
+
if not defining and not importers:
|
|
501
|
+
ref_paths = {c.path for c in contracts if _references_type(c)}
|
|
502
|
+
references = [c for c in contracts if c.path in ref_paths]
|
|
503
|
+
|
|
504
|
+
# Merge in priority order: defining > importers > type-references
|
|
505
|
+
seen: set[str] = set()
|
|
506
|
+
merged: list[FileContract] = []
|
|
507
|
+
for c in defining + importers + references:
|
|
508
|
+
if c.path not in seen:
|
|
509
|
+
seen.add(c.path)
|
|
510
|
+
merged.append(c)
|
|
511
|
+
|
|
512
|
+
return sorted(merged, key=lambda c: (
|
|
513
|
+
c.path not in defining_paths,
|
|
514
|
+
c.path not in importer_paths,
|
|
515
|
+
-c.relevance_score,
|
|
516
|
+
))
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
# ---------------------------------------------------------------------------
|
|
520
|
+
# Deep symbol scan — grep-based fallback for shallow-scanned repos
|
|
521
|
+
# ---------------------------------------------------------------------------
|
|
522
|
+
|
|
523
|
+
_DEEP_SCAN_NOISE_DIRS: frozenset[str] = frozenset({
|
|
524
|
+
"node_modules", ".git", "dist", "build", "__pycache__",
|
|
525
|
+
".venv", "venv", "target", ".next", ".nuxt", ".turbo", "coverage",
|
|
526
|
+
".nyc_output", ".mypy_cache", ".pytest_cache",
|
|
527
|
+
})
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def _find_symbol_files(
|
|
531
|
+
root: Path,
|
|
532
|
+
symbol: str,
|
|
533
|
+
known_paths: set[str],
|
|
534
|
+
scorer: RelevanceScorer,
|
|
535
|
+
) -> list[str]:
|
|
536
|
+
"""Find source files outside *known_paths* that contain *symbol* as text.
|
|
537
|
+
|
|
538
|
+
Uses subprocess grep when available (fast); falls back to os.walk + read.
|
|
539
|
+
Returns repo-relative paths, noise-filtered.
|
|
540
|
+
"""
|
|
541
|
+
found: list[str] = []
|
|
542
|
+
|
|
543
|
+
# Try grep (fast, available on Linux/Mac)
|
|
544
|
+
try:
|
|
545
|
+
result = subprocess.run(
|
|
546
|
+
[
|
|
547
|
+
"grep", "-rl",
|
|
548
|
+
"--include=*.ts", "--include=*.tsx",
|
|
549
|
+
"--include=*.js", "--include=*.jsx",
|
|
550
|
+
"--include=*.py",
|
|
551
|
+
symbol, ".",
|
|
552
|
+
],
|
|
553
|
+
cwd=str(root),
|
|
554
|
+
capture_output=True,
|
|
555
|
+
text=True,
|
|
556
|
+
timeout=20,
|
|
557
|
+
)
|
|
558
|
+
for line in result.stdout.splitlines():
|
|
559
|
+
line = line.strip()
|
|
560
|
+
if line.startswith("./"):
|
|
561
|
+
line = line[2:]
|
|
562
|
+
line = line.replace("\\", "/")
|
|
563
|
+
if line and line not in known_paths and not scorer.is_noise(line):
|
|
564
|
+
found.append(line)
|
|
565
|
+
return found
|
|
566
|
+
except Exception:
|
|
567
|
+
pass
|
|
568
|
+
|
|
569
|
+
# Python fallback — os.walk + text search
|
|
570
|
+
for dirpath, dirnames, filenames in os.walk(str(root)):
|
|
571
|
+
dirnames[:] = sorted(d for d in dirnames if d not in _DEEP_SCAN_NOISE_DIRS)
|
|
572
|
+
for fname in filenames:
|
|
573
|
+
if Path(fname).suffix.lower() not in _SRC_EXTENSIONS:
|
|
574
|
+
continue
|
|
575
|
+
full = os.path.join(dirpath, fname)
|
|
576
|
+
try:
|
|
577
|
+
rel = Path(full).relative_to(root)
|
|
578
|
+
rel_str = str(rel).replace("\\", "/")
|
|
579
|
+
except ValueError:
|
|
580
|
+
continue
|
|
581
|
+
if rel_str in known_paths or scorer.is_noise(rel_str):
|
|
582
|
+
continue
|
|
583
|
+
try:
|
|
584
|
+
content = Path(full).read_text(encoding="utf-8", errors="replace")
|
|
585
|
+
if symbol in content:
|
|
586
|
+
found.append(rel_str)
|
|
587
|
+
except OSError:
|
|
588
|
+
pass
|
|
589
|
+
|
|
590
|
+
return found
|
|
434
591
|
|
|
435
592
|
|
|
436
593
|
# ---------------------------------------------------------------------------
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=X_JznUsRmn3yELdS1V1zFgnDITiz-h3a451D5eLoTnY,103
|
|
2
2
|
sourcecode/architecture_analyzer.py,sha256=H6noGgVArUJ25z1qC0fFA0KvJJeHZYyhKvKSkOyWHUk,23096
|
|
3
3
|
sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
|
|
4
|
-
sourcecode/ast_extractor.py,sha256=
|
|
4
|
+
sourcecode/ast_extractor.py,sha256=0OHQwTUBBc9lmqPLryVeB1z8dGIC6NhLlar800CD9oI,41129
|
|
5
5
|
sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
|
|
6
6
|
sourcecode/cli.py,sha256=O1ObfcxvhMYMXjd6otx6G0fE9ethIAX4qDUpUUjOxgY,63167
|
|
7
7
|
sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvUQKw4,9226
|
|
8
8
|
sourcecode/confidence_analyzer.py,sha256=HxJMPLI5ulqtkncnv98W4iVO6yMbpQo87VuxiuNbDmY,12167
|
|
9
9
|
sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
|
|
10
10
|
sourcecode/contract_model.py,sha256=vr-9WHf0EBlbnuZGtTpUvSnbbjCBsm0q0tpLyxBJ-xI,3287
|
|
11
|
-
sourcecode/contract_pipeline.py,sha256=
|
|
11
|
+
sourcecode/contract_pipeline.py,sha256=Pu9SjgkngLgWoFaNj2ftKsk4lPngophW4840h0FvuEw,23187
|
|
12
12
|
sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
|
|
13
13
|
sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
|
|
14
14
|
sourcecode/doc_analyzer.py,sha256=Ec3orx6vBKsh5cNM3-F4y2Got2KuKx8w3dErwtdtM-A,19891
|
|
@@ -56,8 +56,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
56
56
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
57
57
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
58
58
|
sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
|
|
59
|
-
sourcecode-0.
|
|
60
|
-
sourcecode-0.
|
|
61
|
-
sourcecode-0.
|
|
62
|
-
sourcecode-0.
|
|
63
|
-
sourcecode-0.
|
|
59
|
+
sourcecode-0.36.0.dist-info/METADATA,sha256=vQc2-46U5UyDGNaaVWbr5Jb7_eBOgJAYIPpYAD_aiwA,25209
|
|
60
|
+
sourcecode-0.36.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
61
|
+
sourcecode-0.36.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
62
|
+
sourcecode-0.36.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
63
|
+
sourcecode-0.36.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|