sourcecode 0.34.0__py3-none-any.whl → 0.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/ast_extractor.py +23 -3
- sourcecode/contract_pipeline.py +67 -19
- sourcecode/relevance_scorer.py +18 -5
- sourcecode/serializer.py +3 -1
- {sourcecode-0.34.0.dist-info → sourcecode-0.35.0.dist-info}/METADATA +1 -1
- {sourcecode-0.34.0.dist-info → sourcecode-0.35.0.dist-info}/RECORD +10 -10
- {sourcecode-0.34.0.dist-info → sourcecode-0.35.0.dist-info}/WHEEL +0 -0
- {sourcecode-0.34.0.dist-info → sourcecode-0.35.0.dist-info}/entry_points.txt +0 -0
- {sourcecode-0.34.0.dist-info → sourcecode-0.35.0.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/ast_extractor.py
CHANGED
|
@@ -373,7 +373,8 @@ def _ts_types(root: Any, src: bytes) -> list[TypeDefinition]:
|
|
|
373
373
|
continue
|
|
374
374
|
name = _text(name_n, src)
|
|
375
375
|
fields: list[TypeField] = []
|
|
376
|
-
|
|
376
|
+
# "interface_body" in tree-sitter-typescript >= 0.21; "object_type" in older builds
|
|
377
|
+
body_n = _find_child(node, "interface_body", "object_type")
|
|
377
378
|
if body_n:
|
|
378
379
|
for prop in _walk(body_n):
|
|
379
380
|
if prop.type in ("property_signature", "method_signature"):
|
|
@@ -385,7 +386,7 @@ def _ts_types(root: Any, src: bytes) -> list[TypeDefinition]:
|
|
|
385
386
|
required = not any(c.type == "?" for c in prop.children)
|
|
386
387
|
fields.append(TypeField(name=prop_name, type=type_text, required=required))
|
|
387
388
|
extends: list[str] = []
|
|
388
|
-
heritage_n = _find_child(node, "extends_type_clause", "class_heritage")
|
|
389
|
+
heritage_n = _find_child(node, "extends_type_clause", "extends_clause", "class_heritage")
|
|
389
390
|
if heritage_n:
|
|
390
391
|
for ext_n in _walk(heritage_n):
|
|
391
392
|
if ext_n.type == "type_identifier":
|
|
@@ -429,6 +430,25 @@ def _ts_hooks(root: Any, src: bytes) -> list[str]:
|
|
|
429
430
|
return sorted(used)
|
|
430
431
|
|
|
431
432
|
|
|
433
|
+
def _merge_imports(imports: list[ImportRecord]) -> list[ImportRecord]:
|
|
434
|
+
"""Merge multiple ImportRecords with the same source into one.
|
|
435
|
+
|
|
436
|
+
Tree-sitter correctly captures `import { A }` and `import type { B }` from
|
|
437
|
+
the same module as two separate statements. Merging them produces a compact,
|
|
438
|
+
predictable contract where each source appears exactly once.
|
|
439
|
+
"""
|
|
440
|
+
merged: dict[str, ImportRecord] = {}
|
|
441
|
+
for imp in imports:
|
|
442
|
+
if imp.source in merged:
|
|
443
|
+
existing = merged[imp.source]
|
|
444
|
+
combined_symbols = sorted(set(existing.symbols) | set(imp.symbols))
|
|
445
|
+
kind = existing.kind if existing.kind != "side_effect" else imp.kind
|
|
446
|
+
merged[imp.source] = ImportRecord(source=imp.source, symbols=combined_symbols, kind=kind)
|
|
447
|
+
else:
|
|
448
|
+
merged[imp.source] = imp
|
|
449
|
+
return list(merged.values())
|
|
450
|
+
|
|
451
|
+
|
|
432
452
|
def _extract_ts_js_tree_sitter(path: str, source: str, lang_obj: Any, language: str) -> FileContract:
|
|
433
453
|
try:
|
|
434
454
|
parser = _get_parser(lang_obj)
|
|
@@ -436,7 +456,7 @@ def _extract_ts_js_tree_sitter(path: str, source: str, lang_obj: Any, language:
|
|
|
436
456
|
tree = parser.parse(src_bytes)
|
|
437
457
|
root = tree.root_node
|
|
438
458
|
|
|
439
|
-
imports = _ts_imports(root, src_bytes)
|
|
459
|
+
imports = _merge_imports(_ts_imports(root, src_bytes))
|
|
440
460
|
exports = _ts_exports(root, src_bytes)
|
|
441
461
|
exported_names = {e.name for e in exports}
|
|
442
462
|
functions = _ts_functions(root, src_bytes, exported_names)
|
sourcecode/contract_pipeline.py
CHANGED
|
@@ -25,6 +25,23 @@ from sourcecode.schema import EntryPoint, MonorepoPackageInfo
|
|
|
25
25
|
_MAX_FILES = 500 # hard cap on files extracted per run
|
|
26
26
|
_SRC_EXTENSIONS: frozenset[str] = frozenset(_LANGUAGE_MAP.keys())
|
|
27
27
|
|
|
28
|
+
# Role-based score adjustments applied after contract extraction.
|
|
29
|
+
# Runtime roles get a boost; config/util are neutral or penalized.
|
|
30
|
+
_ROLE_SCORE: dict[str, float] = {
|
|
31
|
+
"entrypoint": 0.15,
|
|
32
|
+
"service": 0.10,
|
|
33
|
+
"route": 0.10,
|
|
34
|
+
"api": 0.08,
|
|
35
|
+
"middleware": 0.06,
|
|
36
|
+
"store": 0.05,
|
|
37
|
+
"model": 0.05,
|
|
38
|
+
"hook": 0.05,
|
|
39
|
+
"component": 0.03,
|
|
40
|
+
"util": 0.00,
|
|
41
|
+
"config": -0.10,
|
|
42
|
+
"unknown": 0.00,
|
|
43
|
+
}
|
|
44
|
+
|
|
28
45
|
RankStrategy = Literal["relevance", "centrality", "git-churn"]
|
|
29
46
|
|
|
30
47
|
|
|
@@ -206,9 +223,9 @@ class ContractPipeline:
|
|
|
206
223
|
if changed_only:
|
|
207
224
|
src_paths = [p for p in src_paths if p in changed_files]
|
|
208
225
|
|
|
209
|
-
# Apply max_files cap
|
|
210
|
-
|
|
211
|
-
|
|
226
|
+
# Apply max_files cap — bypass when symbol search to ensure defining files are found.
|
|
227
|
+
# A symbol query over a large repo needs all files; result set is small after filtering.
|
|
228
|
+
if symbol is None and len(src_paths) > self.max_files:
|
|
212
229
|
src_paths = sorted(
|
|
213
230
|
src_paths,
|
|
214
231
|
key=lambda p: (p in entry_paths, scorer.score(p)),
|
|
@@ -255,23 +272,9 @@ class ContractPipeline:
|
|
|
255
272
|
# 7. Rank
|
|
256
273
|
contracts = self._rank(contracts, rank_by)
|
|
257
274
|
|
|
258
|
-
# 8. Symbol filter — keep files that
|
|
275
|
+
# 8. Symbol filter — keep files that define or import the symbol
|
|
259
276
|
if symbol:
|
|
260
|
-
|
|
261
|
-
c for c in contracts
|
|
262
|
-
if any(e.name == symbol for e in c.exports)
|
|
263
|
-
or any(f.name == symbol for f in c.functions)
|
|
264
|
-
or symbol in {t.name for t in c.types}
|
|
265
|
-
]
|
|
266
|
-
# Also pull in direct importers (fan_in sourcing)
|
|
267
|
-
importer_paths = {
|
|
268
|
-
c.path for c in contracts
|
|
269
|
-
for imp in c.imports
|
|
270
|
-
if symbol in imp.symbols
|
|
271
|
-
}
|
|
272
|
-
importer_contracts = [c for c in contracts if c.path in importer_paths]
|
|
273
|
-
symbol_contracts = list({c.path: c for c in symbol_contracts + importer_contracts}.values())
|
|
274
|
-
contracts = sorted(symbol_contracts, key=lambda c: -c.relevance_score)
|
|
277
|
+
contracts = _filter_by_symbol(contracts, symbol)
|
|
275
278
|
|
|
276
279
|
# 9. Entrypoints-only filter
|
|
277
280
|
if entrypoints_only and not symbol:
|
|
@@ -323,6 +326,9 @@ class ContractPipeline:
|
|
|
323
326
|
churn_score = min(churn.get(c.path, 0) / 20.0, 0.1)
|
|
324
327
|
base += churn_score
|
|
325
328
|
|
|
329
|
+
# Role-based boost: runtime roles score higher than auxiliary
|
|
330
|
+
base += _ROLE_SCORE.get(c.role, 0.0)
|
|
331
|
+
|
|
326
332
|
return min(1.0, base)
|
|
327
333
|
|
|
328
334
|
def _rank(self, contracts: list[FileContract], rank_by: RankStrategy) -> list[FileContract]:
|
|
@@ -385,6 +391,48 @@ def _limit_symbols(contracts: list[FileContract], max_symbols: int) -> list[File
|
|
|
385
391
|
return result
|
|
386
392
|
|
|
387
393
|
|
|
394
|
+
# ---------------------------------------------------------------------------
|
|
395
|
+
# Symbol-aware filter
|
|
396
|
+
# ---------------------------------------------------------------------------
|
|
397
|
+
|
|
398
|
+
def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileContract]:
|
|
399
|
+
"""Return contracts that define or import *symbol*.
|
|
400
|
+
|
|
401
|
+
Matching strategy:
|
|
402
|
+
1. Exact match on export/function/type names.
|
|
403
|
+
2. Case-insensitive fallback when exact match yields nothing.
|
|
404
|
+
3. Importer contracts: files that name the symbol in their imports.
|
|
405
|
+
|
|
406
|
+
Defining contracts are ranked first; importers follow.
|
|
407
|
+
"""
|
|
408
|
+
def _defines(c: FileContract, sym: str, case: bool) -> bool:
|
|
409
|
+
cmp = (lambda a, b: a.lower() == b.lower()) if case else (lambda a, b: a == b)
|
|
410
|
+
return (
|
|
411
|
+
any(cmp(e.name, sym) for e in c.exports)
|
|
412
|
+
or any(cmp(f.name, sym) for f in c.functions)
|
|
413
|
+
or any(cmp(t.name, sym) for t in c.types)
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
def _imports(c: FileContract, sym: str, case: bool) -> bool:
|
|
417
|
+
if case:
|
|
418
|
+
sym_l = sym.lower()
|
|
419
|
+
return any(sym_l == s.lower() for imp in c.imports for s in imp.symbols)
|
|
420
|
+
return any(sym in imp.symbols for imp in c.imports)
|
|
421
|
+
|
|
422
|
+
# Exact match first
|
|
423
|
+
defining = [c for c in contracts if _defines(c, symbol, case=False)]
|
|
424
|
+
if not defining:
|
|
425
|
+
defining = [c for c in contracts if _defines(c, symbol, case=True)]
|
|
426
|
+
|
|
427
|
+
importer_paths = {c.path for c in contracts if _imports(c, symbol, case=len(defining) == 0)}
|
|
428
|
+
# Exclude files already in defining set
|
|
429
|
+
defining_paths = {c.path for c in defining}
|
|
430
|
+
importers = [c for c in contracts if c.path in importer_paths and c.path not in defining_paths]
|
|
431
|
+
|
|
432
|
+
merged = list({c.path: c for c in defining + importers}.values())
|
|
433
|
+
return sorted(merged, key=lambda c: (c.path not in defining_paths, -c.relevance_score))
|
|
434
|
+
|
|
435
|
+
|
|
388
436
|
# ---------------------------------------------------------------------------
|
|
389
437
|
# Dependency graph emission
|
|
390
438
|
# ---------------------------------------------------------------------------
|
sourcecode/relevance_scorer.py
CHANGED
|
@@ -104,6 +104,18 @@ _LOW_RUNTIME_STEMS: frozenset[str] = frozenset({
|
|
|
104
104
|
"gruntfile", "gulpfile", "webpack.config", "vite.config",
|
|
105
105
|
"rollup.config", "babel.config", "jest.config", "vitest.config",
|
|
106
106
|
"tsconfig", "jsconfig", ".eslintrc", ".prettierrc", ".editorconfig",
|
|
107
|
+
# doc-site tooling
|
|
108
|
+
"rspress", "rspress.config", "docusaurus.config", "docusaurus",
|
|
109
|
+
"vuepress.config", "vuepress", "nextra.config",
|
|
110
|
+
"astro.config", "gatsby.config", "gatsby-config",
|
|
111
|
+
# build/workspace orchestration
|
|
112
|
+
"turbo", "turbo.config", "nx", "nx.config", "lerna",
|
|
113
|
+
"esbuild.config", "swc.config", "postcss.config",
|
|
114
|
+
"tailwind.config", "tailwind",
|
|
115
|
+
# storybook
|
|
116
|
+
"main.storybook", "preview.storybook",
|
|
117
|
+
# playwright / cypress / e2e
|
|
118
|
+
"playwright.config", "cypress.config",
|
|
107
119
|
})
|
|
108
120
|
|
|
109
121
|
_HIGH_VALUE_SUFFIXES: frozenset[str] = frozenset({
|
|
@@ -169,15 +181,16 @@ class RelevanceScorer:
|
|
|
169
181
|
if (any(m in f"/{norm}/" for m in _TEST_DIR_MARKERS)
|
|
170
182
|
or any(fname.startswith(p.strip(".")) or p in fname
|
|
171
183
|
for p in _TEST_FILE_PATTERNS)):
|
|
172
|
-
base -= 0.
|
|
184
|
+
base -= 0.30
|
|
173
185
|
|
|
174
|
-
# Config/tooling filename penalty
|
|
186
|
+
# Config/tooling filename penalty — stronger than before
|
|
175
187
|
if stem.lower() in _LOW_RUNTIME_STEMS:
|
|
176
|
-
base -= 0.
|
|
188
|
+
base -= 0.30
|
|
177
189
|
|
|
178
|
-
# Auxiliary dir penalty
|
|
190
|
+
# Auxiliary dir penalty (docs, examples, demos, fixtures, scripts…)
|
|
191
|
+
# Aggressive: these almost never belong in top-ranked agent context
|
|
179
192
|
if self._is_auxiliary(norm):
|
|
180
|
-
base -= 0.
|
|
193
|
+
base -= 0.40
|
|
181
194
|
|
|
182
195
|
return max(0.0, min(1.0, base))
|
|
183
196
|
|
sourcecode/serializer.py
CHANGED
|
@@ -964,7 +964,9 @@ def _contract_view_minimal(
|
|
|
964
964
|
# Compact summary
|
|
965
965
|
if sm.contract_summary is not None:
|
|
966
966
|
cs = sm.contract_summary
|
|
967
|
-
degraded
|
|
967
|
+
# degraded only when tree-sitter is actually unavailable — not when individual
|
|
968
|
+
# files fall back due to parse errors or size limits.
|
|
969
|
+
degraded = any("tree_sitter_unavailable" in lim for lim in cs.limitations)
|
|
968
970
|
summary: dict[str, Any] = {
|
|
969
971
|
"files": cs.extracted_files,
|
|
970
972
|
"total": cs.total_files,
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=ubIjfhDW6QCF5e8x9JbuziDJAqU2SPxhNaBFiG1awNg,103
|
|
2
2
|
sourcecode/architecture_analyzer.py,sha256=H6noGgVArUJ25z1qC0fFA0KvJJeHZYyhKvKSkOyWHUk,23096
|
|
3
3
|
sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
|
|
4
|
-
sourcecode/ast_extractor.py,sha256=
|
|
4
|
+
sourcecode/ast_extractor.py,sha256=2OXxdoXrVqTf-sJZ7kggCO65zSoHd6yo2FEh-ZPddeo,40229
|
|
5
5
|
sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
|
|
6
6
|
sourcecode/cli.py,sha256=O1ObfcxvhMYMXjd6otx6G0fE9ethIAX4qDUpUUjOxgY,63167
|
|
7
7
|
sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvUQKw4,9226
|
|
8
8
|
sourcecode/confidence_analyzer.py,sha256=HxJMPLI5ulqtkncnv98W4iVO6yMbpQo87VuxiuNbDmY,12167
|
|
9
9
|
sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
|
|
10
10
|
sourcecode/contract_model.py,sha256=vr-9WHf0EBlbnuZGtTpUvSnbbjCBsm0q0tpLyxBJ-xI,3287
|
|
11
|
-
sourcecode/contract_pipeline.py,sha256=
|
|
11
|
+
sourcecode/contract_pipeline.py,sha256=3RmS6m13u7Rf1wVYxEXeorO0gvCL-6Lg0yBc0MB6vqg,17499
|
|
12
12
|
sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
|
|
13
13
|
sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
|
|
14
14
|
sourcecode/doc_analyzer.py,sha256=Ec3orx6vBKsh5cNM3-F4y2Got2KuKx8w3dErwtdtM-A,19891
|
|
@@ -20,12 +20,12 @@ sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,
|
|
|
20
20
|
sourcecode/metrics_analyzer.py,sha256=4uh11v-Q0gdrN87BOxuFWUym3N3AOkOuy21K5N8peB8,20126
|
|
21
21
|
sourcecode/prepare_context.py,sha256=vxEzr8czS3MFbdTx4hBJQlJLrl9cuvbHdL3ZokxFkvo,31384
|
|
22
22
|
sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
|
|
23
|
-
sourcecode/relevance_scorer.py,sha256=
|
|
23
|
+
sourcecode/relevance_scorer.py,sha256=ea7_7AHVgahVEWK3ebKOpG67agzG_pGICu5f2KgzrIA,8133
|
|
24
24
|
sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
|
|
25
25
|
sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
|
|
26
26
|
sourcecode/schema.py,sha256=AShu_bcP30TYaw4Dl1nYy8aFnBCKxrUli3LhU3MZTjs,20739
|
|
27
27
|
sourcecode/semantic_analyzer.py,sha256=asQfJf-EhzYaOTA-iMuZsrVXtbW7SV2WEKCxgsxa88Y,79413
|
|
28
|
-
sourcecode/serializer.py,sha256=
|
|
28
|
+
sourcecode/serializer.py,sha256=8ONhYuJ25AjMfZImwveYjVSHjIFDCuBRjCKNjs94bEA,50057
|
|
29
29
|
sourcecode/summarizer.py,sha256=ZuzIdm3t8A-d5MuQL0TSNLrd-L0IQIuguIxeNXMNJf8,16070
|
|
30
30
|
sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
|
|
31
31
|
sourcecode/workspace.py,sha256=fQlVoNx8S-fSHpKoJ0JBvEHCFkxszH0KZVJed1i3TRk,6845
|
|
@@ -56,8 +56,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
56
56
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
57
57
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
58
58
|
sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
|
|
59
|
-
sourcecode-0.
|
|
60
|
-
sourcecode-0.
|
|
61
|
-
sourcecode-0.
|
|
62
|
-
sourcecode-0.
|
|
63
|
-
sourcecode-0.
|
|
59
|
+
sourcecode-0.35.0.dist-info/METADATA,sha256=XHjc7YfLYn-mGsIsArsK--sc8_xpylQ0sniBCO8qTv4,25209
|
|
60
|
+
sourcecode-0.35.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
61
|
+
sourcecode-0.35.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
62
|
+
sourcecode-0.35.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
63
|
+
sourcecode-0.35.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|