starforge-kernel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starforge/__init__.py +86 -0
- starforge/core/__init__.py +0 -0
- starforge/core/checkpoints.py +178 -0
- starforge/core/figures.py +119 -0
- starforge/core/previews.py +109 -0
- starforge/core/provenance.py +192 -0
- starforge/core/runner.py +293 -0
- starforge/core/serializers.py +141 -0
- starforge/core/spec.py +126 -0
- starforge/index/__init__.py +9 -0
- starforge/index/scanner.py +487 -0
- starforge/kernel/__init__.py +0 -0
- starforge/kernel/__main__.py +3 -0
- starforge/kernel/server.py +351 -0
- starforge/kernel/worker.py +66 -0
- starforge/mcp.py +283 -0
- starforge_kernel-0.1.0.dist-info/METADATA +76 -0
- starforge_kernel-0.1.0.dist-info/RECORD +20 -0
- starforge_kernel-0.1.0.dist-info/WHEEL +5 -0
- starforge_kernel-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
"""Static workspace indexer.
|
|
2
|
+
|
|
3
|
+
Discovers ``@block``-decorated functions and the module import graph by
|
|
4
|
+
parsing source with :mod:`ast`. User code is NEVER imported here — imports
|
|
5
|
+
execute side effects and load heavy libraries; the indexer must stay safe to
|
|
6
|
+
run on every keystroke-adjacent event. Execution-time imports happen only in
|
|
7
|
+
the run worker.
|
|
8
|
+
|
|
9
|
+
Incrementality: callers pass the previous scan's cache back in; files whose
|
|
10
|
+
(mtime_ns, size) match are not even re-read, files whose content hash matches
|
|
11
|
+
are not re-parsed.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import ast
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
import hashlib
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any, Iterator
|
|
21
|
+
|
|
22
|
+
#: Bump when the per-file cache entry shape changes; old entries re-parse.
|
|
23
|
+
CACHE_VERSION = 3
|
|
24
|
+
|
|
25
|
+
#: Directories never worth scanning. Dot-directories are skipped wholesale
|
|
26
|
+
#: (.git, .forge, .venv, ...), these cover the common non-dotted offenders.
|
|
27
|
+
SKIP_DIRS = {
|
|
28
|
+
"__pycache__",
|
|
29
|
+
"node_modules",
|
|
30
|
+
"site-packages",
|
|
31
|
+
"dist",
|
|
32
|
+
"build",
|
|
33
|
+
".eggs",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _sha(data: bytes | str) -> str:
|
|
38
|
+
if isinstance(data, str):
|
|
39
|
+
data = data.encode("utf-8")
|
|
40
|
+
return hashlib.sha256(data).hexdigest()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ParamInfo:
|
|
45
|
+
name: str
|
|
46
|
+
annotation: str | None = None
|
|
47
|
+
default_repr: str | None = None
|
|
48
|
+
has_default: bool = False
|
|
49
|
+
keyword_only: bool = False
|
|
50
|
+
#: ``T | None`` / ``Optional[T]`` annotations mark a parameter optional:
|
|
51
|
+
#: when unconnected and given no literal, the worker injects None
|
|
52
|
+
#: (DESIGN.md §5). Independent of has_default.
|
|
53
|
+
optional: bool = False
|
|
54
|
+
|
|
55
|
+
def to_dict(self) -> dict[str, Any]:
|
|
56
|
+
return {
|
|
57
|
+
"name": self.name,
|
|
58
|
+
"annotation": self.annotation,
|
|
59
|
+
"default_repr": self.default_repr,
|
|
60
|
+
"has_default": self.has_default,
|
|
61
|
+
"keyword_only": self.keyword_only,
|
|
62
|
+
"optional": self.optional,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def from_dict(cls, d: dict[str, Any]) -> "ParamInfo":
|
|
67
|
+
return cls(**d)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class BlockInfo:
|
|
72
|
+
block_id: str # "dotted.module:qualname"
|
|
73
|
+
module: str
|
|
74
|
+
qualname: str
|
|
75
|
+
file: str # workspace-relative posix path
|
|
76
|
+
lineno: int
|
|
77
|
+
label: str
|
|
78
|
+
category: str
|
|
79
|
+
params: list[ParamInfo]
|
|
80
|
+
outputs: list[str]
|
|
81
|
+
returns: str | None
|
|
82
|
+
doc: str | None
|
|
83
|
+
source_hash: str
|
|
84
|
+
#: Per-output annotation strings, aligned with ``outputs`` (None when
|
|
85
|
+
#: unknown). Single output → [returns]; tuple returns → element types.
|
|
86
|
+
#: Feeds the canvas's edge-compatibility warnings.
|
|
87
|
+
output_annotations: list[str | None] = field(default_factory=list)
|
|
88
|
+
|
|
89
|
+
def to_dict(self) -> dict[str, Any]:
|
|
90
|
+
return {
|
|
91
|
+
"block_id": self.block_id,
|
|
92
|
+
"module": self.module,
|
|
93
|
+
"qualname": self.qualname,
|
|
94
|
+
"file": self.file,
|
|
95
|
+
"lineno": self.lineno,
|
|
96
|
+
"label": self.label,
|
|
97
|
+
"category": self.category,
|
|
98
|
+
"params": [p.to_dict() for p in self.params],
|
|
99
|
+
"outputs": self.outputs,
|
|
100
|
+
"returns": self.returns,
|
|
101
|
+
"doc": self.doc,
|
|
102
|
+
"source_hash": self.source_hash,
|
|
103
|
+
"output_annotations": self.output_annotations,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
@classmethod
|
|
107
|
+
def from_dict(cls, d: dict[str, Any]) -> "BlockInfo":
|
|
108
|
+
d = dict(d)
|
|
109
|
+
d["params"] = [ParamInfo.from_dict(p) for p in d["params"]]
|
|
110
|
+
return cls(**d)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class ModuleInfo:
|
|
115
|
+
module: str
|
|
116
|
+
file: str
|
|
117
|
+
file_hash: str
|
|
118
|
+
#: AST-normalized content hash: whitespace/comment edits don't change it,
|
|
119
|
+
#: so they don't invalidate importers via the Tier-2 closure. Falls back
|
|
120
|
+
#: to file_hash for files that don't parse.
|
|
121
|
+
ast_hash: str = ""
|
|
122
|
+
#: Raw dotted import targets as written; resolved against the workspace
|
|
123
|
+
#: module set at closure-hash time so cache entries stay valid as other
|
|
124
|
+
#: files appear and disappear.
|
|
125
|
+
imports: list[str] = field(default_factory=list)
|
|
126
|
+
blocks: list[BlockInfo] = field(default_factory=list)
|
|
127
|
+
errors: list[str] = field(default_factory=list)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass
|
|
131
|
+
class WorkspaceIndex:
|
|
132
|
+
root: str
|
|
133
|
+
modules: dict[str, ModuleInfo] = field(default_factory=dict)
|
|
134
|
+
|
|
135
|
+
def __post_init__(self) -> None:
|
|
136
|
+
self._closure_memo: dict[str, str] = {}
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def blocks(self) -> dict[str, BlockInfo]:
|
|
140
|
+
return {b.block_id: b for m in self.modules.values() for b in m.blocks}
|
|
141
|
+
|
|
142
|
+
def errors(self) -> dict[str, list[str]]:
|
|
143
|
+
return {m.module: m.errors for m in self.modules.values() if m.errors}
|
|
144
|
+
|
|
145
|
+
def _resolve_imports(self, module: str) -> set[str]:
|
|
146
|
+
"""Map a module's raw import strings to workspace-internal modules.
|
|
147
|
+
|
|
148
|
+
Importing a package conservatively pulls in every module under it:
|
|
149
|
+
over-invalidation is the safe direction for staleness (DESIGN.md §7).
|
|
150
|
+
"""
|
|
151
|
+
info = self.modules.get(module)
|
|
152
|
+
if info is None:
|
|
153
|
+
return set()
|
|
154
|
+
resolved: set[str] = set()
|
|
155
|
+
for target in info.imports:
|
|
156
|
+
if target in self.modules:
|
|
157
|
+
resolved.add(target)
|
|
158
|
+
prefix = target + "."
|
|
159
|
+
resolved.update(m for m in self.modules if m.startswith(prefix))
|
|
160
|
+
resolved.discard(module)
|
|
161
|
+
return resolved
|
|
162
|
+
|
|
163
|
+
def closure_hash(self, module: str) -> str:
|
|
164
|
+
"""Tier-2 staleness input: hash of this module plus everything it
|
|
165
|
+
(transitively) imports inside the workspace, order-independent."""
|
|
166
|
+
if module in self._closure_memo:
|
|
167
|
+
return self._closure_memo[module]
|
|
168
|
+
seen: set[str] = set()
|
|
169
|
+
frontier = [module]
|
|
170
|
+
while frontier:
|
|
171
|
+
current = frontier.pop()
|
|
172
|
+
if current in seen or current not in self.modules:
|
|
173
|
+
continue
|
|
174
|
+
seen.add(current)
|
|
175
|
+
frontier.extend(self._resolve_imports(current))
|
|
176
|
+
digest = _sha("\n".join(sorted(self.modules[m].ast_hash or self.modules[m].file_hash for m in seen)))
|
|
177
|
+
self._closure_memo[module] = digest
|
|
178
|
+
return digest
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _module_name(relpath: Path) -> str:
|
|
182
|
+
parts = list(relpath.with_suffix("").parts)
|
|
183
|
+
if parts and parts[-1] == "__init__":
|
|
184
|
+
parts = parts[:-1]
|
|
185
|
+
return ".".join(parts) if parts else "__root__"
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _iter_py_files(root: Path) -> Iterator[Path]:
|
|
189
|
+
stack = [root]
|
|
190
|
+
while stack:
|
|
191
|
+
directory = stack.pop()
|
|
192
|
+
try:
|
|
193
|
+
entries = sorted(directory.iterdir())
|
|
194
|
+
except OSError:
|
|
195
|
+
continue
|
|
196
|
+
for entry in entries:
|
|
197
|
+
name = entry.name
|
|
198
|
+
if entry.is_dir():
|
|
199
|
+
if name.startswith(".") or name in SKIP_DIRS or name.endswith(".egg-info"):
|
|
200
|
+
continue
|
|
201
|
+
# Treat any directory that contains a venv marker as foreign.
|
|
202
|
+
if (entry / "pyvenv.cfg").exists():
|
|
203
|
+
continue
|
|
204
|
+
stack.append(entry)
|
|
205
|
+
elif entry.is_file() and name.endswith(".py"):
|
|
206
|
+
yield entry
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class _StarforgeAliases:
|
|
210
|
+
"""Names under which the @block decorator is reachable in one module."""
|
|
211
|
+
|
|
212
|
+
def __init__(self) -> None:
|
|
213
|
+
self.direct: set[str] = set() # from starforge import block [as b]
|
|
214
|
+
self.modules: set[str] = set() # import starforge [as sf]
|
|
215
|
+
|
|
216
|
+
def collect(self, node: ast.AST) -> None:
|
|
217
|
+
if isinstance(node, ast.Import):
|
|
218
|
+
for alias in node.names:
|
|
219
|
+
if alias.name == "starforge":
|
|
220
|
+
self.modules.add(alias.asname or alias.name)
|
|
221
|
+
elif isinstance(node, ast.ImportFrom):
|
|
222
|
+
if node.level == 0 and node.module == "starforge":
|
|
223
|
+
for alias in node.names:
|
|
224
|
+
if alias.name == "block":
|
|
225
|
+
self.direct.add(alias.asname or alias.name)
|
|
226
|
+
|
|
227
|
+
def match(self, decorator: ast.expr) -> tuple[bool, dict[str, Any]]:
|
|
228
|
+
"""Return (is_block_decorator, literal_kwargs)."""
|
|
229
|
+
target = decorator
|
|
230
|
+
kwargs: dict[str, Any] = {}
|
|
231
|
+
if isinstance(decorator, ast.Call):
|
|
232
|
+
target = decorator.func
|
|
233
|
+
for kw in decorator.keywords:
|
|
234
|
+
if kw.arg is None:
|
|
235
|
+
continue
|
|
236
|
+
kwargs[kw.arg] = _literal(kw.value)
|
|
237
|
+
if isinstance(target, ast.Name) and target.id in self.direct:
|
|
238
|
+
return True, kwargs
|
|
239
|
+
if (
|
|
240
|
+
isinstance(target, ast.Attribute)
|
|
241
|
+
and target.attr == "block"
|
|
242
|
+
and isinstance(target.value, ast.Name)
|
|
243
|
+
and target.value.id in self.modules
|
|
244
|
+
):
|
|
245
|
+
return True, kwargs
|
|
246
|
+
return False, {}
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _literal(node: ast.expr) -> Any:
|
|
250
|
+
try:
|
|
251
|
+
return ast.literal_eval(node)
|
|
252
|
+
except (ValueError, SyntaxError):
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _collect_imports(tree: ast.Module, module: str) -> list[str]:
|
|
257
|
+
"""Raw dotted import targets, with relative imports resolved against the
|
|
258
|
+
importing module's package."""
|
|
259
|
+
package_parts = module.split(".")[:-1] if module != "__root__" else []
|
|
260
|
+
targets: set[str] = set()
|
|
261
|
+
for node in ast.walk(tree):
|
|
262
|
+
if isinstance(node, ast.Import):
|
|
263
|
+
for alias in node.names:
|
|
264
|
+
targets.add(alias.name)
|
|
265
|
+
elif isinstance(node, ast.ImportFrom):
|
|
266
|
+
if node.level == 0:
|
|
267
|
+
if node.module:
|
|
268
|
+
targets.add(node.module)
|
|
269
|
+
# `from pkg import name` may bind a submodule, not an attr.
|
|
270
|
+
for alias in node.names:
|
|
271
|
+
targets.add(f"{node.module}.{alias.name}")
|
|
272
|
+
else:
|
|
273
|
+
base = package_parts[: len(package_parts) - (node.level - 1)]
|
|
274
|
+
if node.module:
|
|
275
|
+
base = base + node.module.split(".")
|
|
276
|
+
if base:
|
|
277
|
+
targets.add(".".join(base))
|
|
278
|
+
for alias in node.names:
|
|
279
|
+
if base:
|
|
280
|
+
targets.add(".".join(base + [alias.name]))
|
|
281
|
+
return sorted(targets)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _annotation_is_optional(node: ast.expr | None) -> bool:
|
|
285
|
+
"""True for ``T | None``, ``None | T``, ``Optional[T]``, bare ``None``,
|
|
286
|
+
and string annotations mentioning None."""
|
|
287
|
+
if node is None:
|
|
288
|
+
return False
|
|
289
|
+
if isinstance(node, ast.Constant):
|
|
290
|
+
if node.value is None:
|
|
291
|
+
return True
|
|
292
|
+
return isinstance(node.value, str) and "None" in node.value
|
|
293
|
+
if isinstance(node, ast.BinOp) and isinstance(node.op, ast.BitOr):
|
|
294
|
+
return _annotation_is_optional(node.left) or _annotation_is_optional(node.right)
|
|
295
|
+
if isinstance(node, ast.Subscript):
|
|
296
|
+
base = node.value
|
|
297
|
+
name = base.attr if isinstance(base, ast.Attribute) else getattr(base, "id", None)
|
|
298
|
+
return name == "Optional"
|
|
299
|
+
return False
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _extract_params(fn: ast.FunctionDef) -> list[ParamInfo]:
|
|
303
|
+
params: list[ParamInfo] = []
|
|
304
|
+
args = fn.args
|
|
305
|
+
positional = list(args.posonlyargs) + list(args.args)
|
|
306
|
+
defaults: list[ast.expr | None] = [None] * (len(positional) - len(args.defaults))
|
|
307
|
+
defaults += list(args.defaults)
|
|
308
|
+
for arg, default in zip(positional, defaults):
|
|
309
|
+
params.append(
|
|
310
|
+
ParamInfo(
|
|
311
|
+
name=arg.arg,
|
|
312
|
+
annotation=ast.unparse(arg.annotation) if arg.annotation else None,
|
|
313
|
+
default_repr=ast.unparse(default) if default is not None else None,
|
|
314
|
+
has_default=default is not None,
|
|
315
|
+
optional=_annotation_is_optional(arg.annotation),
|
|
316
|
+
)
|
|
317
|
+
)
|
|
318
|
+
for arg, default in zip(args.kwonlyargs, args.kw_defaults):
|
|
319
|
+
params.append(
|
|
320
|
+
ParamInfo(
|
|
321
|
+
name=arg.arg,
|
|
322
|
+
annotation=ast.unparse(arg.annotation) if arg.annotation else None,
|
|
323
|
+
default_repr=ast.unparse(default) if default is not None else None,
|
|
324
|
+
has_default=default is not None,
|
|
325
|
+
keyword_only=True,
|
|
326
|
+
optional=_annotation_is_optional(arg.annotation),
|
|
327
|
+
)
|
|
328
|
+
)
|
|
329
|
+
# *args/**kwargs are intentionally not modeled in M0 (DESIGN.md §5).
|
|
330
|
+
return params
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def _tuple_elements(returns: ast.expr | None) -> list[ast.expr] | None:
|
|
334
|
+
"""The element annotations of a fixed-arity ``tuple[...]`` return."""
|
|
335
|
+
if (
|
|
336
|
+
isinstance(returns, ast.Subscript)
|
|
337
|
+
and isinstance(returns.value, ast.Name)
|
|
338
|
+
and returns.value.id in {"tuple", "Tuple"}
|
|
339
|
+
and isinstance(returns.slice, ast.Tuple)
|
|
340
|
+
):
|
|
341
|
+
elts = returns.slice.elts
|
|
342
|
+
# tuple[int, ...] is variadic — treat as a single opaque output.
|
|
343
|
+
if not any(isinstance(e, ast.Constant) and e.value is Ellipsis for e in elts):
|
|
344
|
+
return list(elts)
|
|
345
|
+
return None
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def _infer_outputs(fn: ast.FunctionDef, decorator_outputs: Any) -> tuple[list[str], list[str | None]]:
|
|
349
|
+
"""Returns (output_names, per_output_annotations)."""
|
|
350
|
+
elements = _tuple_elements(fn.returns)
|
|
351
|
+
element_annotations = [ast.unparse(e) for e in elements] if elements else None
|
|
352
|
+
|
|
353
|
+
if decorator_outputs:
|
|
354
|
+
names = [str(n) for n in decorator_outputs]
|
|
355
|
+
if names:
|
|
356
|
+
if element_annotations and len(element_annotations) == len(names):
|
|
357
|
+
return names, list(element_annotations)
|
|
358
|
+
return names, [None] * len(names)
|
|
359
|
+
if element_annotations:
|
|
360
|
+
return [f"output_{i}" for i in range(len(element_annotations))], list(element_annotations)
|
|
361
|
+
return ["output"], [ast.unparse(fn.returns) if fn.returns else None]
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def _default_label(name: str) -> str:
|
|
365
|
+
return name.replace("_", " ").strip().title()
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def _parse_module(
|
|
369
|
+
text: str, module: str, relpath: str
|
|
370
|
+
) -> tuple[list[BlockInfo], list[str], list[str], str]:
|
|
371
|
+
"""Returns (blocks, raw_imports, errors, ast_hash) for one source file."""
|
|
372
|
+
errors: list[str] = []
|
|
373
|
+
try:
|
|
374
|
+
tree = ast.parse(text)
|
|
375
|
+
except SyntaxError as exc:
|
|
376
|
+
return [], [], [f"syntax error: line {exc.lineno}: {exc.msg}"], ""
|
|
377
|
+
|
|
378
|
+
aliases = _StarforgeAliases()
|
|
379
|
+
for node in ast.walk(tree):
|
|
380
|
+
aliases.collect(node)
|
|
381
|
+
|
|
382
|
+
blocks: list[BlockInfo] = []
|
|
383
|
+
top_level = {id(n) for n in tree.body}
|
|
384
|
+
for node in ast.walk(tree):
|
|
385
|
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
386
|
+
continue
|
|
387
|
+
matched_kwargs: dict[str, Any] | None = None
|
|
388
|
+
for decorator in node.decorator_list:
|
|
389
|
+
is_block, kwargs = aliases.match(decorator)
|
|
390
|
+
if is_block:
|
|
391
|
+
matched_kwargs = kwargs
|
|
392
|
+
break
|
|
393
|
+
if matched_kwargs is None:
|
|
394
|
+
continue
|
|
395
|
+
if isinstance(node, ast.AsyncFunctionDef):
|
|
396
|
+
errors.append(f"{node.name}: async @block functions are not supported yet")
|
|
397
|
+
continue
|
|
398
|
+
if id(node) not in top_level:
|
|
399
|
+
errors.append(
|
|
400
|
+
f"{node.name}: @block only registers module-level functions "
|
|
401
|
+
"(methods and nested functions are not supported yet)"
|
|
402
|
+
)
|
|
403
|
+
continue
|
|
404
|
+
output_names, output_annotations = _infer_outputs(node, matched_kwargs.get("outputs"))
|
|
405
|
+
blocks.append(
|
|
406
|
+
BlockInfo(
|
|
407
|
+
block_id=f"{module}:{node.name}",
|
|
408
|
+
module=module,
|
|
409
|
+
qualname=node.name,
|
|
410
|
+
file=relpath,
|
|
411
|
+
lineno=node.lineno,
|
|
412
|
+
label=matched_kwargs.get("label") or _default_label(node.name),
|
|
413
|
+
category=matched_kwargs.get("category") or module,
|
|
414
|
+
params=_extract_params(node),
|
|
415
|
+
outputs=output_names,
|
|
416
|
+
returns=ast.unparse(node.returns) if node.returns else None,
|
|
417
|
+
doc=ast.get_docstring(node),
|
|
418
|
+
source_hash=_sha(ast.dump(node)),
|
|
419
|
+
output_annotations=output_annotations,
|
|
420
|
+
)
|
|
421
|
+
)
|
|
422
|
+
return blocks, _collect_imports(tree, module), errors, _sha(ast.dump(tree))
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def scan_workspace(
|
|
426
|
+
root: str | Path,
|
|
427
|
+
cache: dict[str, Any] | None = None,
|
|
428
|
+
) -> tuple[WorkspaceIndex, dict[str, Any]]:
|
|
429
|
+
"""Scan a workspace; returns the index and a cache for the next scan.
|
|
430
|
+
|
|
431
|
+
``cache`` is the second return value of a previous call (typically
|
|
432
|
+
persisted to ``.forge/cache/index.json``). Unchanged files are reused
|
|
433
|
+
without re-reading (mtime+size fast path) or re-parsing (content hash).
|
|
434
|
+
"""
|
|
435
|
+
root = Path(root).resolve()
|
|
436
|
+
prev_files: dict[str, Any] = (cache or {}).get("files", {})
|
|
437
|
+
next_files: dict[str, Any] = {}
|
|
438
|
+
index = WorkspaceIndex(root=str(root))
|
|
439
|
+
|
|
440
|
+
for path in _iter_py_files(root):
|
|
441
|
+
relpath = path.relative_to(root).as_posix()
|
|
442
|
+
try:
|
|
443
|
+
stat = path.stat()
|
|
444
|
+
except OSError:
|
|
445
|
+
continue
|
|
446
|
+
entry = prev_files.get(relpath)
|
|
447
|
+
# Cache-format version gate: entries from older formats are re-parsed
|
|
448
|
+
# rather than trusted. Bump CACHE_VERSION when entry shape changes.
|
|
449
|
+
reusable = entry and entry.get("v") == CACHE_VERSION
|
|
450
|
+
if reusable and entry["mtime_ns"] == stat.st_mtime_ns and entry["size"] == stat.st_size:
|
|
451
|
+
next_files[relpath] = entry
|
|
452
|
+
else:
|
|
453
|
+
try:
|
|
454
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
455
|
+
except OSError:
|
|
456
|
+
continue
|
|
457
|
+
file_hash = _sha(text)
|
|
458
|
+
if reusable and entry["sha"] == file_hash:
|
|
459
|
+
entry = {**entry, "mtime_ns": stat.st_mtime_ns, "size": stat.st_size}
|
|
460
|
+
else:
|
|
461
|
+
module = _module_name(Path(relpath))
|
|
462
|
+
blocks, imports, errors, ast_hash = _parse_module(text, module, relpath)
|
|
463
|
+
entry = {
|
|
464
|
+
"v": CACHE_VERSION,
|
|
465
|
+
"mtime_ns": stat.st_mtime_ns,
|
|
466
|
+
"size": stat.st_size,
|
|
467
|
+
"sha": file_hash,
|
|
468
|
+
"ast_sha": ast_hash,
|
|
469
|
+
"module": module,
|
|
470
|
+
"imports": imports,
|
|
471
|
+
"blocks": [b.to_dict() for b in blocks],
|
|
472
|
+
"errors": errors,
|
|
473
|
+
}
|
|
474
|
+
next_files[relpath] = entry
|
|
475
|
+
|
|
476
|
+
entry = next_files[relpath]
|
|
477
|
+
index.modules[entry["module"]] = ModuleInfo(
|
|
478
|
+
module=entry["module"],
|
|
479
|
+
file=relpath,
|
|
480
|
+
file_hash=entry["sha"],
|
|
481
|
+
ast_hash=entry["ast_sha"],
|
|
482
|
+
imports=list(entry["imports"]),
|
|
483
|
+
blocks=[BlockInfo.from_dict(b) for b in entry["blocks"]],
|
|
484
|
+
errors=list(entry["errors"]),
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
return index, {"files": next_files}
|
|
File without changes
|