starforge-kernel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,487 @@
1
+ """Static workspace indexer.
2
+
3
+ Discovers ``@block``-decorated functions and the module import graph by
4
+ parsing source with :mod:`ast`. User code is NEVER imported here — imports
5
+ execute side effects and load heavy libraries; the indexer must stay safe to
6
+ run on every keystroke-adjacent event. Execution-time imports happen only in
7
+ the run worker.
8
+
9
+ Incrementality: callers pass the previous scan's cache back in; files whose
10
+ (mtime_ns, size) match are not even re-read, files whose content hash matches
11
+ are not re-parsed.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import ast
17
+ from dataclasses import dataclass, field
18
+ import hashlib
19
+ from pathlib import Path
20
+ from typing import Any, Iterator
21
+
22
+ #: Bump when the per-file cache entry shape changes; old entries re-parse.
23
+ CACHE_VERSION = 3
24
+
25
+ #: Directories never worth scanning. Dot-directories are skipped wholesale
26
+ #: (.git, .forge, .venv, ...), these cover the common non-dotted offenders.
27
+ SKIP_DIRS = {
28
+ "__pycache__",
29
+ "node_modules",
30
+ "site-packages",
31
+ "dist",
32
+ "build",
33
+ ".eggs",
34
+ }
35
+
36
+
37
+ def _sha(data: bytes | str) -> str:
38
+ if isinstance(data, str):
39
+ data = data.encode("utf-8")
40
+ return hashlib.sha256(data).hexdigest()
41
+
42
+
43
+ @dataclass
44
+ class ParamInfo:
45
+ name: str
46
+ annotation: str | None = None
47
+ default_repr: str | None = None
48
+ has_default: bool = False
49
+ keyword_only: bool = False
50
+ #: ``T | None`` / ``Optional[T]`` annotations mark a parameter optional:
51
+ #: when unconnected and given no literal, the worker injects None
52
+ #: (DESIGN.md §5). Independent of has_default.
53
+ optional: bool = False
54
+
55
+ def to_dict(self) -> dict[str, Any]:
56
+ return {
57
+ "name": self.name,
58
+ "annotation": self.annotation,
59
+ "default_repr": self.default_repr,
60
+ "has_default": self.has_default,
61
+ "keyword_only": self.keyword_only,
62
+ "optional": self.optional,
63
+ }
64
+
65
+ @classmethod
66
+ def from_dict(cls, d: dict[str, Any]) -> "ParamInfo":
67
+ return cls(**d)
68
+
69
+
70
+ @dataclass
71
+ class BlockInfo:
72
+ block_id: str # "dotted.module:qualname"
73
+ module: str
74
+ qualname: str
75
+ file: str # workspace-relative posix path
76
+ lineno: int
77
+ label: str
78
+ category: str
79
+ params: list[ParamInfo]
80
+ outputs: list[str]
81
+ returns: str | None
82
+ doc: str | None
83
+ source_hash: str
84
+ #: Per-output annotation strings, aligned with ``outputs`` (None when
85
+ #: unknown). Single output → [returns]; tuple returns → element types.
86
+ #: Feeds the canvas's edge-compatibility warnings.
87
+ output_annotations: list[str | None] = field(default_factory=list)
88
+
89
+ def to_dict(self) -> dict[str, Any]:
90
+ return {
91
+ "block_id": self.block_id,
92
+ "module": self.module,
93
+ "qualname": self.qualname,
94
+ "file": self.file,
95
+ "lineno": self.lineno,
96
+ "label": self.label,
97
+ "category": self.category,
98
+ "params": [p.to_dict() for p in self.params],
99
+ "outputs": self.outputs,
100
+ "returns": self.returns,
101
+ "doc": self.doc,
102
+ "source_hash": self.source_hash,
103
+ "output_annotations": self.output_annotations,
104
+ }
105
+
106
+ @classmethod
107
+ def from_dict(cls, d: dict[str, Any]) -> "BlockInfo":
108
+ d = dict(d)
109
+ d["params"] = [ParamInfo.from_dict(p) for p in d["params"]]
110
+ return cls(**d)
111
+
112
+
113
+ @dataclass
114
+ class ModuleInfo:
115
+ module: str
116
+ file: str
117
+ file_hash: str
118
+ #: AST-normalized content hash: whitespace/comment edits don't change it,
119
+ #: so they don't invalidate importers via the Tier-2 closure. Falls back
120
+ #: to file_hash for files that don't parse.
121
+ ast_hash: str = ""
122
+ #: Raw dotted import targets as written; resolved against the workspace
123
+ #: module set at closure-hash time so cache entries stay valid as other
124
+ #: files appear and disappear.
125
+ imports: list[str] = field(default_factory=list)
126
+ blocks: list[BlockInfo] = field(default_factory=list)
127
+ errors: list[str] = field(default_factory=list)
128
+
129
+
130
+ @dataclass
131
+ class WorkspaceIndex:
132
+ root: str
133
+ modules: dict[str, ModuleInfo] = field(default_factory=dict)
134
+
135
+ def __post_init__(self) -> None:
136
+ self._closure_memo: dict[str, str] = {}
137
+
138
+ @property
139
+ def blocks(self) -> dict[str, BlockInfo]:
140
+ return {b.block_id: b for m in self.modules.values() for b in m.blocks}
141
+
142
+ def errors(self) -> dict[str, list[str]]:
143
+ return {m.module: m.errors for m in self.modules.values() if m.errors}
144
+
145
+ def _resolve_imports(self, module: str) -> set[str]:
146
+ """Map a module's raw import strings to workspace-internal modules.
147
+
148
+ Importing a package conservatively pulls in every module under it:
149
+ over-invalidation is the safe direction for staleness (DESIGN.md §7).
150
+ """
151
+ info = self.modules.get(module)
152
+ if info is None:
153
+ return set()
154
+ resolved: set[str] = set()
155
+ for target in info.imports:
156
+ if target in self.modules:
157
+ resolved.add(target)
158
+ prefix = target + "."
159
+ resolved.update(m for m in self.modules if m.startswith(prefix))
160
+ resolved.discard(module)
161
+ return resolved
162
+
163
+ def closure_hash(self, module: str) -> str:
164
+ """Tier-2 staleness input: hash of this module plus everything it
165
+ (transitively) imports inside the workspace, order-independent."""
166
+ if module in self._closure_memo:
167
+ return self._closure_memo[module]
168
+ seen: set[str] = set()
169
+ frontier = [module]
170
+ while frontier:
171
+ current = frontier.pop()
172
+ if current in seen or current not in self.modules:
173
+ continue
174
+ seen.add(current)
175
+ frontier.extend(self._resolve_imports(current))
176
+ digest = _sha("\n".join(sorted(self.modules[m].ast_hash or self.modules[m].file_hash for m in seen)))
177
+ self._closure_memo[module] = digest
178
+ return digest
179
+
180
+
181
+ def _module_name(relpath: Path) -> str:
182
+ parts = list(relpath.with_suffix("").parts)
183
+ if parts and parts[-1] == "__init__":
184
+ parts = parts[:-1]
185
+ return ".".join(parts) if parts else "__root__"
186
+
187
+
188
+ def _iter_py_files(root: Path) -> Iterator[Path]:
189
+ stack = [root]
190
+ while stack:
191
+ directory = stack.pop()
192
+ try:
193
+ entries = sorted(directory.iterdir())
194
+ except OSError:
195
+ continue
196
+ for entry in entries:
197
+ name = entry.name
198
+ if entry.is_dir():
199
+ if name.startswith(".") or name in SKIP_DIRS or name.endswith(".egg-info"):
200
+ continue
201
+ # Treat any directory that contains a venv marker as foreign.
202
+ if (entry / "pyvenv.cfg").exists():
203
+ continue
204
+ stack.append(entry)
205
+ elif entry.is_file() and name.endswith(".py"):
206
+ yield entry
207
+
208
+
209
+ class _StarforgeAliases:
210
+ """Names under which the @block decorator is reachable in one module."""
211
+
212
+ def __init__(self) -> None:
213
+ self.direct: set[str] = set() # from starforge import block [as b]
214
+ self.modules: set[str] = set() # import starforge [as sf]
215
+
216
+ def collect(self, node: ast.AST) -> None:
217
+ if isinstance(node, ast.Import):
218
+ for alias in node.names:
219
+ if alias.name == "starforge":
220
+ self.modules.add(alias.asname or alias.name)
221
+ elif isinstance(node, ast.ImportFrom):
222
+ if node.level == 0 and node.module == "starforge":
223
+ for alias in node.names:
224
+ if alias.name == "block":
225
+ self.direct.add(alias.asname or alias.name)
226
+
227
+ def match(self, decorator: ast.expr) -> tuple[bool, dict[str, Any]]:
228
+ """Return (is_block_decorator, literal_kwargs)."""
229
+ target = decorator
230
+ kwargs: dict[str, Any] = {}
231
+ if isinstance(decorator, ast.Call):
232
+ target = decorator.func
233
+ for kw in decorator.keywords:
234
+ if kw.arg is None:
235
+ continue
236
+ kwargs[kw.arg] = _literal(kw.value)
237
+ if isinstance(target, ast.Name) and target.id in self.direct:
238
+ return True, kwargs
239
+ if (
240
+ isinstance(target, ast.Attribute)
241
+ and target.attr == "block"
242
+ and isinstance(target.value, ast.Name)
243
+ and target.value.id in self.modules
244
+ ):
245
+ return True, kwargs
246
+ return False, {}
247
+
248
+
249
+ def _literal(node: ast.expr) -> Any:
250
+ try:
251
+ return ast.literal_eval(node)
252
+ except (ValueError, SyntaxError):
253
+ return None
254
+
255
+
256
+ def _collect_imports(tree: ast.Module, module: str) -> list[str]:
257
+ """Raw dotted import targets, with relative imports resolved against the
258
+ importing module's package."""
259
+ package_parts = module.split(".")[:-1] if module != "__root__" else []
260
+ targets: set[str] = set()
261
+ for node in ast.walk(tree):
262
+ if isinstance(node, ast.Import):
263
+ for alias in node.names:
264
+ targets.add(alias.name)
265
+ elif isinstance(node, ast.ImportFrom):
266
+ if node.level == 0:
267
+ if node.module:
268
+ targets.add(node.module)
269
+ # `from pkg import name` may bind a submodule, not an attr.
270
+ for alias in node.names:
271
+ targets.add(f"{node.module}.{alias.name}")
272
+ else:
273
+ base = package_parts[: len(package_parts) - (node.level - 1)]
274
+ if node.module:
275
+ base = base + node.module.split(".")
276
+ if base:
277
+ targets.add(".".join(base))
278
+ for alias in node.names:
279
+ if base:
280
+ targets.add(".".join(base + [alias.name]))
281
+ return sorted(targets)
282
+
283
+
284
+ def _annotation_is_optional(node: ast.expr | None) -> bool:
285
+ """True for ``T | None``, ``None | T``, ``Optional[T]``, bare ``None``,
286
+ and string annotations mentioning None."""
287
+ if node is None:
288
+ return False
289
+ if isinstance(node, ast.Constant):
290
+ if node.value is None:
291
+ return True
292
+ return isinstance(node.value, str) and "None" in node.value
293
+ if isinstance(node, ast.BinOp) and isinstance(node.op, ast.BitOr):
294
+ return _annotation_is_optional(node.left) or _annotation_is_optional(node.right)
295
+ if isinstance(node, ast.Subscript):
296
+ base = node.value
297
+ name = base.attr if isinstance(base, ast.Attribute) else getattr(base, "id", None)
298
+ return name == "Optional"
299
+ return False
300
+
301
+
302
+ def _extract_params(fn: ast.FunctionDef) -> list[ParamInfo]:
303
+ params: list[ParamInfo] = []
304
+ args = fn.args
305
+ positional = list(args.posonlyargs) + list(args.args)
306
+ defaults: list[ast.expr | None] = [None] * (len(positional) - len(args.defaults))
307
+ defaults += list(args.defaults)
308
+ for arg, default in zip(positional, defaults):
309
+ params.append(
310
+ ParamInfo(
311
+ name=arg.arg,
312
+ annotation=ast.unparse(arg.annotation) if arg.annotation else None,
313
+ default_repr=ast.unparse(default) if default is not None else None,
314
+ has_default=default is not None,
315
+ optional=_annotation_is_optional(arg.annotation),
316
+ )
317
+ )
318
+ for arg, default in zip(args.kwonlyargs, args.kw_defaults):
319
+ params.append(
320
+ ParamInfo(
321
+ name=arg.arg,
322
+ annotation=ast.unparse(arg.annotation) if arg.annotation else None,
323
+ default_repr=ast.unparse(default) if default is not None else None,
324
+ has_default=default is not None,
325
+ keyword_only=True,
326
+ optional=_annotation_is_optional(arg.annotation),
327
+ )
328
+ )
329
+ # *args/**kwargs are intentionally not modeled in M0 (DESIGN.md §5).
330
+ return params
331
+
332
+
333
+ def _tuple_elements(returns: ast.expr | None) -> list[ast.expr] | None:
334
+ """The element annotations of a fixed-arity ``tuple[...]`` return."""
335
+ if (
336
+ isinstance(returns, ast.Subscript)
337
+ and isinstance(returns.value, ast.Name)
338
+ and returns.value.id in {"tuple", "Tuple"}
339
+ and isinstance(returns.slice, ast.Tuple)
340
+ ):
341
+ elts = returns.slice.elts
342
+ # tuple[int, ...] is variadic — treat as a single opaque output.
343
+ if not any(isinstance(e, ast.Constant) and e.value is Ellipsis for e in elts):
344
+ return list(elts)
345
+ return None
346
+
347
+
348
+ def _infer_outputs(fn: ast.FunctionDef, decorator_outputs: Any) -> tuple[list[str], list[str | None]]:
349
+ """Returns (output_names, per_output_annotations)."""
350
+ elements = _tuple_elements(fn.returns)
351
+ element_annotations = [ast.unparse(e) for e in elements] if elements else None
352
+
353
+ if decorator_outputs:
354
+ names = [str(n) for n in decorator_outputs]
355
+ if names:
356
+ if element_annotations and len(element_annotations) == len(names):
357
+ return names, list(element_annotations)
358
+ return names, [None] * len(names)
359
+ if element_annotations:
360
+ return [f"output_{i}" for i in range(len(element_annotations))], list(element_annotations)
361
+ return ["output"], [ast.unparse(fn.returns) if fn.returns else None]
362
+
363
+
364
+ def _default_label(name: str) -> str:
365
+ return name.replace("_", " ").strip().title()
366
+
367
+
368
+ def _parse_module(
369
+ text: str, module: str, relpath: str
370
+ ) -> tuple[list[BlockInfo], list[str], list[str], str]:
371
+ """Returns (blocks, raw_imports, errors, ast_hash) for one source file."""
372
+ errors: list[str] = []
373
+ try:
374
+ tree = ast.parse(text)
375
+ except SyntaxError as exc:
376
+ return [], [], [f"syntax error: line {exc.lineno}: {exc.msg}"], ""
377
+
378
+ aliases = _StarforgeAliases()
379
+ for node in ast.walk(tree):
380
+ aliases.collect(node)
381
+
382
+ blocks: list[BlockInfo] = []
383
+ top_level = {id(n) for n in tree.body}
384
+ for node in ast.walk(tree):
385
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
386
+ continue
387
+ matched_kwargs: dict[str, Any] | None = None
388
+ for decorator in node.decorator_list:
389
+ is_block, kwargs = aliases.match(decorator)
390
+ if is_block:
391
+ matched_kwargs = kwargs
392
+ break
393
+ if matched_kwargs is None:
394
+ continue
395
+ if isinstance(node, ast.AsyncFunctionDef):
396
+ errors.append(f"{node.name}: async @block functions are not supported yet")
397
+ continue
398
+ if id(node) not in top_level:
399
+ errors.append(
400
+ f"{node.name}: @block only registers module-level functions "
401
+ "(methods and nested functions are not supported yet)"
402
+ )
403
+ continue
404
+ output_names, output_annotations = _infer_outputs(node, matched_kwargs.get("outputs"))
405
+ blocks.append(
406
+ BlockInfo(
407
+ block_id=f"{module}:{node.name}",
408
+ module=module,
409
+ qualname=node.name,
410
+ file=relpath,
411
+ lineno=node.lineno,
412
+ label=matched_kwargs.get("label") or _default_label(node.name),
413
+ category=matched_kwargs.get("category") or module,
414
+ params=_extract_params(node),
415
+ outputs=output_names,
416
+ returns=ast.unparse(node.returns) if node.returns else None,
417
+ doc=ast.get_docstring(node),
418
+ source_hash=_sha(ast.dump(node)),
419
+ output_annotations=output_annotations,
420
+ )
421
+ )
422
+ return blocks, _collect_imports(tree, module), errors, _sha(ast.dump(tree))
423
+
424
+
425
+ def scan_workspace(
426
+ root: str | Path,
427
+ cache: dict[str, Any] | None = None,
428
+ ) -> tuple[WorkspaceIndex, dict[str, Any]]:
429
+ """Scan a workspace; returns the index and a cache for the next scan.
430
+
431
+ ``cache`` is the second return value of a previous call (typically
432
+ persisted to ``.forge/cache/index.json``). Unchanged files are reused
433
+ without re-reading (mtime+size fast path) or re-parsing (content hash).
434
+ """
435
+ root = Path(root).resolve()
436
+ prev_files: dict[str, Any] = (cache or {}).get("files", {})
437
+ next_files: dict[str, Any] = {}
438
+ index = WorkspaceIndex(root=str(root))
439
+
440
+ for path in _iter_py_files(root):
441
+ relpath = path.relative_to(root).as_posix()
442
+ try:
443
+ stat = path.stat()
444
+ except OSError:
445
+ continue
446
+ entry = prev_files.get(relpath)
447
+ # Cache-format version gate: entries from older formats are re-parsed
448
+ # rather than trusted. Bump CACHE_VERSION when entry shape changes.
449
+ reusable = entry and entry.get("v") == CACHE_VERSION
450
+ if reusable and entry["mtime_ns"] == stat.st_mtime_ns and entry["size"] == stat.st_size:
451
+ next_files[relpath] = entry
452
+ else:
453
+ try:
454
+ text = path.read_text(encoding="utf-8", errors="replace")
455
+ except OSError:
456
+ continue
457
+ file_hash = _sha(text)
458
+ if reusable and entry["sha"] == file_hash:
459
+ entry = {**entry, "mtime_ns": stat.st_mtime_ns, "size": stat.st_size}
460
+ else:
461
+ module = _module_name(Path(relpath))
462
+ blocks, imports, errors, ast_hash = _parse_module(text, module, relpath)
463
+ entry = {
464
+ "v": CACHE_VERSION,
465
+ "mtime_ns": stat.st_mtime_ns,
466
+ "size": stat.st_size,
467
+ "sha": file_hash,
468
+ "ast_sha": ast_hash,
469
+ "module": module,
470
+ "imports": imports,
471
+ "blocks": [b.to_dict() for b in blocks],
472
+ "errors": errors,
473
+ }
474
+ next_files[relpath] = entry
475
+
476
+ entry = next_files[relpath]
477
+ index.modules[entry["module"]] = ModuleInfo(
478
+ module=entry["module"],
479
+ file=relpath,
480
+ file_hash=entry["sha"],
481
+ ast_hash=entry["ast_sha"],
482
+ imports=list(entry["imports"]),
483
+ blocks=[BlockInfo.from_dict(b) for b in entry["blocks"]],
484
+ errors=list(entry["errors"]),
485
+ )
486
+
487
+ return index, {"files": next_files}
File without changes
@@ -0,0 +1,3 @@
1
+ from starforge.kernel.server import main
2
+
3
+ main()