tablambda 0.6.0.post30.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,207 @@
1
+ """The defunctionalized runtime: the minimal execution substrate for compiled code.
2
+
3
+ Generated code references three free names from this module: ``Closure``, ``Thunk``, and ``interned``.
4
+ A compiled ``Closure`` and a ``Thunk`` are both ``Node``s, so they share the interpreter's
5
+ ``weak_head_normal_form`` and interning and can run mixed with interpreted terms. The runtime holds NO
6
+ domain logic; all compilation decisions live in the pure-lambda compiler ``_defun_codegen``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ import struct
13
+ import sys
14
+ import threading
15
+ from abc import ABC, abstractmethod
16
+ from collections.abc import Callable, Iterator
17
+ from contextlib import contextmanager
18
+ from dataclasses import dataclass, fields as dataclass_fields
19
+ from typing import Any, TypeGuard, TypeVar, overload
20
+
21
+ from typing_extensions import dataclass_transform
22
+
23
+ from tablambda._ast import Node, WeakHeadBottom
24
+
25
+ _T = TypeVar("_T")
26
+
27
+ # The compiled runtime shares the interpreter's single bottom (``WeakHeadBottom``); ``_BOTTOM`` is the
28
+ # terse internal alias used at the forcing call sites.
29
+ _BOTTOM = WeakHeadBottom.BOTTOM
30
+
31
+
32
+ class Closure(Node, ABC):
33
+ """A compiled closure: an opaque, closed, 1-ary callable ``Node`` (the defunctionalized value, and
34
+ the FFI). Every closure class the compiler emits subclasses ``Closure`` (injected by ``interned``),
35
+ so a compiled value is a ``Node`` and shares the interpreter's ``weak_head_normal_form`` and
36
+ interning. A closure is a weak-head value (its weak head normal form is itself), and it is closed,
37
+ so its ``loose_bound`` is ``0`` and ``shift``/``substitute`` leave it untouched.
38
+ """
39
+
40
+ __slots__ = ()
41
+
42
+ # Closures are closed: no exposed de Bruijn index, so shift/substitute are identity.
43
+ loose_bound = 0
44
+
45
+ @abstractmethod
46
+ def __call__(self, argument: Node) -> Node: ...
47
+
48
+
49
+ def _intern(cls: type[_T], field_names: tuple[str, ...]) -> type[_T]:
50
+ """Hash-cons ``cls``'s instances by ``(cls, field-values-by-identity)``.
51
+
52
+ Two instances of the same class with identical field values (by ``is``) become the same object.
53
+ Fields are themselves interned closures or ``Thunk`` instances, so identity comparison is O(1)
54
+ structural equality, matching ``_ast._intern_node``. The hash-cons table is exposed as
55
+ ``__intern_pool__`` for introspection (e.g. counting tabled objects in a benchmark); it is the SAME
56
+ table the interner already keeps, so surfacing it adds no behaviour.
57
+
58
+ The key is computed directly from the positional constructor arguments (which correspond 1:1 to
59
+ ``field_names`` for both ``@dataclass`` classes and ``Thunk``), so a cache hit avoids allocating
60
+ a throwaway instance entirely.
61
+ """
62
+ pool: dict[tuple, object] = {}
63
+ original_init = cls.__init__
64
+
65
+ def __new__(klass, *args):
66
+ key = (klass,) + tuple(id(a) for a in args)
67
+ existing = pool.get(key)
68
+ if existing is not None:
69
+ return existing
70
+ instance = object.__new__(klass)
71
+ original_init(instance, *args)
72
+ pool[key] = instance
73
+ return instance
74
+
75
+ cls_any: Any = cls
76
+ cls_any.__new__ = __new__
77
+ cls_any.__init__ = lambda self, *args, **kwargs: None
78
+ cls_any.__intern_pool__ = pool
79
+ return cls
80
+
81
+
82
+ @overload
83
+ def interned(cls: type[_T], *, slots: bool = ...) -> type[_T]: ...
84
+
85
+
86
+ @overload
87
+ def interned(
88
+ cls: None = ..., *, slots: bool = ...
89
+ ) -> Callable[[type[_T]], type[_T]]: ...
90
+
91
+
92
+ @dataclass_transform(eq_default=False)
93
+ def interned(cls=None, *, slots=True):
94
+ """Class decorator: make a generated ``Closure`` subclass a frozen-by-identity dataclass and
95
+ hash-cons its instances.
96
+
97
+ Applies ``dataclass(eq=False, slots=slots)`` internally (so generated code needs only
98
+ ``@interned``, not a separate ``@dataclass``), then interns. ``slots=True`` (the default) makes the
99
+ closures the compiler emits slotted, which is faster and lighter; ``eq=False`` keeps identity-based
100
+ equality. The compiler emits each closure as ``@interned class vg_...(Closure)``, so the class is
101
+ already a ``Node`` before this decorator runs. Usable bare (``@interned``) or parameterised
102
+ (``@interned(slots=False)``).
103
+ """
104
+ if cls is None:
105
+ return lambda klass: interned(klass, slots=slots)
106
+ assert issubclass(cls, Closure), f"@interned expects a Closure subclass, got {cls!r}"
107
+ cls = dataclass(eq=False, slots=slots)(cls)
108
+ field_names = tuple(f.name for f in dataclass_fields(cls))
109
+ return _intern(cls, field_names)
110
+
111
+
112
+ def _deterministic_hash(*parts: int) -> int:
113
+ """A deterministic hash from a sequence of integers, independent of ``PYTHONHASHSEED``."""
114
+ data = struct.pack(f">{len(parts)}q", *parts)
115
+ return int.from_bytes(hashlib.sha256(data).digest()[:8], "big")
116
+
117
+
118
+ class Thunk(Node):
119
+ """A suspended application (redex) as a ``Node``: an ``App`` whose callee is a compiled value.
120
+ Interned so structurally equal redexes share identity, enabling tabling: its
121
+ ``weak_head_normal_form`` (inherited from ``Node``) is computed once per distinct ``Thunk``.
122
+
123
+ It does NOT redeclare ``weak_head_normal_form`` (that would duplicate ``Node``'s fixpoint cache
124
+ slot); instead ``_shape.compute_weak_head_normal_form`` dispatches a ``Thunk`` to ``force``.
125
+ A thunk is closed, so its ``loose_bound`` is ``0``.
126
+ """
127
+
128
+ __slots__ = ("callee", "argument")
129
+
130
+ # A redex over closed compiled values is itself closed.
131
+ loose_bound = 0
132
+
133
+ def __init__(self, callee: Node, argument: Node) -> None:
134
+ self.callee = callee
135
+ self.argument = argument
136
+
137
+ def __call__(self, a: Node) -> "Thunk":
138
+ return Thunk(self, a)
139
+
140
+ def force(self) -> Node | WeakHeadBottom:
141
+ """The weak head normal form of this redex: force the callee to a value, apply it to the
142
+ argument, and force the result. Mixed-safe: the callee may force to an interpreter ``Lam`` or a
143
+ ``Closure``, and ``apply_value`` handles both."""
144
+ from tablambda._shape import apply_value, weak_head_normalize
145
+
146
+ callee = weak_head_normalize(self.callee)
147
+ if callee is _BOTTOM:
148
+ return _BOTTOM
149
+ result = apply_value(callee, self.argument)
150
+ if result is _BOTTOM:
151
+ return _BOTTOM
152
+ return weak_head_normalize(result)
153
+
154
+
155
+ Thunk = _intern(Thunk, ("callee", "argument"))
156
+
157
+
158
+ def _is_thunk(x: object) -> TypeGuard[Thunk]:
159
+ return isinstance(x, Thunk)
160
+
161
+
162
+ # --- stack helpers ---------------------------------------------------------------------------------
163
+
164
+ _COMPILE_RECURSION_LIMIT = 16_000
165
+ _RECURSION_LIMIT = 200_000
166
+ _STACK_SIZE = 1024 * 1024 * 1024 # 1 GiB
167
+
168
+
169
+ @contextmanager
170
+ def recursion_headroom() -> Iterator[None]:
171
+ previous = sys.getrecursionlimit()
172
+ sys.setrecursionlimit(max(previous, _COMPILE_RECURSION_LIMIT))
173
+ try:
174
+ yield
175
+ finally:
176
+ sys.setrecursionlimit(previous)
177
+
178
+
179
+ def _python_tag() -> str:
180
+ """A Python-version tag for generated-module filenames, e.g. ``py313``. Defunctionalized modules
181
+ are rendered with ``ast.unparse``, whose formatting can differ between Python versions, so a module
182
+ generated under one interpreter must not be reused under another; the tag keeps artifacts distinct.
183
+ """
184
+ return f"py{sys.version_info.major}{sys.version_info.minor}"
185
+
186
+
187
+ def run_in_large_stack(thunk):
188
+ """Run ``thunk`` in a thread with a 1 GiB C stack and a high recursion limit."""
189
+ result: list = []
190
+
191
+ def run() -> None:
192
+ previous_limit = sys.getrecursionlimit()
193
+ sys.setrecursionlimit(max(previous_limit, _RECURSION_LIMIT))
194
+ try:
195
+ result.append(thunk())
196
+ finally:
197
+ sys.setrecursionlimit(previous_limit)
198
+
199
+ previous_stack_size = threading.stack_size(_STACK_SIZE)
200
+ try:
201
+ worker = threading.Thread(target=run)
202
+ worker.start()
203
+ worker.join()
204
+ finally:
205
+ threading.stack_size(previous_stack_size)
206
+ (single_result,) = result
207
+ return single_result
@@ -0,0 +1,455 @@
1
+ """The defunctionalization boundary: quote, compile, decode, canonicalize, load.
2
+
3
+ Thin Python layer analogous to ``_specialize`` but for the defunctionalization target. The lambda
4
+ compiler ``DEFUN`` produces the Scott-encoded ``ast.Module``; this module quotes the input, runs
5
+ the compiler in the interpreter, decodes the Scott AST to a real ``ast.Module``, deduplicates
6
+ class definitions by node identity (``memoized_decode``), renames every class by the Merkle hash of
7
+ its COMPILED body (``_canonicalize_classes``), and unparses to source. ``load`` execs the source
8
+ with the runtime globals (``Thunk``, ``interned``, ``dataclass``) and returns the ``compiled``
9
+ value.
10
+
11
+ Content addressing happens on the compiled dataclass, not the source lambda term. Two source
12
+ closures of the same shape that capture variables at different de Bruijn depths compile to the same
13
+ dataclass (same arity, byte-identical ``__call__`` body over positional capture fields), so the
14
+ boundary collapses them to one class. This is coarser than the source's term equality and makes the
15
+ generated code smaller and more reusable.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import ast
21
+ import contextlib
22
+ import copy
23
+ import hashlib
24
+ from typing import TypeVar
25
+
26
+ from tablambda._ast import Node
27
+ from tablambda._codec import quote_binnat
28
+ from tablambda._defun_codegen import DEFUN
29
+ from tablambda._defun_runtime import (
30
+ Closure,
31
+ Thunk,
32
+ _BOTTOM,
33
+ _is_thunk,
34
+ interned,
35
+ run_in_large_stack,
36
+ )
37
+
38
+ _AstNode = TypeVar("_AstNode", bound=ast.AST)
39
+ from tablambda._dsl import app, build
40
+ from tablambda._pyast import SUPPORTED, _ARITY, _reset_gensym, decode, memoized_decode
41
+
42
+
43
+ class _RenameClasses(ast.NodeTransformer):
44
+ """Rewrite ``ast.Name`` references to class names according to ``mapping``."""
45
+
46
+ def __init__(self, mapping: "dict[str, str]") -> None:
47
+ self._mapping = mapping
48
+
49
+ def visit_Name(self, node: ast.Name) -> ast.Name:
50
+ renamed = self._mapping.get(node.id)
51
+ if renamed is not None:
52
+ return ast.copy_location(ast.Name(id=renamed, ctx=node.ctx), node)
53
+ return node
54
+
55
+
56
+ def _rename_copy(node: _AstNode, mapping: "dict[str, str]") -> _AstNode:
57
+ """A deep copy of ``node`` with class-name references rewritten per ``mapping``."""
58
+ renamed = _RenameClasses(mapping).visit(copy.deepcopy(node))
59
+ assert isinstance(renamed, type(node)), (
60
+ f"_RenameClasses must preserve node type {type(node).__name__}, got {type(renamed).__name__}"
61
+ )
62
+ return renamed
63
+
64
+
65
+ class _RenameFields(ast.NodeTransformer):
66
+ """Rewrite a class's capture-field names (AnnAssign targets and ``self.<field>`` accesses)."""
67
+
68
+ def __init__(self, mapping: "dict[str, str]") -> None:
69
+ self._mapping = mapping
70
+
71
+ def visit_Name(self, node: ast.Name) -> ast.Name:
72
+ renamed = self._mapping.get(node.id)
73
+ if renamed is not None:
74
+ return ast.copy_location(ast.Name(id=renamed, ctx=node.ctx), node)
75
+ return node
76
+
77
+ def visit_Attribute(self, node: ast.Attribute) -> ast.Attribute:
78
+ self.generic_visit(node)
79
+ renamed = self._mapping.get(node.attr)
80
+ if renamed is not None:
81
+ node.attr = renamed
82
+ return node
83
+
84
+
85
+ def _canonicalize_fields(classdef: ast.ClassDef) -> ast.ClassDef:
86
+ """Rename a class's capture fields to positional ``cap_<i>`` names (in definition order)."""
87
+ field_names = [
88
+ statement.target.id
89
+ for statement in classdef.body
90
+ if isinstance(statement, ast.AnnAssign) and isinstance(statement.target, ast.Name)
91
+ ]
92
+ mapping = {name: f"cap_{position}" for position, name in enumerate(field_names)}
93
+ renamed = _RenameFields(mapping).visit(copy.deepcopy(classdef))
94
+ assert isinstance(renamed, ast.ClassDef)
95
+ return renamed
96
+
97
+
98
+ def _canonicalize_classes(module: ast.Module) -> ast.Module:
99
+ """Rename every closure class by a content hash of its COMPILED dataclass and drop duplicates.
100
+
101
+ Capture fields are first renamed positionally (``cap_0``, ``cap_1``, ...). The content hash of a
102
+ class is then the Merkle hash of its (field-canonicalized) body with references to other classes
103
+ replaced by THEIR content hashes (computed bottom-up over the acyclic class-reference DAG) and the
104
+ class's own name replaced by a fixed placeholder. Two classes with identical compiled bodies hash
105
+ equal and collapse to one. Definitions are emitted sorted by name, so the output is stable under
106
+ local source edits and identical between the in-process and self-hosted compilers.
107
+ """
108
+ classdefs: "dict[str, ast.ClassDef]" = {}
109
+ others: "list[ast.stmt]" = []
110
+ for statement in module.body:
111
+ if isinstance(statement, ast.ClassDef):
112
+ field_canonical = _canonicalize_fields(statement)
113
+ kept = classdefs.get(field_canonical.name)
114
+ if kept is not None:
115
+ assert ast.dump(kept) == ast.dump(field_canonical), (
116
+ f"provisional class {field_canonical.name!r} has two non-identical definitions"
117
+ )
118
+ continue
119
+ classdefs[field_canonical.name] = field_canonical
120
+ else:
121
+ others.append(statement)
122
+ provisional = set(classdefs)
123
+
124
+ def referenced(classdef: ast.ClassDef) -> "set[str]":
125
+ return {n.id for n in ast.walk(classdef) if isinstance(n, ast.Name) and n.id in provisional}
126
+
127
+ canonical: "dict[str, str]" = {}
128
+ in_progress: "set[str]" = set()
129
+
130
+ def canonical_name(name: str) -> str:
131
+ cached = canonical.get(name)
132
+ if cached is not None:
133
+ return cached
134
+ assert name not in in_progress, f"class reference cycle through {name!r}"
135
+ in_progress.add(name)
136
+ classdef = classdefs[name]
137
+ mapping = {reference: canonical_name(reference) for reference in referenced(classdef)}
138
+ mapping[name] = "_SELF_"
139
+ key_node = _rename_copy(classdef, mapping)
140
+ assert isinstance(key_node, ast.ClassDef)
141
+ key_node.name = "_SELF_"
142
+ digest = hashlib.sha256(ast.dump(key_node).encode()).digest()[:8]
143
+ result = "vg_" + digest.hex()
144
+ in_progress.discard(name)
145
+ canonical[name] = result
146
+ return result
147
+
148
+ for name in classdefs:
149
+ canonical_name(name)
150
+
151
+ global_mapping = {name: canonical[name] for name in provisional}
152
+ deduped: "dict[str, ast.ClassDef]" = {}
153
+ for name, classdef in classdefs.items():
154
+ renamed = _rename_copy(classdef, global_mapping)
155
+ assert isinstance(renamed, ast.ClassDef)
156
+ renamed.name = canonical[name]
157
+ deduped[renamed.name] = renamed
158
+
159
+ sorted_defs: "list[ast.stmt]" = [deduped[key] for key in sorted(deduped)]
160
+ new_others = [_rename_copy(statement, global_mapping) for statement in others]
161
+ module.body = sorted_defs + new_others
162
+ return module
163
+
164
+
165
+ # --- Direct defun decoder: decode Scott-encoded AST from defunctionalized values -----------------
166
+ # Mirrors ``_pyast.decode`` but operates directly on compiled values (``Thunk`` + ``Closure``),
167
+ # reflecting Scott structure out of them with marker handlers in the self-hosted compilation path.
168
+
169
+
170
+ # The decoder reflects Scott structure out of compiled values by feeding them marker handlers and
171
+ # reading which handler fires. Compiled code embeds these markers as ``Thunk`` callees/arguments and
172
+ # forces them, so each marker must be a ``Node`` (a ``Closure``): a marker's weak head normal form is
173
+ # itself, and applying a callable marker runs its Python ``__call__``.
174
+
175
+
176
+ class _TagMarker(Closure):
177
+ """Callable marker for Scott constructor extraction. When the Scott value selects this handler,
178
+ it calls ``__call__`` once per field, accumulating the field values."""
179
+
180
+ __slots__ = ("tag", "fields")
181
+
182
+ def __init__(self, tag: int) -> None:
183
+ self.tag = tag
184
+ self.fields: list[object] = []
185
+
186
+ def __call__(self, argument: Node) -> Node:
187
+ self.fields.append(argument)
188
+ return self
189
+
190
+
191
+ class _ChurchApp(Closure):
192
+ """Marker node in a Church numeral spine: successor applied to predecessor. Never applied (it is a
193
+ spine value the decoder walks via ``argument``)."""
194
+
195
+ __slots__ = ("argument",)
196
+
197
+ def __init__(self, argument: object) -> None:
198
+ self.argument = argument
199
+
200
+ def __call__(self, argument: Node) -> Node:
201
+ raise TypeError("_ChurchApp is a spine value, not applicable")
202
+
203
+
204
+ class _ChurchSucc(Closure):
205
+ """Callable marker for Church numeral successor."""
206
+
207
+ __slots__ = ()
208
+
209
+ def __call__(self, argument: Node) -> Node:
210
+ return _ChurchApp(argument)
211
+
212
+
213
+ class _ChurchZero(Closure):
214
+ """The Church-numeral zero marker: the base value at the end of the spine, never applied."""
215
+
216
+ __slots__ = ()
217
+
218
+ def __call__(self, argument: Node) -> Node:
219
+ raise TypeError("_ChurchZero is a base value, not applicable")
220
+
221
+
222
+ _CHURCH_SUCC_DEFUN = _ChurchSucc()
223
+ _CHURCH_ZERO_DEFUN = _ChurchZero()
224
+
225
+ _church_int_cache: "dict[int, int]" = {}
226
+ _defun_gensym_ids: "dict[int, str]" = {}
227
+ _defun_gensym_counter: int = 0
228
+ _defun_decode_memo: "dict[int, ast.AST] | None" = None
229
+
230
+
231
+ def _reset_defun_gensym() -> None:
232
+ _church_int_cache.clear()
233
+ _defun_gensym_ids.clear()
234
+ global _defun_gensym_counter
235
+ _defun_gensym_counter = 0
236
+
237
+
238
+ @contextlib.contextmanager
239
+ def _memoized_decode_defun():
240
+ global _defun_decode_memo
241
+ assert _defun_decode_memo is None, "memoized decode_defun does not nest"
242
+ _defun_decode_memo = {}
243
+ try:
244
+ yield
245
+ finally:
246
+ _defun_decode_memo = None
247
+
248
+
249
+ def _force_defun(value: object) -> object:
250
+ if _is_thunk(value):
251
+ whnf = value.weak_head_normal_form
252
+ assert whnf is not _BOTTOM, "hit bottom while forcing defun value"
253
+ return whnf
254
+ return value
255
+
256
+
257
+ def _extract_defun(value: object, arities: "tuple[int, ...]") -> "tuple[int, list[object]]":
258
+ current = _force_defun(value)
259
+ for tag in range(len(arities)):
260
+ assert callable(current), f"expected callable during extraction, got {type(current).__name__}"
261
+ result = current(_TagMarker(tag))
262
+ current = _force_defun(result)
263
+ assert isinstance(current, _TagMarker), (
264
+ f"expected _TagMarker after extraction, got {type(current).__name__}"
265
+ )
266
+ return current.tag, current.fields
267
+
268
+
269
+ def _church_to_int_defun(value: object) -> int:
270
+ key = id(value)
271
+ cached = _church_int_cache.get(key)
272
+ if cached is not None:
273
+ return cached
274
+ current = _force_defun(value)
275
+ assert callable(current), f"church spine head must be callable, got {type(current).__name__}"
276
+ current = current(_CHURCH_SUCC_DEFUN)
277
+ current = _force_defun(current)
278
+ assert callable(current), f"church spine successor result must be callable, got {type(current).__name__}"
279
+ current = current(_CHURCH_ZERO_DEFUN)
280
+ current = _force_defun(current)
281
+ count = 0
282
+ while isinstance(current, _ChurchApp):
283
+ count += 1
284
+ current = _force_defun(current.argument)
285
+ assert current is _CHURCH_ZERO_DEFUN, "church spine did not end at zero marker"
286
+ _church_int_cache[key] = count
287
+ return count
288
+
289
+
290
+ def _decode_scott_list_defun(value: object) -> "list[object]":
291
+ items: "list[object]" = []
292
+ current = value
293
+ while True:
294
+ tag, fields = _extract_defun(current, (2, 0))
295
+ if tag == 1:
296
+ return items
297
+ assert tag == 0, f"expected cons (0) or nil (1), got {tag}"
298
+ items.append(fields[0])
299
+ current = fields[1]
300
+
301
+
302
+ def _gensym_name_defun(payload: object) -> str:
303
+ global _defun_gensym_counter
304
+ key = id(payload)
305
+ existing = _defun_gensym_ids.get(key)
306
+ if existing is not None:
307
+ return existing
308
+ name = f"vg_{_defun_gensym_counter:016x}"
309
+ _defun_gensym_counter += 1
310
+ _defun_gensym_ids[key] = name
311
+ return name
312
+
313
+
314
+ def _decode_field_defun(value: object) -> object:
315
+ _, fields = _extract_defun(value, (2,))
316
+ kind_value, payload = fields
317
+ kind = _church_to_int_defun(kind_value)
318
+ match kind:
319
+ case 0:
320
+ return decode_defun(payload)
321
+ case 1:
322
+ return [_decode_field_defun(item) for item in _decode_scott_list_defun(payload)]
323
+ case 2:
324
+ return _church_to_int_defun(payload)
325
+ case 3:
326
+ return "".join(chr(_church_to_int_defun(code)) for code in _decode_scott_list_defun(payload))
327
+ case 5:
328
+ return None
329
+ case 7:
330
+ return _gensym_name_defun(payload)
331
+ case _:
332
+ raise ValueError(f"defun decode: unsupported field kind {kind}")
333
+
334
+
335
+ def decode_defun(value: object) -> ast.AST:
336
+ """Decode a Scott-encoded AST directly from defunctionalized values (Thunks + closures).
337
+
338
+ Reflects Scott structure out of the compiled values with marker handlers, with no intermediate
339
+ interpreter-tree readback. Under ``_memoized_decode_defun``, each distinct interned value is
340
+ decoded once (keyed by identity).
341
+ """
342
+ if _defun_decode_memo is not None:
343
+ cached = _defun_decode_memo.get(id(value))
344
+ if cached is not None:
345
+ return cached
346
+ tag, fields = _extract_defun(value, _ARITY)
347
+ cls = SUPPORTED[tag]
348
+ decoded = cls(*[_decode_field_defun(field) for field in fields])
349
+ if _defun_decode_memo is not None:
350
+ _defun_decode_memo[id(value)] = decoded
351
+ return decoded
352
+
353
+
354
+ def defunctionalize(node: Node) -> str:
355
+ """Compile a lambda term to defunctionalized Python source (a module of closure classes).
356
+
357
+ Runs in a large-stack thread: the interpreter's substitution recursion can be as deep as the term,
358
+ which overflows the C stack on Python 3.12+ (which caps C recursion regardless of
359
+ ``setrecursionlimit``); ``run_in_large_stack`` gives it a 1 GiB stack and a high recursion limit.
360
+ """
361
+ def work() -> str:
362
+ module = build(app(DEFUN, quote_binnat(node)))
363
+ _reset_gensym()
364
+ with memoized_decode():
365
+ decoded = decode(module)
366
+ assert isinstance(decoded, ast.Module)
367
+ canonical_module = _canonicalize_classes(decoded)
368
+ return ast.unparse(ast.fix_missing_locations(canonical_module))
369
+
370
+ return run_in_large_stack(work)
371
+
372
+
373
+ def _defun_globals() -> dict:
374
+ return {
375
+ "Thunk": Thunk,
376
+ "interned": interned,
377
+ "Closure": Closure,
378
+ }
379
+
380
+
381
+ def load_namespace(source: str) -> dict:
382
+ """Execute defunctionalized source and return the whole module namespace.
383
+
384
+ The namespace holds every generated closure class (each carrying its ``__intern_pool__``) and the
385
+ ``compiled`` value, so a caller can both run the program and inspect its tabled objects.
386
+ """
387
+ namespace = _defun_globals()
388
+ exec(compile(source, "<defun>", "exec"), namespace) # noqa: S102
389
+ return namespace
390
+
391
+
392
+ def load(source: str) -> object:
393
+ """Execute defunctionalized source and return the ``compiled`` value."""
394
+ return load_namespace(source)["compiled"]
395
+
396
+
397
+ def defunctionalize_and_load(node: Node) -> object:
398
+ """Compile a lambda term to defunctionalized code and load the resulting value."""
399
+ return load(defunctionalize(node))
400
+
401
+
402
+ # A self-contained import header so a generated defunctionalized module runs on its own: it binds the
403
+ # runtime free names the generated code references (``Closure``, ``Thunk``, ``interned``). Every
404
+ # generated closure explicitly subclasses ``Closure`` and annotates its captures ``cap_i: Closure``;
405
+ # ``interned`` applies ``dataclass(eq=False)`` and hash-conses, so generated classes carry only
406
+ # ``@interned``.
407
+ _DEFUN_MODULE_HEADER = (
408
+ "# Generated, self-contained module: the import header is added at serialization time (see\n"
409
+ "# tablambda._defunctionalize.runnable_defun_module); the body is emitted by the DEFUN lambda\n"
410
+ "# term and content-addressed by compiled dataclass shape.\n"
411
+ "from tablambda._defun_runtime import Closure, Thunk, interned\n"
412
+ )
413
+
414
+
415
+ def runnable_defun_module(source: str) -> str:
416
+ """Prepend the runtime import header so a defunctionalized module is importable on its own."""
417
+ return _DEFUN_MODULE_HEADER + "\n" + source
418
+
419
+
420
+ def defun_compiler_source() -> str:
421
+ """The defunctionalization compiler ``DEFUN`` self-compiled to a runnable dataclass module.
422
+
423
+ This is the dataclass-form ``compiled compiler``: ``DEFUN`` defunctionalized by itself. Importing
424
+ the result binds ``compiled`` to the defunctionalized ``DEFUN`` value; applying it (through a
425
+ ``Thunk``) to a quoted program yields that program's compiled Scott ``ast.Module`` as a
426
+ defunctionalized value.
427
+ """
428
+ from tablambda._defun_codegen import DEFUN
429
+
430
+ return runnable_defun_module(defunctionalize(build(DEFUN)))
431
+
432
+
433
+ def compile_with_defun(engine: object, node: Node) -> str:
434
+ """Compile ``node`` by RUNNING a defunctionalized ``DEFUN`` engine (the dataclass compiled compiler).
435
+
436
+ ``engine`` is the ``compiled`` value of a ``defun_compiler_source`` module. The node is quoted and
437
+ itself defunctionalized to feed the engine a defunctionalized Scott source value; the engine's
438
+ output (a defunctionalized Scott ``ast.Module``) is reified, decoded, canonicalized, and unparsed,
439
+ yielding exactly what the in-process ``defunctionalize`` produces, by self-hosting.
440
+ """
441
+ quoted_argument = defunctionalize_and_load(build(quote_binnat(node)))
442
+
443
+ def work() -> str:
444
+ result = Thunk(engine, quoted_argument).weak_head_normal_form
445
+ if result is _BOTTOM:
446
+ raise ValueError("the defunctionalized compiler did not produce a module")
447
+ _reset_defun_gensym()
448
+ with _memoized_decode_defun():
449
+ decoded = decode_defun(result)
450
+ assert isinstance(decoded, ast.Module)
451
+ canonical_module = _canonicalize_classes(decoded)
452
+ return ast.unparse(ast.fix_missing_locations(canonical_module))
453
+
454
+ return run_in_large_stack(work)
455
+