tablambda 0.6.0.post30.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tablambda/__init__.py +1 -0
- tablambda/_analysis.py +92 -0
- tablambda/_ast.py +289 -0
- tablambda/_binnat.py +170 -0
- tablambda/_codec.py +147 -0
- tablambda/_compiler_artifact.py +50 -0
- tablambda/_defun_codegen.py +324 -0
- tablambda/_defun_runtime.py +207 -0
- tablambda/_defunctionalize.py +455 -0
- tablambda/_dsl.py +148 -0
- tablambda/_generated/.gitattributes +5 -0
- tablambda/_generated/__init__.py +1 -0
- tablambda/_generated/_generated_defun_compiler_py311.py +7579 -0
- tablambda/_generated/_generated_defun_compiler_py312.py +7579 -0
- tablambda/_generated/_generated_defun_compiler_py313.py +7579 -0
- tablambda/_hoas_latex.py +144 -0
- tablambda/_latex.py +56 -0
- tablambda/_prelude.py +163 -0
- tablambda/_pyast.py +416 -0
- tablambda/_pybuild.py +315 -0
- tablambda/_reduce.py +145 -0
- tablambda/_shape.py +129 -0
- tablambda/_sugar.py +74 -0
- tablambda/_typecheck.py +370 -0
- tablambda-0.6.0.post30.dev0.dist-info/METADATA +45 -0
- tablambda-0.6.0.post30.dev0.dist-info/RECORD +28 -0
- tablambda-0.6.0.post30.dev0.dist-info/WHEEL +4 -0
- tablambda-0.6.0.post30.dev0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""The defunctionalized runtime: the minimal execution substrate for compiled code.
|
|
2
|
+
|
|
3
|
+
Generated code references three free names from this module: ``Closure``, ``Thunk``, and ``interned``.
|
|
4
|
+
A compiled ``Closure`` and a ``Thunk`` are both ``Node``s, so they share the interpreter's
|
|
5
|
+
``weak_head_normal_form`` and interning and can run mixed with interpreted terms. The runtime holds NO
|
|
6
|
+
domain logic; all compilation decisions live in the pure-lambda compiler ``_defun_codegen``.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import struct
|
|
13
|
+
import sys
|
|
14
|
+
import threading
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from collections.abc import Callable, Iterator
|
|
17
|
+
from contextlib import contextmanager
|
|
18
|
+
from dataclasses import dataclass, fields as dataclass_fields
|
|
19
|
+
from typing import Any, TypeGuard, TypeVar, overload
|
|
20
|
+
|
|
21
|
+
from typing_extensions import dataclass_transform
|
|
22
|
+
|
|
23
|
+
from tablambda._ast import Node, WeakHeadBottom
|
|
24
|
+
|
|
25
|
+
_T = TypeVar("_T")
|
|
26
|
+
|
|
27
|
+
# The compiled runtime shares the interpreter's single bottom (``WeakHeadBottom``); ``_BOTTOM`` is the
|
|
28
|
+
# terse internal alias used at the forcing call sites.
|
|
29
|
+
_BOTTOM = WeakHeadBottom.BOTTOM
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Closure(Node, ABC):
|
|
33
|
+
"""A compiled closure: an opaque, closed, 1-ary callable ``Node`` (the defunctionalized value, and
|
|
34
|
+
the FFI). Every closure class the compiler emits subclasses ``Closure`` (injected by ``interned``),
|
|
35
|
+
so a compiled value is a ``Node`` and shares the interpreter's ``weak_head_normal_form`` and
|
|
36
|
+
interning. A closure is a weak-head value (its weak head normal form is itself), and it is closed,
|
|
37
|
+
so its ``loose_bound`` is ``0`` and ``shift``/``substitute`` leave it untouched.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
__slots__ = ()
|
|
41
|
+
|
|
42
|
+
# Closures are closed: no exposed de Bruijn index, so shift/substitute are identity.
|
|
43
|
+
loose_bound = 0
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def __call__(self, argument: Node) -> Node: ...
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _intern(cls: type[_T], field_names: tuple[str, ...]) -> type[_T]:
|
|
50
|
+
"""Hash-cons ``cls``'s instances by ``(cls, field-values-by-identity)``.
|
|
51
|
+
|
|
52
|
+
Two instances of the same class with identical field values (by ``is``) become the same object.
|
|
53
|
+
Fields are themselves interned closures or ``Thunk`` instances, so identity comparison is O(1)
|
|
54
|
+
structural equality, matching ``_ast._intern_node``. The hash-cons table is exposed as
|
|
55
|
+
``__intern_pool__`` for introspection (e.g. counting tabled objects in a benchmark); it is the SAME
|
|
56
|
+
table the interner already keeps, so surfacing it adds no behaviour.
|
|
57
|
+
|
|
58
|
+
The key is computed directly from the positional constructor arguments (which correspond 1:1 to
|
|
59
|
+
``field_names`` for both ``@dataclass`` classes and ``Thunk``), so a cache hit avoids allocating
|
|
60
|
+
a throwaway instance entirely.
|
|
61
|
+
"""
|
|
62
|
+
pool: dict[tuple, object] = {}
|
|
63
|
+
original_init = cls.__init__
|
|
64
|
+
|
|
65
|
+
def __new__(klass, *args):
|
|
66
|
+
key = (klass,) + tuple(id(a) for a in args)
|
|
67
|
+
existing = pool.get(key)
|
|
68
|
+
if existing is not None:
|
|
69
|
+
return existing
|
|
70
|
+
instance = object.__new__(klass)
|
|
71
|
+
original_init(instance, *args)
|
|
72
|
+
pool[key] = instance
|
|
73
|
+
return instance
|
|
74
|
+
|
|
75
|
+
cls_any: Any = cls
|
|
76
|
+
cls_any.__new__ = __new__
|
|
77
|
+
cls_any.__init__ = lambda self, *args, **kwargs: None
|
|
78
|
+
cls_any.__intern_pool__ = pool
|
|
79
|
+
return cls
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@overload
|
|
83
|
+
def interned(cls: type[_T], *, slots: bool = ...) -> type[_T]: ...
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@overload
|
|
87
|
+
def interned(
|
|
88
|
+
cls: None = ..., *, slots: bool = ...
|
|
89
|
+
) -> Callable[[type[_T]], type[_T]]: ...
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dataclass_transform(eq_default=False)
|
|
93
|
+
def interned(cls=None, *, slots=True):
|
|
94
|
+
"""Class decorator: make a generated ``Closure`` subclass a frozen-by-identity dataclass and
|
|
95
|
+
hash-cons its instances.
|
|
96
|
+
|
|
97
|
+
Applies ``dataclass(eq=False, slots=slots)`` internally (so generated code needs only
|
|
98
|
+
``@interned``, not a separate ``@dataclass``), then interns. ``slots=True`` (the default) makes the
|
|
99
|
+
closures the compiler emits slotted, which is faster and lighter; ``eq=False`` keeps identity-based
|
|
100
|
+
equality. The compiler emits each closure as ``@interned class vg_...(Closure)``, so the class is
|
|
101
|
+
already a ``Node`` before this decorator runs. Usable bare (``@interned``) or parameterised
|
|
102
|
+
(``@interned(slots=False)``).
|
|
103
|
+
"""
|
|
104
|
+
if cls is None:
|
|
105
|
+
return lambda klass: interned(klass, slots=slots)
|
|
106
|
+
assert issubclass(cls, Closure), f"@interned expects a Closure subclass, got {cls!r}"
|
|
107
|
+
cls = dataclass(eq=False, slots=slots)(cls)
|
|
108
|
+
field_names = tuple(f.name for f in dataclass_fields(cls))
|
|
109
|
+
return _intern(cls, field_names)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _deterministic_hash(*parts: int) -> int:
|
|
113
|
+
"""A deterministic hash from a sequence of integers, independent of ``PYTHONHASHSEED``."""
|
|
114
|
+
data = struct.pack(f">{len(parts)}q", *parts)
|
|
115
|
+
return int.from_bytes(hashlib.sha256(data).digest()[:8], "big")
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class Thunk(Node):
|
|
119
|
+
"""A suspended application (redex) as a ``Node``: an ``App`` whose callee is a compiled value.
|
|
120
|
+
Interned so structurally equal redexes share identity, enabling tabling: its
|
|
121
|
+
``weak_head_normal_form`` (inherited from ``Node``) is computed once per distinct ``Thunk``.
|
|
122
|
+
|
|
123
|
+
It does NOT redeclare ``weak_head_normal_form`` (that would duplicate ``Node``'s fixpoint cache
|
|
124
|
+
slot); instead ``_shape.compute_weak_head_normal_form`` dispatches a ``Thunk`` to ``force``.
|
|
125
|
+
A thunk is closed, so its ``loose_bound`` is ``0``.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
__slots__ = ("callee", "argument")
|
|
129
|
+
|
|
130
|
+
# A redex over closed compiled values is itself closed.
|
|
131
|
+
loose_bound = 0
|
|
132
|
+
|
|
133
|
+
def __init__(self, callee: Node, argument: Node) -> None:
|
|
134
|
+
self.callee = callee
|
|
135
|
+
self.argument = argument
|
|
136
|
+
|
|
137
|
+
def __call__(self, a: Node) -> "Thunk":
|
|
138
|
+
return Thunk(self, a)
|
|
139
|
+
|
|
140
|
+
def force(self) -> Node | WeakHeadBottom:
|
|
141
|
+
"""The weak head normal form of this redex: force the callee to a value, apply it to the
|
|
142
|
+
argument, and force the result. Mixed-safe: the callee may force to an interpreter ``Lam`` or a
|
|
143
|
+
``Closure``, and ``apply_value`` handles both."""
|
|
144
|
+
from tablambda._shape import apply_value, weak_head_normalize
|
|
145
|
+
|
|
146
|
+
callee = weak_head_normalize(self.callee)
|
|
147
|
+
if callee is _BOTTOM:
|
|
148
|
+
return _BOTTOM
|
|
149
|
+
result = apply_value(callee, self.argument)
|
|
150
|
+
if result is _BOTTOM:
|
|
151
|
+
return _BOTTOM
|
|
152
|
+
return weak_head_normalize(result)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
Thunk = _intern(Thunk, ("callee", "argument"))
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _is_thunk(x: object) -> TypeGuard[Thunk]:
|
|
159
|
+
return isinstance(x, Thunk)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# --- stack helpers ---------------------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
_COMPILE_RECURSION_LIMIT = 16_000
|
|
165
|
+
_RECURSION_LIMIT = 200_000
|
|
166
|
+
_STACK_SIZE = 1024 * 1024 * 1024 # 1 GiB
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@contextmanager
|
|
170
|
+
def recursion_headroom() -> Iterator[None]:
|
|
171
|
+
previous = sys.getrecursionlimit()
|
|
172
|
+
sys.setrecursionlimit(max(previous, _COMPILE_RECURSION_LIMIT))
|
|
173
|
+
try:
|
|
174
|
+
yield
|
|
175
|
+
finally:
|
|
176
|
+
sys.setrecursionlimit(previous)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _python_tag() -> str:
|
|
180
|
+
"""A Python-version tag for generated-module filenames, e.g. ``py313``. Defunctionalized modules
|
|
181
|
+
are rendered with ``ast.unparse``, whose formatting can differ between Python versions, so a module
|
|
182
|
+
generated under one interpreter must not be reused under another; the tag keeps artifacts distinct.
|
|
183
|
+
"""
|
|
184
|
+
return f"py{sys.version_info.major}{sys.version_info.minor}"
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def run_in_large_stack(thunk):
|
|
188
|
+
"""Run ``thunk`` in a thread with a 1 GiB C stack and a high recursion limit."""
|
|
189
|
+
result: list = []
|
|
190
|
+
|
|
191
|
+
def run() -> None:
|
|
192
|
+
previous_limit = sys.getrecursionlimit()
|
|
193
|
+
sys.setrecursionlimit(max(previous_limit, _RECURSION_LIMIT))
|
|
194
|
+
try:
|
|
195
|
+
result.append(thunk())
|
|
196
|
+
finally:
|
|
197
|
+
sys.setrecursionlimit(previous_limit)
|
|
198
|
+
|
|
199
|
+
previous_stack_size = threading.stack_size(_STACK_SIZE)
|
|
200
|
+
try:
|
|
201
|
+
worker = threading.Thread(target=run)
|
|
202
|
+
worker.start()
|
|
203
|
+
worker.join()
|
|
204
|
+
finally:
|
|
205
|
+
threading.stack_size(previous_stack_size)
|
|
206
|
+
(single_result,) = result
|
|
207
|
+
return single_result
|
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
"""The defunctionalization boundary: quote, compile, decode, canonicalize, load.
|
|
2
|
+
|
|
3
|
+
Thin Python layer analogous to ``_specialize`` but for the defunctionalization target. The lambda
|
|
4
|
+
compiler ``DEFUN`` produces the Scott-encoded ``ast.Module``; this module quotes the input, runs
|
|
5
|
+
the compiler in the interpreter, decodes the Scott AST to a real ``ast.Module``, deduplicates
|
|
6
|
+
class definitions by node identity (``memoized_decode``), renames every class by the Merkle hash of
|
|
7
|
+
its COMPILED body (``_canonicalize_classes``), and unparses to source. ``load`` execs the source
|
|
8
|
+
with the runtime globals (``Thunk``, ``interned``, ``dataclass``) and returns the ``compiled``
|
|
9
|
+
value.
|
|
10
|
+
|
|
11
|
+
Content addressing happens on the compiled dataclass, not the source lambda term. Two source
|
|
12
|
+
closures of the same shape that capture variables at different de Bruijn depths compile to the same
|
|
13
|
+
dataclass (same arity, byte-identical ``__call__`` body over positional capture fields), so the
|
|
14
|
+
boundary collapses them to one class. This is coarser than the source's term equality and makes the
|
|
15
|
+
generated code smaller and more reusable.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import ast
|
|
21
|
+
import contextlib
|
|
22
|
+
import copy
|
|
23
|
+
import hashlib
|
|
24
|
+
from typing import TypeVar
|
|
25
|
+
|
|
26
|
+
from tablambda._ast import Node
|
|
27
|
+
from tablambda._codec import quote_binnat
|
|
28
|
+
from tablambda._defun_codegen import DEFUN
|
|
29
|
+
from tablambda._defun_runtime import (
|
|
30
|
+
Closure,
|
|
31
|
+
Thunk,
|
|
32
|
+
_BOTTOM,
|
|
33
|
+
_is_thunk,
|
|
34
|
+
interned,
|
|
35
|
+
run_in_large_stack,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
_AstNode = TypeVar("_AstNode", bound=ast.AST)
|
|
39
|
+
from tablambda._dsl import app, build
|
|
40
|
+
from tablambda._pyast import SUPPORTED, _ARITY, _reset_gensym, decode, memoized_decode
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class _RenameClasses(ast.NodeTransformer):
|
|
44
|
+
"""Rewrite ``ast.Name`` references to class names according to ``mapping``."""
|
|
45
|
+
|
|
46
|
+
def __init__(self, mapping: "dict[str, str]") -> None:
|
|
47
|
+
self._mapping = mapping
|
|
48
|
+
|
|
49
|
+
def visit_Name(self, node: ast.Name) -> ast.Name:
|
|
50
|
+
renamed = self._mapping.get(node.id)
|
|
51
|
+
if renamed is not None:
|
|
52
|
+
return ast.copy_location(ast.Name(id=renamed, ctx=node.ctx), node)
|
|
53
|
+
return node
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _rename_copy(node: _AstNode, mapping: "dict[str, str]") -> _AstNode:
|
|
57
|
+
"""A deep copy of ``node`` with class-name references rewritten per ``mapping``."""
|
|
58
|
+
renamed = _RenameClasses(mapping).visit(copy.deepcopy(node))
|
|
59
|
+
assert isinstance(renamed, type(node)), (
|
|
60
|
+
f"_RenameClasses must preserve node type {type(node).__name__}, got {type(renamed).__name__}"
|
|
61
|
+
)
|
|
62
|
+
return renamed
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class _RenameFields(ast.NodeTransformer):
|
|
66
|
+
"""Rewrite a class's capture-field names (AnnAssign targets and ``self.<field>`` accesses)."""
|
|
67
|
+
|
|
68
|
+
def __init__(self, mapping: "dict[str, str]") -> None:
|
|
69
|
+
self._mapping = mapping
|
|
70
|
+
|
|
71
|
+
def visit_Name(self, node: ast.Name) -> ast.Name:
|
|
72
|
+
renamed = self._mapping.get(node.id)
|
|
73
|
+
if renamed is not None:
|
|
74
|
+
return ast.copy_location(ast.Name(id=renamed, ctx=node.ctx), node)
|
|
75
|
+
return node
|
|
76
|
+
|
|
77
|
+
def visit_Attribute(self, node: ast.Attribute) -> ast.Attribute:
|
|
78
|
+
self.generic_visit(node)
|
|
79
|
+
renamed = self._mapping.get(node.attr)
|
|
80
|
+
if renamed is not None:
|
|
81
|
+
node.attr = renamed
|
|
82
|
+
return node
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _canonicalize_fields(classdef: ast.ClassDef) -> ast.ClassDef:
|
|
86
|
+
"""Rename a class's capture fields to positional ``cap_<i>`` names (in definition order)."""
|
|
87
|
+
field_names = [
|
|
88
|
+
statement.target.id
|
|
89
|
+
for statement in classdef.body
|
|
90
|
+
if isinstance(statement, ast.AnnAssign) and isinstance(statement.target, ast.Name)
|
|
91
|
+
]
|
|
92
|
+
mapping = {name: f"cap_{position}" for position, name in enumerate(field_names)}
|
|
93
|
+
renamed = _RenameFields(mapping).visit(copy.deepcopy(classdef))
|
|
94
|
+
assert isinstance(renamed, ast.ClassDef)
|
|
95
|
+
return renamed
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _canonicalize_classes(module: ast.Module) -> ast.Module:
|
|
99
|
+
"""Rename every closure class by a content hash of its COMPILED dataclass and drop duplicates.
|
|
100
|
+
|
|
101
|
+
Capture fields are first renamed positionally (``cap_0``, ``cap_1``, ...). The content hash of a
|
|
102
|
+
class is then the Merkle hash of its (field-canonicalized) body with references to other classes
|
|
103
|
+
replaced by THEIR content hashes (computed bottom-up over the acyclic class-reference DAG) and the
|
|
104
|
+
class's own name replaced by a fixed placeholder. Two classes with identical compiled bodies hash
|
|
105
|
+
equal and collapse to one. Definitions are emitted sorted by name, so the output is stable under
|
|
106
|
+
local source edits and identical between the in-process and self-hosted compilers.
|
|
107
|
+
"""
|
|
108
|
+
classdefs: "dict[str, ast.ClassDef]" = {}
|
|
109
|
+
others: "list[ast.stmt]" = []
|
|
110
|
+
for statement in module.body:
|
|
111
|
+
if isinstance(statement, ast.ClassDef):
|
|
112
|
+
field_canonical = _canonicalize_fields(statement)
|
|
113
|
+
kept = classdefs.get(field_canonical.name)
|
|
114
|
+
if kept is not None:
|
|
115
|
+
assert ast.dump(kept) == ast.dump(field_canonical), (
|
|
116
|
+
f"provisional class {field_canonical.name!r} has two non-identical definitions"
|
|
117
|
+
)
|
|
118
|
+
continue
|
|
119
|
+
classdefs[field_canonical.name] = field_canonical
|
|
120
|
+
else:
|
|
121
|
+
others.append(statement)
|
|
122
|
+
provisional = set(classdefs)
|
|
123
|
+
|
|
124
|
+
def referenced(classdef: ast.ClassDef) -> "set[str]":
|
|
125
|
+
return {n.id for n in ast.walk(classdef) if isinstance(n, ast.Name) and n.id in provisional}
|
|
126
|
+
|
|
127
|
+
canonical: "dict[str, str]" = {}
|
|
128
|
+
in_progress: "set[str]" = set()
|
|
129
|
+
|
|
130
|
+
def canonical_name(name: str) -> str:
|
|
131
|
+
cached = canonical.get(name)
|
|
132
|
+
if cached is not None:
|
|
133
|
+
return cached
|
|
134
|
+
assert name not in in_progress, f"class reference cycle through {name!r}"
|
|
135
|
+
in_progress.add(name)
|
|
136
|
+
classdef = classdefs[name]
|
|
137
|
+
mapping = {reference: canonical_name(reference) for reference in referenced(classdef)}
|
|
138
|
+
mapping[name] = "_SELF_"
|
|
139
|
+
key_node = _rename_copy(classdef, mapping)
|
|
140
|
+
assert isinstance(key_node, ast.ClassDef)
|
|
141
|
+
key_node.name = "_SELF_"
|
|
142
|
+
digest = hashlib.sha256(ast.dump(key_node).encode()).digest()[:8]
|
|
143
|
+
result = "vg_" + digest.hex()
|
|
144
|
+
in_progress.discard(name)
|
|
145
|
+
canonical[name] = result
|
|
146
|
+
return result
|
|
147
|
+
|
|
148
|
+
for name in classdefs:
|
|
149
|
+
canonical_name(name)
|
|
150
|
+
|
|
151
|
+
global_mapping = {name: canonical[name] for name in provisional}
|
|
152
|
+
deduped: "dict[str, ast.ClassDef]" = {}
|
|
153
|
+
for name, classdef in classdefs.items():
|
|
154
|
+
renamed = _rename_copy(classdef, global_mapping)
|
|
155
|
+
assert isinstance(renamed, ast.ClassDef)
|
|
156
|
+
renamed.name = canonical[name]
|
|
157
|
+
deduped[renamed.name] = renamed
|
|
158
|
+
|
|
159
|
+
sorted_defs: "list[ast.stmt]" = [deduped[key] for key in sorted(deduped)]
|
|
160
|
+
new_others = [_rename_copy(statement, global_mapping) for statement in others]
|
|
161
|
+
module.body = sorted_defs + new_others
|
|
162
|
+
return module
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# --- Direct defun decoder: decode Scott-encoded AST from defunctionalized values -----------------
|
|
166
|
+
# Mirrors ``_pyast.decode`` but operates directly on compiled values (``Thunk`` + ``Closure``),
|
|
167
|
+
# reflecting Scott structure out of them with marker handlers in the self-hosted compilation path.
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# The decoder reflects Scott structure out of compiled values by feeding them marker handlers and
|
|
171
|
+
# reading which handler fires. Compiled code embeds these markers as ``Thunk`` callees/arguments and
|
|
172
|
+
# forces them, so each marker must be a ``Node`` (a ``Closure``): a marker's weak head normal form is
|
|
173
|
+
# itself, and applying a callable marker runs its Python ``__call__``.
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class _TagMarker(Closure):
|
|
177
|
+
"""Callable marker for Scott constructor extraction. When the Scott value selects this handler,
|
|
178
|
+
it calls ``__call__`` once per field, accumulating the field values."""
|
|
179
|
+
|
|
180
|
+
__slots__ = ("tag", "fields")
|
|
181
|
+
|
|
182
|
+
def __init__(self, tag: int) -> None:
|
|
183
|
+
self.tag = tag
|
|
184
|
+
self.fields: list[object] = []
|
|
185
|
+
|
|
186
|
+
def __call__(self, argument: Node) -> Node:
|
|
187
|
+
self.fields.append(argument)
|
|
188
|
+
return self
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class _ChurchApp(Closure):
|
|
192
|
+
"""Marker node in a Church numeral spine: successor applied to predecessor. Never applied (it is a
|
|
193
|
+
spine value the decoder walks via ``argument``)."""
|
|
194
|
+
|
|
195
|
+
__slots__ = ("argument",)
|
|
196
|
+
|
|
197
|
+
def __init__(self, argument: object) -> None:
|
|
198
|
+
self.argument = argument
|
|
199
|
+
|
|
200
|
+
def __call__(self, argument: Node) -> Node:
|
|
201
|
+
raise TypeError("_ChurchApp is a spine value, not applicable")
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class _ChurchSucc(Closure):
|
|
205
|
+
"""Callable marker for Church numeral successor."""
|
|
206
|
+
|
|
207
|
+
__slots__ = ()
|
|
208
|
+
|
|
209
|
+
def __call__(self, argument: Node) -> Node:
|
|
210
|
+
return _ChurchApp(argument)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class _ChurchZero(Closure):
|
|
214
|
+
"""The Church-numeral zero marker: the base value at the end of the spine, never applied."""
|
|
215
|
+
|
|
216
|
+
__slots__ = ()
|
|
217
|
+
|
|
218
|
+
def __call__(self, argument: Node) -> Node:
|
|
219
|
+
raise TypeError("_ChurchZero is a base value, not applicable")
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
_CHURCH_SUCC_DEFUN = _ChurchSucc()
|
|
223
|
+
_CHURCH_ZERO_DEFUN = _ChurchZero()
|
|
224
|
+
|
|
225
|
+
_church_int_cache: "dict[int, int]" = {}
|
|
226
|
+
_defun_gensym_ids: "dict[int, str]" = {}
|
|
227
|
+
_defun_gensym_counter: int = 0
|
|
228
|
+
_defun_decode_memo: "dict[int, ast.AST] | None" = None
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _reset_defun_gensym() -> None:
|
|
232
|
+
_church_int_cache.clear()
|
|
233
|
+
_defun_gensym_ids.clear()
|
|
234
|
+
global _defun_gensym_counter
|
|
235
|
+
_defun_gensym_counter = 0
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@contextlib.contextmanager
|
|
239
|
+
def _memoized_decode_defun():
|
|
240
|
+
global _defun_decode_memo
|
|
241
|
+
assert _defun_decode_memo is None, "memoized decode_defun does not nest"
|
|
242
|
+
_defun_decode_memo = {}
|
|
243
|
+
try:
|
|
244
|
+
yield
|
|
245
|
+
finally:
|
|
246
|
+
_defun_decode_memo = None
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _force_defun(value: object) -> object:
|
|
250
|
+
if _is_thunk(value):
|
|
251
|
+
whnf = value.weak_head_normal_form
|
|
252
|
+
assert whnf is not _BOTTOM, "hit bottom while forcing defun value"
|
|
253
|
+
return whnf
|
|
254
|
+
return value
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _extract_defun(value: object, arities: "tuple[int, ...]") -> "tuple[int, list[object]]":
|
|
258
|
+
current = _force_defun(value)
|
|
259
|
+
for tag in range(len(arities)):
|
|
260
|
+
assert callable(current), f"expected callable during extraction, got {type(current).__name__}"
|
|
261
|
+
result = current(_TagMarker(tag))
|
|
262
|
+
current = _force_defun(result)
|
|
263
|
+
assert isinstance(current, _TagMarker), (
|
|
264
|
+
f"expected _TagMarker after extraction, got {type(current).__name__}"
|
|
265
|
+
)
|
|
266
|
+
return current.tag, current.fields
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _church_to_int_defun(value: object) -> int:
|
|
270
|
+
key = id(value)
|
|
271
|
+
cached = _church_int_cache.get(key)
|
|
272
|
+
if cached is not None:
|
|
273
|
+
return cached
|
|
274
|
+
current = _force_defun(value)
|
|
275
|
+
assert callable(current), f"church spine head must be callable, got {type(current).__name__}"
|
|
276
|
+
current = current(_CHURCH_SUCC_DEFUN)
|
|
277
|
+
current = _force_defun(current)
|
|
278
|
+
assert callable(current), f"church spine successor result must be callable, got {type(current).__name__}"
|
|
279
|
+
current = current(_CHURCH_ZERO_DEFUN)
|
|
280
|
+
current = _force_defun(current)
|
|
281
|
+
count = 0
|
|
282
|
+
while isinstance(current, _ChurchApp):
|
|
283
|
+
count += 1
|
|
284
|
+
current = _force_defun(current.argument)
|
|
285
|
+
assert current is _CHURCH_ZERO_DEFUN, "church spine did not end at zero marker"
|
|
286
|
+
_church_int_cache[key] = count
|
|
287
|
+
return count
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _decode_scott_list_defun(value: object) -> "list[object]":
|
|
291
|
+
items: "list[object]" = []
|
|
292
|
+
current = value
|
|
293
|
+
while True:
|
|
294
|
+
tag, fields = _extract_defun(current, (2, 0))
|
|
295
|
+
if tag == 1:
|
|
296
|
+
return items
|
|
297
|
+
assert tag == 0, f"expected cons (0) or nil (1), got {tag}"
|
|
298
|
+
items.append(fields[0])
|
|
299
|
+
current = fields[1]
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _gensym_name_defun(payload: object) -> str:
|
|
303
|
+
global _defun_gensym_counter
|
|
304
|
+
key = id(payload)
|
|
305
|
+
existing = _defun_gensym_ids.get(key)
|
|
306
|
+
if existing is not None:
|
|
307
|
+
return existing
|
|
308
|
+
name = f"vg_{_defun_gensym_counter:016x}"
|
|
309
|
+
_defun_gensym_counter += 1
|
|
310
|
+
_defun_gensym_ids[key] = name
|
|
311
|
+
return name
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _decode_field_defun(value: object) -> object:
|
|
315
|
+
_, fields = _extract_defun(value, (2,))
|
|
316
|
+
kind_value, payload = fields
|
|
317
|
+
kind = _church_to_int_defun(kind_value)
|
|
318
|
+
match kind:
|
|
319
|
+
case 0:
|
|
320
|
+
return decode_defun(payload)
|
|
321
|
+
case 1:
|
|
322
|
+
return [_decode_field_defun(item) for item in _decode_scott_list_defun(payload)]
|
|
323
|
+
case 2:
|
|
324
|
+
return _church_to_int_defun(payload)
|
|
325
|
+
case 3:
|
|
326
|
+
return "".join(chr(_church_to_int_defun(code)) for code in _decode_scott_list_defun(payload))
|
|
327
|
+
case 5:
|
|
328
|
+
return None
|
|
329
|
+
case 7:
|
|
330
|
+
return _gensym_name_defun(payload)
|
|
331
|
+
case _:
|
|
332
|
+
raise ValueError(f"defun decode: unsupported field kind {kind}")
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def decode_defun(value: object) -> ast.AST:
|
|
336
|
+
"""Decode a Scott-encoded AST directly from defunctionalized values (Thunks + closures).
|
|
337
|
+
|
|
338
|
+
Reflects Scott structure out of the compiled values with marker handlers, with no intermediate
|
|
339
|
+
interpreter-tree readback. Under ``_memoized_decode_defun``, each distinct interned value is
|
|
340
|
+
decoded once (keyed by identity).
|
|
341
|
+
"""
|
|
342
|
+
if _defun_decode_memo is not None:
|
|
343
|
+
cached = _defun_decode_memo.get(id(value))
|
|
344
|
+
if cached is not None:
|
|
345
|
+
return cached
|
|
346
|
+
tag, fields = _extract_defun(value, _ARITY)
|
|
347
|
+
cls = SUPPORTED[tag]
|
|
348
|
+
decoded = cls(*[_decode_field_defun(field) for field in fields])
|
|
349
|
+
if _defun_decode_memo is not None:
|
|
350
|
+
_defun_decode_memo[id(value)] = decoded
|
|
351
|
+
return decoded
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def defunctionalize(node: Node) -> str:
|
|
355
|
+
"""Compile a lambda term to defunctionalized Python source (a module of closure classes).
|
|
356
|
+
|
|
357
|
+
Runs in a large-stack thread: the interpreter's substitution recursion can be as deep as the term,
|
|
358
|
+
which overflows the C stack on Python 3.12+ (which caps C recursion regardless of
|
|
359
|
+
``setrecursionlimit``); ``run_in_large_stack`` gives it a 1 GiB stack and a high recursion limit.
|
|
360
|
+
"""
|
|
361
|
+
def work() -> str:
|
|
362
|
+
module = build(app(DEFUN, quote_binnat(node)))
|
|
363
|
+
_reset_gensym()
|
|
364
|
+
with memoized_decode():
|
|
365
|
+
decoded = decode(module)
|
|
366
|
+
assert isinstance(decoded, ast.Module)
|
|
367
|
+
canonical_module = _canonicalize_classes(decoded)
|
|
368
|
+
return ast.unparse(ast.fix_missing_locations(canonical_module))
|
|
369
|
+
|
|
370
|
+
return run_in_large_stack(work)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def _defun_globals() -> dict:
|
|
374
|
+
return {
|
|
375
|
+
"Thunk": Thunk,
|
|
376
|
+
"interned": interned,
|
|
377
|
+
"Closure": Closure,
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def load_namespace(source: str) -> dict:
|
|
382
|
+
"""Execute defunctionalized source and return the whole module namespace.
|
|
383
|
+
|
|
384
|
+
The namespace holds every generated closure class (each carrying its ``__intern_pool__``) and the
|
|
385
|
+
``compiled`` value, so a caller can both run the program and inspect its tabled objects.
|
|
386
|
+
"""
|
|
387
|
+
namespace = _defun_globals()
|
|
388
|
+
exec(compile(source, "<defun>", "exec"), namespace) # noqa: S102
|
|
389
|
+
return namespace
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def load(source: str) -> object:
|
|
393
|
+
"""Execute defunctionalized source and return the ``compiled`` value."""
|
|
394
|
+
return load_namespace(source)["compiled"]
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def defunctionalize_and_load(node: Node) -> object:
|
|
398
|
+
"""Compile a lambda term to defunctionalized code and load the resulting value."""
|
|
399
|
+
return load(defunctionalize(node))
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
# A self-contained import header so a generated defunctionalized module runs on its own: it binds the
|
|
403
|
+
# runtime free names the generated code references (``Closure``, ``Thunk``, ``interned``). Every
|
|
404
|
+
# generated closure explicitly subclasses ``Closure`` and annotates its captures ``cap_i: Closure``;
|
|
405
|
+
# ``interned`` applies ``dataclass(eq=False)`` and hash-conses, so generated classes carry only
|
|
406
|
+
# ``@interned``.
|
|
407
|
+
_DEFUN_MODULE_HEADER = (
|
|
408
|
+
"# Generated, self-contained module: the import header is added at serialization time (see\n"
|
|
409
|
+
"# tablambda._defunctionalize.runnable_defun_module); the body is emitted by the DEFUN lambda\n"
|
|
410
|
+
"# term and content-addressed by compiled dataclass shape.\n"
|
|
411
|
+
"from tablambda._defun_runtime import Closure, Thunk, interned\n"
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def runnable_defun_module(source: str) -> str:
|
|
416
|
+
"""Prepend the runtime import header so a defunctionalized module is importable on its own."""
|
|
417
|
+
return _DEFUN_MODULE_HEADER + "\n" + source
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def defun_compiler_source() -> str:
|
|
421
|
+
"""The defunctionalization compiler ``DEFUN`` self-compiled to a runnable dataclass module.
|
|
422
|
+
|
|
423
|
+
This is the dataclass-form ``compiled compiler``: ``DEFUN`` defunctionalized by itself. Importing
|
|
424
|
+
the result binds ``compiled`` to the defunctionalized ``DEFUN`` value; applying it (through a
|
|
425
|
+
``Thunk``) to a quoted program yields that program's compiled Scott ``ast.Module`` as a
|
|
426
|
+
defunctionalized value.
|
|
427
|
+
"""
|
|
428
|
+
from tablambda._defun_codegen import DEFUN
|
|
429
|
+
|
|
430
|
+
return runnable_defun_module(defunctionalize(build(DEFUN)))
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def compile_with_defun(engine: object, node: Node) -> str:
|
|
434
|
+
"""Compile ``node`` by RUNNING a defunctionalized ``DEFUN`` engine (the dataclass compiled compiler).
|
|
435
|
+
|
|
436
|
+
``engine`` is the ``compiled`` value of a ``defun_compiler_source`` module. The node is quoted and
|
|
437
|
+
itself defunctionalized to feed the engine a defunctionalized Scott source value; the engine's
|
|
438
|
+
output (a defunctionalized Scott ``ast.Module``) is reified, decoded, canonicalized, and unparsed,
|
|
439
|
+
yielding exactly what the in-process ``defunctionalize`` produces, by self-hosting.
|
|
440
|
+
"""
|
|
441
|
+
quoted_argument = defunctionalize_and_load(build(quote_binnat(node)))
|
|
442
|
+
|
|
443
|
+
def work() -> str:
|
|
444
|
+
result = Thunk(engine, quoted_argument).weak_head_normal_form
|
|
445
|
+
if result is _BOTTOM:
|
|
446
|
+
raise ValueError("the defunctionalized compiler did not produce a module")
|
|
447
|
+
_reset_defun_gensym()
|
|
448
|
+
with _memoized_decode_defun():
|
|
449
|
+
decoded = decode_defun(result)
|
|
450
|
+
assert isinstance(decoded, ast.Module)
|
|
451
|
+
canonical_module = _canonicalize_classes(decoded)
|
|
452
|
+
return ast.unparse(ast.fix_missing_locations(canonical_module))
|
|
453
|
+
|
|
454
|
+
return run_in_large_stack(work)
|
|
455
|
+
|