vysort 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vysort-0.1.0/PKG-INFO +127 -0
- vysort-0.1.0/README.md +117 -0
- vysort-0.1.0/pyproject.toml +17 -0
- vysort-0.1.0/src/vysort/__init__.py +343 -0
- vysort-0.1.0/src/vysort/workers/check.py +142 -0
- vysort-0.1.0/src/vysort/workers/match.py +119 -0
- vysort-0.1.0/src/vysort/workers/preflight.py +42 -0
- vysort-0.1.0/src/vysort/workers/recover.py +178 -0
- vysort-0.1.0/src/vysort/workers/reorder.py +59 -0
vysort-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: vysort
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Make vyper 0.3.4-0.3.7 bytecode deterministic by forcing the internal-function layout
|
|
5
|
+
Author: banteg
|
|
6
|
+
Author-email: banteg <4562643+banteg@users.noreply.github.com>
|
|
7
|
+
Requires-Dist: uv
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
|
|
11
|
+
# vysort
|
|
12
|
+
|
|
13
|
+
Make vyper 0.3.4–0.3.7 bytecode deterministic.
|
|
14
|
+
|
|
15
|
+
These compiler versions emit nondeterministic bytecode for any contract whose
|
|
16
|
+
call graph contains a *decision point* — a function calling ≥2 internal
|
|
17
|
+
functions defined later in the file ([vyper#3369](https://github.com/vyperlang/vyper/issues/3369)).
|
|
18
|
+
The internal-function sections get permuted per environment (and per run on
|
|
19
|
+
linux), which blocks byte-exact verification: the verifier's recompile may
|
|
20
|
+
never reproduce what the deployer's machine happened to emit.
|
|
21
|
+
|
|
22
|
+
vysort fixes this with no compiler modifications: it decodes the deployed
|
|
23
|
+
layout straight off the on-chain bytecode, then reorders the source so
|
|
24
|
+
internal function defs come first, in that exact order. The topsort then has
|
|
25
|
+
zero decision points and a stock compiler produces the deployed bytecode
|
|
26
|
+
everywhere, every run.
|
|
27
|
+
|
|
28
|
+
## Install
|
|
29
|
+
|
|
30
|
+
```sh
|
|
31
|
+
uv tool install vysort
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Or run from a checkout: `uv run vysort ...`
|
|
35
|
+
|
|
36
|
+
vysort itself runs on any modern python and depends only on uv. The
|
|
37
|
+
vyper-touching work runs in an ephemeral `uv run` environment with the
|
|
38
|
+
matching compiler: the vyper version is auto-detected from the source's
|
|
39
|
+
version pragma (override with `--vyper`), on python 3.10 by default
|
|
40
|
+
(override with `--python`). No old python or vyper install needed.
|
|
41
|
+
|
|
42
|
+
## Verify your contract
|
|
43
|
+
|
|
44
|
+
If your vyper 0.3.x contract fails verification, this is the command:
|
|
45
|
+
|
|
46
|
+
```sh
|
|
47
|
+
vysort verify contract.vy --address 0x2cced4ff... --rpc-url https://eth.drpc.org
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
It fetches the deployed code and chain id from the RPC, recovers the deployed
|
|
51
|
+
internal-function layout from the on-chain bytes, rewrites the source to force
|
|
52
|
+
that layout, confirms the exact standard-json payload reproduces the runtime
|
|
53
|
+
byte-for-byte (a preflight compile through vyper's own std-json entry point —
|
|
54
|
+
the same path the verifier's binary takes), and submits it to sourcify's v2
|
|
55
|
+
API with a stock compiler version. No forks, no patched binaries, no special
|
|
56
|
+
verifier support.
|
|
57
|
+
|
|
58
|
+
Use `--dry-run` to inspect the submission payload without sending it,
|
|
59
|
+
`--creation-tx` to help the creation match, `--sourcify-url` to target
|
|
60
|
+
another server, and `-o` to keep the rewritten source.
|
|
61
|
+
|
|
62
|
+
Note: creation matches are only guaranteed when `__init__` calls ≤1 internal
|
|
63
|
+
function; the init-callee section of creation code is not forced by source
|
|
64
|
+
order. Runtime matches are always forceable.
|
|
65
|
+
|
|
66
|
+
## Match without submitting
|
|
67
|
+
|
|
68
|
+
To recover the layout and prove the match locally — against on-chain code or
|
|
69
|
+
a hex file — without involving a verifier:
|
|
70
|
+
|
|
71
|
+
```sh
|
|
72
|
+
vysort match contract.vy --address 0x2cced4ff... --rpc-url https://eth.drpc.org -o matched.vy
|
|
73
|
+
vysort match contract.vy --runtime runtime.hex -o matched.vy
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
The deployed layout is recovered in 2 compiles regardless of contract size:
|
|
77
|
+
one instrumented compile maps each internal function's section boundaries and
|
|
78
|
+
masks the layout-dependent address bytes, the deployed order is then decoded
|
|
79
|
+
straight off the on-chain bytes, and one reordered stock compile verifies it
|
|
80
|
+
byte-exactly — `exact`, or `prefix` when the deployed code carries an appended
|
|
81
|
+
immutable tail. If the decode hits an edge case, reachable layouts are
|
|
82
|
+
brute-forced one compile at a time as a fallback. For unaffected compiler
|
|
83
|
+
versions a single compile-and-compare runs instead. `--evm-version istanbul`
|
|
84
|
+
helps pre-berlin deployments whose nonreentrant lock constants differ.
|
|
85
|
+
|
|
86
|
+
The matched source written by `-o` is ordinary vyper that any stock compiler
|
|
87
|
+
of that version turns into the deployed bytecode — auxdata contains no source
|
|
88
|
+
hash, so the output is byte-identical to what the original source produces
|
|
89
|
+
under that ordering.
|
|
90
|
+
|
|
91
|
+
## Developer curiosities
|
|
92
|
+
|
|
93
|
+
The remaining subcommands expose the machinery.
|
|
94
|
+
|
|
95
|
+
Analyze a contract for ordering nondeterminism:
|
|
96
|
+
|
|
97
|
+
```sh
|
|
98
|
+
vysort check contract.vy
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
```json
|
|
102
|
+
{
|
|
103
|
+
"internal_fns": 2,
|
|
104
|
+
"decision_points": 1,
|
|
105
|
+
"reachable_layouts": 2,
|
|
106
|
+
"immune": false,
|
|
107
|
+
"env_layout": ["_triple", "_double"],
|
|
108
|
+
"layouts": [["_double", "_triple"], ["_triple", "_double"]]
|
|
109
|
+
}
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
`immune: true` means exactly one layout is reachable — the contract was never
|
|
113
|
+
at risk; this covers 93% of affected-band mainnet contracts. Otherwise
|
|
114
|
+
`layouts` (when small) enumerates every layout the deployer's heap could have
|
|
115
|
+
produced. The check is version-aware: sources targeting compilers outside the
|
|
116
|
+
affected 0.3.4–0.3.7 band short-circuit to `immune: true` without compiling.
|
|
117
|
+
|
|
118
|
+
Force an arbitrary layout by rewriting the source:
|
|
119
|
+
|
|
120
|
+
```sh
|
|
121
|
+
vysort reorder contract.vy _double,_triple -o forced.vy
|
|
122
|
+
vysort reorder contract.vy layout.json > forced.vy
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
The layout is a comma-separated list of internal function names or a JSON
|
|
126
|
+
file (`["_double", "_triple"]`). This is the forcing primitive `match` and
|
|
127
|
+
`verify` are built on.
|
vysort-0.1.0/README.md
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# vysort
|
|
2
|
+
|
|
3
|
+
Make vyper 0.3.4–0.3.7 bytecode deterministic.
|
|
4
|
+
|
|
5
|
+
These compiler versions emit nondeterministic bytecode for any contract whose
|
|
6
|
+
call graph contains a *decision point* — a function calling ≥2 internal
|
|
7
|
+
functions defined later in the file ([vyper#3369](https://github.com/vyperlang/vyper/issues/3369)).
|
|
8
|
+
The internal-function sections get permuted per environment (and per run on
|
|
9
|
+
linux), which blocks byte-exact verification: the verifier's recompile may
|
|
10
|
+
never reproduce what the deployer's machine happened to emit.
|
|
11
|
+
|
|
12
|
+
vysort fixes this with no compiler modifications: it decodes the deployed
|
|
13
|
+
layout straight off the on-chain bytecode, then reorders the source so
|
|
14
|
+
internal function defs come first, in that exact order. The topsort then has
|
|
15
|
+
zero decision points and a stock compiler produces the deployed bytecode
|
|
16
|
+
everywhere, every run.
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```sh
|
|
21
|
+
uv tool install vysort
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Or run from a checkout: `uv run vysort ...`
|
|
25
|
+
|
|
26
|
+
vysort itself runs on any modern python and depends only on uv. The
|
|
27
|
+
vyper-touching work runs in an ephemeral `uv run` environment with the
|
|
28
|
+
matching compiler: the vyper version is auto-detected from the source's
|
|
29
|
+
version pragma (override with `--vyper`), on python 3.10 by default
|
|
30
|
+
(override with `--python`). No old python or vyper install needed.
|
|
31
|
+
|
|
32
|
+
## Verify your contract
|
|
33
|
+
|
|
34
|
+
If your vyper 0.3.x contract fails verification, this is the command:
|
|
35
|
+
|
|
36
|
+
```sh
|
|
37
|
+
vysort verify contract.vy --address 0x2cced4ff... --rpc-url https://eth.drpc.org
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
It fetches the deployed code and chain id from the RPC, recovers the deployed
|
|
41
|
+
internal-function layout from the on-chain bytes, rewrites the source to force
|
|
42
|
+
that layout, confirms the exact standard-json payload reproduces the runtime
|
|
43
|
+
byte-for-byte (a preflight compile through vyper's own std-json entry point —
|
|
44
|
+
the same path the verifier's binary takes), and submits it to sourcify's v2
|
|
45
|
+
API with a stock compiler version. No forks, no patched binaries, no special
|
|
46
|
+
verifier support.
|
|
47
|
+
|
|
48
|
+
Use `--dry-run` to inspect the submission payload without sending it,
|
|
49
|
+
`--creation-tx` to help the creation match, `--sourcify-url` to target
|
|
50
|
+
another server, and `-o` to keep the rewritten source.
|
|
51
|
+
|
|
52
|
+
Note: creation matches are only guaranteed when `__init__` calls ≤1 internal
|
|
53
|
+
function; the init-callee section of creation code is not forced by source
|
|
54
|
+
order. Runtime matches are always forceable.
|
|
55
|
+
|
|
56
|
+
## Match without submitting
|
|
57
|
+
|
|
58
|
+
To recover the layout and prove the match locally — against on-chain code or
|
|
59
|
+
a hex file — without involving a verifier:
|
|
60
|
+
|
|
61
|
+
```sh
|
|
62
|
+
vysort match contract.vy --address 0x2cced4ff... --rpc-url https://eth.drpc.org -o matched.vy
|
|
63
|
+
vysort match contract.vy --runtime runtime.hex -o matched.vy
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
The deployed layout is recovered in 2 compiles regardless of contract size:
|
|
67
|
+
one instrumented compile maps each internal function's section boundaries and
|
|
68
|
+
masks the layout-dependent address bytes, the deployed order is then decoded
|
|
69
|
+
straight off the on-chain bytes, and one reordered stock compile verifies it
|
|
70
|
+
byte-exactly — `exact`, or `prefix` when the deployed code carries an appended
|
|
71
|
+
immutable tail. If the decode hits an edge case, reachable layouts are
|
|
72
|
+
brute-forced one compile at a time as a fallback. For unaffected compiler
|
|
73
|
+
versions a single compile-and-compare runs instead. `--evm-version istanbul`
|
|
74
|
+
helps pre-berlin deployments whose nonreentrant lock constants differ.
|
|
75
|
+
|
|
76
|
+
The matched source written by `-o` is ordinary vyper that any stock compiler
|
|
77
|
+
of that version turns into the deployed bytecode — auxdata contains no source
|
|
78
|
+
hash, so the output is byte-identical to what the original source produces
|
|
79
|
+
under that ordering.
|
|
80
|
+
|
|
81
|
+
## Developer curiosities
|
|
82
|
+
|
|
83
|
+
The remaining subcommands expose the machinery.
|
|
84
|
+
|
|
85
|
+
Analyze a contract for ordering nondeterminism:
|
|
86
|
+
|
|
87
|
+
```sh
|
|
88
|
+
vysort check contract.vy
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
```json
|
|
92
|
+
{
|
|
93
|
+
"internal_fns": 2,
|
|
94
|
+
"decision_points": 1,
|
|
95
|
+
"reachable_layouts": 2,
|
|
96
|
+
"immune": false,
|
|
97
|
+
"env_layout": ["_triple", "_double"],
|
|
98
|
+
"layouts": [["_double", "_triple"], ["_triple", "_double"]]
|
|
99
|
+
}
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
`immune: true` means exactly one layout is reachable — the contract was never
|
|
103
|
+
at risk; this covers 93% of affected-band mainnet contracts. Otherwise
|
|
104
|
+
`layouts` (when small) enumerates every layout the deployer's heap could have
|
|
105
|
+
produced. The check is version-aware: sources targeting compilers outside the
|
|
106
|
+
affected 0.3.4–0.3.7 band short-circuit to `immune: true` without compiling.
|
|
107
|
+
|
|
108
|
+
Force an arbitrary layout by rewriting the source:
|
|
109
|
+
|
|
110
|
+
```sh
|
|
111
|
+
vysort reorder contract.vy _double,_triple -o forced.vy
|
|
112
|
+
vysort reorder contract.vy layout.json > forced.vy
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
The layout is a comma-separated list of internal function names or a JSON
|
|
116
|
+
file (`["_double", "_triple"]`). This is the forcing primitive `match` and
|
|
117
|
+
`verify` are built on.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "vysort"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Make vyper 0.3.4-0.3.7 bytecode deterministic by forcing the internal-function layout"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "banteg", email = "4562643+banteg@users.noreply.github.com" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
dependencies = ["uv"]
|
|
11
|
+
|
|
12
|
+
[project.scripts]
|
|
13
|
+
vysort = "vysort:main"
|
|
14
|
+
|
|
15
|
+
[build-system]
|
|
16
|
+
requires = ["uv_build>=0.11.17,<0.12.0"]
|
|
17
|
+
build-backend = "uv_build"
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
"""vysort: make vyper 0.3.4-0.3.7 bytecode deterministic by forcing the
|
|
2
|
+
internal-function layout through source definition reordering.
|
|
3
|
+
|
|
4
|
+
The CLI runs on any modern python; vyper-touching work is delegated to
|
|
5
|
+
self-contained worker scripts executed via `uv run` in an ephemeral
|
|
6
|
+
environment with the matching python and vyper versions.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import json
|
|
11
|
+
import pathlib
|
|
12
|
+
import re
|
|
13
|
+
import subprocess
|
|
14
|
+
import sys
|
|
15
|
+
import tempfile
|
|
16
|
+
import time
|
|
17
|
+
import urllib.error
|
|
18
|
+
import urllib.request
|
|
19
|
+
|
|
20
|
+
from uv import find_uv_bin
|
|
21
|
+
|
|
22
|
+
WORKERS = pathlib.Path(__file__).parent / "workers"
|
|
23
|
+
|
|
24
|
+
# the whole affected band (vyper 0.3.4-0.3.7) supports python 3.10
|
|
25
|
+
DEFAULT_PYTHON = "3.10"
|
|
26
|
+
|
|
27
|
+
# call-graph ordering nondeterminism: introduced in 0.3.4, fixed in 0.3.8
|
|
28
|
+
AFFECTED_MIN, AFFECTED_MAX = (0, 3, 4), (0, 3, 7)
|
|
29
|
+
|
|
30
|
+
DEFAULT_SOURCIFY = "https://sourcify.dev/server"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def version_tuple(version: str) -> tuple:
|
|
34
|
+
m = re.match(r"(\d+)\.(\d+)\.(\d+)", version)
|
|
35
|
+
if not m:
|
|
36
|
+
raise SystemExit(f"cannot parse vyper version: {version!r}")
|
|
37
|
+
return tuple(int(x) for x in m.groups())
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def is_affected(version: str) -> bool:
|
|
41
|
+
return AFFECTED_MIN <= version_tuple(version) <= AFFECTED_MAX
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def detect_vyper_version(src: str) -> str:
|
|
45
|
+
m = re.search(r"^#\s*(?:@version|pragma\s+version)\s+(.+)$", src, re.M)
|
|
46
|
+
if not m:
|
|
47
|
+
raise SystemExit("no version pragma in source; pass --vyper")
|
|
48
|
+
spec = m.group(1).strip()
|
|
49
|
+
exact = re.fullmatch(r"==?\s*(\d+\.\d+\.\d+\S*)|(\d+\.\d+\.\d+\S*)", spec)
|
|
50
|
+
if not exact:
|
|
51
|
+
raise SystemExit(
|
|
52
|
+
f"version pragma {spec!r} is a range, not a concrete compiler; "
|
|
53
|
+
"pass --vyper with the version the contract was deployed with"
|
|
54
|
+
)
|
|
55
|
+
return exact.group(1) or exact.group(2)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def run_worker(worker: str, args, vyper_version: str, python: str) -> str:
|
|
59
|
+
cmd = [
|
|
60
|
+
find_uv_bin(),
|
|
61
|
+
"run",
|
|
62
|
+
"--quiet",
|
|
63
|
+
"--no-project",
|
|
64
|
+
"--python", python,
|
|
65
|
+
"--with", f"vyper=={vyper_version}",
|
|
66
|
+
str(WORKERS / f"{worker}.py"),
|
|
67
|
+
*map(str, args),
|
|
68
|
+
]
|
|
69
|
+
r = subprocess.run(cmd, stdout=subprocess.PIPE, text=True)
|
|
70
|
+
if r.returncode:
|
|
71
|
+
sys.exit(r.returncode)
|
|
72
|
+
return r.stdout
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def http_json(url: str, payload: dict | None = None, method: str = "POST") -> dict:
|
|
76
|
+
req = urllib.request.Request(
|
|
77
|
+
url,
|
|
78
|
+
data=json.dumps(payload).encode() if payload is not None else None,
|
|
79
|
+
method=method,
|
|
80
|
+
headers={"Content-Type": "application/json", "User-Agent": "vysort"},
|
|
81
|
+
)
|
|
82
|
+
try:
|
|
83
|
+
with urllib.request.urlopen(req, timeout=30) as r:
|
|
84
|
+
return json.load(r)
|
|
85
|
+
except urllib.error.HTTPError as e:
|
|
86
|
+
raise SystemExit(f"http {e.code} from {url}: {e.read().decode(errors='replace')}")
|
|
87
|
+
except (urllib.error.URLError, TimeoutError) as e:
|
|
88
|
+
raise SystemExit(f"cannot reach {url}: {e}")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def eth_rpc(rpc_url: str, method: str, params: list):
|
|
92
|
+
reply = http_json(rpc_url, {"jsonrpc": "2.0", "method": method, "params": params, "id": 1})
|
|
93
|
+
if "error" in reply:
|
|
94
|
+
raise SystemExit(f"rpc error from {method}: {reply['error']}")
|
|
95
|
+
return reply["result"]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def fetch_runtime(address: str, rpc_url: str) -> str:
|
|
99
|
+
code = eth_rpc(rpc_url, "eth_getCode", [address, "latest"])
|
|
100
|
+
if code in (None, "0x"):
|
|
101
|
+
raise SystemExit(f"no code at {address}")
|
|
102
|
+
return code
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def resolve_runtime(args, parser) -> str:
|
|
106
|
+
if args.runtime:
|
|
107
|
+
return args.runtime.read_text()
|
|
108
|
+
if args.address and args.rpc_url:
|
|
109
|
+
return fetch_runtime(args.address, args.rpc_url)
|
|
110
|
+
parser.error("supply --runtime or --address with --rpc-url")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def run_match(
|
|
114
|
+
source: pathlib.Path, runtime_hex: str, vyper_version: str, python: str, evm_version: str | None
|
|
115
|
+
) -> dict:
|
|
116
|
+
mode = "brute" if is_affected(vyper_version) else "single"
|
|
117
|
+
clean = "".join(runtime_hex.split()).removeprefix("0x")
|
|
118
|
+
with tempfile.NamedTemporaryFile("w", suffix=".hex") as f:
|
|
119
|
+
f.write(clean)
|
|
120
|
+
f.flush()
|
|
121
|
+
worker_args = [source, f.name, mode] + ([evm_version] if evm_version else [])
|
|
122
|
+
out = run_worker("match", worker_args, vyper_version, python)
|
|
123
|
+
return json.loads(out)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def run_preflight(payload: dict, runtime_hex: str, vyper_version: str, python: str) -> dict:
|
|
127
|
+
"""Compile the exact submission payload via vyper's std-json path and
|
|
128
|
+
compare against the deployed runtime."""
|
|
129
|
+
clean = "".join(runtime_hex.split()).removeprefix("0x")
|
|
130
|
+
with tempfile.NamedTemporaryFile("w", suffix=".json") as fj, tempfile.NamedTemporaryFile(
|
|
131
|
+
"w", suffix=".hex"
|
|
132
|
+
) as fh:
|
|
133
|
+
json.dump(payload["stdJsonInput"], fj)
|
|
134
|
+
fj.flush()
|
|
135
|
+
fh.write(clean)
|
|
136
|
+
fh.flush()
|
|
137
|
+
out = run_worker(
|
|
138
|
+
"preflight", [fj.name, fh.name, payload["contractIdentifier"]], vyper_version, python
|
|
139
|
+
)
|
|
140
|
+
return json.loads(out)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def compiler_version_string(vyper_version: str, python: str) -> str:
|
|
144
|
+
"""Full version with commit hash as sourcify expects it, e.g. 0.3.7+commit.6020b8bb.
|
|
145
|
+
|
|
146
|
+
The canonical string comes from the official release binary names (pip vyper
|
|
147
|
+
reports a shorter git hash than the release assets carry)."""
|
|
148
|
+
try:
|
|
149
|
+
release = http_json(
|
|
150
|
+
f"https://api.github.com/repos/vyperlang/vyper/releases/tags/v{vyper_version}",
|
|
151
|
+
method="GET",
|
|
152
|
+
)
|
|
153
|
+
for asset in release["assets"]:
|
|
154
|
+
m = re.match(rf"vyper\.({re.escape(vyper_version)}\+commit\.[0-9a-f]+)\.", asset["name"])
|
|
155
|
+
if m:
|
|
156
|
+
return m.group(1)
|
|
157
|
+
except SystemExit:
|
|
158
|
+
pass
|
|
159
|
+
cmd = [
|
|
160
|
+
find_uv_bin(),
|
|
161
|
+
"run",
|
|
162
|
+
"--quiet",
|
|
163
|
+
"--no-project",
|
|
164
|
+
"--python", python,
|
|
165
|
+
"--with", f"vyper=={vyper_version}",
|
|
166
|
+
"vyper", "--version",
|
|
167
|
+
]
|
|
168
|
+
return subprocess.run(cmd, stdout=subprocess.PIPE, text=True, check=True).stdout.strip()
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def build_std_json(filename: str, source: str, evm_version: str | None) -> dict:
|
|
172
|
+
settings = {"outputSelection": {"*": ["evm.bytecode", "evm.deployedBytecode", "abi"]}}
|
|
173
|
+
if evm_version:
|
|
174
|
+
settings["evmVersion"] = evm_version
|
|
175
|
+
return {"language": "Vyper", "sources": {filename: {"content": source}}, "settings": settings}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def sourcify_verify(sourcify_url: str, chain_id: int, address: str, payload: dict) -> dict:
|
|
179
|
+
submit = http_json(f"{sourcify_url}/v2/verify/{chain_id}/{address}", payload)
|
|
180
|
+
vid = submit["verificationId"]
|
|
181
|
+
print(f"submitted verification job {vid}", file=sys.stderr)
|
|
182
|
+
deadline = time.monotonic() + 600
|
|
183
|
+
while time.monotonic() < deadline:
|
|
184
|
+
job = http_json(f"{sourcify_url}/v2/verify/{vid}", method="GET")
|
|
185
|
+
if job.get("isJobCompleted"):
|
|
186
|
+
return job
|
|
187
|
+
time.sleep(2)
|
|
188
|
+
raise SystemExit(f"verification job {vid} did not complete within 10 minutes")
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def parse_layout(value: str) -> list:
|
|
192
|
+
"""Layout is a JSON file path or an inline comma-separated list of names."""
|
|
193
|
+
path = pathlib.Path(value)
|
|
194
|
+
if path.exists():
|
|
195
|
+
return json.loads(path.read_text())
|
|
196
|
+
return [n.strip() for n in value.split(",") if n.strip()]
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def main() -> None:
|
|
200
|
+
parser = argparse.ArgumentParser(prog="vysort", description=__doc__)
|
|
201
|
+
common = argparse.ArgumentParser(add_help=False)
|
|
202
|
+
common.add_argument("source", type=pathlib.Path, help="vyper source file")
|
|
203
|
+
common.add_argument("--vyper", help="vyper version (default: from source pragma)")
|
|
204
|
+
common.add_argument(
|
|
205
|
+
"--python", default=DEFAULT_PYTHON, help=f"python for the compiler env (default: {DEFAULT_PYTHON})"
|
|
206
|
+
)
|
|
207
|
+
onchain = argparse.ArgumentParser(add_help=False)
|
|
208
|
+
onchain.add_argument("--runtime", type=pathlib.Path, help="file with the target runtime bytecode hex")
|
|
209
|
+
onchain.add_argument("--address", help="contract address to fetch the runtime code from")
|
|
210
|
+
onchain.add_argument("--rpc-url", help="json-rpc endpoint for --address")
|
|
211
|
+
|
|
212
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
213
|
+
|
|
214
|
+
sub.add_parser(
|
|
215
|
+
"check",
|
|
216
|
+
parents=[common],
|
|
217
|
+
help="analyze a contract for ordering nondeterminism (decision points, reachable layouts)",
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
p_reorder = sub.add_parser(
|
|
221
|
+
"reorder",
|
|
222
|
+
parents=[common],
|
|
223
|
+
help="rewrite source so a stock compiler deterministically produces the target layout",
|
|
224
|
+
)
|
|
225
|
+
p_reorder.add_argument(
|
|
226
|
+
"layout",
|
|
227
|
+
type=parse_layout,
|
|
228
|
+
help='target internal layout: JSON file (["_f1", ...]) or comma-separated names (_f1,_f2)',
|
|
229
|
+
)
|
|
230
|
+
p_reorder.add_argument(
|
|
231
|
+
"-o", "--output", type=pathlib.Path, help="write reordered source here (default: stdout)"
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
p_match = sub.add_parser(
|
|
235
|
+
"match",
|
|
236
|
+
parents=[common, onchain],
|
|
237
|
+
help="find the deployed layout: compile reachable layouts until the runtime matches",
|
|
238
|
+
)
|
|
239
|
+
p_match.add_argument("--evm-version", help="evm version for compilation (e.g. istanbul)")
|
|
240
|
+
p_match.add_argument(
|
|
241
|
+
"-o", "--output", type=pathlib.Path, help="write the matched reordered source here"
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
p_verify = sub.add_parser(
|
|
245
|
+
"verify",
|
|
246
|
+
parents=[common],
|
|
247
|
+
help="match the deployed layout and submit the source to sourcify",
|
|
248
|
+
)
|
|
249
|
+
p_verify.add_argument("--address", required=True, help="deployed contract address")
|
|
250
|
+
p_verify.add_argument("--rpc-url", required=True, help="json-rpc endpoint (also provides the chain id)")
|
|
251
|
+
p_verify.add_argument("--sourcify-url", default=DEFAULT_SOURCIFY, help=f"sourcify server (default: {DEFAULT_SOURCIFY})")
|
|
252
|
+
p_verify.add_argument("--creation-tx", help="contract creation tx hash (helps the creation match)")
|
|
253
|
+
p_verify.add_argument("--evm-version", help="evm version for compilation (e.g. istanbul)")
|
|
254
|
+
p_verify.add_argument(
|
|
255
|
+
"--dry-run", action="store_true", help="print the submission payload instead of submitting"
|
|
256
|
+
)
|
|
257
|
+
p_verify.add_argument(
|
|
258
|
+
"-o", "--output", type=pathlib.Path, help="write the matched reordered source here"
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
args = parser.parse_args()
|
|
262
|
+
vyper_version = args.vyper or detect_vyper_version(args.source.read_text())
|
|
263
|
+
|
|
264
|
+
if args.command == "check":
|
|
265
|
+
if not is_affected(vyper_version):
|
|
266
|
+
report = {
|
|
267
|
+
"vyper": vyper_version,
|
|
268
|
+
"affected": False,
|
|
269
|
+
"immune": True,
|
|
270
|
+
"reason": "call-graph ordering bug only affects vyper 0.3.4-0.3.7",
|
|
271
|
+
}
|
|
272
|
+
else:
|
|
273
|
+
report = {"vyper": vyper_version, "affected": True}
|
|
274
|
+
report.update(json.loads(run_worker("check", [args.source], vyper_version, args.python)))
|
|
275
|
+
print(json.dumps(report, indent=2))
|
|
276
|
+
|
|
277
|
+
elif args.command == "reorder":
|
|
278
|
+
out = run_worker("reorder", [args.source, json.dumps(args.layout)], vyper_version, args.python)
|
|
279
|
+
if args.output:
|
|
280
|
+
args.output.write_text(out)
|
|
281
|
+
else:
|
|
282
|
+
sys.stdout.write(out)
|
|
283
|
+
|
|
284
|
+
elif args.command == "match":
|
|
285
|
+
runtime_hex = resolve_runtime(args, parser)
|
|
286
|
+
result = run_match(args.source, runtime_hex, vyper_version, args.python, args.evm_version)
|
|
287
|
+
source = result.pop("source", None)
|
|
288
|
+
if args.output and source:
|
|
289
|
+
args.output.write_text(source)
|
|
290
|
+
result["output"] = str(args.output)
|
|
291
|
+
print(json.dumps({"vyper": vyper_version, "affected": is_affected(vyper_version), **result}, indent=2))
|
|
292
|
+
if result["status"] not in ("exact", "prefix"):
|
|
293
|
+
sys.exit(1)
|
|
294
|
+
|
|
295
|
+
elif args.command == "verify":
|
|
296
|
+
runtime_hex = fetch_runtime(args.address, args.rpc_url)
|
|
297
|
+
chain_id = int(eth_rpc(args.rpc_url, "eth_chainId", []), 16)
|
|
298
|
+
result = run_match(args.source, runtime_hex, vyper_version, args.python, args.evm_version)
|
|
299
|
+
source = result.pop("source", None)
|
|
300
|
+
if result["status"] not in ("exact", "prefix"):
|
|
301
|
+
print(json.dumps(result, indent=2))
|
|
302
|
+
raise SystemExit(f"runtime matches no reachable layout (status: {result['status']}); not submitting")
|
|
303
|
+
if args.output:
|
|
304
|
+
args.output.write_text(source)
|
|
305
|
+
filename = args.source.name
|
|
306
|
+
payload = {
|
|
307
|
+
"stdJsonInput": build_std_json(filename, source, args.evm_version),
|
|
308
|
+
"compilerVersion": compiler_version_string(vyper_version, args.python),
|
|
309
|
+
"contractIdentifier": f"{filename}:{args.source.stem}",
|
|
310
|
+
}
|
|
311
|
+
if args.creation_tx:
|
|
312
|
+
payload["creationTransactionHash"] = args.creation_tx
|
|
313
|
+
preflight = run_preflight(payload, runtime_hex, vyper_version, args.python)
|
|
314
|
+
if preflight["status"] not in ("exact", "prefix"):
|
|
315
|
+
print(json.dumps({"match": result, "preflight": preflight}, indent=2))
|
|
316
|
+
raise SystemExit(
|
|
317
|
+
f"preflight std-json compile does not reproduce the runtime "
|
|
318
|
+
f"(status: {preflight['status']}); not submitting"
|
|
319
|
+
)
|
|
320
|
+
print(f"preflight: std-json payload reproduces the runtime ({preflight['status']})", file=sys.stderr)
|
|
321
|
+
if args.dry_run:
|
|
322
|
+
print(json.dumps(
|
|
323
|
+
{
|
|
324
|
+
"chain_id": chain_id,
|
|
325
|
+
"address": args.address,
|
|
326
|
+
"match": result,
|
|
327
|
+
"preflight": preflight,
|
|
328
|
+
"payload": payload,
|
|
329
|
+
},
|
|
330
|
+
indent=2,
|
|
331
|
+
))
|
|
332
|
+
return
|
|
333
|
+
job = sourcify_verify(args.sourcify_url, chain_id, args.address, payload)
|
|
334
|
+
report = {
|
|
335
|
+
"match": result["status"],
|
|
336
|
+
"preflight": preflight["status"],
|
|
337
|
+
"layout": result.get("layout"),
|
|
338
|
+
"contract": job.get("contract"),
|
|
339
|
+
"error": job.get("error"),
|
|
340
|
+
}
|
|
341
|
+
print(json.dumps(report, indent=2))
|
|
342
|
+
if not (job.get("contract") or {}).get("match"):
|
|
343
|
+
sys.exit(1)
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Decision-point analysis for vyper 0.3.4-0.3.7 ordering nondeterminism.
|
|
2
|
+
|
|
3
|
+
The bug requires multiple valid topsorts — i.e. some called_functions set
|
|
4
|
+
holding >=2 not-yet-placed internal functions at DFS time. Contracts without
|
|
5
|
+
such a decision point have exactly one reachable layout and are immune.
|
|
6
|
+
|
|
7
|
+
Captures the call graph + source order from a real compile, enumerates (or
|
|
8
|
+
samples) all per-set iteration orders, and counts distinct reachable internal
|
|
9
|
+
layouts.
|
|
10
|
+
|
|
11
|
+
Self-contained worker: runs in an ephemeral `uv run --with vyper==X` env and
|
|
12
|
+
imports nothing from vysort. argv: <source.vy>; JSON report on stdout.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import itertools
|
|
16
|
+
import json
|
|
17
|
+
import pathlib
|
|
18
|
+
import random
|
|
19
|
+
import sys
|
|
20
|
+
|
|
21
|
+
import vyper.codegen.module as module_mod
|
|
22
|
+
from vyper.compiler.phases import CompilerData
|
|
23
|
+
|
|
24
|
+
ENUMERATION_CAP = 20000
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def capture_call_graph(src: str) -> dict:
|
|
28
|
+
"""Compile once with an instrumented _topsort; return the call graph."""
|
|
29
|
+
captured = {}
|
|
30
|
+
orig_topsort = module_mod._topsort
|
|
31
|
+
|
|
32
|
+
def capturing_topsort(functions):
|
|
33
|
+
if not captured:
|
|
34
|
+
for f in functions:
|
|
35
|
+
t = f._metadata["type"]
|
|
36
|
+
captured[f.name] = {
|
|
37
|
+
"callees": [c.name for c in t.called_functions], # env iteration order
|
|
38
|
+
"internal": t.is_internal,
|
|
39
|
+
}
|
|
40
|
+
captured["__source_order__"] = [f.name for f in functions]
|
|
41
|
+
return orig_topsort(functions)
|
|
42
|
+
|
|
43
|
+
module_mod._topsort = capturing_topsort
|
|
44
|
+
try:
|
|
45
|
+
CompilerData(src).bytecode_runtime # triggers codegen -> capture
|
|
46
|
+
finally:
|
|
47
|
+
module_mod._topsort = orig_topsort
|
|
48
|
+
return captured
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def simulate(source_order, callee_order, internal):
|
|
52
|
+
"""Replicate _topsort + dedup-first; return internal layout tuple."""
|
|
53
|
+
placed = []
|
|
54
|
+
placed_set = set()
|
|
55
|
+
|
|
56
|
+
def helper(f):
|
|
57
|
+
if f in placed_set:
|
|
58
|
+
return
|
|
59
|
+
for c in callee_order[f]:
|
|
60
|
+
helper(c)
|
|
61
|
+
if f not in placed_set:
|
|
62
|
+
placed.append(f)
|
|
63
|
+
placed_set.add(f)
|
|
64
|
+
|
|
65
|
+
for f in source_order:
|
|
66
|
+
helper(f)
|
|
67
|
+
return tuple(f for f in placed if internal.get(f))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def decision_points(source_order, callee_order, internal):
|
|
71
|
+
"""Count sets that, at first visit, contain >=2 unplaced callees."""
|
|
72
|
+
points = []
|
|
73
|
+
placed = set()
|
|
74
|
+
|
|
75
|
+
def helper(f):
|
|
76
|
+
if f in placed:
|
|
77
|
+
return
|
|
78
|
+
unplaced = [c for c in callee_order[f] if c not in placed]
|
|
79
|
+
if len([c for c in unplaced if internal.get(c)]) >= 2:
|
|
80
|
+
points.append((f, len(unplaced)))
|
|
81
|
+
for c in callee_order[f]:
|
|
82
|
+
helper(c)
|
|
83
|
+
placed.add(f)
|
|
84
|
+
|
|
85
|
+
for f in source_order:
|
|
86
|
+
helper(f)
|
|
87
|
+
return points
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def enumerate_layouts(source_order, callee_order, internal):
|
|
91
|
+
"""Enumerate per-set permutations (cartesian product), cap with sampling.
|
|
92
|
+
|
|
93
|
+
Returns (layouts, exact, choice_space)."""
|
|
94
|
+
multi = [n for n, c in callee_order.items() if len(c) >= 2]
|
|
95
|
+
space = 1
|
|
96
|
+
for n in multi:
|
|
97
|
+
for i in range(2, len(callee_order[n]) + 1):
|
|
98
|
+
space *= i
|
|
99
|
+
layouts = set()
|
|
100
|
+
if space <= ENUMERATION_CAP:
|
|
101
|
+
pools = [list(itertools.permutations(callee_order[n])) for n in multi]
|
|
102
|
+
for combo in itertools.product(*pools):
|
|
103
|
+
co = dict(callee_order)
|
|
104
|
+
co.update({n: list(p) for n, p in zip(multi, combo)})
|
|
105
|
+
layouts.add(simulate(source_order, co, internal))
|
|
106
|
+
return layouts, True, space
|
|
107
|
+
rng = random.Random(1)
|
|
108
|
+
for _ in range(ENUMERATION_CAP):
|
|
109
|
+
co = dict(callee_order)
|
|
110
|
+
for n in multi:
|
|
111
|
+
p = callee_order[n][:]
|
|
112
|
+
rng.shuffle(p)
|
|
113
|
+
co[n] = p
|
|
114
|
+
layouts.add(simulate(source_order, co, internal))
|
|
115
|
+
return layouts, False, space
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def check(src: str) -> dict:
|
|
119
|
+
captured = capture_call_graph(src)
|
|
120
|
+
source_order = captured.pop("__source_order__")
|
|
121
|
+
internal = {n: v["internal"] for n, v in captured.items()}
|
|
122
|
+
base_callees = {n: v["callees"] for n, v in captured.items()}
|
|
123
|
+
|
|
124
|
+
env_layout = simulate(source_order, base_callees, internal)
|
|
125
|
+
dps = decision_points(source_order, base_callees, internal)
|
|
126
|
+
layouts, exact, space = enumerate_layouts(source_order, base_callees, internal)
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
"internal_fns": sum(internal.values()),
|
|
130
|
+
"decision_points": len(dps),
|
|
131
|
+
"choice_space": space,
|
|
132
|
+
"reachable_layouts": len(layouts),
|
|
133
|
+
"reachable_exact": exact,
|
|
134
|
+
"immune": len(layouts) == 1 and exact,
|
|
135
|
+
"env_layout": list(env_layout),
|
|
136
|
+
"layouts": sorted(layouts) if exact and len(layouts) <= 64 else None,
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
if __name__ == "__main__":
|
|
141
|
+
src = pathlib.Path(sys.argv[1]).read_text()
|
|
142
|
+
print(json.dumps(check(src), indent=2))
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Brute-force the deployed internal-function layout: enumerate every layout
|
|
2
|
+
reachable by the 0.3.4-0.3.7 call-graph topsort, reorder the source per
|
|
3
|
+
candidate, stock-compile, and compare against the target runtime bytecode.
|
|
4
|
+
|
|
5
|
+
A `prefix` status means the compiled runtime is a prefix of the target and
|
|
6
|
+
the remainder is the appended immutable tail. In `single` mode (unaffected
|
|
7
|
+
compiler versions: exactly one layout is reachable) the original source is
|
|
8
|
+
compiled once and compared.
|
|
9
|
+
|
|
10
|
+
Self-contained worker: runs in an ephemeral `uv run --with vyper==X` env,
|
|
11
|
+
importing siblings from its own directory. argv: <source.vy>
|
|
12
|
+
<runtime-hex-file> <mode: brute|single> [evm-version]; JSON on stdout,
|
|
13
|
+
progress on stderr.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import pathlib
|
|
18
|
+
import sys
|
|
19
|
+
|
|
20
|
+
import vyper
|
|
21
|
+
from check import capture_call_graph, enumerate_layouts, simulate
|
|
22
|
+
from recover import recover_layout
|
|
23
|
+
from reorder import reorder
|
|
24
|
+
|
|
25
|
+
EVM_VERSION = sys.argv[4] if len(sys.argv) > 4 else None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def compile_runtime(src: str) -> bytes:
|
|
29
|
+
kwargs = {"evm_version": EVM_VERSION} if EVM_VERSION else {}
|
|
30
|
+
out = vyper.compile_code(src, output_formats=["bytecode_runtime"], **kwargs)
|
|
31
|
+
return bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def compare(target: bytes, runtime: bytes):
|
|
35
|
+
if runtime == target:
|
|
36
|
+
return {"status": "exact"}
|
|
37
|
+
if len(target) > len(runtime) and target.startswith(runtime):
|
|
38
|
+
return {"status": "prefix", "immutable_tail_bytes": len(target) - len(runtime)}
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def match_single(src: str, target: bytes) -> dict:
|
|
43
|
+
verdict = compare(target, compile_runtime(src)) or {"status": "no_match"}
|
|
44
|
+
out = {**verdict, "attempts": 1, "reachable_layouts": 1}
|
|
45
|
+
if verdict["status"] != "no_match":
|
|
46
|
+
out["source"] = src
|
|
47
|
+
return out
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def match_brute(src: str, target: bytes) -> dict:
|
|
51
|
+
# layout recovery first: decode the deployed order straight off the
|
|
52
|
+
# bytecode, then one reordered stock compile to verify — 2 compiles
|
|
53
|
+
# total regardless of how many layouts are reachable
|
|
54
|
+
# a decode crash must not kill the worker — brute force still rescues
|
|
55
|
+
# 25 of the 26 at-risk mainnet runtimes if recovery hits an unknown
|
|
56
|
+
# edge case (the decode found new ones in both validation rounds)
|
|
57
|
+
try:
|
|
58
|
+
rec = recover_layout(src, target, EVM_VERSION)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
rec = {"status": f"recover_error: {type(e).__name__}: {e}"}
|
|
61
|
+
if rec["status"] == "unsupported_minimal_proxy":
|
|
62
|
+
return rec
|
|
63
|
+
if rec["status"] == "decoded":
|
|
64
|
+
reordered = reorder(src, rec["proof_order"])
|
|
65
|
+
verdict = compare(target, compile_runtime(reordered))
|
|
66
|
+
if verdict:
|
|
67
|
+
return {
|
|
68
|
+
**verdict,
|
|
69
|
+
"method": "recovery",
|
|
70
|
+
"layout": rec["proof_order"],
|
|
71
|
+
"compiles": 2,
|
|
72
|
+
"source": reordered,
|
|
73
|
+
}
|
|
74
|
+
print("recovered order did not verify, falling back to brute force", file=sys.stderr)
|
|
75
|
+
else:
|
|
76
|
+
print(f"layout recovery unavailable ({rec['status']}), falling back to brute force", file=sys.stderr)
|
|
77
|
+
|
|
78
|
+
captured = capture_call_graph(src)
|
|
79
|
+
source_order = captured.pop("__source_order__")
|
|
80
|
+
internal = {n: v["internal"] for n, v in captured.items()}
|
|
81
|
+
base_callees = {n: v["callees"] for n, v in captured.items()}
|
|
82
|
+
env_layout = simulate(source_order, base_callees, internal)
|
|
83
|
+
layouts, exact, space = enumerate_layouts(source_order, base_callees, internal)
|
|
84
|
+
if not exact:
|
|
85
|
+
return {
|
|
86
|
+
"status": "layout_space_too_large",
|
|
87
|
+
"choice_space": space,
|
|
88
|
+
"sampled_layouts": len(layouts),
|
|
89
|
+
}
|
|
90
|
+
# the environment's own layout is the most likely match, try it first
|
|
91
|
+
candidates = sorted(layouts, key=lambda la: (la != env_layout, la))
|
|
92
|
+
for i, layout in enumerate(candidates, 1):
|
|
93
|
+
print(f"candidate {i}/{len(candidates)}: {list(layout)}", file=sys.stderr)
|
|
94
|
+
reordered = reorder(src, list(layout))
|
|
95
|
+
verdict = compare(target, compile_runtime(reordered))
|
|
96
|
+
if verdict:
|
|
97
|
+
return {
|
|
98
|
+
**verdict,
|
|
99
|
+
"method": "brute",
|
|
100
|
+
"layout": list(layout),
|
|
101
|
+
"attempts": i,
|
|
102
|
+
"reachable_layouts": len(candidates),
|
|
103
|
+
"source": reordered,
|
|
104
|
+
}
|
|
105
|
+
return {
|
|
106
|
+
"status": "no_match",
|
|
107
|
+
"method": "brute",
|
|
108
|
+
"attempts": len(candidates),
|
|
109
|
+
"reachable_layouts": len(candidates),
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
if __name__ == "__main__":
|
|
114
|
+
src = pathlib.Path(sys.argv[1]).read_text()
|
|
115
|
+
hex_str = "".join(pathlib.Path(sys.argv[2]).read_text().split()).removeprefix("0x")
|
|
116
|
+
target = bytes.fromhex(hex_str)
|
|
117
|
+
mode = sys.argv[3]
|
|
118
|
+
out = match_single(src, target) if mode == "single" else match_brute(src, target)
|
|
119
|
+
print(json.dumps(out, indent=2))
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Compile the exact standard-json payload that will be submitted to the
|
|
2
|
+
verifier — through vyper's own std-json entry point, the same path the
|
|
3
|
+
official binary takes — and compare the produced deployedBytecode against
|
|
4
|
+
the target runtime. Catches anything that diverges between the in-process
|
|
5
|
+
match verification and the submitted payload: settings serialization,
|
|
6
|
+
std-json defaults, identifier resolution.
|
|
7
|
+
|
|
8
|
+
Self-contained worker: runs in an ephemeral `uv run --with vyper==X` env.
|
|
9
|
+
argv: <stdjson-file> <runtime-hex-file> <contract-identifier>; JSON on
|
|
10
|
+
stdout.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import pathlib
|
|
15
|
+
import sys
|
|
16
|
+
|
|
17
|
+
from vyper.cli.vyper_json import compile_json, exc_handler_to_dict
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def preflight(input_json: dict, target: bytes, identifier: str) -> dict:
|
|
21
|
+
filename, name = identifier.rsplit(":", 1)
|
|
22
|
+
output = compile_json(input_json, exc_handler=exc_handler_to_dict)
|
|
23
|
+
errors = [e for e in output.get("errors", []) if e.get("severity") == "error"]
|
|
24
|
+
if errors:
|
|
25
|
+
return {"status": "compile_error", "errors": [e.get("message") for e in errors]}
|
|
26
|
+
evm = output["contracts"][filename][name]["evm"]
|
|
27
|
+
runtime = bytes.fromhex(evm["deployedBytecode"]["object"].removeprefix("0x"))
|
|
28
|
+
if runtime == target:
|
|
29
|
+
return {"status": "exact"}
|
|
30
|
+
if len(target) > len(runtime) and target.startswith(runtime):
|
|
31
|
+
return {"status": "prefix", "immutable_tail_bytes": len(target) - len(runtime)}
|
|
32
|
+
n = next(
|
|
33
|
+
(i for i, (x, y) in enumerate(zip(target, runtime)) if x != y),
|
|
34
|
+
min(len(target), len(runtime)),
|
|
35
|
+
)
|
|
36
|
+
return {"status": "no_match", "common_prefix_bytes": n, "compiled_bytes": len(runtime)}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
if __name__ == "__main__":
|
|
40
|
+
input_json = json.loads(pathlib.Path(sys.argv[1]).read_text())
|
|
41
|
+
hex_str = "".join(pathlib.Path(sys.argv[2]).read_text().split()).removeprefix("0x")
|
|
42
|
+
print(json.dumps(preflight(input_json, bytes.fromhex(hex_str), sys.argv[3]), indent=2))
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Recover the deployed internal-function layout from on-chain runtime
|
|
2
|
+
bytecode in 1 instrumented compile plus an offline decode (no search):
|
|
3
|
+
|
|
4
|
+
1. instrumented compile: walk the runtime assembly to get each internal
|
|
5
|
+
function's section boundaries (entry-label pcs) and all code-symbol
|
|
6
|
+
PUSH2 operand positions, masking only operands whose target lies in
|
|
7
|
+
the movable internal region;
|
|
8
|
+
2. decode the deployed section order by masked comparison (no compiles).
|
|
9
|
+
|
|
10
|
+
Section lengths are order-invariant, so the decode terminates with the full
|
|
11
|
+
deployed permutation regardless of how many layouts are reachable.
|
|
12
|
+
|
|
13
|
+
Handles: shared revert/postamble after the last internal function (excluded
|
|
14
|
+
from the movable region), cleanup-only internals (no emitted main label —
|
|
15
|
+
anchored to the host section containing their cleanup pc and re-inserted
|
|
16
|
+
into the proof order), and EIP-1167 minimal proxies (classified, not
|
|
17
|
+
decoded).
|
|
18
|
+
|
|
19
|
+
Self-contained worker module: runs in an ephemeral `uv run --with vyper==X`
|
|
20
|
+
env; used by match.py, not invoked directly.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import re
|
|
24
|
+
|
|
25
|
+
import vyper.evm.opcodes as evm_ops
|
|
26
|
+
from vyper.compiler.phases import CompilerData
|
|
27
|
+
from vyper.ir.compile_ir import is_mem_sym, is_ofst, is_symbol
|
|
28
|
+
|
|
29
|
+
EIP1167_RE = re.compile(r"^363d3d373d3d3d363d73([0-9a-f]{40})5af43d82803e903d91602b57fd5bf3$")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def scan(assembly):
|
|
33
|
+
"""Replicate assembly_to_evm's pc walk.
|
|
34
|
+
|
|
35
|
+
Returns (labels: symbol -> pc, refs: [(operand_pc, symbol)], code_end).
|
|
36
|
+
"""
|
|
37
|
+
pc, labels, refs = 0, {}, []
|
|
38
|
+
for i, item in enumerate(assembly):
|
|
39
|
+
if item == "DEBUG":
|
|
40
|
+
continue
|
|
41
|
+
if is_symbol(item):
|
|
42
|
+
if assembly[i + 1] in ("JUMPDEST", "BLANK"):
|
|
43
|
+
labels[item] = pc
|
|
44
|
+
else:
|
|
45
|
+
refs.append((pc + 1, item))
|
|
46
|
+
pc += 3
|
|
47
|
+
elif is_mem_sym(item):
|
|
48
|
+
raise AssertionError("mem symbol in runtime assembly")
|
|
49
|
+
elif is_ofst(item):
|
|
50
|
+
pc -= 1 # matches upstream: following symbol overlaps by one byte
|
|
51
|
+
elif item == "BLANK" or (isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_")):
|
|
52
|
+
pass
|
|
53
|
+
elif isinstance(item, list):
|
|
54
|
+
raise AssertionError("subcode in runtime assembly")
|
|
55
|
+
else:
|
|
56
|
+
pc += 1
|
|
57
|
+
return labels, refs, pc
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def recover_layout(src: str, onchain: bytes, evm_version=None) -> dict:
|
|
61
|
+
"""One instrumented compile + offline decode of the deployed order.
|
|
62
|
+
|
|
63
|
+
Returns {"status": "decoded", "proof_order": [...], ...} on success;
|
|
64
|
+
other statuses: unsupported_minimal_proxy, too_few_sections,
|
|
65
|
+
decode_failed."""
|
|
66
|
+
m = EIP1167_RE.match(onchain.hex())
|
|
67
|
+
if m:
|
|
68
|
+
return {"status": "unsupported_minimal_proxy", "implementation": "0x" + m.group(1)}
|
|
69
|
+
|
|
70
|
+
if evm_version:
|
|
71
|
+
# 0.3.x anchors the evm version in a module global (no kwarg on CompilerData)
|
|
72
|
+
evm_ops.active_evm_version = evm_ops.EVM_VERSIONS[evm_version]
|
|
73
|
+
data = CompilerData(src)
|
|
74
|
+
bc = data.bytecode_runtime
|
|
75
|
+
internal_sigs = {n: s for n, s in data.function_signatures.items() if s.internal}
|
|
76
|
+
labels, refs, code_end = scan(data.assembly_runtime)
|
|
77
|
+
|
|
78
|
+
starts, missing = {}, []
|
|
79
|
+
for name, sig in internal_sigs.items():
|
|
80
|
+
main_pc = labels.get("_sym_" + sig.internal_function_label)
|
|
81
|
+
if main_pc is not None:
|
|
82
|
+
starts[name] = main_pc
|
|
83
|
+
else:
|
|
84
|
+
missing.append((name, sig))
|
|
85
|
+
|
|
86
|
+
if len(starts) < 2:
|
|
87
|
+
# nothing movable (cleanup-only internals can't reorder on their own)
|
|
88
|
+
return {"status": "too_few_sections", "internal_sections": len(starts)}
|
|
89
|
+
|
|
90
|
+
order = sorted(starts, key=starts.get)
|
|
91
|
+
movable_start = starts[order[0]]
|
|
92
|
+
# the shared revert/postamble after the last function is NOT movable:
|
|
93
|
+
# bound the last section at the first _sym_revert* label past its start
|
|
94
|
+
revert_pcs = [
|
|
95
|
+
pc for s, pc in labels.items() if s.startswith("_sym_revert") and pc > starts[order[-1]]
|
|
96
|
+
]
|
|
97
|
+
movable_end = min(revert_pcs) if revert_pcs else code_end
|
|
98
|
+
bounds = [starts[f] for f in order] + [movable_end]
|
|
99
|
+
sections = {f: (bounds[i], bounds[i + 1]) for i, f in enumerate(order)}
|
|
100
|
+
|
|
101
|
+
# cleanup-only internals: no emitted main label; their remnant code lives
|
|
102
|
+
# inside another function's section. Anchor each to that host section so
|
|
103
|
+
# the proof order can re-insert it right after its host. Internals with
|
|
104
|
+
# neither label are never emitted in the runtime (unreachable from any
|
|
105
|
+
# external function, e.g. dead code or constructor-only) — their source
|
|
106
|
+
# position cannot affect the runtime layout, so they are left out of the
|
|
107
|
+
# proof order; the final byte-exact comparison validates this.
|
|
108
|
+
skipped, absent = [], []
|
|
109
|
+
for name, sig in missing:
|
|
110
|
+
cleanup_pc = labels.get("_sym_" + sig.exit_sequence_label)
|
|
111
|
+
host = None
|
|
112
|
+
if cleanup_pc is not None:
|
|
113
|
+
host = next((f for f, (lo, hi) in sections.items() if lo <= cleanup_pc < hi), None)
|
|
114
|
+
if cleanup_pc is None:
|
|
115
|
+
absent.append(name)
|
|
116
|
+
elif host is None:
|
|
117
|
+
return {
|
|
118
|
+
"status": "decode_failed",
|
|
119
|
+
"reason": f"cleanup label of {name} at pc {cleanup_pc} falls outside every internal section",
|
|
120
|
+
}
|
|
121
|
+
else:
|
|
122
|
+
skipped.append({"function": name, "attach_after": host, "cleanup_pc": cleanup_pc})
|
|
123
|
+
|
|
124
|
+
# mask only operands whose target label sits inside the movable region;
|
|
125
|
+
# refs to fixed targets (external section, postamble, code_end) must match
|
|
126
|
+
# as-is. _sym_code_end is the one symbol the assembler defines outside its
|
|
127
|
+
# main loop; it resolves to code_end, which is layout-invariant.
|
|
128
|
+
labels.setdefault("_sym_code_end", code_end)
|
|
129
|
+
mask = set()
|
|
130
|
+
for operand_pc, sym in refs:
|
|
131
|
+
target = labels.get(sym)
|
|
132
|
+
if target is None:
|
|
133
|
+
raise AssertionError(f"reference to undefined symbol {sym}")
|
|
134
|
+
if movable_start <= target < movable_end:
|
|
135
|
+
mask.update((operand_pc, operand_pc + 1))
|
|
136
|
+
|
|
137
|
+
def eq_masked(off, lo, hi):
|
|
138
|
+
if off + (hi - lo) > len(onchain):
|
|
139
|
+
return False
|
|
140
|
+
return all(onchain[off + i - lo] == bc[i] for i in range(lo, hi) if i not in mask)
|
|
141
|
+
|
|
142
|
+
external_ok = eq_masked(0, 0, movable_start)
|
|
143
|
+
|
|
144
|
+
cursor, deployed, remaining = movable_start, [], set(order)
|
|
145
|
+
while remaining:
|
|
146
|
+
for f in sorted(remaining):
|
|
147
|
+
lo, hi = sections[f]
|
|
148
|
+
if eq_masked(cursor, lo, hi):
|
|
149
|
+
deployed.append(f)
|
|
150
|
+
remaining.discard(f)
|
|
151
|
+
cursor += hi - lo
|
|
152
|
+
break
|
|
153
|
+
else:
|
|
154
|
+
return {
|
|
155
|
+
"status": "decode_failed",
|
|
156
|
+
"reason": f"no section matches at offset {cursor}",
|
|
157
|
+
"recovered_so_far": deployed,
|
|
158
|
+
"external_region_ok": external_ok,
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
proof_order = []
|
|
162
|
+
for f in deployed:
|
|
163
|
+
proof_order.append(f)
|
|
164
|
+
proof_order.extend(
|
|
165
|
+
s["function"]
|
|
166
|
+
for s in sorted(skipped, key=lambda s: s["cleanup_pc"])
|
|
167
|
+
if s["attach_after"] == f
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
"status": "decoded",
|
|
172
|
+
"recovered_order": deployed,
|
|
173
|
+
"proof_order": proof_order,
|
|
174
|
+
"skipped_internals": [s["function"] for s in skipped],
|
|
175
|
+
"absent_internals": absent,
|
|
176
|
+
"internal_sections": len(order),
|
|
177
|
+
"external_region_ok": external_ok,
|
|
178
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Force a vanilla vyper 0.3.x compiler into a specific internal-function
|
|
2
|
+
layout by reordering source definitions: move the targeted internal function
|
|
3
|
+
defs to the front (before any other function def), ordered by the target
|
|
4
|
+
layout. With internals defined before every caller, the topsort has zero
|
|
5
|
+
decision points and the layout is deterministic in any environment.
|
|
6
|
+
|
|
7
|
+
Uses vyper's own AST for exact function spans (handles multi-line signatures).
|
|
8
|
+
|
|
9
|
+
Self-contained worker: runs in an ephemeral `uv run --with vyper==X` env and
|
|
10
|
+
imports nothing from vysort. argv: <source.vy> <layout-json>; reordered
|
|
11
|
+
source on stdout.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import pathlib
|
|
16
|
+
import sys
|
|
17
|
+
|
|
18
|
+
import vyper.ast as vy_ast
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def reorder(src: str, layout: list) -> str:
|
|
22
|
+
lines = src.splitlines(keepends=True)
|
|
23
|
+
mod = vy_ast.parse_to_ast(src)
|
|
24
|
+
spans = {} # name -> (start_line0, end_line0_exclusive)
|
|
25
|
+
first_def_start = None
|
|
26
|
+
for f in mod.get_children(vy_ast.FunctionDef):
|
|
27
|
+
start = min([d.lineno for d in f.decorator_list] + [f.lineno]) - 1
|
|
28
|
+
# include contiguous leading comment lines so moved functions keep
|
|
29
|
+
# their banner comments/NatSpec (cosmetic; bytecode is unaffected)
|
|
30
|
+
while start > 0 and lines[start - 1].strip().startswith("#"):
|
|
31
|
+
start -= 1
|
|
32
|
+
end = f.end_lineno
|
|
33
|
+
spans[f.name] = (start, end)
|
|
34
|
+
if first_def_start is None or start < first_def_start:
|
|
35
|
+
first_def_start = start
|
|
36
|
+
missing = [n for n in layout if n not in spans]
|
|
37
|
+
if missing:
|
|
38
|
+
raise SystemExit(f"functions not found in source: {missing}")
|
|
39
|
+
|
|
40
|
+
moved = set()
|
|
41
|
+
for n in layout:
|
|
42
|
+
moved.update(range(*spans[n]))
|
|
43
|
+
|
|
44
|
+
out = []
|
|
45
|
+
for i, line in enumerate(lines):
|
|
46
|
+
if i == first_def_start:
|
|
47
|
+
for n in layout:
|
|
48
|
+
s, e = spans[n]
|
|
49
|
+
out.extend(lines[s:e])
|
|
50
|
+
out.append("\n\n")
|
|
51
|
+
if i not in moved:
|
|
52
|
+
out.append(line)
|
|
53
|
+
return "".join(out)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
if __name__ == "__main__":
|
|
57
|
+
src = pathlib.Path(sys.argv[1]).read_text()
|
|
58
|
+
layout = json.loads(sys.argv[2])
|
|
59
|
+
sys.stdout.write(reorder(src, layout))
|