touchstone-prover 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- touchstone_prover-0.1.0/LICENSE +21 -0
- touchstone_prover-0.1.0/PKG-INFO +180 -0
- touchstone_prover-0.1.0/README.md +158 -0
- touchstone_prover-0.1.0/pyproject.toml +38 -0
- touchstone_prover-0.1.0/setup.cfg +4 -0
- touchstone_prover-0.1.0/touchstone/__init__.py +10 -0
- touchstone_prover-0.1.0/touchstone/__main__.py +11 -0
- touchstone_prover-0.1.0/touchstone/_impl.py +11 -0
- touchstone_prover-0.1.0/touchstone/audit.py +3789 -0
- touchstone_prover-0.1.0/touchstone/benchmark.py +1002 -0
- touchstone_prover-0.1.0/touchstone/ci.py +143 -0
- touchstone_prover-0.1.0/touchstone/cli.py +159 -0
- touchstone_prover-0.1.0/touchstone/core.py +3304 -0
- touchstone_prover-0.1.0/touchstone/domains.py +1259 -0
- touchstone_prover-0.1.0/touchstone/engines.py +3853 -0
- touchstone_prover-0.1.0/touchstone/examples.py +163 -0
- touchstone_prover-0.1.0/touchstone/pytest_plugin.py +81 -0
- touchstone_prover-0.1.0/touchstone/smtcoq_export.py +183 -0
- touchstone_prover-0.1.0/touchstone/soundinfer.py +413 -0
- touchstone_prover-0.1.0/touchstone/theories.py +1113 -0
- touchstone_prover-0.1.0/touchstone/typeinfer.py +3139 -0
- touchstone_prover-0.1.0/touchstone/vcgen.py +500 -0
- touchstone_prover-0.1.0/touchstone_prover.egg-info/PKG-INFO +180 -0
- touchstone_prover-0.1.0/touchstone_prover.egg-info/SOURCES.txt +26 -0
- touchstone_prover-0.1.0/touchstone_prover.egg-info/dependency_links.txt +1 -0
- touchstone_prover-0.1.0/touchstone_prover.egg-info/entry_points.txt +5 -0
- touchstone_prover-0.1.0/touchstone_prover.egg-info/requires.txt +2 -0
- touchstone_prover-0.1.0/touchstone_prover.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 CharlesCNorton
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: touchstone-prover
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: An SMT-based verifier for Python with a machine-checked trust base.
|
|
5
|
+
Author: CharlesCNorton
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/CharlesCNorton/touchstone
|
|
8
|
+
Keywords: verification,smt,z3,cvc5,formal-methods,type-inference,contracts
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
15
|
+
Classifier: Topic :: Software Development :: Testing
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: z3-solver==4.16.0
|
|
20
|
+
Requires-Dist: cvc5==1.3.4
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# Touchstone
|
|
24
|
+
|
|
25
|
+
An SMT-based verifier for Python. Touchstone takes a function and a property and returns
|
|
26
|
+
**PROVED** (it holds for all inputs), **REFUTED** (with a counterexample), or **UNKNOWN**
|
|
27
|
+
(with a reason), by translating the code to Z3 rather than running it.
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import touchstone as t
|
|
31
|
+
|
|
32
|
+
# state the property in Python, over the parameters and `result`
|
|
33
|
+
t.prove("def f(x):\n return x + x\n", "result == 2 * x").status # 'PROVED'
|
|
34
|
+
|
|
35
|
+
# or write the contract as decorators on the function itself
|
|
36
|
+
t.verify_contracts('''
|
|
37
|
+
@require("n >= 0")
|
|
38
|
+
@ensure("result == n")
|
|
39
|
+
def count(n):
|
|
40
|
+
i = 0
|
|
41
|
+
while i < n:
|
|
42
|
+
i = i + 1
|
|
43
|
+
return i
|
|
44
|
+
''').status # 'PROVED'
|
|
45
|
+
|
|
46
|
+
# or check two implementations agree on every input
|
|
47
|
+
t.verify_equiv("double", "f", "def f(a):\n return a + a\n",
|
|
48
|
+
"def g(a):\n return 2 * a\n", {}).status # 'PROVED'
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Command line
|
|
52
|
+
|
|
53
|
+
The same verbs run from the shell, with the process exit status mirroring the verdict
|
|
54
|
+
(0 PROVED, 1 REFUTED, 2 UNKNOWN) so they compose in CI:
|
|
55
|
+
|
|
56
|
+
```sh
|
|
57
|
+
touchstone verify count.py # the @require / @ensure contracts written in a file
|
|
58
|
+
touchstone prove f.py --ensures 'result == x' # a postcondition over the parameters and `result`
|
|
59
|
+
touchstone equiv impl.py spec.py --func f # two implementations agree on every input
|
|
60
|
+
touchstone check d.py # trap freedom (and any asserts) for all inputs
|
|
61
|
+
touchstone infer m.py # sound over-approximate types of a return and its locals
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
A refutation comes back with the counterexample and the path it took:
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
$ touchstone prove f.py --ensures 'result == x'
|
|
68
|
+
REFUTED [property via verified VC generator (Rocq-extracted wpg)]
|
|
69
|
+
counterexample: x=0
|
|
70
|
+
trace:
|
|
71
|
+
line 2: return x + 1 [x=0]
|
|
72
|
+
=> returns 1
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## What it covers
|
|
76
|
+
|
|
77
|
+
Functional equivalence and predicates; whole-function and interprocedural reasoning over
|
|
78
|
+
control flow with multiple loops, arbitrary nesting, break and continue, statements after a
|
|
79
|
+
loop, and any step direction; self-recursion, mutual recursion, and recursion over lists;
|
|
80
|
+
deductive and synthesized loop invariants; abstract interpretation (interval, zone, octagon,
|
|
81
|
+
Karr, polyhedra, machine-integer); IEEE-754 floating point total over every double, with Inf
|
|
82
|
+
and NaN as first-class inputs, exact floor division and modulo, and sound over-approximations
|
|
83
|
+
of sin, cos, exp, and log; arrays with quantified specifications; termination of counted
|
|
84
|
+
loops, iteration over containers, and data-dependent loops, and cost; exceptions;
|
|
85
|
+
rely-guarantee concurrency for all schedules and all depths; and separation logic with the
|
|
86
|
+
frame rule, the magic wand, and inductive heap predicates.
|
|
87
|
+
|
|
88
|
+
Values carry their real types through the symbolic core; the heap models object identity,
|
|
89
|
+
aliasing, and mutation; and index-out-of-bounds, key errors, None in arithmetic, type
|
|
90
|
+
mismatches, and division by zero are traps that refute a totality claim. Fixed-width
|
|
91
|
+
wraparound is checked alongside every integer proof. A property can be stated in Python over
|
|
92
|
+
the parameters and `result` (`prove`), written as `@require` / `@ensure` decorators on the
|
|
93
|
+
function (`verify_contracts`, in the style of the contracts and icontract packages), mined from
|
|
94
|
+
the code's own assertions (`check`), or written directly as a Z3 predicate; a counterexample
|
|
95
|
+
comes back with the execution trace and the path taken (`explain`).
|
|
96
|
+
|
|
97
|
+
## Soundness
|
|
98
|
+
|
|
99
|
+
Every construct is either encoded soundly or returned as UNKNOWN with a reason, so an
|
|
100
|
+
unsupported feature is never silently skipped or assumed away. A PROVED is confirmed by a
|
|
101
|
+
second independent solver (cvc5) and withheld unless both agree, runs under a deterministic
|
|
102
|
+
resource bound so identical input yields an identical verdict on every machine, and carries a
|
|
103
|
+
reproducibility certificate.
|
|
104
|
+
|
|
105
|
+
The trust base is machine-checked in Rocq (`proofs/`): the operational semantics of the
|
|
106
|
+
modeled subset; the verification-condition generator over it, proven sound and complete for
|
|
107
|
+
straight-line assignment and conditionals and sound for while-loops carrying a syntactic
|
|
108
|
+
invariant, trap-aware so a reachable division or modulo by zero leaves the condition
|
|
109
|
+
undischarged; a fixed-width two's-complement integer model proven to agree with unbounded
|
|
110
|
+
arithmetic exactly when no operation overflows; the SMT-LIB division and modulo encoding
|
|
111
|
+
proven to refine the theory for every conforming solver; the abstract-domain transfers
|
|
112
|
+
(extracted to OCaml and run as the engine's operators); the translation as a
|
|
113
|
+
semantics-preserving functor; and the end-to-end theorem that a discharged verification
|
|
114
|
+
condition implies the property under the program's semantics. SMTCoq re-checks each integer
|
|
115
|
+
obligation's certificate inside Coq's kernel.
|
|
116
|
+
|
|
117
|
+
Both the straight-line and the loop verification-condition generators are extracted from that
|
|
118
|
+
proof to OCaml and transcribed in the engine, and a differential audit holds each transcription
|
|
119
|
+
byte-for-byte equal to its extraction on a random corpus, so the generator code that runs is the
|
|
120
|
+
one proven correct in Rocq, not a separate symbolic execution. The engine discharges the
|
|
121
|
+
loop-free integer fragment through the straight-line generator directly, trap-aware so a
|
|
122
|
+
reachable division by zero leaves the condition undischarged. With that core verified, the
|
|
123
|
+
random differential checks against CPython are a completeness regression that measures precision
|
|
124
|
+
rather than the barrier against an unsound verdict.
|
|
125
|
+
|
|
126
|
+
## Type inference
|
|
127
|
+
|
|
128
|
+
The same symbolic core infers the type of every function return, parameter, and variable in
|
|
129
|
+
unannotated code, in two modes. `infer_types` is over-approximating and sound: the reported
|
|
130
|
+
set of type names is guaranteed to contain the value's runtime type, or the location is left
|
|
131
|
+
UNKNOWN when no such bound can be established, so a stated type is never narrower than the
|
|
132
|
+
truth. `emit_facts` is best-effort exact in the TypeEvalPy schema and discovers its own
|
|
133
|
+
targets: it walks the module and emits a fact at every return, parameter, and binding it
|
|
134
|
+
finds, typing each by carrying an argument's type across the call boundary into the parameter
|
|
135
|
+
it reaches, following a value through reassignment and the narrowing of `is None` /
|
|
136
|
+
`isinstance` guards, and resolving container element types and dict keys through the call
|
|
137
|
+
graph, attributes set in a constructor, decorators, and generators.
|
|
138
|
+
|
|
139
|
+
Scored by TypeEvalPy's exact matcher at full source position, including column offset and with
|
|
140
|
+
the analysis discovering each location, name, and kind on its own, the emitted facts are matched
|
|
141
|
+
against the runtime-observed ground truth. As an independent check, the inference is run over
|
|
142
|
+
pure-Python standard-library modules and compared against the type CPython produces at runtime:
|
|
143
|
+
the sound mode's reported set is confirmed to contain the runtime type at every observed location,
|
|
144
|
+
and the exact mode is held to the same emit-and-match standard, discovering the locations itself.
|
|
145
|
+
|
|
146
|
+
| Evaluation | Result |
|
|
147
|
+
| --- | --- |
|
|
148
|
+
| TypeEvalPy micro-benchmark (emit-and-match) | 807 / 868 (92.97%) |
|
|
149
|
+
| TypeEvalPy autogen suite (emit-and-match) | 71,451 / 77,268 (92.47%) |
|
|
150
|
+
| CPython standard library (emit-and-match) | 770 / 958 (80.4%) |
|
|
151
|
+
|
|
152
|
+
## Run
|
|
153
|
+
|
|
154
|
+
```sh
|
|
155
|
+
pip install touchstone-prover # z3-solver and cvc5, pinned in pyproject.toml
|
|
156
|
+
python -m touchstone.ci # self-tests, soundness audits, completeness regressions -> "CI OK"
|
|
157
|
+
python -m touchstone.examples # one runnable example per capability, each verdict asserted
|
|
158
|
+
python -m touchstone # a demonstration
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
The machine-checked proofs run under the Rocq 9.0 opam switch; `verify_coq.sh` also invokes
|
|
162
|
+
the SMTCoq certificate check when its separate toolchain (see `proofs/toolchain.lock`) is
|
|
163
|
+
present, and skips it cleanly otherwise:
|
|
164
|
+
|
|
165
|
+
```sh
|
|
166
|
+
eval "$(opam env --switch=rocq9)" && cd proofs && bash verify_coq.sh
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Layout
|
|
170
|
+
|
|
171
|
+
```
|
|
172
|
+
touchstone/ package: core, domains, engines, theories, vcgen, audit, ci, examples (_impl is the engine)
|
|
173
|
+
proofs/ Rocq + SMTCoq proofs, the extracted VC generators + interval operators, verify_coq.sh
|
|
174
|
+
.github/ continuous integration: the audits and the proof gate on every change
|
|
175
|
+
pyproject.toml package metadata and pinned Python dependencies
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# Touchstone
|
|
2
|
+
|
|
3
|
+
An SMT-based verifier for Python. Touchstone takes a function and a property and returns
|
|
4
|
+
**PROVED** (it holds for all inputs), **REFUTED** (with a counterexample), or **UNKNOWN**
|
|
5
|
+
(with a reason), by translating the code to Z3 rather than running it.
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
import touchstone as t
|
|
9
|
+
|
|
10
|
+
# state the property in Python, over the parameters and `result`
|
|
11
|
+
t.prove("def f(x):\n return x + x\n", "result == 2 * x").status # 'PROVED'
|
|
12
|
+
|
|
13
|
+
# or write the contract as decorators on the function itself
|
|
14
|
+
t.verify_contracts('''
|
|
15
|
+
@require("n >= 0")
|
|
16
|
+
@ensure("result == n")
|
|
17
|
+
def count(n):
|
|
18
|
+
i = 0
|
|
19
|
+
while i < n:
|
|
20
|
+
i = i + 1
|
|
21
|
+
return i
|
|
22
|
+
''').status # 'PROVED'
|
|
23
|
+
|
|
24
|
+
# or check two implementations agree on every input
|
|
25
|
+
t.verify_equiv("double", "f", "def f(a):\n return a + a\n",
|
|
26
|
+
"def g(a):\n return 2 * a\n", {}).status # 'PROVED'
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Command line
|
|
30
|
+
|
|
31
|
+
The same verbs run from the shell, with the process exit status mirroring the verdict
|
|
32
|
+
(0 PROVED, 1 REFUTED, 2 UNKNOWN) so they compose in CI:
|
|
33
|
+
|
|
34
|
+
```sh
|
|
35
|
+
touchstone verify count.py # the @require / @ensure contracts written in a file
|
|
36
|
+
touchstone prove f.py --ensures 'result == x' # a postcondition over the parameters and `result`
|
|
37
|
+
touchstone equiv impl.py spec.py --func f # two implementations agree on every input
|
|
38
|
+
touchstone check d.py # trap freedom (and any asserts) for all inputs
|
|
39
|
+
touchstone infer m.py # sound over-approximate types of a return and its locals
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
A refutation comes back with the counterexample and the path it took:
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
$ touchstone prove f.py --ensures 'result == x'
|
|
46
|
+
REFUTED [property via verified VC generator (Rocq-extracted wpg)]
|
|
47
|
+
counterexample: x=0
|
|
48
|
+
trace:
|
|
49
|
+
line 2: return x + 1 [x=0]
|
|
50
|
+
=> returns 1
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## What it covers
|
|
54
|
+
|
|
55
|
+
Functional equivalence and predicates; whole-function and interprocedural reasoning over
|
|
56
|
+
control flow with multiple loops, arbitrary nesting, break and continue, statements after a
|
|
57
|
+
loop, and any step direction; self-recursion, mutual recursion, and recursion over lists;
|
|
58
|
+
deductive and synthesized loop invariants; abstract interpretation (interval, zone, octagon,
|
|
59
|
+
Karr, polyhedra, machine-integer); IEEE-754 floating point total over every double, with Inf
|
|
60
|
+
and NaN as first-class inputs, exact floor division and modulo, and sound over-approximations
|
|
61
|
+
of sin, cos, exp, and log; arrays with quantified specifications; termination of counted
|
|
62
|
+
loops, iteration over containers, and data-dependent loops, and cost; exceptions;
|
|
63
|
+
rely-guarantee concurrency for all schedules and all depths; and separation logic with the
|
|
64
|
+
frame rule, the magic wand, and inductive heap predicates.
|
|
65
|
+
|
|
66
|
+
Values carry their real types through the symbolic core; the heap models object identity,
|
|
67
|
+
aliasing, and mutation; and index-out-of-bounds, key errors, None in arithmetic, type
|
|
68
|
+
mismatches, and division by zero are traps that refute a totality claim. Fixed-width
|
|
69
|
+
wraparound is checked alongside every integer proof. A property can be stated in Python over
|
|
70
|
+
the parameters and `result` (`prove`), written as `@require` / `@ensure` decorators on the
|
|
71
|
+
function (`verify_contracts`, in the style of the contracts and icontract packages), mined from
|
|
72
|
+
the code's own assertions (`check`), or written directly as a Z3 predicate; a counterexample
|
|
73
|
+
comes back with the execution trace and the path taken (`explain`).
|
|
74
|
+
|
|
75
|
+
## Soundness
|
|
76
|
+
|
|
77
|
+
Every construct is either encoded soundly or returned as UNKNOWN with a reason, so an
|
|
78
|
+
unsupported feature is never silently skipped or assumed away. A PROVED is confirmed by a
|
|
79
|
+
second independent solver (cvc5) and withheld unless both agree, runs under a deterministic
|
|
80
|
+
resource bound so identical input yields an identical verdict on every machine, and carries a
|
|
81
|
+
reproducibility certificate.
|
|
82
|
+
|
|
83
|
+
The trust base is machine-checked in Rocq (`proofs/`): the operational semantics of the
|
|
84
|
+
modeled subset; the verification-condition generator over it, proven sound and complete for
|
|
85
|
+
straight-line assignment and conditionals and sound for while-loops carrying a syntactic
|
|
86
|
+
invariant, trap-aware so a reachable division or modulo by zero leaves the condition
|
|
87
|
+
undischarged; a fixed-width two's-complement integer model proven to agree with unbounded
|
|
88
|
+
arithmetic exactly when no operation overflows; the SMT-LIB division and modulo encoding
|
|
89
|
+
proven to refine the theory for every conforming solver; the abstract-domain transfers
|
|
90
|
+
(extracted to OCaml and run as the engine's operators); the translation as a
|
|
91
|
+
semantics-preserving functor; and the end-to-end theorem that a discharged verification
|
|
92
|
+
condition implies the property under the program's semantics. SMTCoq re-checks each integer
|
|
93
|
+
obligation's certificate inside Coq's kernel.
|
|
94
|
+
|
|
95
|
+
Both the straight-line and the loop verification-condition generators are extracted from that
|
|
96
|
+
proof to OCaml and transcribed in the engine, and a differential audit holds each transcription
|
|
97
|
+
byte-for-byte equal to its extraction on a random corpus, so the generator code that runs is the
|
|
98
|
+
one proven correct in Rocq, not a separate symbolic execution. The engine discharges the
|
|
99
|
+
loop-free integer fragment through the straight-line generator directly, trap-aware so a
|
|
100
|
+
reachable division by zero leaves the condition undischarged. With that core verified, the
|
|
101
|
+
random differential checks against CPython are a completeness regression that measures precision
|
|
102
|
+
rather than the barrier against an unsound verdict.
|
|
103
|
+
|
|
104
|
+
## Type inference
|
|
105
|
+
|
|
106
|
+
The same symbolic core infers the type of every function return, parameter, and variable in
|
|
107
|
+
unannotated code, in two modes. `infer_types` is over-approximating and sound: the reported
|
|
108
|
+
set of type names is guaranteed to contain the value's runtime type, or the location is left
|
|
109
|
+
UNKNOWN when no such bound can be established, so a stated type is never narrower than the
|
|
110
|
+
truth. `emit_facts` is best-effort exact in the TypeEvalPy schema and discovers its own
|
|
111
|
+
targets: it walks the module and emits a fact at every return, parameter, and binding it
|
|
112
|
+
finds, typing each by carrying an argument's type across the call boundary into the parameter
|
|
113
|
+
it reaches, following a value through reassignment and the narrowing of `is None` /
|
|
114
|
+
`isinstance` guards, and resolving container element types and dict keys through the call
|
|
115
|
+
graph, attributes set in a constructor, decorators, and generators.
|
|
116
|
+
|
|
117
|
+
Scored by TypeEvalPy's exact matcher at full source position, including column offset and with
|
|
118
|
+
the analysis discovering each location, name, and kind on its own, the emitted facts are matched
|
|
119
|
+
against the runtime-observed ground truth. As an independent check, the inference is run over
|
|
120
|
+
pure-Python standard-library modules and compared against the type CPython produces at runtime:
|
|
121
|
+
the sound mode's reported set is confirmed to contain the runtime type at every observed location,
|
|
122
|
+
and the exact mode is held to the same emit-and-match standard, discovering the locations itself.
|
|
123
|
+
|
|
124
|
+
| Evaluation | Result |
|
|
125
|
+
| --- | --- |
|
|
126
|
+
| TypeEvalPy micro-benchmark (emit-and-match) | 807 / 868 (92.97%) |
|
|
127
|
+
| TypeEvalPy autogen suite (emit-and-match) | 71,451 / 77,268 (92.47%) |
|
|
128
|
+
| CPython standard library (emit-and-match) | 770 / 958 (80.4%) |
|
|
129
|
+
|
|
130
|
+
## Run
|
|
131
|
+
|
|
132
|
+
```sh
|
|
133
|
+
pip install touchstone-prover # z3-solver and cvc5, pinned in pyproject.toml
|
|
134
|
+
python -m touchstone.ci # self-tests, soundness audits, completeness regressions -> "CI OK"
|
|
135
|
+
python -m touchstone.examples # one runnable example per capability, each verdict asserted
|
|
136
|
+
python -m touchstone # a demonstration
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
The machine-checked proofs run under the Rocq 9.0 opam switch; `verify_coq.sh` also invokes
|
|
140
|
+
the SMTCoq certificate check when its separate toolchain (see `proofs/toolchain.lock`) is
|
|
141
|
+
present, and skips it cleanly otherwise:
|
|
142
|
+
|
|
143
|
+
```sh
|
|
144
|
+
eval "$(opam env --switch=rocq9)" && cd proofs && bash verify_coq.sh
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Layout
|
|
148
|
+
|
|
149
|
+
```
|
|
150
|
+
touchstone/ package: core, domains, engines, theories, vcgen, audit, ci, examples (_impl is the engine)
|
|
151
|
+
proofs/ Rocq + SMTCoq proofs, the extracted VC generators + interval operators, verify_coq.sh
|
|
152
|
+
.github/ continuous integration: the audits and the proof gate on every change
|
|
153
|
+
pyproject.toml package metadata and pinned Python dependencies
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## License
|
|
157
|
+
|
|
158
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "touchstone-prover"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "An SMT-based verifier for Python with a machine-checked trust base."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "CharlesCNorton" }]
|
|
13
|
+
keywords = ["verification", "smt", "z3", "cvc5", "formal-methods", "type-inference", "contracts"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3.11",
|
|
16
|
+
"Programming Language :: Python :: 3.12",
|
|
17
|
+
"Programming Language :: Python :: 3.13",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Topic :: Software Development :: Quality Assurance",
|
|
21
|
+
"Topic :: Software Development :: Testing",
|
|
22
|
+
]
|
|
23
|
+
dependencies = ["z3-solver==4.16.0", "cvc5==1.3.4"]
|
|
24
|
+
|
|
25
|
+
[project.scripts]
|
|
26
|
+
touchstone = "touchstone.cli:main"
|
|
27
|
+
|
|
28
|
+
[project.entry-points.pytest11]
|
|
29
|
+
touchstone = "touchstone.pytest_plugin"
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Repository = "https://github.com/CharlesCNorton/touchstone"
|
|
33
|
+
|
|
34
|
+
[tool.setuptools]
|
|
35
|
+
packages = ["touchstone"]
|
|
36
|
+
|
|
37
|
+
# The companion proofs in proofs/ use a separate, non-Python toolchain pinned in
|
|
38
|
+
# proofs/toolchain.lock (Rocq 9.0 for three files; Coq 8.20 + SMTCoq + cvc4 for the fourth).
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Touchstone: an SMT-based verifier for a subset of Python.
|
|
2
|
+
|
|
3
|
+
The full API is available from the top level; the same names are grouped into
|
|
4
|
+
importable submodules (core, domains, engines, theories, audit) for callers that
|
|
5
|
+
want one layer in isolation.
|
|
6
|
+
"""
|
|
7
|
+
from . import _impl
|
|
8
|
+
from ._impl import * # noqa: F401,F403
|
|
9
|
+
|
|
10
|
+
__all__ = [n for n in dir(_impl) if not n.startswith("_")]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""`python -m touchstone` runs the self-tests and the demonstration; `python -m touchstone
|
|
2
|
+
<command> ...` runs a command-line verb (see `touchstone -h` or `python -m touchstone check -h`)."""
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
if __name__ == "__main__":
|
|
6
|
+
if len(sys.argv) > 1:
|
|
7
|
+
from .cli import main
|
|
8
|
+
raise SystemExit(main())
|
|
9
|
+
from ._impl import run_self_tests, demo
|
|
10
|
+
run_self_tests()
|
|
11
|
+
demo()
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Aggregator: the implementation now lives in the focused modules core, engines, domains,
|
|
2
|
+
theories, and audit. This module preserves the historical touchstone._impl import surface."""
|
|
3
|
+
from . import core, engines, domains, theories, vcgen, audit, soundinfer
|
|
4
|
+
from .core import *
|
|
5
|
+
from .engines import *
|
|
6
|
+
from .domains import *
|
|
7
|
+
from .theories import *
|
|
8
|
+
from .vcgen import *
|
|
9
|
+
from .audit import *
|
|
10
|
+
from .soundinfer import infer_return_type, infer_local_types, infer_types
|
|
11
|
+
from .core import ALLOW_SUBJECT_EXECUTION, REQUIRE_CORROBORATION, CROSS_VALIDATE_DOMAINS
|