unicode-fol-kit 0.5.0__tar.gz → 0.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/CHANGELOG.md +37 -0
  2. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/PKG-INFO +52 -2
  3. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/README.md +51 -1
  4. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/pyproject.toml +1 -1
  5. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/__init__.py +5 -1
  6. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/atp/__init__.py +2 -0
  7. unicode_fol_kit-0.5.2/unicode_fol_kit/atp/vampire_entailment.py +150 -0
  8. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/eval/__init__.py +10 -0
  9. unicode_fol_kit-0.5.2/unicode_fol_kit/eval/predicate_match.py +180 -0
  10. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/.gitignore +0 -0
  11. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/LICENSE +0 -0
  12. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/__main__.py +0 -0
  13. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/atp/prover9_entailment.py +0 -0
  14. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/atp/resolution.py +0 -0
  15. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/atp/z3_arith.py +0 -0
  16. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/atp/z3_equivalence.py +0 -0
  17. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/atp/z3_fuzzy.py +0 -0
  18. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/atp/z3_models.py +0 -0
  19. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/eval/canonical.py +0 -0
  20. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/eval/validate.py +0 -0
  21. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/__init__.py +0 -0
  22. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/_fol_nodes.py +0 -0
  23. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/_modal_nodes.py +0 -0
  24. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/_msfl_nodes.py +0 -0
  25. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/_so_nodes.py +0 -0
  26. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/grammars/fl.lark +0 -0
  27. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/grammars/fol.lark +0 -0
  28. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/grammars/modal.lark +0 -0
  29. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/grammars/msfl.lark +0 -0
  30. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/grammars/msfol.lark +0 -0
  31. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/grammars/so.lark +0 -0
  32. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/grammars/terminals.lark +0 -0
  33. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/lambda_tools.py +0 -0
  34. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/latex_input.py +0 -0
  35. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/modal_translation.py +0 -0
  36. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/msflparser.py +0 -0
  37. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/naming.py +0 -0
  38. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/nodes.py +0 -0
  39. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/normalforms.py +0 -0
  40. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/fol/unification.py +0 -0
  41. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/py.typed +0 -0
  42. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/semantics/__init__.py +0 -0
  43. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/semantics/_modal_reject.py +0 -0
  44. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/semantics/fuzzy.py +0 -0
  45. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/semantics/kripke.py +0 -0
  46. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/semantics/manyvalued.py +0 -0
  47. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/semantics/secondorder.py +0 -0
  48. {unicode_fol_kit-0.5.0 → unicode_fol_kit-0.5.2}/unicode_fol_kit/semantics/tarski.py +0 -0
@@ -5,6 +5,43 @@ loosely based on [Keep a Changelog](https://keepachangelog.com/). Versioning is
5
5
  semantic, but the project is pre-1.0 (alpha): a **minor** release may contain
6
6
  breaking changes.
7
7
 
8
+ ## [0.5.2] - 2026-06-26
9
+
10
+ ### Added
11
+
12
+ - **Predicate-aligned string match** (`unicode_fol_kit.eval.predicate_match`) —
13
+ `match_predicates`, `formulas_are_matched_identical`, and
14
+ `formulas_are_identical`, re-exported at the package top level. A lexical
15
+ (string-level) evaluation notion for NL→FOL: `match_predicates` greedily
16
+ renames each predicate/function symbol in a predicted formula to the
17
+ lexically-closest symbol in the reference (by **normalised Levenshtein
18
+ distance**, accepting matches at or below a `max_norm_distance` threshold,
19
+ default `0.6`), so a structurally-correct answer that merely chose different
20
+ predicate names is not penalised. `formulas_are_identical` is the plain
21
+ whitespace- and case-insensitive string equality; `formulas_are_matched_identical`
22
+ combines the two (realign predicates, then compare). This is **complementary**
23
+ to the AST-level `exact_match`: the canonical match quotients out α-renaming /
24
+ commutativity / associativity / double negation but treats different predicate
25
+ names as a mismatch, whereas this matcher quotients out predicate-name (and
26
+ whitespace/case) differences but not the structural rewrites — the two are
27
+ typically reported as separate metrics. The Levenshtein distance is computed in
28
+ pure Python, so **no new dependency** is introduced; the matcher is
29
+ parser-independent and also applies to raw, not-yet-parseable model output.
30
+
31
+ ## [0.5.1] - 2026-06-24
32
+
33
+ ### Added
34
+
35
+ - **`check_logical_entailment_vampire`** — entailment checking via the
36
+ [Vampire](https://vprover.github.io/) theorem prover, a TPTP-based companion to
37
+ the existing Prover9 backend. Premises are emitted as TPTP `axiom`s and the
38
+ conclusion as a `conjecture`; the path to the Vampire executable is passed as
39
+ the `vampire_path` argument, and a `SZS status Theorem` result means the
40
+ entailment holds. Classical FOL only (the same fragment `to_tptp` supports).
41
+ Pass `use_wsl=True` to drive a Linux Vampire installed in WSL from a Windows
42
+ host (Vampire is launched via `wsl.exe`, with automatic `wslpath` translation of
43
+ the temp-file path).
44
+
8
45
  ## [0.5.0] - 2026-06-24
9
46
 
10
47
  Adds an NL→FOL **evaluation** toolkit and broad **non-classical logic** coverage —
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unicode-fol-kit
3
- Version: 0.5.0
3
+ Version: 0.5.2
4
4
  Summary: Parser and toolkit for first-order logic formulas using Unicode operators
5
5
  Project-URL: Repository, https://github.com/fvossel/unicode-fol-kit
6
6
  Project-URL: Issues, https://github.com/fvossel/unicode-fol-kit/issues
@@ -48,7 +48,7 @@ A Python toolkit for parsing and working with first-order logic (FOL) formulas w
48
48
  - **Prover9 export** — translate formulas to Prover9 syntax for automated theorem proving
49
49
  - **TPTP export** — translate formulas to TPTP syntax
50
50
  - **Equivalence checking** — check if two formulas are logically equivalent via Z3
51
- - **Entailment checking** — check if a conclusion follows from premises via Prover9
51
+ - **Entailment checking** — check if a conclusion follows from premises via Prover9 (`check_logical_entailment`) or Vampire (`check_logical_entailment_vampire`), each taking the prover's executable path as an argument
52
52
  - **Built-in resolution prover** — `prove()` and `is_valid_resolution()` decide entailment/validity in-process (sound first-order resolution, no external solver needed)
53
53
  - **Canonical form & exact match** — `canonicalize()` normalises bound-variable renaming, commutativity/associativity, operand duplication, and double negation; `exact_match()` gives a fair NL→FOL comparison stricter than logical equivalence but more forgiving than raw equality
54
54
  - **Formula validation** — `validate()` / `is_wellformed()` / `validate_text()` report free variables, inconsistent predicate/function arity, leftover lambda nodes, and parseability of raw model output
@@ -472,6 +472,32 @@ conclusion = parser.parse("Mortal(socrates)")
472
472
  check_logical_entailment(premises, conclusion, prover9_path="/usr/bin/prover9") # True
473
473
  ```
474
474
 
475
+ ### Entailment checking (Vampire)
476
+
477
+ The same check backed by [Vampire](https://vprover.github.io/) instead of Prover9: the premises are emitted as TPTP `axiom`s and the conclusion as a `conjecture`, and the path to the Vampire executable is passed as an argument (Vampire reports `SZS status Theorem` when the entailment holds).
478
+
479
+ ```python
480
+ from unicode_fol_kit import MSFLParser, check_logical_entailment_vampire # doctest: +SKIP (needs an installed Vampire)
481
+
482
+ parser = MSFLParser()
483
+ premises = [
484
+ parser.parse("∀x (Human(x) → Mortal(x))"),
485
+ parser.parse("Human(socrates)"),
486
+ ]
487
+ conclusion = parser.parse("Mortal(socrates)")
488
+
489
+ check_logical_entailment_vampire(premises, conclusion, vampire_path="/usr/bin/vampire") # True
490
+ ```
491
+
492
+ On Windows you can drive a Linux Vampire installed in **WSL** with `use_wsl=True`: Vampire is launched through `wsl.exe` and the temporary problem file's path is translated to its `/mnt/...` form automatically. Here `vampire_path` is the command/path *inside* WSL (e.g. `"vampire"` if it is on the WSL `PATH`).
493
+
494
+ ```python
495
+ # Windows host, Vampire installed in WSL: # doctest: +SKIP (needs WSL + Vampire)
496
+ check_logical_entailment_vampire(premises, conclusion, vampire_path="vampire", use_wsl=True) # True
497
+ ```
498
+
499
+ Note that every premise and the conclusion must be a closed sentence — Vampire rejects unquantified (free) variables, and recall that a single lowercase letter like `x` is a *variable*, so a constant individual needs a multi-character name (`socrates`) or the `c_`-prefix.
500
+
475
501
  ### Entailment and validity (built-in resolution prover)
476
502
 
477
503
  For entailment and validity **without** an external prover, the package ships a self-contained first-order **resolution** prover. It clausifies the input (skolemise → drop ∀ prefix → CNF → clauses), then refutes `premises ∧ ¬conclusion` by binary resolution and factoring, deriving the empty clause iff the entailment holds.
@@ -679,6 +705,30 @@ validate_text("∀x (P(x)").parseable # False (unbalanced parenthesis)
679
705
 
680
706
  The `ValidationReport` also exposes `has_lambdas`, and `predicates` / `functions` / `constants` / `sorts_used` inventories. Built-in comparison (`= ≠ < > ≤ ≥`) and arithmetic (`+ - * /`) symbols are excluded from the arity checks and inventories.
681
707
 
708
+ ### Predicate-aligned string match
709
+
710
+ `canonicalize` / `exact_match` forgive *structural* differences (α-renaming, commutativity/associativity, …) but treat two **different predicate names** as a genuine mismatch. The complementary, lexical notion is `match_predicates`: it greedily renames each predicate/function symbol in a predicted formula to the closest symbol in the reference — by **normalised Levenshtein distance**, accepting a match at or below a threshold (`max_norm_distance`, default `0.6`) — so a structurally-correct answer that merely chose different predicate names is not penalised. `formulas_are_identical` is the plain whitespace- and case-insensitive string equality; `formulas_are_matched_identical` realigns predicates and then compares.
711
+
712
+ ```python
713
+ from unicode_fol_kit import (
714
+ match_predicates,
715
+ formulas_are_identical,
716
+ formulas_are_matched_identical,
717
+ )
718
+
719
+ pred = "∀x (Wins(x) → Happy(x))"
720
+ ref = "∀x (Win(x) → Happy(x))" # same shape; "Wins" vs "Win"
721
+
722
+ formulas_are_identical(pred, ref) # False (raw strings differ)
723
+ match_predicates(pred, ref) # '∀x (Win(x) → Happy(x))'
724
+ formulas_are_matched_identical(pred, ref) # True (Wins → Win is a close match)
725
+
726
+ # A symbol with no sufficiently close reference counterpart is left untouched:
727
+ match_predicates("Red(x)", "Tall(x)") # 'Red(x)' (normalised distance 1.0 > 0.6)
728
+ ```
729
+
730
+ Unlike the `canonicalize`/`exact_match` pair, this matcher is purely **lexical** (string-level), so it also applies to raw model output that does not yet parse, and the two notions are typically reported as separate metrics (e.g. `EXACT_MATCH` vs `PREDICATE_MATCHED_EXACT_MATCH`). The Levenshtein distance is computed in pure Python, so no extra dependency is required.
731
+
682
732
  ## Modal, temporal, and epistemic logic
683
733
 
684
734
  Natural language is full of constructs classical FOL can't express directly — necessity/possibility, knowledge and belief, and time. `MSFLParser(modal=True)` adds a modal mode (classical unsorted FOL extended with modal operators) and the toolkit ships Kripke-model semantics plus a standard translation back to FOL.
@@ -24,7 +24,7 @@ A Python toolkit for parsing and working with first-order logic (FOL) formulas w
24
24
  - **Prover9 export** — translate formulas to Prover9 syntax for automated theorem proving
25
25
  - **TPTP export** — translate formulas to TPTP syntax
26
26
  - **Equivalence checking** — check if two formulas are logically equivalent via Z3
27
- - **Entailment checking** — check if a conclusion follows from premises via Prover9
27
+ - **Entailment checking** — check if a conclusion follows from premises via Prover9 (`check_logical_entailment`) or Vampire (`check_logical_entailment_vampire`), each taking the prover's executable path as an argument
28
28
  - **Built-in resolution prover** — `prove()` and `is_valid_resolution()` decide entailment/validity in-process (sound first-order resolution, no external solver needed)
29
29
  - **Canonical form & exact match** — `canonicalize()` normalises bound-variable renaming, commutativity/associativity, operand duplication, and double negation; `exact_match()` gives a fair NL→FOL comparison stricter than logical equivalence but more forgiving than raw equality
30
30
  - **Formula validation** — `validate()` / `is_wellformed()` / `validate_text()` report free variables, inconsistent predicate/function arity, leftover lambda nodes, and parseability of raw model output
@@ -448,6 +448,32 @@ conclusion = parser.parse("Mortal(socrates)")
448
448
  check_logical_entailment(premises, conclusion, prover9_path="/usr/bin/prover9") # True
449
449
  ```
450
450
 
451
+ ### Entailment checking (Vampire)
452
+
453
+ The same check backed by [Vampire](https://vprover.github.io/) instead of Prover9: the premises are emitted as TPTP `axiom`s and the conclusion as a `conjecture`, and the path to the Vampire executable is passed as an argument (Vampire reports `SZS status Theorem` when the entailment holds).
454
+
455
+ ```python
456
+ from unicode_fol_kit import MSFLParser, check_logical_entailment_vampire # doctest: +SKIP (needs an installed Vampire)
457
+
458
+ parser = MSFLParser()
459
+ premises = [
460
+ parser.parse("∀x (Human(x) → Mortal(x))"),
461
+ parser.parse("Human(socrates)"),
462
+ ]
463
+ conclusion = parser.parse("Mortal(socrates)")
464
+
465
+ check_logical_entailment_vampire(premises, conclusion, vampire_path="/usr/bin/vampire") # True
466
+ ```
467
+
468
+ On Windows you can drive a Linux Vampire installed in **WSL** with `use_wsl=True`: Vampire is launched through `wsl.exe` and the temporary problem file's path is translated to its `/mnt/...` form automatically. Here `vampire_path` is the command/path *inside* WSL (e.g. `"vampire"` if it is on the WSL `PATH`).
469
+
470
+ ```python
471
+ # Windows host, Vampire installed in WSL: # doctest: +SKIP (needs WSL + Vampire)
472
+ check_logical_entailment_vampire(premises, conclusion, vampire_path="vampire", use_wsl=True) # True
473
+ ```
474
+
475
+ Note that every premise and the conclusion must be a closed sentence — Vampire rejects unquantified (free) variables, and recall that a single lowercase letter like `x` is a *variable*, so a constant individual needs a multi-character name (`socrates`) or the `c_`-prefix.
476
+
451
477
  ### Entailment and validity (built-in resolution prover)
452
478
 
453
479
  For entailment and validity **without** an external prover, the package ships a self-contained first-order **resolution** prover. It clausifies the input (skolemise → drop ∀ prefix → CNF → clauses), then refutes `premises ∧ ¬conclusion` by binary resolution and factoring, deriving the empty clause iff the entailment holds.
@@ -655,6 +681,30 @@ validate_text("∀x (P(x)").parseable # False (unbalanced parenthesis)
655
681
 
656
682
  The `ValidationReport` also exposes `has_lambdas`, and `predicates` / `functions` / `constants` / `sorts_used` inventories. Built-in comparison (`= ≠ < > ≤ ≥`) and arithmetic (`+ - * /`) symbols are excluded from the arity checks and inventories.
657
683
 
684
+ ### Predicate-aligned string match
685
+
686
+ `canonicalize` / `exact_match` forgive *structural* differences (α-renaming, commutativity/associativity, …) but treat two **different predicate names** as a genuine mismatch. The complementary, lexical notion is `match_predicates`: it greedily renames each predicate/function symbol in a predicted formula to the closest symbol in the reference — by **normalised Levenshtein distance**, accepting a match at or below a threshold (`max_norm_distance`, default `0.6`) — so a structurally-correct answer that merely chose different predicate names is not penalised. `formulas_are_identical` is the plain whitespace- and case-insensitive string equality; `formulas_are_matched_identical` realigns predicates and then compares.
687
+
688
+ ```python
689
+ from unicode_fol_kit import (
690
+ match_predicates,
691
+ formulas_are_identical,
692
+ formulas_are_matched_identical,
693
+ )
694
+
695
+ pred = "∀x (Wins(x) → Happy(x))"
696
+ ref = "∀x (Win(x) → Happy(x))" # same shape; "Wins" vs "Win"
697
+
698
+ formulas_are_identical(pred, ref) # False (raw strings differ)
699
+ match_predicates(pred, ref) # '∀x (Win(x) → Happy(x))'
700
+ formulas_are_matched_identical(pred, ref) # True (Wins → Win is a close match)
701
+
702
+ # A symbol with no sufficiently close reference counterpart is left untouched:
703
+ match_predicates("Red(x)", "Tall(x)") # 'Red(x)' (normalised distance 1.0 > 0.6)
704
+ ```
705
+
706
+ Unlike the `canonicalize`/`exact_match` pair, this matcher is purely **lexical** (string-level), so it also applies to raw model output that does not yet parse, and the two notions are typically reported as separate metrics (e.g. `EXACT_MATCH` vs `PREDICATE_MATCHED_EXACT_MATCH`). The Levenshtein distance is computed in pure Python, so no extra dependency is required.
707
+
658
708
  ## Modal, temporal, and epistemic logic
659
709
 
660
710
  Natural language is full of constructs classical FOL can't express directly — necessity/possibility, knowledge and belief, and time. `MSFLParser(modal=True)` adds a modal mode (classical unsorted FOL extended with modal operators) and the toolkit ships Kripke-model semantics plus a standard translation back to FOL.
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "unicode-fol-kit"
7
- version = "0.5.0"
7
+ version = "0.5.2"
8
8
  description = "Parser and toolkit for first-order logic formulas using Unicode operators"
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -26,6 +26,7 @@ from .fol import (
26
26
  )
27
27
  from .atp import (
28
28
  formulas_are_equivalent, check_logical_entailment,
29
+ check_logical_entailment_vampire,
29
30
  is_satisfiable, is_valid, get_model,
30
31
  fuzzy_is_satisfiable, fuzzy_is_valid, fuzzy_get_model,
31
32
  to_z3_arith, is_satisfiable_arith, is_valid_arith, get_model_arith,
@@ -41,9 +42,10 @@ from .semantics import (
41
42
  from .eval import (
42
43
  canonicalize, exact_match,
43
44
  validate, is_wellformed, validate_text, ValidationReport,
45
+ formulas_are_identical, match_predicates, formulas_are_matched_identical,
44
46
  )
45
47
 
46
- __version__ = "0.5.0"
48
+ __version__ = "0.5.2"
47
49
 
48
50
  __all__ = [
49
51
  "MSFLParser",
@@ -52,6 +54,7 @@ __all__ = [
52
54
  "Z3Env",
53
55
  "NamingError", "ParsingError",
54
56
  "formulas_are_equivalent", "check_logical_entailment",
57
+ "check_logical_entailment_vampire",
55
58
  "SortedQuantifier", "SortedConstant",
56
59
  "WeakConjunction", "WeakDisjunction",
57
60
  "StrongConjunction", "StrongDisjunction",
@@ -81,4 +84,5 @@ __all__ = [
81
84
  "satisfies_so", "holds",
82
85
  "canonicalize", "exact_match",
83
86
  "validate", "is_wellformed", "validate_text", "ValidationReport",
87
+ "formulas_are_identical", "match_predicates", "formulas_are_matched_identical",
84
88
  ]
@@ -1,5 +1,6 @@
1
1
  from .z3_equivalence import formulas_are_equivalent
2
2
  from .prover9_entailment import check_logical_entailment
3
+ from .vampire_entailment import check_logical_entailment_vampire
3
4
  from .z3_models import is_satisfiable, is_valid, get_model
4
5
  from .z3_fuzzy import (
5
6
  fuzzy_is_satisfiable, fuzzy_is_valid, fuzzy_get_model, degree_expr,
@@ -12,6 +13,7 @@ from .resolution import to_clauses, refute, prove, is_valid_resolution
12
13
  __all__ = [
13
14
  "formulas_are_equivalent",
14
15
  "check_logical_entailment",
16
+ "check_logical_entailment_vampire",
15
17
  "is_satisfiable", "is_valid", "get_model",
16
18
  "fuzzy_is_satisfiable", "fuzzy_is_valid", "fuzzy_get_model", "degree_expr",
17
19
  "to_z3_arith", "is_satisfiable_arith", "is_valid_arith", "get_model_arith", "ArithEnv",
@@ -0,0 +1,150 @@
1
+ """Entailment checking via the Vampire theorem prover (TPTP backend).
2
+
3
+ The companion to :func:`prover9_entailment.check_logical_entailment`, but driving
4
+ `Vampire <https://vprover.github.io/>`_ instead of Prover9. The problem is emitted
5
+ in TPTP ``fof`` syntax — every premise as an ``axiom`` and the conclusion as a
6
+ ``conjecture`` — and handed to a Vampire binary whose path the caller supplies.
7
+ Vampire negates the conjecture internally and reports ``SZS status Theorem`` when
8
+ the premises entail the conclusion.
9
+
10
+ Only the classical FOL fragment is supported, exactly as far as ``Node.to_tptp``
11
+ reaches: a modal, second-order, Łukasiewicz, or lambda node raises
12
+ ``NotImplementedError`` from ``to_tptp`` and that error propagates here.
13
+
14
+ A Windows host can drive a Linux Vampire installed in WSL by passing
15
+ ``use_wsl=True``: Vampire is then launched through ``wsl.exe`` and the temporary
16
+ problem file's path is translated to its ``/mnt/...`` form with ``wslpath``.
17
+ """
18
+
19
+ import os
20
+ import subprocess
21
+ import tempfile
22
+ from typing import List
23
+
24
+ from ..fol.nodes import Node
25
+
26
+
27
+ def _generate_vampire_input(premises: List[Node], conclusion: Node) -> str:
28
+ """Build a TPTP ``fof`` problem string from premises and a conclusion.
29
+
30
+ Each premise becomes ``fof(premise_<i>, axiom, <tptp>).`` and the conclusion
31
+ becomes ``fof(goal, conjecture, <tptp>).``. The bodies come from
32
+ ``Node.to_tptp`` (so variables are upper-cased TPTP-style). Vampire treats the
33
+ single conjecture as the goal to prove from the axioms.
34
+ """
35
+ lines: List[str] = []
36
+ for i, premise in enumerate(premises, start=1):
37
+ lines.append(f"fof(premise_{i}, axiom, {premise.to_tptp()}).")
38
+ lines.append(f"fof(goal, conjecture, {conclusion.to_tptp()}).")
39
+ return "\n".join(lines) + "\n"
40
+
41
+
42
+ def _is_entailed_output(stdout: str) -> bool:
43
+ """Decide entailment from Vampire's stdout.
44
+
45
+ Vampire reports ``SZS status Theorem`` when it proves the conjecture from the
46
+ axioms; ``Refutation found`` is the equivalent message in its default proof
47
+ output (and also covers the vacuous case of inconsistent premises, which
48
+ entail anything). Either signal means the entailment holds. A
49
+ ``CounterSatisfiable`` / ``Satisfiable`` / ``Timeout`` status — or no proof at
50
+ all — means it does not.
51
+ """
52
+ return ("SZS status Theorem" in stdout) or ("Refutation found" in stdout)
53
+
54
+
55
+ def _to_wsl_path(windows_path: str) -> str:
56
+ """Translate a Windows path to its WSL ``/mnt/...`` form via ``wslpath``.
57
+
58
+ Backslashes are turned into forward slashes first: the WSL interop layer
59
+ swallows backslashes in arguments (``C:\\Users\\…`` reaches ``wslpath`` as
60
+ ``C:Users…`` with the separators gone), whereas ``wslpath`` accepts the
61
+ forward-slash spelling ``C:/Users/…`` directly.
62
+ """
63
+ result = subprocess.run(
64
+ ["wsl.exe", "wslpath", "-u", windows_path.replace("\\", "/")],
65
+ capture_output=True,
66
+ text=True,
67
+ timeout=20,
68
+ )
69
+ wsl_path = result.stdout.strip()
70
+ if not wsl_path:
71
+ raise RuntimeError(
72
+ f"wslpath could not translate {windows_path!r} (is WSL available?): "
73
+ f"{result.stderr.strip()}"
74
+ )
75
+ return wsl_path
76
+
77
+
78
+ def _run_vampire(input_str: str, vampire_path: str, timeout: int = 30,
79
+ use_wsl: bool = False) -> bool:
80
+ """Write the TPTP problem to a temp file and run Vampire on it.
81
+
82
+ Mirrors the Prover9 runner's contract: a subprocess timeout is swallowed and
83
+ reported as "not entailed" (Vampire could not finish), while any other error —
84
+ notably ``FileNotFoundError`` for a wrong ``vampire_path`` — propagates to the
85
+ caller. The temporary file is always removed, even when the subprocess raises.
86
+
87
+ With ``use_wsl=True`` Vampire is invoked inside WSL as
88
+ ``wsl.exe <vampire_path> <file>``, and the Windows temp-file path is first
89
+ translated to its ``/mnt/...`` form with ``wslpath`` so a Linux Vampire under
90
+ WSL can read the file the Windows side created.
91
+ """
92
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".p", delete=False,
93
+ encoding="utf-8") as temp_file:
94
+ temp_file.write(input_str)
95
+ temp_filename = temp_file.name
96
+
97
+ try:
98
+ if use_wsl:
99
+ command = ["wsl.exe", vampire_path, _to_wsl_path(temp_filename)]
100
+ else:
101
+ command = [vampire_path, temp_filename]
102
+ result = subprocess.run(
103
+ command,
104
+ capture_output=True,
105
+ text=True,
106
+ timeout=timeout,
107
+ )
108
+ return _is_entailed_output(result.stdout)
109
+ except subprocess.TimeoutExpired:
110
+ return False
111
+ finally:
112
+ try:
113
+ os.unlink(temp_filename)
114
+ except OSError:
115
+ pass
116
+
117
+
118
+ def check_logical_entailment_vampire(premises: List[Node], conclusion: Node,
119
+ vampire_path: str, timeout: int = 30,
120
+ use_wsl: bool = False) -> bool:
121
+ """Return whether ``premises`` entail ``conclusion``, decided by Vampire.
122
+
123
+ Args:
124
+ premises: a list of classical FOL premise formulas.
125
+ conclusion: the classical FOL conclusion formula.
126
+ vampire_path: path to a Vampire executable (e.g. ``"/usr/bin/vampire"``).
127
+ With ``use_wsl=True`` this is the command/path INSIDE WSL — e.g.
128
+ ``"vampire"`` if it is on the WSL ``PATH``, or ``"/home/me/vampire"``.
129
+ timeout: seconds to allow the Vampire process before giving up and
130
+ returning ``False`` (default 30).
131
+ use_wsl: when True, run Vampire inside WSL via ``wsl.exe`` and translate
132
+ the temp-file path to its ``/mnt/...`` form, so a Windows host can
133
+ drive a Linux Vampire installed in WSL.
134
+
135
+ Returns:
136
+ ``True`` iff Vampire proves the conclusion follows from the premises.
137
+ Note that every premise and the conclusion must be a closed sentence:
138
+ Vampire rejects formulas with unquantified (free) variables, and such a
139
+ rejection is reported as ``False`` (no proof), not raised.
140
+
141
+ Raises:
142
+ FileNotFoundError: ``vampire_path`` does not point to an executable (or,
143
+ with ``use_wsl=True``, ``wsl.exe`` itself is not found).
144
+ NotImplementedError: a formula is outside the first-order fragment
145
+ (modal / second-order / Łukasiewicz / lambda), surfaced by
146
+ ``to_tptp``.
147
+ """
148
+ vampire_input = _generate_vampire_input(premises, conclusion)
149
+ return _run_vampire(vampire_input, vampire_path, timeout=timeout,
150
+ use_wsl=use_wsl)
@@ -9,12 +9,22 @@
9
9
  - :func:`validate` / :func:`is_wellformed` / :func:`validate_text` report the
10
10
  common defects in a generated formula (free variables, inconsistent predicate
11
11
  or function arity, leftover lambda nodes, unparseable text).
12
+ - :func:`match_predicates` / :func:`formulas_are_matched_identical` /
13
+ :func:`formulas_are_identical` provide a lexical, predicate-aligned string
14
+ match (Levenshtein-based predicate renaming) — complementary to the AST-level
15
+ :func:`exact_match`, which instead quotients out the structural rewrites.
12
16
  """
13
17
 
14
18
  from .canonical import canonicalize, exact_match
15
19
  from .validate import validate, is_wellformed, validate_text, ValidationReport
20
+ from .predicate_match import (
21
+ formulas_are_identical,
22
+ match_predicates,
23
+ formulas_are_matched_identical,
24
+ )
16
25
 
17
26
  __all__ = [
18
27
  "canonicalize", "exact_match",
19
28
  "validate", "is_wellformed", "validate_text", "ValidationReport",
29
+ "formulas_are_identical", "match_predicates", "formulas_are_matched_identical",
20
30
  ]
@@ -0,0 +1,180 @@
1
+ """Predicate-aligned string matching for NL→FOL evaluation.
2
+
3
+ When scoring a model that translates natural language to FOL, two formulas may
4
+ denote the same thing while using *different predicate names* — a model might
5
+ write ``Wins(x)`` where the reference writes ``IsWinner(x)``. A plain string
6
+ comparison (or even a structural one) counts that as wrong, even though the
7
+ logical *shape* is identical and only the lexical choice of predicate symbol
8
+ differs. ``match_predicates`` closes that gap: it greedily renames each
9
+ predicate/function symbol in the prediction to the closest reference symbol
10
+ (by **normalised Levenshtein distance**, accepting a match at or below a
11
+ distance threshold) and returns the rewritten string, so a subsequent string
12
+ comparison rewards a structurally-correct answer that merely renamed its
13
+ predicates.
14
+
15
+ This is a deliberately **lexical / string-level** notion, complementary to the
16
+ AST-level :func:`unicode_fol_kit.eval.canonical.exact_match`:
17
+
18
+ * :func:`exact_match` (canonical) quotients out α-renaming, commutativity /
19
+ associativity, operand duplication, and double negation, but treats two
20
+ *different predicate names* as a genuine mismatch.
21
+ * :func:`match_predicates` / :func:`formulas_are_matched_identical` quotient out
22
+ *predicate-name* differences (and, via :func:`formulas_are_identical`,
23
+ whitespace and case), but not the structural rewrites above.
24
+
25
+ The two are orthogonal and are typically reported as separate metrics
26
+ (``EXACT_MATCH`` vs ``PREDICATE_MATCHED_EXACT_MATCH``). The matcher is
27
+ parser-independent: it operates directly on the surface strings, so it also
28
+ applies to raw model output that does not (yet) parse.
29
+
30
+ The Levenshtein distance is computed in pure Python (classical unit-cost
31
+ insertion / deletion / substitution dynamic program), so this module adds no
32
+ third-party dependency.
33
+ """
34
+
35
+ import re
36
+
37
+ __all__ = [
38
+ "formulas_are_identical",
39
+ "match_predicates",
40
+ "formulas_are_matched_identical",
41
+ ]
42
+
43
+ # A predicate or function symbol is a maximal word immediately followed by an
44
+ # opening parenthesis, e.g. the ``P`` in ``P(x)`` or the ``loves`` in
45
+ # ``loves(a, b)``. Nullary predicates written without parentheses are not
46
+ # captured (there is nothing lexical to realign), and neither are bare terms.
47
+ _SYMBOL_BEFORE_PAREN = re.compile(r"\b\w+(?=\()")
48
+
49
+ _WHITESPACE = re.compile(r"\s+")
50
+
51
+
52
+ def _levenshtein(a: str, b: str) -> int:
53
+ """Return the Levenshtein edit distance between ``a`` and ``b``.
54
+
55
+ Classical unit-cost dynamic program (insertion, deletion, and substitution
56
+ each cost 1), computed with a single rolling row in O(len(a)·len(b)) time
57
+ and O(len(b)) space. Matches the value of ``Levenshtein.distance`` for the
58
+ same inputs, so results are identical whether or not the optional
59
+ ``python-Levenshtein`` C extension is installed.
60
+ """
61
+ if a == b:
62
+ return 0
63
+ if not a:
64
+ return len(b)
65
+ if not b:
66
+ return len(a)
67
+
68
+ previous = list(range(len(b) + 1))
69
+ for i, ca in enumerate(a, start=1):
70
+ current = [i]
71
+ for j, cb in enumerate(b, start=1):
72
+ insertion = current[j - 1] + 1
73
+ deletion = previous[j] + 1
74
+ substitution = previous[j - 1] + (ca != cb)
75
+ current.append(min(insertion, deletion, substitution))
76
+ previous = current
77
+ return previous[len(b)]
78
+
79
+
80
+ def _normalised_distance(a: str, b: str) -> float:
81
+ """Levenshtein distance scaled by the longer string's length, in [0, 1].
82
+
83
+ Normalising by ``max(len(a), len(b))`` makes the threshold length-agnostic:
84
+ a one-character edit weighs more between two short names than between two
85
+ long ones. Both names are predicate/function symbols matched by
86
+ :data:`_SYMBOL_BEFORE_PAREN`, hence always non-empty, so the denominator is
87
+ never zero.
88
+ """
89
+ return _levenshtein(a, b) / max(len(a), len(b))
90
+
91
+
92
+ def formulas_are_identical(prediction: str, reference: str) -> bool:
93
+ """Return whether two formula strings are equal ignoring whitespace and case.
94
+
95
+ Both strings are stripped of all whitespace and lower-cased before
96
+ comparison, so ``"∀x P(x)"`` and ``"∀x p( x )"`` are considered identical.
97
+ This is the plain ``EXACT_MATCH`` notion; it does **not** realign predicate
98
+ names — use :func:`formulas_are_matched_identical` for that.
99
+ """
100
+ cleaned_prediction = _WHITESPACE.sub("", prediction).lower()
101
+ cleaned_reference = _WHITESPACE.sub("", reference).lower()
102
+ return cleaned_prediction == cleaned_reference
103
+
104
+
105
+ def _map_predicates(
106
+ prediction_symbols: list,
107
+ reference_symbols: list,
108
+ max_norm_distance: float = 0.6,
109
+ ) -> list:
110
+ """Map each prediction symbol to its nearest reference symbol, or keep it.
111
+
112
+ For every symbol in ``prediction_symbols`` the closest symbol in
113
+ ``reference_symbols`` (smallest normalised Levenshtein distance) is found.
114
+ If that distance is at or below ``max_norm_distance`` the reference symbol is
115
+ used; otherwise the original prediction symbol is kept unchanged (the match
116
+ is too weak to trust). Ties are broken by the reference symbol's position,
117
+ matching ``min``'s first-minimum semantics.
118
+ """
119
+ mapped = []
120
+ for symbol in prediction_symbols:
121
+ best_match = min(
122
+ reference_symbols,
123
+ key=lambda candidate: _normalised_distance(symbol, candidate),
124
+ )
125
+ if _normalised_distance(symbol, best_match) <= max_norm_distance:
126
+ mapped.append(best_match)
127
+ else:
128
+ mapped.append(symbol)
129
+ return mapped
130
+
131
+
132
+ def match_predicates(
133
+ prediction: str,
134
+ reference: str,
135
+ max_norm_distance: float = 0.6,
136
+ ) -> str:
137
+ """Rewrite ``prediction``'s predicate/function names toward ``reference``.
138
+
139
+ Every symbol that appears immediately before a ``(`` in ``prediction`` is
140
+ realigned to the lexically-closest such symbol in ``reference`` (see
141
+ :func:`_map_predicates`), and the rewrite is applied to the surface string
142
+ as a ``"<old>(" → "<new>("`` substitution. Symbols with no sufficiently
143
+ close reference counterpart (normalised distance above ``max_norm_distance``)
144
+ are left as they are. If either side has no parenthesised symbols, the
145
+ prediction is returned unchanged.
146
+
147
+ The result is a string in the same surface syntax as the input, suitable for
148
+ a subsequent :func:`formulas_are_identical` comparison or for re-parsing.
149
+ """
150
+ matched_formula = prediction
151
+ prediction_symbols = _SYMBOL_BEFORE_PAREN.findall(prediction)
152
+ reference_symbols = _SYMBOL_BEFORE_PAREN.findall(reference)
153
+
154
+ if prediction_symbols and reference_symbols:
155
+ mapped_symbols = _map_predicates(
156
+ prediction_symbols, reference_symbols, max_norm_distance
157
+ )
158
+ for old_symbol, new_symbol in zip(prediction_symbols, mapped_symbols):
159
+ matched_formula = matched_formula.replace(
160
+ old_symbol + "(", new_symbol + "("
161
+ )
162
+
163
+ return matched_formula
164
+
165
+
166
+ def formulas_are_matched_identical(
167
+ prediction: str,
168
+ reference: str,
169
+ max_norm_distance: float = 0.6,
170
+ ) -> bool:
171
+ """Return whether ``prediction`` equals ``reference`` after predicate realignment.
172
+
173
+ Realigns the prediction's predicate/function names to the reference's with
174
+ :func:`match_predicates`, then compares with :func:`formulas_are_identical`
175
+ (whitespace- and case-insensitive). This is the ``PREDICATE_MATCHED_EXACT``
176
+ notion: it forgives a structurally-correct answer that merely chose different
177
+ predicate symbol names.
178
+ """
179
+ matched_prediction = match_predicates(prediction, reference, max_norm_distance)
180
+ return formulas_are_identical(matched_prediction, reference)
File without changes