xhail 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. xhail-0.1.0/LICENSE +21 -0
  2. xhail-0.1.0/MANIFEST.in +12 -0
  3. xhail-0.1.0/PKG-INFO +412 -0
  4. xhail-0.1.0/README.md +381 -0
  5. xhail-0.1.0/experiments/benchmarks/animals.lp +43 -0
  6. xhail-0.1.0/experiments/benchmarks/blocks.lp +43 -0
  7. xhail-0.1.0/experiments/benchmarks/epidemic.lp +65 -0
  8. xhail-0.1.0/experiments/benchmarks/event_calculus.lp +55 -0
  9. xhail-0.1.0/experiments/benchmarks/grandfather.lp +39 -0
  10. xhail-0.1.0/experiments/benchmarks/penguins.lp +24 -0
  11. xhail-0.1.0/experiments/benchmarks/propositional.lp +29 -0
  12. xhail-0.1.0/experiments/benchmarks/sugar.lp +53 -0
  13. xhail-0.1.0/experiments/benchmarks/traffic.lp +31 -0
  14. xhail-0.1.0/experiments/benchmarks/trains.lp +53 -0
  15. xhail-0.1.0/pyproject.toml +109 -0
  16. xhail-0.1.0/setup.cfg +4 -0
  17. xhail-0.1.0/tests/test_benchmarks.py +243 -0
  18. xhail-0.1.0/tests/test_cli.py +201 -0
  19. xhail-0.1.0/tests/test_internals.py +361 -0
  20. xhail-0.1.0/tests/test_language.py +246 -0
  21. xhail-0.1.0/tests/test_phase0_regression.py +367 -0
  22. xhail-0.1.0/tests/test_pipeline_edge_cases.py +199 -0
  23. xhail-0.1.0/tests/test_public_api.py +276 -0
  24. xhail-0.1.0/xhail/__init__.py +34 -0
  25. xhail-0.1.0/xhail/cli.py +144 -0
  26. xhail-0.1.0/xhail/core.py +398 -0
  27. xhail-0.1.0/xhail/language/__init__.py +1 -0
  28. xhail-0.1.0/xhail/language/structures.py +204 -0
  29. xhail-0.1.0/xhail/language/terms.py +199 -0
  30. xhail-0.1.0/xhail/parser/__init__.py +1 -0
  31. xhail-0.1.0/xhail/parser/parser.py +329 -0
  32. xhail-0.1.0/xhail/reasoning/__init__.py +1 -0
  33. xhail-0.1.0/xhail/reasoning/abduction.py +49 -0
  34. xhail-0.1.0/xhail/reasoning/deduction.py +336 -0
  35. xhail-0.1.0/xhail/reasoning/induction.py +228 -0
  36. xhail-0.1.0/xhail/reasoning/model.py +238 -0
  37. xhail-0.1.0/xhail/reasoning/utils.py +23 -0
  38. xhail-0.1.0/xhail.egg-info/PKG-INFO +412 -0
  39. xhail-0.1.0/xhail.egg-info/SOURCES.txt +41 -0
  40. xhail-0.1.0/xhail.egg-info/dependency_links.txt +1 -0
  41. xhail-0.1.0/xhail.egg-info/entry_points.txt +2 -0
  42. xhail-0.1.0/xhail.egg-info/requires.txt +13 -0
  43. xhail-0.1.0/xhail.egg-info/top_level.txt +1 -0
xhail-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 td22885
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,12 @@
1
+ # Files to include in the source distribution (sdist) beyond the defaults.
2
+ include LICENSE README.md pyproject.toml
3
+
4
+ # Benchmark examples — useful for developers cloning the repo, not shipped in wheel.
5
+ recursive-include experiments *.lp
6
+
7
+ # Exclude runtime-generated files that should never be in a release.
8
+ exclude xhail/parser/parsetab.py
9
+ exclude xhail/output/*.lp
10
+ global-exclude __pycache__
11
+ global-exclude *.py[cod]
12
+ global-exclude .DS_Store
xhail-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,412 @@
1
+ Metadata-Version: 2.4
2
+ Name: xhail
3
+ Version: 0.1.0
4
+ Summary: XHAIL: eXtended Hybrid Abductive Inductive Learning — a symbolic ILP system built on Answer Set Programming
5
+ License-Expression: MIT
6
+ Project-URL: Repository, https://github.com/everettmakes/xhail
7
+ Project-URL: Bug Tracker, https://github.com/everettmakes/xhail/issues
8
+ Keywords: inductive-logic-programming,answer-set-programming,symbolic-ai,machine-learning,ilp,asp
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Operating System :: OS Independent
16
+ Requires-Python: >=3.10
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: clingo>=5.6
20
+ Requires-Dist: ply>=3.11
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest>=8.0; extra == "dev"
23
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
24
+ Requires-Dist: ruff>=0.4; extra == "dev"
25
+ Requires-Dist: mypy>=1.0; extra == "dev"
26
+ Requires-Dist: pre-commit>=3.0; extra == "dev"
27
+ Provides-Extra: publish
28
+ Requires-Dist: build>=1.0; extra == "publish"
29
+ Requires-Dist: twine>=5.0; extra == "publish"
30
+ Dynamic: license-file
31
+
32
+ # XHAIL — eXtended Hybrid Abductive Inductive Learning
33
+
34
+ [![CI](https://github.com/everettmakes/xhail/actions/workflows/ci.yml/badge.svg)](https://github.com/everettmakes/xhail/actions/workflows/ci.yml)
35
+ [![codecov](https://codecov.io/gh/everettmakes/xhail/branch/main/graph/badge.svg)](https://codecov.io/gh/everettmakes/xhail)
36
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
37
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
38
+
39
+ A research-grade Python implementation of the XHAIL Inductive Logic Programming framework, built on [clingo](https://potassco.org/clingo/) 5.6+.
40
+
41
+ XHAIL learns **interpretable logic-program rules** from background knowledge and examples. Given observations, it produces human-readable hypotheses that are sound with respect to the background knowledge — no gradient descent, no black-box parameters. The learned rules can be read, verified, and extended by domain experts.
42
+
43
+ ```prolog
44
+ % Input: background knowledge + examples
45
+ bird(a). bird(b). bird(c). penguin(d).
46
+ bird(X) :- penguin(X).
47
+ #modeh flies(+bird).
48
+ #modeb not penguin(+bird).
49
+ #example flies(a). #example not flies(d).
50
+
51
+ % Output: learned hypothesis (< 5 ms)
52
+ flies(V1) :- not penguin(V1).
53
+ ```
54
+
55
+ ---
56
+
57
+ ## Contents
58
+
59
+ - [Motivation](#motivation)
60
+ - [How it works](#how-it-works)
61
+ - [Benchmark results](#benchmark-results)
62
+ - [Installation](#installation)
63
+ - [Usage](#usage)
64
+ - [Input format](#input-format)
65
+ - [Repository layout](#repository-layout)
66
+ - [Engineering notes](#engineering-notes)
67
+ - [Comparison to other ILP systems](#comparison-to-other-ilp-systems)
68
+ - [Roadmap](#roadmap)
69
+ - [Citation](#citation)
70
+
71
+ ---
72
+
73
+ ## Motivation
74
+
75
+ Statistical ML excels at pattern recognition but produces models that are opaque, brittle under distribution shift, and unable to incorporate prior symbolic knowledge. Inductive Logic Programming (ILP) takes the complementary path: it searches the space of logic programs and returns the simplest hypothesis that is logically consistent with the observations.
76
+
77
+ XHAIL's specific approach — **abduction followed by deduction followed by induction**, all implemented as Answer Set Programming (ASP) solve calls — gives it three properties that matter for real research:
78
+
79
+ 1. **Transparency.** Each intermediate result (the abduced atom set Δ, the kernel set K, the final hypothesis H) is a first-class artefact. You can inspect, checkpoint, and debug each phase independently.
80
+ 2. **Expressiveness.** Because the language is Answer Set Programming, XHAIL handles negation-as-failure, integrity constraints, and non-monotonic reasoning natively — things that classical ILP systems (Aleph, Metagol) cannot express.
81
+ 3. **Correctness guarantees.** The hypothesis is guaranteed to cover every positive example and violate no negative example, by construction.
82
+
83
+ ---
84
+
85
+ ## How it works
86
+
87
+ XHAIL decomposes hypothesis search into three focused solve calls:
88
+
89
+ ```
90
+ Input (.lp file)
91
+
92
+ ├─ Background knowledge (BG) — domain axioms, type facts, integrity constraints
93
+ ├─ Mode declarations (#modeh / #modeb) — define the hypothesis language
94
+ └─ Examples (#example) — positive and negative observations
95
+
96
+
97
+ ╔══════════════════════════════════════════════════════════════╗
98
+ ║ Phase 1 · Abduction ║
99
+ ║ ║
100
+ ║ Find the minimal set of ground atoms Δ (consistent with BG) ║
101
+ ║ such that BG ∪ Δ satisfies every example. ║
102
+ ║ ║
103
+ ║ Δ = { happens(infect(bob),1), happens(infect(carol),2) } ║
104
+ ╚══════════════════════════════════════════════════════════════╝
105
+
106
+
107
+ ╔══════════════════════════════════════════════════════════════╗
108
+ ║ Phase 2 · Deduction — BFS kernel construction ║
109
+ ║ ║
110
+ ║ Build the kernel set K: one maximally-specific clause per ║
111
+ ║ leaf of a BFS over (abduced atoms × mode schemas). ║
112
+ ║ Parent-pointer reconstruction gives O(depth) chain recall. ║
113
+ ║ Full ancestor tracking prevents BFS cycles in O(1) per step.║
114
+ ║ ║
115
+ ║ K = { happens(infect(bob),T) :- holdsAt(ill(alice),T), ║
116
+ ║ happens(infect(carol),T) :- holdsAt(ill(bob),T), … } ║
117
+ ╚══════════════════════════════════════════════════════════════╝
118
+
119
+
120
+ ╔══════════════════════════════════════════════════════════════╗
121
+ ║ Phase 3 · Induction ║
122
+ ║ ║
123
+ ║ Search for the smallest subset H ⊆ K (by literal count) ║
124
+ ║ such that BG ∪ H covers every positive example and ║
125
+ ║ violates no negative example. Solved with a clingo ║
126
+ ║ optimisation program (minimize{use(I,J)}). ║
127
+ ║ ║
128
+ ║ H = { happens(infect(bob),V1) :- holdsAt(ill(alice),V1). ║
129
+ ║ happens(infect(carol),V1) :- holdsAt(ill(bob),V1). }║
130
+ ╚══════════════════════════════════════════════════════════════╝
131
+
132
+
133
+ Learned hypothesis — printed to stdout or returned via Python API
134
+ ```
135
+
136
+ ---
137
+
138
+ ## Benchmark results
139
+
140
+ The suite covers ten canonical ILP tasks across classical logic, Event Calculus, negation-as-failure, and multi-body reasoning. Timings on Python 3.11, clingo 5.7, 4-core laptop (wall time measured with `--jobs 4`).
141
+
142
+ | Benchmark | Domain | Rules | CPU time | Notes |
143
+ |---|---|:---:|---:|---|
144
+ | `animals` | Classification | 1 | 6 ms | `mammal(V1) :- produces_milk(V1).` |
145
+ | `blocks` | Event Calculus | 2 | 8 ms | pick_up / put_down rules |
146
+ | `epidemic` | Event Calculus | 2 | 9 ms | Chained infection cascade, NAF temporal negatives |
147
+ | `event_calculus` | Event Calculus | 1 | 8 ms | `happens(work(alice),V1) :- holdsAt(awake(alice),V1).` |
148
+ | `grandfather` | Recursive relations | 1 | 41 ms | 2-literal chain; predicate-indexed BFS |
149
+ | `penguins` | NAF / exceptions | 1 | 2 ms | `flies(V1) :- not penguin(V1).` |
150
+ | `propositional` | Propositional | 1 | 4 ms | `output :- .` (zero-arity rule) |
151
+ | `sugar` | Event Calculus | 2 | 7 ms | Priority-ordered resource consumption, NAF |
152
+ | `traffic` | Rules | 1 | 4 ms | `stop(V1) :- red(V1).` |
153
+ | `trains` | Structural | 1 | 28 ms | 3-body rule; induction selects subset of kernel |
154
+ | **Total** | | **13** | **117 ms CPU / 58 ms wall** | 10 / 10 solved |
155
+
156
+ Reproduce with:
157
+
158
+ ```bash
159
+ python experiments/run_benchmarks.py # parallel (default: all cores)
160
+ python experiments/run_benchmarks.py --jobs 1 # sequential, for profiling
161
+ ```
162
+
163
+ ---
164
+
165
+ ## Installation
166
+
167
+ Requires **Python ≥ 3.10**. The `clingo` ASP solver is installed automatically as a Python wheel.
168
+
169
+ ```bash
170
+ git clone https://github.com/everettmakes/xhail.git
171
+ cd xhail
172
+ pip install -e ".[dev]"
173
+ ```
174
+
175
+ Verify:
176
+
177
+ ```bash
178
+ xhail --version
179
+ xhail run experiments/benchmarks/penguins.lp
180
+ # flies(V1) :- not penguin(V1).
181
+ ```
182
+
183
+ ---
184
+
185
+ ## Usage
186
+
187
+ ### Command line
188
+
189
+ ```bash
190
+ # Run the learner on any .lp file
191
+ xhail run myfile.lp
192
+
193
+ # Increase deduction depth (default 10) for deeper rule bodies
194
+ xhail run myfile.lp --depth 15
195
+
196
+ # Show phase-by-phase progress
197
+ xhail run myfile.lp --verbose
198
+
199
+ # Write intermediate ASP programs to disk for debugging
200
+ xhail run myfile.lp --debug --debug-output ./debug_out/
201
+ ```
202
+
203
+ ### Python API
204
+
205
+ ```python
206
+ from xhail import learn
207
+
208
+ result = learn("experiments/benchmarks/trains.lp", depth=10)
209
+
210
+ print(result.success) # True
211
+ print(result.n_rules) # 1
212
+ for rule in result.hypothesis:
213
+ print(rule)
214
+ # eastbound(V1) :- has_car(V1,V2), triangle_load(V2), rectangle(V2).
215
+
216
+ print(repr(result))
217
+ # LearningResult(success=True, n_rules=1, source='experiments/benchmarks/trains.lp')
218
+ ```
219
+
220
+ The `learn()` function is the single public entry point. It is thread-safe: each call creates a fresh `Model` instance with no shared mutable state.
221
+
222
+ ---
223
+
224
+ ## Input format
225
+
226
+ XHAIL input files are Answer Set Programs (`.lp`) with three additional directives:
227
+
228
+ ```prolog
229
+ % ── Background knowledge ──────────────────────────────────────
230
+ % Any valid ASP rules, facts, and integrity constraints.
231
+ bird(a). bird(b). bird(c).
232
+ penguin(d).
233
+ bird(X) :- penguin(X).
234
+
235
+ % ── Mode declarations ─────────────────────────────────────────
236
+ % #modeh defines allowed head predicates.
237
+ % #modeb defines allowed body predicates.
238
+ % Placemarkers:
239
+ % +type input variable — must be grounded by a prior term
240
+ % -type output variable — introduced by this literal
241
+ % #type ground constant — appears literally in the hypothesis
242
+ #modeh flies(+bird).
243
+ #modeb penguin(+bird).
244
+ #modeb not penguin(+bird). % negation-as-failure body literal
245
+
246
+ % ── Examples ──────────────────────────────────────────────────
247
+ #example flies(a). % positive: must be entailed by H
248
+ #example flies(b).
249
+ #example flies(c).
250
+ #example not flies(d). % negative: must NOT be entailed by H
251
+ ```
252
+
253
+ ### Placemarker reference
254
+
255
+ | Marker | Role | Effect |
256
+ |--------|------|--------|
257
+ | `+type` | Input variable | Must be grounded by the head or a prior body literal. Introduces a typed existential variable `V1`, `V2`, … |
258
+ | `-type` | Output variable | Introduced by this literal; can be used downstream. |
259
+ | `#type` | Ground constant | The actual constant (e.g. `alice`) appears in the learned rule, not a variable. Useful for domain-specific rules. |
260
+
261
+ ---
262
+
263
+ ## Repository layout
264
+
265
+ ```
266
+ xhail/
267
+ ├── xhail/ Core Python package
268
+ │ ├── __init__.py Public API — learn(), LearningResult
269
+ │ ├── cli.py xhail CLI (argparse, logging setup)
270
+ │ ├── core.py Pipeline orchestrator
271
+ │ ├── language/
272
+ │ │ ├── terms.py Atom, Clause, Literal, Normal, PlaceMarker, Fact
273
+ │ │ └── structures.py Mode declarations
274
+ │ ├── parser/
275
+ │ │ └── parser.py PLY-based .lp parser (tokeniser + grammar)
276
+ │ └── reasoning/
277
+ │ ├── abduction.py Phase 1 — ASP abduction, builds Δ
278
+ │ ├── deduction.py Phase 2 — BFS kernel construction
279
+ │ ├── induction.py Phase 3 — ASP minimisation, builds H
280
+ │ ├── model.py Shared state (clingo bridge, subsumption cache)
281
+ │ └── utils.py ASP serialisation helpers
282
+
283
+ ├── experiments/
284
+ │ ├── benchmarks/ 10 canonical .lp benchmarks
285
+ │ ├── run_benchmarks.py Benchmark runner (timing, memory, hypothesis)
286
+ │ ├── plot_results.py Matplotlib visualisation
287
+ │ └── results/ CSV / JSON metrics (git-ignored)
288
+
289
+ ├── tests/
290
+ │ ├── conftest.py Shared fixtures
291
+ │ ├── test_benchmarks.py Integration tests — one class per benchmark
292
+ │ ├── test_language.py Unit tests — term / clause data structures
293
+ │ ├── test_phase0_regression.py Regression tests for 14 fixed defects
294
+ │ └── test_pipeline_edge_cases.py Edge cases — UNSAT, empty kernel, timeout
295
+
296
+ ├── .github/workflows/ci.yml GitHub Actions — lint, type-check, test, benchmark
297
+ ├── pyproject.toml Build config, Ruff, mypy, pytest settings
298
+ ├── RELATED_WORK.md Full comparison: Aleph, Metagol, ILASP, FastLAS
299
+ └── RESEARCH_FRAMING.md Research questions, hypotheses, known limitations
300
+ ```
301
+
302
+ ---
303
+
304
+ ## Engineering notes
305
+
306
+ Several non-obvious engineering decisions are worth documenting:
307
+
308
+ **BFS kernel construction (deduction phase).** The original implementation used a parent-key string to track the immediate predecessor of each BFS node, causing O(depth × level_size) chain reconstruction and allowing A→B→A cycles. The rewrite stores a `frozenset` of all ancestor keys on each node (O(1) cycle detection) and a direct `parent_node` pointer for O(depth) chain reconstruction. Chains terminate when the ancestor set blocks all further extensions — for typical benchmarks (5–15 unique matching atoms), this keeps the BFS polynomial.
309
+
310
+ **Leaf-node kernel collection.** The kernel is collected from BFS *leaf nodes* — nodes that generated no children — rather than from the deepest BFS level. This correctly handles benchmarks where different head atoms produce chains of different depths (e.g. the epidemic benchmark: the bob-rule terminates at depth 1 while the carol-rule extends to depth 2; collecting only from `levels[top]` silently discarded the bob-rule).
311
+
312
+ **Type membership caching.** The subsumption check `isSubsumed(atom, mode)` requires verifying that each ground constant belongs to the correct type (e.g. `alice` ∈ `person`). The original implementation called `getMatches` on the entire model for every subsumption check — quadratic in model size. The rewrite builds a `dict[type_name, frozenset[str]]` once per abduced model from unary facts, reducing subsumption to a frozenset lookup.
313
+
314
+ **Predicate-indexed BFS.** During deduction, the inner loop previously cross-joined every (schema, fact) pair — O(|schemas| × |all_facts|) per BFS level. A `{predicate → [Atom]}` index built once from the abduced model reduces this to O(|schemas| × |bucket_size|). On `grandfather` (20+ parent facts, 5 grandparent targets) this cuts the BFS cross-join by ~10×, dropping per-run time from ~270 ms to ~41 ms. On `trains` (100+ car facts) the improvement is similar (81 ms → 28 ms).
315
+
316
+ **Parallel clingo.** Both `call()` (abduction) and `getBestModel()` (induction) pass `--parallel-mode=N` (N = min(4, cpu_count)) to clingo, engaging its built-in thread-pool at no extra implementation cost.
317
+
318
+ **Induction kernel cap.** After generalisation and deduplication, the induction ASP program scales linearly with `|K| × max_body_size`. Empirical profiling across all 10 benchmarks shows that abstract clauses collapse to a single body length after generalisation (typically 5 literals). A default cap of **10 shortest abstract clauses** (configurable via `XHAIL_MAX_KERNEL`) is therefore sufficient — it gives induction full selectional flexibility while keeping the ASP program ~4× smaller than the former cap of 50:
319
+
320
+ ```
321
+ cap=5 → 90 ms total, 10/10 solved
322
+ cap=10 → 97 ms total, 10/10 solved ← default (safety margin)
323
+ cap=50 → 375 ms total, 10/10 solved ← former default, 4× slower
324
+ ```
325
+
326
+ **Parallel benchmark runner.** `run_benchmarks.py` uses `concurrent.futures.ProcessPoolExecutor` to run all benchmarks concurrently (default: all CPU cores, configurable via `--jobs N`). All 10 benchmarks complete in ~58 ms wall time vs ~117 ms sequential, displaying speedup vs CPU time in the summary footer.
327
+
328
+ ---
329
+
330
+ ## Running the tests
331
+
332
+ ```bash
333
+ # Unit tests only — no clingo, runs in < 1 s
334
+ pytest -m "not integration"
335
+
336
+ # Full suite — unit + integration + edge cases
337
+ pytest
338
+
339
+ # With coverage
340
+ pytest --cov=xhail --cov-report=term-missing
341
+
342
+ # Specific benchmark
343
+ pytest tests/test_benchmarks.py::TestTrainsBenchmark -v
344
+ ```
345
+
346
+ All 147 tests pass on Python 3.10, 3.11, and 3.12 (94% line coverage).
347
+
348
+ ---
349
+
350
+ ## Comparison to other ILP systems
351
+
352
+ | System | Hypothesis language | NAF | Solver | Intermediate artefacts | Recursive programs |
353
+ |--------|---------------------|:---:|--------|:----------------------:|:------------------:|
354
+ | [Aleph](https://www.cs.ox.ac.uk/activities/programinduction/Aleph/aleph.html) | Horn clauses | ✗ | Prolog | ✗ | Limited |
355
+ | [Metagol](https://github.com/metagol/metagol) | Metarule instances | ✗ | Prolog | ✗ | ✓ |
356
+ | [ILASP](https://doc.ilasp.com/) | Full ASP | ✓ | clingo | ✗ | Limited |
357
+ | [FastLAS](https://spike-imperial.github.io/FastLAS/) | Normal + choice rules | ✓ | clingo | ✗ | Limited |
358
+ | **XHAIL** (this work) | Normal rules with NAF | ✓ | clingo | ✓ (Δ, K, H) | ✗ |
359
+
360
+ **vs Aleph / Metagol.** XHAIL supports negation-as-failure, which is essential for defeasible rules ("flies unless penguin"). Classical ILP systems based on definite Horn clauses cannot express this.
361
+
362
+ **vs ILASP.** Both use clingo and support NAF. The key difference is architecture: ILASP treats hypothesis search as a single, monolithic optimisation; XHAIL exposes Δ (abduced atoms) and K (kernel clauses) as checkpointable intermediate results. This makes it possible to inspect *why* a particular hypothesis was found — or wasn't.
363
+
364
+ **vs FastLAS.** FastLAS optimises for scalability via a faster partial evaluation strategy. XHAIL prioritises legibility of the learning process, making it better suited to research contexts where understanding *how* a hypothesis was derived matters as much as the hypothesis itself.
365
+
366
+ See [`RELATED_WORK.md`](RELATED_WORK.md) for a detailed technical comparison and [`RESEARCH_FRAMING.md`](RESEARCH_FRAMING.md) for open research questions.
367
+
368
+ ---
369
+
370
+ ## Roadmap
371
+
372
+ | Phase | Description | Status |
373
+ |-------|-------------|--------|
374
+ | 0 | Correctness & stabilisation — 14 defects fixed, 105 regression tests | ✅ Done |
375
+ | 1 | Repository professionalisation — packaging, public API, CLI | ✅ Done |
376
+ | 2 | Testing & CI — GitHub Actions (lint + type-check + test + benchmark), Codecov | ✅ Done |
377
+ | 3 | Experimental framework — 10 benchmarks, metrics runner, timing | ✅ Done |
378
+ | 4 | Performance engineering — BFS leaf collection, type-member cache, predicate-indexed BFS, parallel clingo, parallel benchmark runner | ✅ Done |
379
+ | 5 | Research positioning — related-work comparison, research framing | ✅ Done |
380
+ | 6 | Technical report / mini-paper | 🔲 Next |
381
+ | 7 | Extensions — noisy examples, neuro-symbolic integration, LLM-guided rule synthesis | 🔲 Planned |
382
+
383
+ ---
384
+
385
+ ## Citation
386
+
387
+ If you use this software in research, please cite both this implementation and the original XHAIL paper:
388
+
389
+ ```bibtex
390
+ @software{everett2025xhail,
391
+ author = {Everett, Josh},
392
+ title = {{XHAIL}: eXtended Hybrid Abductive Inductive Learning},
393
+ url = {https://github.com/everettmakes/xhail},
394
+ year = {2025}
395
+ }
396
+
397
+ @article{ray2009xhail,
398
+ author = {Ray, Oliver},
399
+ title = {Nonmonotonic abductive inductive learning},
400
+ journal = {Journal of Applied Logic},
401
+ volume = {7},
402
+ number = {3},
403
+ pages = {329--340},
404
+ year = {2009}
405
+ }
406
+ ```
407
+
408
+ ---
409
+
410
+ ## License
411
+
412
+ MIT — see [LICENSE](LICENSE).