xains 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xains-0.0.1/.gitignore +47 -0
- xains-0.0.1/CHANGELOG.md +220 -0
- xains-0.0.1/LICENSE +21 -0
- xains-0.0.1/PKG-INFO +226 -0
- xains-0.0.1/README.md +164 -0
- xains-0.0.1/pyproject.toml +183 -0
- xains-0.0.1/src/xains/__init__.py +143 -0
- xains-0.0.1/src/xains/_substitution.py +31 -0
- xains-0.0.1/src/xains/config.py +50 -0
- xains-0.0.1/src/xains/explainer.py +141 -0
- xains-0.0.1/src/xains/generation/__init__.py +18 -0
- xains-0.0.1/src/xains/generation/base.py +47 -0
- xains-0.0.1/src/xains/generation/llm.py +58 -0
- xains-0.0.1/src/xains/generation/templated.py +118 -0
- xains-0.0.1/src/xains/guardrails/__init__.py +17 -0
- xains-0.0.1/src/xains/guardrails/extraction.py +234 -0
- xains-0.0.1/src/xains/guardrails/rules.py +29 -0
- xains-0.0.1/src/xains/guardrails/types.py +91 -0
- xains-0.0.1/src/xains/integrations/__init__.py +5 -0
- xains-0.0.1/src/xains/integrations/feature_importance.py +77 -0
- xains-0.0.1/src/xains/metrics/__init__.py +56 -0
- xains-0.0.1/src/xains/metrics/_internal/__init__.py +0 -0
- xains-0.0.1/src/xains/metrics/_internal/curve_fit.py +65 -0
- xains-0.0.1/src/xains/metrics/_internal/data/cause_effect_markers.json +25 -0
- xains-0.0.1/src/xains/metrics/_internal/data/connectives.json +149 -0
- xains-0.0.1/src/xains/metrics/_internal/lexicons.py +73 -0
- xains-0.0.1/src/xains/metrics/_internal/perplexity_utils.py +20 -0
- xains-0.0.1/src/xains/metrics/_internal/tokenize.py +66 -0
- xains-0.0.1/src/xains/metrics/coverage.py +31 -0
- xains-0.0.1/src/xains/metrics/fidelity.py +152 -0
- xains-0.0.1/src/xains/metrics/grader.py +65 -0
- xains-0.0.1/src/xains/metrics/narrativity.py +372 -0
- xains-0.0.1/src/xains/metrics/perplexity.py +34 -0
- xains-0.0.1/src/xains/metrics/perplexity_api.py +146 -0
- xains-0.0.1/src/xains/metrics/perplexity_hf.py +112 -0
- xains-0.0.1/src/xains/prompts/__init__.py +17 -0
- xains-0.0.1/src/xains/prompts/base.py +27 -0
- xains-0.0.1/src/xains/prompts/echo.py +37 -0
- xains-0.0.1/src/xains/prompts/feature_importance_tabular.py +133 -0
- xains-0.0.1/src/xains/providers/__init__.py +20 -0
- xains-0.0.1/src/xains/providers/anthropic.py +67 -0
- xains-0.0.1/src/xains/providers/base.py +39 -0
- xains-0.0.1/src/xains/providers/mock.py +40 -0
- xains-0.0.1/src/xains/providers/openai_compatible.py +164 -0
- xains-0.0.1/src/xains/py.typed +0 -0
- xains-0.0.1/src/xains/schema.py +133 -0
- xains-0.0.1/src/xains/types.py +215 -0
xains-0.0.1/.gitignore
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# Distribution / packaging
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
*.egg-info/
|
|
10
|
+
*.egg
|
|
11
|
+
.eggs/
|
|
12
|
+
|
|
13
|
+
# Virtual envs
|
|
14
|
+
.venv/
|
|
15
|
+
venv/
|
|
16
|
+
env/
|
|
17
|
+
.env
|
|
18
|
+
|
|
19
|
+
# Testing / coverage
|
|
20
|
+
.pytest_cache/
|
|
21
|
+
.coverage
|
|
22
|
+
.coverage.*
|
|
23
|
+
htmlcov/
|
|
24
|
+
.tox/
|
|
25
|
+
.nox/
|
|
26
|
+
|
|
27
|
+
# Type checkers
|
|
28
|
+
.mypy_cache/
|
|
29
|
+
.pyright/
|
|
30
|
+
.pytype/
|
|
31
|
+
|
|
32
|
+
# Ruff
|
|
33
|
+
.ruff_cache/
|
|
34
|
+
|
|
35
|
+
# IDE
|
|
36
|
+
.vscode/
|
|
37
|
+
.idea/
|
|
38
|
+
*.swp
|
|
39
|
+
*~
|
|
40
|
+
|
|
41
|
+
# OS
|
|
42
|
+
.DS_Store
|
|
43
|
+
Thumbs.db
|
|
44
|
+
|
|
45
|
+
# Env
|
|
46
|
+
.env
|
|
47
|
+
.env.local
|
xains-0.0.1/CHANGELOG.md
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
While `0.y.z`, minor versions may contain breaking changes.
|
|
9
|
+
|
|
10
|
+
## [Unreleased]
|
|
11
|
+
|
|
12
|
+
### Changed (BREAKING)
|
|
13
|
+
|
|
14
|
+
- `ExplanationConfig.mode` is now a required field with no default.
|
|
15
|
+
Construction without `mode=` raises `ValidationError`. Previously
|
|
16
|
+
defaulted to `"auto"`.
|
|
17
|
+
- Mode vocabulary is now `"feature_importance"`, `"counterfactual"`,
|
|
18
|
+
`"feature_importance_counterfactual"`. The previous values `"auto"` and
|
|
19
|
+
`"contrastive"` are removed. `"contrastive"` is renamed and redefined
|
|
20
|
+
as `"feature_importance_counterfactual"` (a narrative weaving both factual
|
|
21
|
+
contributions and counterfactual(s)).
|
|
22
|
+
- `Explainer._resolve_mode` renamed to `_validate_mode` — the method no
|
|
23
|
+
longer infers mode from the request shape; it validates the explicit
|
|
24
|
+
mode and returns it.
|
|
25
|
+
- ADR 0012: explanation-mode vocabulary finalized (supersedes the mode
|
|
26
|
+
portion of ADR 0003).
|
|
27
|
+
- Removed `include_confidence` and `include_caveats` fields from
|
|
28
|
+
`ExplanationConfig`. They had no consumer in the library and never
|
|
29
|
+
affected behavior. Setting them now raises `ValidationError`.
|
|
30
|
+
ADR 0013.
|
|
31
|
+
- Scoring API renamed for lexical consistency (the word "score" reads
|
|
32
|
+
like a prediction/confidence score, which is not what these functions
|
|
33
|
+
produce):
|
|
34
|
+
`score_extraction` → `grade_extraction`,
|
|
35
|
+
`score_narrativity` → `grade_narrativity`,
|
|
36
|
+
`ExtractionScores` → `ExtractionGrades`,
|
|
37
|
+
`NarrativityScores` → `NarrativityGrades`.
|
|
38
|
+
The source module `xainarratives.metrics.scorer` is renamed to
|
|
39
|
+
`xainarratives.metrics.grader`; top-level re-exports
|
|
40
|
+
(`xainarratives.ExtractionGrades`, etc.) keep the same import path
|
|
41
|
+
modulo the new name. ADR 0014.
|
|
42
|
+
- `OpenAICompatibleEchoProvider` API key is now optional and keyword-only.
|
|
43
|
+
It resolves from `api_key=` if passed, else from the environment variable
|
|
44
|
+
named by `api_key_env_var` (default `OPENAI_API_KEY`), raising
|
|
45
|
+
`ValueError` if neither is set. The constructor is now keyword-only past
|
|
46
|
+
`base_url`. ADR 0015.
|
|
47
|
+
- Mode vocabulary renamed for semantic accuracy (the prior `"factual"`
|
|
48
|
+
collided with the counterfactual-explanation literature's meaning —
|
|
49
|
+
"factual" is the input datapoint, not an explanation style; the mode
|
|
50
|
+
actually means "explain via feature-importance contributions"):
|
|
51
|
+
`"factual"` → `"feature_importance"`,
|
|
52
|
+
`"factual_counterfactual"` → `"feature_importance_counterfactual"`.
|
|
53
|
+
`"counterfactual"` unchanged. `FactualTabularPromptTemplate` →
|
|
54
|
+
`FeatureImportanceTabularPromptTemplate`; module path
|
|
55
|
+
`xainarratives.prompts.factual_tabular` →
|
|
56
|
+
`xainarratives.prompts.feature_importance_tabular`. The CF-literature
|
|
57
|
+
use of "factual" in `Explainer._warn_if_counterfactual_does_not_flip`
|
|
58
|
+
(`factual_class` local var + warning prose) is deliberately preserved
|
|
59
|
+
as the now-unambiguous "input datapoint" sense. ADR 0016 (supersedes
|
|
60
|
+
the mode naming in ADR 0012).
|
|
61
|
+
- `Explainer` now takes `generator=` (a `NarrativeGenerator`:
|
|
62
|
+
`LLMNarrativeGenerator` or a future templated generator) instead of
|
|
63
|
+
`prompt_template=` / `llm=`. `judge_llm` is required when
|
|
64
|
+
`extract_narrative=True` — the silent `self.llm` fallback is removed;
|
|
65
|
+
`explain()` raises `ValueError` otherwise.
|
|
66
|
+
`ExplanationResult.{prompt, model_name, raw_llm_response}` widened
|
|
67
|
+
to `str | None` (templated generators produce no LLM metadata).
|
|
68
|
+
ADR 0018.
|
|
69
|
+
- Renamed the package from `xainarratives` to `xain`.
|
|
70
|
+
`from xainarratives import ...` becomes `from xain import ...`;
|
|
71
|
+
`pip install "xainarratives[extra]"` becomes
|
|
72
|
+
`pip install "xain[extra]"`. ADR 0021.
|
|
73
|
+
- Renamed the package from `xain` to `xains` (the PyPI name `xain` was unavailable). `from xain import ...` becomes `from xains import ...`; the distribution is `xains`. ADR 0022.
|
|
74
|
+
|
|
75
|
+
### Added
|
|
76
|
+
|
|
77
|
+
- `xainarratives.metrics` subpackage: `sign_faithfulness`,
|
|
78
|
+
`value_faithfulness`, `rank_correlation`, `coverage`,
|
|
79
|
+
`hallucination_count`, `readability`. All pure functions; degenerate
|
|
80
|
+
inputs return `None` rather than raising.
|
|
81
|
+
- `ExtractionScores` model + `score_extraction(extraction, request,
|
|
82
|
+
schema, narrative_text, k=10, perplexity_provider=None)` integration
|
|
83
|
+
function.
|
|
84
|
+
- `PerplexityProvider` Protocol (`@runtime_checkable`) with two
|
|
85
|
+
concretes: `DisabledProvider` (always returns `None`; default) and
|
|
86
|
+
`APIPerplexityProvider` (wraps a caller-supplied callable).
|
|
87
|
+
- `FeatureClaim.narrative_name` (required) and
|
|
88
|
+
`FeatureClaim.resolved_to: str | None` fields, capturing the LLM's
|
|
89
|
+
resolution from narrative mention to schema feature name.
|
|
90
|
+
- `NarrativeExtraction.hallucinations: list[FeatureClaim]` channel for
|
|
91
|
+
unresolved narrative mentions.
|
|
92
|
+
- Three new pydantic validators on `NarrativeExtraction`: rank
|
|
93
|
+
permutation over features and hallucinations together; key /
|
|
94
|
+
`resolved_to` consistency for resolved features; `resolved_to is None`
|
|
95
|
+
for every hallucination.
|
|
96
|
+
- `textstat` as an optional dependency
|
|
97
|
+
(`pip install "xainarratives[textstat]"`) for the readability metric.
|
|
98
|
+
- ADR 0007: resolution at extraction time.
|
|
99
|
+
- Seven paper narrativity metrics from Cedro & Martens 2026
|
|
100
|
+
(arXiv:2604.18311): `csr`, `dcpr`, `ccpr`, `cecpr`, `fdr`, `ttcpr`,
|
|
101
|
+
`vcpr`. All pure `(text, provider) -> float | None`; degrade to
|
|
102
|
+
`None` on degenerate inputs.
|
|
103
|
+
- `NarrativityScores` model + `score_narrativity(text, provider)`
|
|
104
|
+
orchestrator. Captures the 7 derived metrics plus 9 auxiliary
|
|
105
|
+
primitives (`ppl_ordered`, `ppl_shuffled`, `decay_constant`,
|
|
106
|
+
`dist2`, `ttr`, `vr`, `cr`, `cer`, `n_sentences`) for paper
|
|
107
|
+
replication.
|
|
108
|
+
- `xainarratives.metrics._internal/` private subpackage:
|
|
109
|
+
`tokenize` (NLTK sentence/POS, regex word tokenizer), `curve_fit`
|
|
110
|
+
(scipy exponential decay fitter), `lexicons` (vendored JSON loaders
|
|
111
|
+
+ greedy phrase counter), `perplexity_utils` (cumulative perplexity
|
|
112
|
+
over sentence prefixes).
|
|
113
|
+
- Vendored lexicons under
|
|
114
|
+
`src/xainarratives/metrics/_internal/data/`: 142-entry connectives
|
|
115
|
+
(Das et al. 2018, ACL W18-5042) and 19-entry cause-effect markers
|
|
116
|
+
(paper Appendix A). Loaders assert expected counts at load time.
|
|
117
|
+
- `narrativity` optional dependency
|
|
118
|
+
(`pip install "xainarratives[narrativity]"`) bundling
|
|
119
|
+
`nltk>=3.9,<4` and `scipy>=1.13,<2`.
|
|
120
|
+
- ADR 0008: narrativity metrics — paper-faithful composition over
|
|
121
|
+
Protocol changes.
|
|
122
|
+
- `HuggingFacePerplexityProvider` — local autoregressive perplexity via
|
|
123
|
+
`transformers` + `torch`. Eager-loads model + tokenizer in `__init__`,
|
|
124
|
+
auto-detects CUDA, truncates oversize inputs with one `UserWarning` per
|
|
125
|
+
provider instance. Default `model_name="gpt2"` (~500 MB cached
|
|
126
|
+
download on first use); paper replication wants
|
|
127
|
+
`meta-llama/Llama-3.1-8B`.
|
|
128
|
+
- `OpenAICompatibleEchoProvider` — hits any OpenAI-compatible
|
|
129
|
+
`/v1/completions` endpoint with `echo=True, logprobs=1, max_tokens=1`
|
|
130
|
+
(Together.ai, vLLM, TGI's OpenAI shim, OpenAI's legacy completions).
|
|
131
|
+
Dual-shape response parser handles both `choices[0].logprobs` and
|
|
132
|
+
`prompt[0].logprobs`. Catches `openai.OpenAIError` and returns `None`
|
|
133
|
+
per Protocol contract.
|
|
134
|
+
- `perplexity-hf` optional dependency
|
|
135
|
+
(`pip install "xainarratives[perplexity-hf]"`) bundling
|
|
136
|
+
`transformers>=4.40,<5` and `torch>=2.0,<3`.
|
|
137
|
+
- `perplexity-api` optional dependency
|
|
138
|
+
(`pip install "xainarratives[perplexity-api]"`) bundling
|
|
139
|
+
`openai>=1.30,<2`.
|
|
140
|
+
- ADR 0009: perplexity providers — two concretes, no shared base.
|
|
141
|
+
- Executable quickstart notebook at `notebooks/01_quickstart.ipynb`:
|
|
142
|
+
end-to-end pipeline on a 30-row OpenML German Credit slice (load,
|
|
143
|
+
one-hot encode, RF + SHAP, build request, generate + extract, score
|
|
144
|
+
extraction + narrativity). Outputs committed for GitHub rendering.
|
|
145
|
+
- Vendored `notebooks/data/german_credit_sample.csv` and its
|
|
146
|
+
deterministic regenerator `scripts/generate_german_credit_sample.py`
|
|
147
|
+
(seed 42; runs once before the notebook ever executes).
|
|
148
|
+
- `notebook` optional dependency
|
|
149
|
+
(`pip install "xainarratives[notebook]"`) bundling `jupyter`, `shap`,
|
|
150
|
+
`scikit-learn`, `pandas`.
|
|
151
|
+
- ADR 0010: ship a quickstart Jupyter notebook.
|
|
152
|
+
- Configurable narrative-generation rules: `ExplanationConfig.narrative_rules`
|
|
153
|
+
(a string field, default `DEFAULT_NARRATIVE_RULES`) is injected into the
|
|
154
|
+
system prompt by `FeatureImportanceTabularPromptTemplate`. The default is the
|
|
155
|
+
four-rule operational definition of an XAI Narrative from Cedro & Martens
|
|
156
|
+
2026; users override it by passing a custom value. Applies to all
|
|
157
|
+
narrative-generating templates by convention.
|
|
158
|
+
- ADR 0011: configurable narrative-generation rules.
|
|
159
|
+
- `FeatureImportanceTabularPromptTemplate` now accepts `system_template`,
|
|
160
|
+
`user_template`, and `extra_placeholders` (all keyword-only, defaulted)
|
|
161
|
+
for editable prompts with `{placeholder}` substitution.
|
|
162
|
+
`DEFAULT_SYSTEM_TEMPLATE` and `DEFAULT_USER_TEMPLATE` are exported from
|
|
163
|
+
`xainarratives.prompts`. The quickstart notebook prints the rendered
|
|
164
|
+
prompt before sending. ADR 0017.
|
|
165
|
+
- `TemplatedNarrativeGenerator` - LLM-free feature-importance
|
|
166
|
+
narratives. Verbalizes ranked contributions as prose with no LLM
|
|
167
|
+
call; method-agnostic by default (`method="SHAP"` reproduces
|
|
168
|
+
Cedro 2026's templated-baseline wording); editable lead/clause
|
|
169
|
+
templates; raw values from `request.features`; tabular-only. Slots
|
|
170
|
+
into `Explainer(generator=)` and flows through the same extraction
|
|
171
|
+
+ grading path as LLM narratives (LLM-free generation, LLM-graded).
|
|
172
|
+
The shared `substitute()` helper now has its second user. ADR 0019.
|
|
173
|
+
- OpenAI and OpenRouter narrative-generation providers:
|
|
174
|
+
`OpenAIProvider` (reads `OPENAI_API_KEY`) and `OpenRouterProvider`
|
|
175
|
+
(reads `OPENROUTER_API_KEY`, optional `HTTP-Referer`/`X-Title`
|
|
176
|
+
headers), both thin presets over a new public
|
|
177
|
+
`OpenAICompatibleProvider` base usable directly for any
|
|
178
|
+
OpenAI-compatible endpoint (Together, Groq, vLLM, ...). Eager key
|
|
179
|
+
resolution, lazy SDK import. New `openai` pip extra. All providers
|
|
180
|
+
now top-level importable: `from xainarratives import
|
|
181
|
+
AnthropicProvider, OpenAIProvider, OpenRouterProvider,
|
|
182
|
+
OpenAICompatibleProvider, ...`. ADR 0020.
|
|
183
|
+
|
|
184
|
+
### Changed
|
|
185
|
+
|
|
186
|
+
- `_EXTRACTION_PROMPT_VERSION` bumped from `"1"` to `"2"`. The wire
|
|
187
|
+
format adds `narrative_name` per feature claim and a separate
|
|
188
|
+
`hallucinations` array.
|
|
189
|
+
- `NarrativeExtraction.features` is now keyed by **schema feature
|
|
190
|
+
name** rather than the narrative's name for the feature. Resolution
|
|
191
|
+
happens at extraction time, not at scoring time.
|
|
192
|
+
- `extract_narrative_claims` now rejects `features` keys not in the
|
|
193
|
+
schema's resolution vocabulary as a parse failure (advisory
|
|
194
|
+
`GuardrailResult`, no exception).
|
|
195
|
+
- `grade_extraction` swallows `ImportError` from `readability` so a
|
|
196
|
+
missing `textstat` install degrades to `readability=None` rather than
|
|
197
|
+
cascading to the whole scorer. `readability()` itself keeps the
|
|
198
|
+
strict `ImportError` contract for direct callers.
|
|
199
|
+
- ADR 0006 status updated to "superseded in part by 0007".
|
|
200
|
+
|
|
201
|
+
### Removed
|
|
202
|
+
|
|
203
|
+
- Support for prompt-version `"1"` extractions. Hard cutover; pre-1.0
|
|
204
|
+
project, no external users, no compatibility shim.
|
|
205
|
+
- `APIPerplexityProvider` (abstract callable-wrapper placeholder). Zero
|
|
206
|
+
callers in the codebase, failed the CLAUDE.md "abstractions need ≥2
|
|
207
|
+
implementations" rule. Replaced by `HuggingFacePerplexityProvider` and
|
|
208
|
+
`OpenAICompatibleEchoProvider`.
|
|
209
|
+
|
|
210
|
+
## [0.0.1] - 2026-04-23
|
|
211
|
+
|
|
212
|
+
### Added
|
|
213
|
+
|
|
214
|
+
- Initial skeleton: pydantic schema / types / config for all four modalities
|
|
215
|
+
(tabular, text, image, graph).
|
|
216
|
+
- `LLMProvider` Protocol + `MockLLMProvider`.
|
|
217
|
+
- `PromptTemplate` ABC + `EchoPromptTemplate`.
|
|
218
|
+
- `Explainer` orchestrator with sync `explain()`.
|
|
219
|
+
- ADRs 0001–0004 recording scope, API style, data-model, and counterfactual-payload
|
|
220
|
+
decisions.
|
xains-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 University of Antwerp
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
xains-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: xains
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Natural-language verbalization of ML model predictions from pre-computed attributions.
|
|
5
|
+
Project-URL: Repository, https://github.com/ADMAntwerp/xains
|
|
6
|
+
Project-URL: Issues, https://github.com/ADMAntwerp/xains/issues
|
|
7
|
+
Author-email: Mateusz Cedro <mateusz.cedro@uantwerpen.be>, David Martens <david.martens@uantwerpen.be>
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: explainability,llm,nlg,shap,xai
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Typing :: Typed
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Requires-Dist: pydantic>=2.6
|
|
23
|
+
Provides-Extra: anthropic
|
|
24
|
+
Requires-Dist: anthropic<1.0,>=0.96; extra == 'anthropic'
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: anthropic<1.0,>=0.96; extra == 'dev'
|
|
27
|
+
Requires-Dist: jupyter<2,>=1.1; extra == 'dev'
|
|
28
|
+
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
29
|
+
Requires-Dist: nltk<4,>=3.9; extra == 'dev'
|
|
30
|
+
Requires-Dist: openai<2,>=1.30; extra == 'dev'
|
|
31
|
+
Requires-Dist: pandas<3,>=2.2; extra == 'dev'
|
|
32
|
+
Requires-Dist: pre-commit>=3.6; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest-cov>=4.1; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest-recording<0.14,>=0.13; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: ruff>=0.3; extra == 'dev'
|
|
37
|
+
Requires-Dist: scikit-learn<2,>=1.5; extra == 'dev'
|
|
38
|
+
Requires-Dist: scipy<2,>=1.13; extra == 'dev'
|
|
39
|
+
Requires-Dist: shap<1,>=0.46; extra == 'dev'
|
|
40
|
+
Requires-Dist: textstat<1,>=0.7; extra == 'dev'
|
|
41
|
+
Requires-Dist: torch<3,>=2.0; extra == 'dev'
|
|
42
|
+
Requires-Dist: transformers<5,>=4.40; extra == 'dev'
|
|
43
|
+
Requires-Dist: vcrpy<9,>=8.1; extra == 'dev'
|
|
44
|
+
Provides-Extra: narrativity
|
|
45
|
+
Requires-Dist: nltk<4,>=3.9; extra == 'narrativity'
|
|
46
|
+
Requires-Dist: scipy<2,>=1.13; extra == 'narrativity'
|
|
47
|
+
Provides-Extra: notebook
|
|
48
|
+
Requires-Dist: jupyter<2,>=1.1; extra == 'notebook'
|
|
49
|
+
Requires-Dist: pandas<3,>=2.2; extra == 'notebook'
|
|
50
|
+
Requires-Dist: scikit-learn<2,>=1.5; extra == 'notebook'
|
|
51
|
+
Requires-Dist: shap<1,>=0.46; extra == 'notebook'
|
|
52
|
+
Provides-Extra: openai
|
|
53
|
+
Requires-Dist: openai<2,>=1.30; extra == 'openai'
|
|
54
|
+
Provides-Extra: perplexity-api
|
|
55
|
+
Requires-Dist: openai<2,>=1.30; extra == 'perplexity-api'
|
|
56
|
+
Provides-Extra: perplexity-hf
|
|
57
|
+
Requires-Dist: torch<3,>=2.0; extra == 'perplexity-hf'
|
|
58
|
+
Requires-Dist: transformers<5,>=4.40; extra == 'perplexity-hf'
|
|
59
|
+
Provides-Extra: textstat
|
|
60
|
+
Requires-Dist: textstat<1,>=0.7; extra == 'textstat'
|
|
61
|
+
Description-Content-Type: text/markdown
|
|
62
|
+
|
|
63
|
+
# xains
|
|
64
|
+
|
|
65
|
+
[](https://opensource.org/licenses/MIT)
|
|
66
|
+
[](https://www.python.org)
|
|
67
|
+
|
|
68
|
+
<!-- TODO (add as each service comes online for ADMAntwerp/xains):
|
|
69
|
+
[](https://pypi.org/project/xains/)
|
|
70
|
+
[](https://github.com/ADMAntwerp/xains/actions/workflows/ci.yml)
|
|
71
|
+
[](https://codecov.io/gh/ADMAntwerp/xains)
|
|
72
|
+
[](https://xains.readthedocs.io/en/latest/)
|
|
73
|
+
-->
|
|
74
|
+
|
|
75
|
+
xains generates explainable AI (XAI) narratives - hence the name. It turns technical XAI outputs such as SHAP attributions and counterfactuals into clear natural-language explanations that make model decisions understandable to a broad audience.
|
|
76
|
+
|
|
77
|
+
> **Scope.** This library generates natural-language XAI narratives from technical outputs like SHAP attributions or counterfactual explanations, making the explanations more transparent and understandable.
|
|
78
|
+
|
|
79
|
+
## Install
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
git clone https://github.com/ADMAntwerp/xains.git
|
|
83
|
+
cd xains
|
|
84
|
+
pip install -e .
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Minimal example
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
import xains
|
|
91
|
+
import xains.prompts
|
|
92
|
+
|
|
93
|
+
schema = xains.DatasetSchema(
|
|
94
|
+
modality=xains.Modality.TABULAR,
|
|
95
|
+
name="credit_risk",
|
|
96
|
+
description="Predicts 24-month default on personal loans.",
|
|
97
|
+
target=xains.TargetSchema(
|
|
98
|
+
name="default",
|
|
99
|
+
description="Whether the applicant defaulted.",
|
|
100
|
+
classes={0: "Repaid", 1: "Defaulted"},
|
|
101
|
+
),
|
|
102
|
+
features=[
|
|
103
|
+
xains.FeatureSchema(name="age", dtype="numeric", unit="years",
|
|
104
|
+
description="Applicant age at application."),
|
|
105
|
+
xains.FeatureSchema(name="salary", dtype="numeric", unit="EUR",
|
|
106
|
+
description="Annual gross salary."),
|
|
107
|
+
xains.FeatureSchema(name="debt_to_income", dtype="numeric",
|
|
108
|
+
description="Debt-to-income ratio."),
|
|
109
|
+
],
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
request = xains.TabularExplanationRequest(
|
|
113
|
+
features={"age": 29, "salary": 52000, "debt_to_income": 0.41},
|
|
114
|
+
prediction=xains.Prediction(predicted_class=1, probabilities={0: 0.2, 1: 0.8}),
|
|
115
|
+
contributions=[
|
|
116
|
+
xains.TabularContribution(name="debt_to_income", value=0.41, importance=0.37),
|
|
117
|
+
xains.TabularContribution(name="salary", value=52000, importance=-0.21),
|
|
118
|
+
xains.TabularContribution(name="age", value=29, importance=-0.12),
|
|
119
|
+
],
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
llm = xains.AnthropicProvider(model="claude-haiku-4-5", max_tokens=512)
|
|
123
|
+
explainer = xains.Explainer(
|
|
124
|
+
schema=schema,
|
|
125
|
+
generator=xains.LLMNarrativeGenerator(
|
|
126
|
+
prompt_template=xains.prompts.FeatureImportanceTabularPromptTemplate(),
|
|
127
|
+
llm=llm,
|
|
128
|
+
),
|
|
129
|
+
config=xains.ExplanationConfig(
|
|
130
|
+
mode="feature_importance", audience="end_user",
|
|
131
|
+
max_length_words=40, extract_narrative=True,
|
|
132
|
+
),
|
|
133
|
+
judge_llm=llm, # required when extract_narrative=True
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
result = explainer.explain(request)
|
|
137
|
+
print(result.text)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Output is illustrative; LLM responses vary run-to-run:
|
|
141
|
+
|
|
142
|
+
```text
|
|
143
|
+
Your profile indicates elevated default risk. A debt-to-income ratio of 0.41
|
|
144
|
+
substantially increases this concern, signaling that your debt obligations
|
|
145
|
+
consume a meaningful portion of earnings. Although your salary of EUR 52,000
|
|
146
|
+
and relatively young age of 29 provide some protective factors that work
|
|
147
|
+
against default, they ultimately prove insufficient to offset the debt burden
|
|
148
|
+
weighing on your financial stability.
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### Scoring the narrative
|
|
152
|
+
|
|
153
|
+
A narrative is only useful if it is faithful to the attributions and reads well. xains scores both. `grade_extraction` checks the claims the narrative makes against the input attributions - sign, value, and rank fidelity, coverage, hallucination count, and readability (perplexity is added when a perplexity provider is supplied):
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
grades = xains.grade_extraction(
|
|
157
|
+
extraction=result.narrative_extraction,
|
|
158
|
+
request=request,
|
|
159
|
+
schema=schema,
|
|
160
|
+
narrative_text=result.text,
|
|
161
|
+
k=5,
|
|
162
|
+
)
|
|
163
|
+
print(grades)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
```text
|
|
167
|
+
sign_faithfulness=1.0 value_faithfulness=1.0 rank_correlation=1.0 coverage=1.0
|
|
168
|
+
hallucination_count=0 readability=30.09 perplexity=None prompt_version='2'
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
`grade_narrativity` scores how well the text reads as a narrative, using the metrics from Cedro & Martens 2026. It needs a perplexity provider (any OpenAI-compatible endpoint that returns logprobs):
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
from xains.metrics import OpenAICompatibleEchoProvider
|
|
175
|
+
|
|
176
|
+
ppl = OpenAICompatibleEchoProvider(
|
|
177
|
+
base_url="https://api.together.xyz/v1",
|
|
178
|
+
model="meta-llama/Meta-Llama-3-8B-Instruct-Lite",
|
|
179
|
+
api_key_env_var="TOGETHER_API_KEY",
|
|
180
|
+
)
|
|
181
|
+
narrativity = xains.grade_narrativity(result.text, ppl)
|
|
182
|
+
print(narrativity.fdr, narrativity.csr)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
```text
|
|
186
|
+
0.29 0.11
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
The two values are Fluency-Diversity Rate (FDR) and Continuous Structure Rate (CSR), both higher-is-better - two of the seven Cedro & Martens 2026 narrativity metrics; the notebook computes all seven plus auxiliary primitives.
|
|
190
|
+
|
|
191
|
+
### End-to-end notebook
|
|
192
|
+
|
|
193
|
+
For the full pipeline (load German Credit, train a RandomForest, compute SHAP, generate the narrative, extract structured claims, and score on faithfulness and narrativity), see the tutorial in [`notebooks/01_quickstart.ipynb`](notebooks/01_quickstart.ipynb).
|
|
194
|
+
|
|
195
|
+
See `docs/design.md` for the full design and `docs/decisions/` for recorded architecture decisions.
|
|
196
|
+
|
|
197
|
+
## Choosing a model
|
|
198
|
+
|
|
199
|
+
Any `LLMProvider` drops into `xains.LLMNarrativeGenerator(llm=...)` - pick the provider for the model you want:
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
import xains
|
|
203
|
+
|
|
204
|
+
# Anthropic (reads ANTHROPIC_API_KEY)
|
|
205
|
+
llm = xains.AnthropicProvider(model="claude-haiku-4-5", max_tokens=512)
|
|
206
|
+
|
|
207
|
+
# OpenAI (reads OPENAI_API_KEY)
|
|
208
|
+
llm = xains.OpenAIProvider(model="gpt-4o-mini", max_tokens=512)
|
|
209
|
+
|
|
210
|
+
# OpenRouter - Llama, and many others (reads OPENROUTER_API_KEY)
|
|
211
|
+
llm = xains.OpenRouterProvider(model="meta-llama/llama-3.3-70b-instruct", max_tokens=512)
|
|
212
|
+
|
|
213
|
+
# Any OpenAI-compatible endpoint (Together, Groq, vLLM, ...) - set base_url + the env var to read
|
|
214
|
+
llm = xains.OpenAICompatibleProvider(
|
|
215
|
+
base_url="https://api.together.xyz/v1",
|
|
216
|
+
api_key_env_var="TOGETHER_API_KEY",
|
|
217
|
+
model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
218
|
+
max_tokens=512,
|
|
219
|
+
)
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Each reads its API key from the named env var (or pass `api_key=` explicitly); drop any of them into `xains.LLMNarrativeGenerator(llm=...)` exactly as the Minimal example does.
|
|
223
|
+
|
|
224
|
+
## License
|
|
225
|
+
|
|
226
|
+
MIT - see [`LICENSE`](LICENSE).
|
xains-0.0.1/README.md
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# xains
|
|
2
|
+
|
|
3
|
+
[](https://opensource.org/licenses/MIT)
|
|
4
|
+
[](https://www.python.org)
|
|
5
|
+
|
|
6
|
+
<!-- TODO (add as each service comes online for ADMAntwerp/xains):
|
|
7
|
+
[](https://pypi.org/project/xains/)
|
|
8
|
+
[](https://github.com/ADMAntwerp/xains/actions/workflows/ci.yml)
|
|
9
|
+
[](https://codecov.io/gh/ADMAntwerp/xains)
|
|
10
|
+
[](https://xains.readthedocs.io/en/latest/)
|
|
11
|
+
-->
|
|
12
|
+
|
|
13
|
+
xains generates explainable AI (XAI) narratives - hence the name. It turns technical XAI outputs such as SHAP attributions and counterfactuals into clear natural-language explanations that make model decisions understandable to a broad audience.
|
|
14
|
+
|
|
15
|
+
> **Scope.** This library generates natural-language XAI narratives from technical outputs like SHAP attributions or counterfactual explanations, making the explanations more transparent and understandable.
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
git clone https://github.com/ADMAntwerp/xains.git
|
|
21
|
+
cd xains
|
|
22
|
+
pip install -e .
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Minimal example
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
import xains
|
|
29
|
+
import xains.prompts
|
|
30
|
+
|
|
31
|
+
schema = xains.DatasetSchema(
|
|
32
|
+
modality=xains.Modality.TABULAR,
|
|
33
|
+
name="credit_risk",
|
|
34
|
+
description="Predicts 24-month default on personal loans.",
|
|
35
|
+
target=xains.TargetSchema(
|
|
36
|
+
name="default",
|
|
37
|
+
description="Whether the applicant defaulted.",
|
|
38
|
+
classes={0: "Repaid", 1: "Defaulted"},
|
|
39
|
+
),
|
|
40
|
+
features=[
|
|
41
|
+
xains.FeatureSchema(name="age", dtype="numeric", unit="years",
|
|
42
|
+
description="Applicant age at application."),
|
|
43
|
+
xains.FeatureSchema(name="salary", dtype="numeric", unit="EUR",
|
|
44
|
+
description="Annual gross salary."),
|
|
45
|
+
xains.FeatureSchema(name="debt_to_income", dtype="numeric",
|
|
46
|
+
description="Debt-to-income ratio."),
|
|
47
|
+
],
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
request = xains.TabularExplanationRequest(
|
|
51
|
+
features={"age": 29, "salary": 52000, "debt_to_income": 0.41},
|
|
52
|
+
prediction=xains.Prediction(predicted_class=1, probabilities={0: 0.2, 1: 0.8}),
|
|
53
|
+
contributions=[
|
|
54
|
+
xains.TabularContribution(name="debt_to_income", value=0.41, importance=0.37),
|
|
55
|
+
xains.TabularContribution(name="salary", value=52000, importance=-0.21),
|
|
56
|
+
xains.TabularContribution(name="age", value=29, importance=-0.12),
|
|
57
|
+
],
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
llm = xains.AnthropicProvider(model="claude-haiku-4-5", max_tokens=512)
|
|
61
|
+
explainer = xains.Explainer(
|
|
62
|
+
schema=schema,
|
|
63
|
+
generator=xains.LLMNarrativeGenerator(
|
|
64
|
+
prompt_template=xains.prompts.FeatureImportanceTabularPromptTemplate(),
|
|
65
|
+
llm=llm,
|
|
66
|
+
),
|
|
67
|
+
config=xains.ExplanationConfig(
|
|
68
|
+
mode="feature_importance", audience="end_user",
|
|
69
|
+
max_length_words=40, extract_narrative=True,
|
|
70
|
+
),
|
|
71
|
+
judge_llm=llm, # required when extract_narrative=True
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
result = explainer.explain(request)
|
|
75
|
+
print(result.text)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Output is illustrative; LLM responses vary run-to-run:
|
|
79
|
+
|
|
80
|
+
```text
|
|
81
|
+
Your profile indicates elevated default risk. A debt-to-income ratio of 0.41
|
|
82
|
+
substantially increases this concern, signaling that your debt obligations
|
|
83
|
+
consume a meaningful portion of earnings. Although your salary of EUR 52,000
|
|
84
|
+
and relatively young age of 29 provide some protective factors that work
|
|
85
|
+
against default, they ultimately prove insufficient to offset the debt burden
|
|
86
|
+
weighing on your financial stability.
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Scoring the narrative
|
|
90
|
+
|
|
91
|
+
A narrative is only useful if it is faithful to the attributions and reads well. xains scores both. `grade_extraction` checks the claims the narrative makes against the input attributions - sign, value, and rank fidelity, coverage, hallucination count, and readability (perplexity is added when a perplexity provider is supplied):
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
grades = xains.grade_extraction(
|
|
95
|
+
extraction=result.narrative_extraction,
|
|
96
|
+
request=request,
|
|
97
|
+
schema=schema,
|
|
98
|
+
narrative_text=result.text,
|
|
99
|
+
k=5,
|
|
100
|
+
)
|
|
101
|
+
print(grades)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
```text
|
|
105
|
+
sign_faithfulness=1.0 value_faithfulness=1.0 rank_correlation=1.0 coverage=1.0
|
|
106
|
+
hallucination_count=0 readability=30.09 perplexity=None prompt_version='2'
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
`grade_narrativity` scores how well the text reads as a narrative, using the metrics from Cedro & Martens 2026. It needs a perplexity provider (any OpenAI-compatible endpoint that returns logprobs):
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from xains.metrics import OpenAICompatibleEchoProvider
|
|
113
|
+
|
|
114
|
+
ppl = OpenAICompatibleEchoProvider(
|
|
115
|
+
base_url="https://api.together.xyz/v1",
|
|
116
|
+
model="meta-llama/Meta-Llama-3-8B-Instruct-Lite",
|
|
117
|
+
api_key_env_var="TOGETHER_API_KEY",
|
|
118
|
+
)
|
|
119
|
+
narrativity = xains.grade_narrativity(result.text, ppl)
|
|
120
|
+
print(narrativity.fdr, narrativity.csr)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
```text
|
|
124
|
+
0.29 0.11
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
The two values are Fluency-Diversity Rate (FDR) and Continuous Structure Rate (CSR), both higher-is-better - two of the seven Cedro & Martens 2026 narrativity metrics; the notebook computes all seven plus auxiliary primitives.
|
|
128
|
+
|
|
129
|
+
### End-to-end notebook
|
|
130
|
+
|
|
131
|
+
For the full pipeline (load German Credit, train a RandomForest, compute SHAP, generate the narrative, extract structured claims, and score on faithfulness and narrativity), see the tutorial in [`notebooks/01_quickstart.ipynb`](notebooks/01_quickstart.ipynb).
|
|
132
|
+
|
|
133
|
+
See `docs/design.md` for the full design and `docs/decisions/` for recorded architecture decisions.
|
|
134
|
+
|
|
135
|
+
## Choosing a model
|
|
136
|
+
|
|
137
|
+
Any `LLMProvider` drops into `xains.LLMNarrativeGenerator(llm=...)` - pick the provider for the model you want:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
import xains
|
|
141
|
+
|
|
142
|
+
# Anthropic (reads ANTHROPIC_API_KEY)
|
|
143
|
+
llm = xains.AnthropicProvider(model="claude-haiku-4-5", max_tokens=512)
|
|
144
|
+
|
|
145
|
+
# OpenAI (reads OPENAI_API_KEY)
|
|
146
|
+
llm = xains.OpenAIProvider(model="gpt-4o-mini", max_tokens=512)
|
|
147
|
+
|
|
148
|
+
# OpenRouter - Llama, and many others (reads OPENROUTER_API_KEY)
|
|
149
|
+
llm = xains.OpenRouterProvider(model="meta-llama/llama-3.3-70b-instruct", max_tokens=512)
|
|
150
|
+
|
|
151
|
+
# Any OpenAI-compatible endpoint (Together, Groq, vLLM, ...) - set base_url + the env var to read
|
|
152
|
+
llm = xains.OpenAICompatibleProvider(
|
|
153
|
+
base_url="https://api.together.xyz/v1",
|
|
154
|
+
api_key_env_var="TOGETHER_API_KEY",
|
|
155
|
+
model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
156
|
+
max_tokens=512,
|
|
157
|
+
)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Each reads its API key from the named env var (or pass `api_key=` explicitly); drop any of them into `xains.LLMNarrativeGenerator(llm=...)` exactly as the Minimal example does.
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
MIT - see [`LICENSE`](LICENSE).
|