spell-exploder 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spell_exploder-0.1.0/LICENSE +21 -0
- spell_exploder-0.1.0/PKG-INFO +221 -0
- spell_exploder-0.1.0/README.md +178 -0
- spell_exploder-0.1.0/pyproject.toml +60 -0
- spell_exploder-0.1.0/setup.cfg +4 -0
- spell_exploder-0.1.0/spell_exploder/__init__.py +205 -0
- spell_exploder-0.1.0/spell_exploder/_version.py +1 -0
- spell_exploder-0.1.0/spell_exploder/analyzers/__init__.py +18 -0
- spell_exploder-0.1.0/spell_exploder/analyzers/adaptive_evolution.py +453 -0
- spell_exploder-0.1.0/spell_exploder/analyzers/complexity_index.py +224 -0
- spell_exploder-0.1.0/spell_exploder/analyzers/keyword_erp.py +477 -0
- spell_exploder-0.1.0/spell_exploder/analyzers/valence_model.py +523 -0
- spell_exploder-0.1.0/spell_exploder/core/__init__.py +45 -0
- spell_exploder-0.1.0/spell_exploder/core/compression.py +103 -0
- spell_exploder-0.1.0/spell_exploder/core/entropy.py +203 -0
- spell_exploder-0.1.0/spell_exploder/core/information.py +179 -0
- spell_exploder-0.1.0/spell_exploder/core/nlp.py +107 -0
- spell_exploder-0.1.0/spell_exploder/exceptions.py +25 -0
- spell_exploder-0.1.0/spell_exploder/extractors/__init__.py +35 -0
- spell_exploder-0.1.0/spell_exploder/extractors/action_frames.py +133 -0
- spell_exploder-0.1.0/spell_exploder/extractors/noun_dependencies.py +96 -0
- spell_exploder-0.1.0/spell_exploder/extractors/sentence_parser.py +168 -0
- spell_exploder-0.1.0/spell_exploder/graphs/__init__.py +0 -0
- spell_exploder-0.1.0/spell_exploder/io/__init__.py +14 -0
- spell_exploder-0.1.0/spell_exploder/io/exporters.py +94 -0
- spell_exploder-0.1.0/spell_exploder/io/readers.py +117 -0
- spell_exploder-0.1.0/spell_exploder/results/__init__.py +44 -0
- spell_exploder-0.1.0/spell_exploder/results/complexity.py +111 -0
- spell_exploder-0.1.0/spell_exploder/results/evolution.py +136 -0
- spell_exploder-0.1.0/spell_exploder/results/keyword.py +139 -0
- spell_exploder-0.1.0/spell_exploder/results/valence.py +134 -0
- spell_exploder-0.1.0/spell_exploder/utils/__init__.py +11 -0
- spell_exploder-0.1.0/spell_exploder/utils/imports.py +48 -0
- spell_exploder-0.1.0/spell_exploder/utils/smoothing.py +42 -0
- spell_exploder-0.1.0/spell_exploder/utils/statistics.py +54 -0
- spell_exploder-0.1.0/spell_exploder/visualization/__init__.py +27 -0
- spell_exploder-0.1.0/spell_exploder/visualization/plots.py +562 -0
- spell_exploder-0.1.0/spell_exploder.egg-info/PKG-INFO +221 -0
- spell_exploder-0.1.0/spell_exploder.egg-info/SOURCES.txt +54 -0
- spell_exploder-0.1.0/spell_exploder.egg-info/dependency_links.txt +1 -0
- spell_exploder-0.1.0/spell_exploder.egg-info/requires.txt +23 -0
- spell_exploder-0.1.0/spell_exploder.egg-info/top_level.txt +1 -0
- spell_exploder-0.1.0/tests/test_adaptive_evolution.py +289 -0
- spell_exploder-0.1.0/tests/test_api_and_exporters.py +174 -0
- spell_exploder-0.1.0/tests/test_complexity_index.py +283 -0
- spell_exploder-0.1.0/tests/test_core_compression.py +87 -0
- spell_exploder-0.1.0/tests/test_core_entropy.py +138 -0
- spell_exploder-0.1.0/tests/test_core_information.py +166 -0
- spell_exploder-0.1.0/tests/test_core_nlp.py +80 -0
- spell_exploder-0.1.0/tests/test_extractors.py +104 -0
- spell_exploder-0.1.0/tests/test_io.py +85 -0
- spell_exploder-0.1.0/tests/test_keyword_erp.py +286 -0
- spell_exploder-0.1.0/tests/test_results.py +220 -0
- spell_exploder-0.1.0/tests/test_utils.py +97 -0
- spell_exploder-0.1.0/tests/test_valence_model.py +315 -0
- spell_exploder-0.1.0/tests/test_visualization.py +247 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Spellcaster Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: spell-exploder
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Analyze natural language through complex systems science, information theory, and evolutionary game theory.
|
|
5
|
+
Author: Spellcaster Contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/inc-research/spell-exploder
|
|
8
|
+
Project-URL: Documentation, https://github.com/inc-research/spell-exploder#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/inc-research/spell-exploder
|
|
10
|
+
Keywords: nlp,information-theory,complex-systems,text-analysis,evolutionary-game-theory,linguistics
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
20
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy>=1.23
|
|
25
|
+
Requires-Dist: pandas>=1.5
|
|
26
|
+
Requires-Dist: spacy>=3.4
|
|
27
|
+
Requires-Dist: scipy>=1.9
|
|
28
|
+
Requires-Dist: python-Levenshtein>=0.20
|
|
29
|
+
Provides-Extra: viz
|
|
30
|
+
Requires-Dist: matplotlib>=3.5; extra == "viz"
|
|
31
|
+
Requires-Dist: seaborn>=0.12; extra == "viz"
|
|
32
|
+
Provides-Extra: graphs
|
|
33
|
+
Requires-Dist: networkx>=3.0; extra == "graphs"
|
|
34
|
+
Provides-Extra: ml
|
|
35
|
+
Requires-Dist: sentence-transformers>=2.0; extra == "ml"
|
|
36
|
+
Requires-Dist: scikit-learn>=1.0; extra == "ml"
|
|
37
|
+
Provides-Extra: all
|
|
38
|
+
Requires-Dist: spell-exploder[graphs,ml,viz]; extra == "all"
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
41
|
+
Requires-Dist: spell-exploder[all]; extra == "dev"
|
|
42
|
+
Dynamic: license-file
|
|
43
|
+
|
|
44
|
+
# Spell Exploder
|
|
45
|
+
|
|
46
|
+
Analyze natural language text through complex systems science, information theory, information-theoretic physics analogues, and evolutionary game theory.
|
|
47
|
+
|
|
48
|
+
Spell Exploder provides four complementary analyzers that reveal the hidden structural, informational, and evolutionary properties of text — from sentence-level compression dynamics to document-scale syntactic evolution.
|
|
49
|
+
|
|
50
|
+
## Installation
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install spell-exploder
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Required dependencies** (installed automatically): `numpy`, `pandas`, `spacy`, `scipy`, `python-Levenshtein`
|
|
57
|
+
|
|
58
|
+
**Optional dependencies:**
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# Sentence embeddings for APE hybrid clustering
|
|
62
|
+
pip install spell-exploder[ml] # sentence-transformers, scikit-learn
|
|
63
|
+
|
|
64
|
+
# Visualization (convenience plotting)
|
|
65
|
+
pip install spell-exploder[viz] # matplotlib, seaborn
|
|
66
|
+
|
|
67
|
+
# Everything
|
|
68
|
+
pip install spell-exploder[all]
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
**spaCy model** (required):
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
python -m spacy download en_core_web_sm
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Quick Start
|
|
78
|
+
|
|
79
|
+
### One-liner API
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import spell_exploder
|
|
83
|
+
|
|
84
|
+
# Complexity flow analysis (LCX)
|
|
85
|
+
result = spell_exploder.analyze_complexity("essay_a.txt", "essay_b.txt")
|
|
86
|
+
df = result.to_dataframe()
|
|
87
|
+
|
|
88
|
+
# Valence model (LCVM) — entropy, MI, action frames, multiscale collapse
|
|
89
|
+
result = spell_exploder.analyze_valence("essay_a.txt", "essay_b.txt")
|
|
90
|
+
profile = spell_exploder.ValenceModelAnalyzer().build_complexity_profile(result)
|
|
91
|
+
|
|
92
|
+
# Adaptive evolution (APE) — syntactic species dynamics
|
|
93
|
+
result = spell_exploder.analyze_evolution("early_draft.txt", "final_draft.txt")
|
|
94
|
+
print(result.to_dataframe())
|
|
95
|
+
|
|
96
|
+
# Keyword structural coherence (KEPM)
|
|
97
|
+
result = spell_exploder.analyze_keywords(
|
|
98
|
+
"essay.txt",
|
|
99
|
+
keywords=["information", "network"],
|
|
100
|
+
)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Full-control API
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from spell_exploder.analyzers import TextComplexityAnalyzer
|
|
107
|
+
|
|
108
|
+
analyzer = TextComplexityAnalyzer()
|
|
109
|
+
result = analyzer.compare(
|
|
110
|
+
["Human text here.", "AI text here."],
|
|
111
|
+
labels=["Human", "AI"],
|
|
112
|
+
from_files=False,
|
|
113
|
+
)
|
|
114
|
+
for flow in result.flows:
|
|
115
|
+
print(f"{flow.label}: {len(flow.sentences)} sentences")
|
|
116
|
+
print(f" Final k_hist: {flow.sentences[-1].k_hist}")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Analyzers
|
|
120
|
+
|
|
121
|
+
### TextComplexityAnalyzer (LCX)
|
|
122
|
+
|
|
123
|
+
Sentence-by-sentence complexity flow via compression (zlib), Levenshtein volatility, and synergy ratios.
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from spell_exploder.analyzers import TextComplexityAnalyzer
|
|
127
|
+
|
|
128
|
+
lcx = TextComplexityAnalyzer()
|
|
129
|
+
result = lcx.compare(["file_a.txt", "file_b.txt"])
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
**Key metrics:** cumulative compressed size (`k_hist`), edit distance (`volatility`), volatility/marginal-info ratio (`synergy`)
|
|
133
|
+
|
|
134
|
+
### ValenceModelAnalyzer (LCVM)
|
|
135
|
+
|
|
136
|
+
The most comprehensive analyzer — ~30 metrics per document across five dimensions:
|
|
137
|
+
|
|
138
|
+
| Dimension | Metrics |
|
|
139
|
+
|-----------|---------|
|
|
140
|
+
| **Variation** | Shannon entropy of token distributions |
|
|
141
|
+
| **Redundancy** | Multiscale entropy-collapse curves |
|
|
142
|
+
| **Organisation** | MI(Verb; Subject), MI(Verb; Object), coupling strength |
|
|
143
|
+
| **Repertoire** | Action-frame density, verb diversity |
|
|
144
|
+
| **Semantic breadth** | Schema-keyword concentration, valence entropy |
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from spell_exploder.analyzers import ValenceModelAnalyzer
|
|
148
|
+
|
|
149
|
+
vm = ValenceModelAnalyzer()
|
|
150
|
+
result = vm.analyze(["essay_a.txt", "essay_b.txt"])
|
|
151
|
+
profile = vm.build_complexity_profile(result)
|
|
152
|
+
print(vm.profile_for_print(profile))
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### AdaptiveEvolutionAnalyzer (APE)
|
|
156
|
+
|
|
157
|
+
Treats syntactic structures as biological species competing for "cognitive market share" across document revisions.
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from spell_exploder.analyzers import AdaptiveEvolutionAnalyzer
|
|
161
|
+
|
|
162
|
+
ape = AdaptiveEvolutionAnalyzer(use_embeddings=False) # NCD-only mode
|
|
163
|
+
result = ape.analyze(["draft_v1.txt", "draft_v2.txt"])
|
|
164
|
+
|
|
165
|
+
for species in result.species[:5]:
|
|
166
|
+
print(f" Group {species.cluster_id}: {species.status.value} "
|
|
167
|
+
f"(Δ={species.delta:+.3f})")
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### KeywordERPAnalyzer (KEPM)
|
|
171
|
+
|
|
172
|
+
Analyses structural coherence of keyword usage through POS co-occurrence spectral entropy and NCD-based structural similarity.
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
from spell_exploder.analyzers import KeywordERPAnalyzer
|
|
176
|
+
|
|
177
|
+
kw = KeywordERPAnalyzer(keywords=["information", "network"])
|
|
178
|
+
result = kw.analyze(["essay.txt"])
|
|
179
|
+
df = result.to_dataframe()
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Export
|
|
183
|
+
|
|
184
|
+
All results can be exported to CSV or JSON:
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
from spell_exploder.io import export_csv, export_json
|
|
188
|
+
|
|
189
|
+
export_csv(result, "output.csv")
|
|
190
|
+
export_json(result, "output.json")
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Result Objects
|
|
194
|
+
|
|
195
|
+
Every analyzer returns a structured result with:
|
|
196
|
+
|
|
197
|
+
- **`.to_dataframe()`** — flat pandas DataFrame for analysis
|
|
198
|
+
- **Direct attribute access** — full nested data (e.g., `result.posts[0].schema_valence_entropy`)
|
|
199
|
+
- **NumPy array properties** — e.g., `flow.k_hist_array`, `flow.volatility_array`
|
|
200
|
+
|
|
201
|
+
## Architecture
|
|
202
|
+
|
|
203
|
+
```
|
|
204
|
+
spell_exploder/
|
|
205
|
+
├── analyzers/ # 4 analyzer classes (LCX, LCVM, APE, KEPM)
|
|
206
|
+
├── core/ # Shared math: entropy, compression, MI, JS divergence
|
|
207
|
+
├── extractors/ # NLP extraction: action frames, noun deps, sentences
|
|
208
|
+
├── results/ # Structured dataclass results with .to_dataframe()
|
|
209
|
+
├── io/ # Text loading and result export (CSV, JSON)
|
|
210
|
+
├── utils/ # Smoothing, statistics, lazy imports
|
|
211
|
+
└── visualization/ # Optional convenience plotting (requires matplotlib)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Requirements
|
|
215
|
+
|
|
216
|
+
- Python ≥ 3.10
|
|
217
|
+
- spaCy with `en_core_web_sm` (or another English model)
|
|
218
|
+
|
|
219
|
+
## License
|
|
220
|
+
|
|
221
|
+
MIT
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# Spell Exploder
|
|
2
|
+
|
|
3
|
+
Analyze natural language text through complex systems science, information theory, information-theoretic physics analogues, and evolutionary game theory.
|
|
4
|
+
|
|
5
|
+
Spell Exploder provides four complementary analyzers that reveal the hidden structural, informational, and evolutionary properties of text — from sentence-level compression dynamics to document-scale syntactic evolution.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install spell-exploder
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
**Required dependencies** (installed automatically): `numpy`, `pandas`, `spacy`, `scipy`, `python-Levenshtein`
|
|
14
|
+
|
|
15
|
+
**Optional dependencies:**
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Sentence embeddings for APE hybrid clustering
|
|
19
|
+
pip install spell-exploder[ml] # sentence-transformers, scikit-learn
|
|
20
|
+
|
|
21
|
+
# Visualization (convenience plotting)
|
|
22
|
+
pip install spell-exploder[viz] # matplotlib, seaborn
|
|
23
|
+
|
|
24
|
+
# Everything
|
|
25
|
+
pip install spell-exploder[all]
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
**spaCy model** (required):
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
python -m spacy download en_core_web_sm
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
### One-liner API
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
import spell_exploder
|
|
40
|
+
|
|
41
|
+
# Complexity flow analysis (LCX)
|
|
42
|
+
result = spell_exploder.analyze_complexity("essay_a.txt", "essay_b.txt")
|
|
43
|
+
df = result.to_dataframe()
|
|
44
|
+
|
|
45
|
+
# Valence model (LCVM) — entropy, MI, action frames, multiscale collapse
|
|
46
|
+
result = spell_exploder.analyze_valence("essay_a.txt", "essay_b.txt")
|
|
47
|
+
profile = spell_exploder.ValenceModelAnalyzer().build_complexity_profile(result)
|
|
48
|
+
|
|
49
|
+
# Adaptive evolution (APE) — syntactic species dynamics
|
|
50
|
+
result = spell_exploder.analyze_evolution("early_draft.txt", "final_draft.txt")
|
|
51
|
+
print(result.to_dataframe())
|
|
52
|
+
|
|
53
|
+
# Keyword structural coherence (KEPM)
|
|
54
|
+
result = spell_exploder.analyze_keywords(
|
|
55
|
+
"essay.txt",
|
|
56
|
+
keywords=["information", "network"],
|
|
57
|
+
)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Full-control API
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from spell_exploder.analyzers import TextComplexityAnalyzer
|
|
64
|
+
|
|
65
|
+
analyzer = TextComplexityAnalyzer()
|
|
66
|
+
result = analyzer.compare(
|
|
67
|
+
["Human text here.", "AI text here."],
|
|
68
|
+
labels=["Human", "AI"],
|
|
69
|
+
from_files=False,
|
|
70
|
+
)
|
|
71
|
+
for flow in result.flows:
|
|
72
|
+
print(f"{flow.label}: {len(flow.sentences)} sentences")
|
|
73
|
+
print(f" Final k_hist: {flow.sentences[-1].k_hist}")
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Analyzers
|
|
77
|
+
|
|
78
|
+
### TextComplexityAnalyzer (LCX)
|
|
79
|
+
|
|
80
|
+
Sentence-by-sentence complexity flow via compression (zlib), Levenshtein volatility, and synergy ratios.
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
from spell_exploder.analyzers import TextComplexityAnalyzer
|
|
84
|
+
|
|
85
|
+
lcx = TextComplexityAnalyzer()
|
|
86
|
+
result = lcx.compare(["file_a.txt", "file_b.txt"])
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
**Key metrics:** cumulative compressed size (`k_hist`), edit distance (`volatility`), volatility/marginal-info ratio (`synergy`)
|
|
90
|
+
|
|
91
|
+
### ValenceModelAnalyzer (LCVM)
|
|
92
|
+
|
|
93
|
+
The most comprehensive analyzer — ~30 metrics per document across five dimensions:
|
|
94
|
+
|
|
95
|
+
| Dimension | Metrics |
|
|
96
|
+
|-----------|---------|
|
|
97
|
+
| **Variation** | Shannon entropy of token distributions |
|
|
98
|
+
| **Redundancy** | Multiscale entropy-collapse curves |
|
|
99
|
+
| **Organisation** | MI(Verb; Subject), MI(Verb; Object), coupling strength |
|
|
100
|
+
| **Repertoire** | Action-frame density, verb diversity |
|
|
101
|
+
| **Semantic breadth** | Schema-keyword concentration, valence entropy |
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from spell_exploder.analyzers import ValenceModelAnalyzer
|
|
105
|
+
|
|
106
|
+
vm = ValenceModelAnalyzer()
|
|
107
|
+
result = vm.analyze(["essay_a.txt", "essay_b.txt"])
|
|
108
|
+
profile = vm.build_complexity_profile(result)
|
|
109
|
+
print(vm.profile_for_print(profile))
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### AdaptiveEvolutionAnalyzer (APE)
|
|
113
|
+
|
|
114
|
+
Treats syntactic structures as biological species competing for "cognitive market share" across document revisions.
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from spell_exploder.analyzers import AdaptiveEvolutionAnalyzer
|
|
118
|
+
|
|
119
|
+
ape = AdaptiveEvolutionAnalyzer(use_embeddings=False) # NCD-only mode
|
|
120
|
+
result = ape.analyze(["draft_v1.txt", "draft_v2.txt"])
|
|
121
|
+
|
|
122
|
+
for species in result.species[:5]:
|
|
123
|
+
print(f" Group {species.cluster_id}: {species.status.value} "
|
|
124
|
+
f"(Δ={species.delta:+.3f})")
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### KeywordERPAnalyzer (KEPM)
|
|
128
|
+
|
|
129
|
+
Analyses structural coherence of keyword usage through POS co-occurrence spectral entropy and NCD-based structural similarity.
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
from spell_exploder.analyzers import KeywordERPAnalyzer
|
|
133
|
+
|
|
134
|
+
kw = KeywordERPAnalyzer(keywords=["information", "network"])
|
|
135
|
+
result = kw.analyze(["essay.txt"])
|
|
136
|
+
df = result.to_dataframe()
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Export
|
|
140
|
+
|
|
141
|
+
All results can be exported to CSV or JSON:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from spell_exploder.io import export_csv, export_json
|
|
145
|
+
|
|
146
|
+
export_csv(result, "output.csv")
|
|
147
|
+
export_json(result, "output.json")
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Result Objects
|
|
151
|
+
|
|
152
|
+
Every analyzer returns a structured result with:
|
|
153
|
+
|
|
154
|
+
- **`.to_dataframe()`** — flat pandas DataFrame for analysis
|
|
155
|
+
- **Direct attribute access** — full nested data (e.g., `result.posts[0].schema_valence_entropy`)
|
|
156
|
+
- **NumPy array properties** — e.g., `flow.k_hist_array`, `flow.volatility_array`
|
|
157
|
+
|
|
158
|
+
## Architecture
|
|
159
|
+
|
|
160
|
+
```
|
|
161
|
+
spell_exploder/
|
|
162
|
+
├── analyzers/ # 4 analyzer classes (LCX, LCVM, APE, KEPM)
|
|
163
|
+
├── core/ # Shared math: entropy, compression, MI, JS divergence
|
|
164
|
+
├── extractors/ # NLP extraction: action frames, noun deps, sentences
|
|
165
|
+
├── results/ # Structured dataclass results with .to_dataframe()
|
|
166
|
+
├── io/ # Text loading and result export (CSV, JSON)
|
|
167
|
+
├── utils/ # Smoothing, statistics, lazy imports
|
|
168
|
+
└── visualization/ # Optional convenience plotting (requires matplotlib)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Requirements
|
|
172
|
+
|
|
173
|
+
- Python ≥ 3.10
|
|
174
|
+
- spaCy with `en_core_web_sm` (or another English model)
|
|
175
|
+
|
|
176
|
+
## License
|
|
177
|
+
|
|
178
|
+
MIT
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "spell-exploder"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Analyze natural language through complex systems science, information theory, and evolutionary game theory."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Spellcaster Contributors"},
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"nlp",
|
|
17
|
+
"information-theory",
|
|
18
|
+
"complex-systems",
|
|
19
|
+
"text-analysis",
|
|
20
|
+
"evolutionary-game-theory",
|
|
21
|
+
"linguistics",
|
|
22
|
+
]
|
|
23
|
+
classifiers = [
|
|
24
|
+
"Development Status :: 3 - Alpha",
|
|
25
|
+
"Intended Audience :: Science/Research",
|
|
26
|
+
"License :: OSI Approved :: MIT License",
|
|
27
|
+
"Programming Language :: Python :: 3",
|
|
28
|
+
"Programming Language :: Python :: 3.10",
|
|
29
|
+
"Programming Language :: Python :: 3.11",
|
|
30
|
+
"Programming Language :: Python :: 3.12",
|
|
31
|
+
"Programming Language :: Python :: 3.13",
|
|
32
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
33
|
+
"Topic :: Text Processing :: Linguistic",
|
|
34
|
+
]
|
|
35
|
+
dependencies = [
|
|
36
|
+
"numpy>=1.23",
|
|
37
|
+
"pandas>=1.5",
|
|
38
|
+
"spacy>=3.4",
|
|
39
|
+
"scipy>=1.9",
|
|
40
|
+
"python-Levenshtein>=0.20",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
[project.optional-dependencies]
|
|
44
|
+
viz = ["matplotlib>=3.5", "seaborn>=0.12"]
|
|
45
|
+
graphs = ["networkx>=3.0"]
|
|
46
|
+
ml = ["sentence-transformers>=2.0", "scikit-learn>=1.0"]
|
|
47
|
+
all = ["spell-exploder[viz,graphs,ml]"]
|
|
48
|
+
dev = ["pytest>=7.0", "spell-exploder[all]"]
|
|
49
|
+
|
|
50
|
+
[project.urls]
|
|
51
|
+
Homepage = "https://github.com/inc-research/spell-exploder"
|
|
52
|
+
Documentation = "https://github.com/inc-research/spell-exploder#readme"
|
|
53
|
+
Repository = "https://github.com/inc-research/spell-exploder"
|
|
54
|
+
|
|
55
|
+
[tool.setuptools.packages.find]
|
|
56
|
+
include = ["spell_exploder*"]
|
|
57
|
+
|
|
58
|
+
[tool.pytest.ini_options]
|
|
59
|
+
testpaths = ["tests"]
|
|
60
|
+
pythonpath = ["."]
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Spell Exploder
|
|
3
|
+
===========
|
|
4
|
+
|
|
5
|
+
Analyze natural language text through complex systems science,
|
|
6
|
+
information theory, information-theoretic physics analogues,
|
|
7
|
+
and evolutionary game theory.
|
|
8
|
+
|
|
9
|
+
Quick Start
|
|
10
|
+
-----------
|
|
11
|
+
>>> import spell_exploder
|
|
12
|
+
>>> result = spell_exploder.analyze_complexity("draft_a.txt", "draft_b.txt")
|
|
13
|
+
>>> result.to_dataframe()
|
|
14
|
+
|
|
15
|
+
Analyzers
|
|
16
|
+
---------
|
|
17
|
+
For full control, import the analyzer classes directly::
|
|
18
|
+
|
|
19
|
+
from spell_exploder.analyzers import (
|
|
20
|
+
TextComplexityAnalyzer, # LCX — compression, volatility, synergy
|
|
21
|
+
ValenceModelAnalyzer, # LCVM — entropy, MI, action frames, collapse
|
|
22
|
+
AdaptiveEvolutionAnalyzer,# APE — POS clustering, evolutionary dynamics
|
|
23
|
+
KeywordERPAnalyzer, # KEPM — keyword structural coherence
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
Results
|
|
27
|
+
-------
|
|
28
|
+
All analyzers return structured dataclass results with ``.to_dataframe()``
|
|
29
|
+
methods for easy integration with pandas, matplotlib, or any other tooling.
|
|
30
|
+
|
|
31
|
+
Export
|
|
32
|
+
------
|
|
33
|
+
>>> from spell_exploder.io import export_csv, export_json
|
|
34
|
+
>>> export_csv(result, "output.csv")
|
|
35
|
+
>>> export_json(result, "output.json")
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
from spell_exploder._version import __version__
|
|
39
|
+
|
|
40
|
+
from spell_exploder.analyzers.complexity_index import TextComplexityAnalyzer
|
|
41
|
+
from spell_exploder.analyzers.valence_model import ValenceModelAnalyzer
|
|
42
|
+
from spell_exploder.analyzers.adaptive_evolution import AdaptiveEvolutionAnalyzer
|
|
43
|
+
from spell_exploder.analyzers.keyword_erp import KeywordERPAnalyzer
|
|
44
|
+
|
|
45
|
+
from spell_exploder.io.readers import load_texts, texts_from_strings
|
|
46
|
+
from spell_exploder.io.exporters import export_csv, export_json
|
|
47
|
+
|
|
48
|
+
from spell_exploder.results.complexity import (
|
|
49
|
+
ComplexityComparisonResult,
|
|
50
|
+
ComplexityFlowResult,
|
|
51
|
+
)
|
|
52
|
+
from spell_exploder.results.valence import ValenceModelResult
|
|
53
|
+
from spell_exploder.results.evolution import EvolutionResult
|
|
54
|
+
from spell_exploder.results.keyword import KeywordERPResult
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ── Convenience functions ────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
def analyze_complexity(
|
|
60
|
+
*texts_or_paths: str,
|
|
61
|
+
labels: list[str] | None = None,
|
|
62
|
+
from_files: bool = True,
|
|
63
|
+
) -> ComplexityComparisonResult:
|
|
64
|
+
"""
|
|
65
|
+
One-liner complexity analysis.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
*texts_or_paths : str
|
|
70
|
+
File paths (when *from_files* is True) or raw text strings.
|
|
71
|
+
labels : list[str] or None
|
|
72
|
+
Human-readable labels.
|
|
73
|
+
from_files : bool
|
|
74
|
+
Whether to read from files or treat as raw strings.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
ComplexityComparisonResult
|
|
79
|
+
"""
|
|
80
|
+
return TextComplexityAnalyzer().compare(
|
|
81
|
+
list(texts_or_paths), labels=labels, from_files=from_files,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def analyze_valence(
|
|
86
|
+
*texts_or_paths: str,
|
|
87
|
+
labels: list[str] | None = None,
|
|
88
|
+
from_files: bool = True,
|
|
89
|
+
window_sizes: tuple[int, ...] = (25, 50, 100, 250, 500),
|
|
90
|
+
) -> ValenceModelResult:
|
|
91
|
+
"""
|
|
92
|
+
One-liner valence model analysis.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
*texts_or_paths : str
|
|
97
|
+
File paths or raw text strings.
|
|
98
|
+
labels : list[str] or None
|
|
99
|
+
Human-readable labels.
|
|
100
|
+
from_files : bool
|
|
101
|
+
Whether to read from files.
|
|
102
|
+
window_sizes : tuple[int, ...]
|
|
103
|
+
Window sizes for multiscale collapse.
|
|
104
|
+
|
|
105
|
+
Returns
|
|
106
|
+
-------
|
|
107
|
+
ValenceModelResult
|
|
108
|
+
"""
|
|
109
|
+
return ValenceModelAnalyzer(window_sizes=window_sizes).analyze(
|
|
110
|
+
list(texts_or_paths), labels=labels, from_files=from_files,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def analyze_evolution(
|
|
115
|
+
*texts_or_paths: str,
|
|
116
|
+
labels: list[str] | None = None,
|
|
117
|
+
from_files: bool = True,
|
|
118
|
+
use_embeddings: bool = True,
|
|
119
|
+
alpha_semantic: float = 0.5,
|
|
120
|
+
) -> EvolutionResult:
|
|
121
|
+
"""
|
|
122
|
+
One-liner adaptive evolution analysis.
|
|
123
|
+
|
|
124
|
+
Documents should be in chronological order (earliest first).
|
|
125
|
+
|
|
126
|
+
Parameters
|
|
127
|
+
----------
|
|
128
|
+
*texts_or_paths : str
|
|
129
|
+
File paths or raw text strings.
|
|
130
|
+
labels : list[str] or None
|
|
131
|
+
Human-readable labels.
|
|
132
|
+
from_files : bool
|
|
133
|
+
Whether to read from files.
|
|
134
|
+
use_embeddings : bool
|
|
135
|
+
Whether to use sentence-transformer embeddings.
|
|
136
|
+
alpha_semantic : float
|
|
137
|
+
Blend weight for semantic vs. structural distance.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
EvolutionResult
|
|
142
|
+
"""
|
|
143
|
+
return AdaptiveEvolutionAnalyzer(
|
|
144
|
+
use_embeddings=use_embeddings,
|
|
145
|
+
alpha_semantic=alpha_semantic,
|
|
146
|
+
).analyze(
|
|
147
|
+
list(texts_or_paths), labels=labels, from_files=from_files,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def analyze_keywords(
|
|
152
|
+
*texts_or_paths: str,
|
|
153
|
+
keywords: list[str],
|
|
154
|
+
labels: list[str] | None = None,
|
|
155
|
+
from_files: bool = True,
|
|
156
|
+
context_window: int = 25,
|
|
157
|
+
) -> KeywordERPResult:
|
|
158
|
+
"""
|
|
159
|
+
One-liner keyword ERP analysis.
|
|
160
|
+
|
|
161
|
+
Parameters
|
|
162
|
+
----------
|
|
163
|
+
*texts_or_paths : str
|
|
164
|
+
File paths or raw text strings.
|
|
165
|
+
keywords : list[str]
|
|
166
|
+
Keywords to analyse.
|
|
167
|
+
labels : list[str] or None
|
|
168
|
+
Human-readable labels.
|
|
169
|
+
from_files : bool
|
|
170
|
+
Whether to read from files.
|
|
171
|
+
context_window : int
|
|
172
|
+
±N sentences around each keyword mention.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
KeywordERPResult
|
|
177
|
+
"""
|
|
178
|
+
return KeywordERPAnalyzer(
|
|
179
|
+
keywords=keywords,
|
|
180
|
+
context_window=context_window,
|
|
181
|
+
).analyze(
|
|
182
|
+
list(texts_or_paths), labels=labels, from_files=from_files,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
__all__ = [
|
|
187
|
+
"__version__",
|
|
188
|
+
"analyze_complexity",
|
|
189
|
+
"analyze_valence",
|
|
190
|
+
"analyze_evolution",
|
|
191
|
+
"analyze_keywords",
|
|
192
|
+
"TextComplexityAnalyzer",
|
|
193
|
+
"ValenceModelAnalyzer",
|
|
194
|
+
"AdaptiveEvolutionAnalyzer",
|
|
195
|
+
"KeywordERPAnalyzer",
|
|
196
|
+
"load_texts",
|
|
197
|
+
"texts_from_strings",
|
|
198
|
+
"export_csv",
|
|
199
|
+
"export_json",
|
|
200
|
+
"ComplexityComparisonResult",
|
|
201
|
+
"ComplexityFlowResult",
|
|
202
|
+
"ValenceModelResult",
|
|
203
|
+
"EvolutionResult",
|
|
204
|
+
"KeywordERPResult",
|
|
205
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|