spell-exploder 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. spell_exploder/__init__.py +205 -0
  2. spell_exploder/_version.py +1 -0
  3. spell_exploder/analyzers/__init__.py +18 -0
  4. spell_exploder/analyzers/adaptive_evolution.py +453 -0
  5. spell_exploder/analyzers/complexity_index.py +224 -0
  6. spell_exploder/analyzers/keyword_erp.py +477 -0
  7. spell_exploder/analyzers/valence_model.py +523 -0
  8. spell_exploder/core/__init__.py +45 -0
  9. spell_exploder/core/compression.py +103 -0
  10. spell_exploder/core/entropy.py +203 -0
  11. spell_exploder/core/information.py +179 -0
  12. spell_exploder/core/nlp.py +107 -0
  13. spell_exploder/exceptions.py +25 -0
  14. spell_exploder/extractors/__init__.py +35 -0
  15. spell_exploder/extractors/action_frames.py +133 -0
  16. spell_exploder/extractors/noun_dependencies.py +96 -0
  17. spell_exploder/extractors/sentence_parser.py +168 -0
  18. spell_exploder/graphs/__init__.py +0 -0
  19. spell_exploder/io/__init__.py +14 -0
  20. spell_exploder/io/exporters.py +94 -0
  21. spell_exploder/io/readers.py +117 -0
  22. spell_exploder/results/__init__.py +44 -0
  23. spell_exploder/results/complexity.py +111 -0
  24. spell_exploder/results/evolution.py +136 -0
  25. spell_exploder/results/keyword.py +139 -0
  26. spell_exploder/results/valence.py +134 -0
  27. spell_exploder/utils/__init__.py +11 -0
  28. spell_exploder/utils/imports.py +48 -0
  29. spell_exploder/utils/smoothing.py +42 -0
  30. spell_exploder/utils/statistics.py +54 -0
  31. spell_exploder/visualization/__init__.py +27 -0
  32. spell_exploder/visualization/plots.py +562 -0
  33. spell_exploder-0.1.0.dist-info/METADATA +221 -0
  34. spell_exploder-0.1.0.dist-info/RECORD +37 -0
  35. spell_exploder-0.1.0.dist-info/WHEEL +5 -0
  36. spell_exploder-0.1.0.dist-info/licenses/LICENSE +21 -0
  37. spell_exploder-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,96 @@
1
+ """
2
+ Noun-dependency extraction (schema–valence pairs).
3
+
4
+ Extracts structured relationships between nouns and their modifiers
5
+ or governing verbs. In Spellcaster's terminology:
6
+
7
+ * **Schema keyword** — the noun lemma (the concept being modified).
8
+ * **Valence keyword** — the adjective or verb lemma that colours the
9
+ noun's meaning in context.
10
+ * **Dependency type** — the syntactic relation (``amod``, ``nsubj``,
11
+ ``dobj``).
12
+
13
+ These triples power the *valence entropy* and *semantic breadth*
14
+ metrics in the Valence Model analyzer.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from typing import TYPE_CHECKING
20
+
21
+ if TYPE_CHECKING:
22
+ import spacy
23
+
24
+ from spell_exploder.core.nlp import get_nlp
25
+
26
+
27
+ # Type alias for a single dependency triple
28
+ NounDependency = tuple[str, str, str]
29
+ """``(schema_keyword, valence_keyword, dep_type)``"""
30
+
31
+
32
+ def extract_noun_dependencies(
33
+ text: str,
34
+ *,
35
+ nlp: spacy.Language | None = None,
36
+ model_name: str = "en_core_web_sm",
37
+ ) -> list[NounDependency]:
38
+ """
39
+ Extract noun–adjective and noun–verb dependency triples from *text*.
40
+
41
+ Three dependency patterns are captured:
42
+
43
+ 1. **amod** — An adjective modifying a noun
44
+ (e.g. *"quick fox"* → ``("fox", "quick", "amod")``).
45
+ 2. **nsubj** — A noun serving as subject of a verb
46
+ (e.g. *"The fox jumps"* → ``("fox", "jump", "nsubj")``).
47
+ 3. **dobj** — A noun serving as direct object of a verb
48
+ (e.g. *"chase the mouse"* → ``("mouse", "chase", "dobj")``).
49
+
50
+ Stop-word valence keywords are filtered out.
51
+
52
+ Parameters
53
+ ----------
54
+ text : str
55
+ Raw input text.
56
+ nlp : spacy.Language or None
57
+ Pre-loaded pipeline (must include ``parser``).
58
+ When ``None``, loaded via :func:`~spell_exploder.core.nlp.get_nlp`.
59
+ model_name : str
60
+ spaCy model name (used only when *nlp* is ``None``).
61
+
62
+ Returns
63
+ -------
64
+ list[NounDependency]
65
+ List of ``(schema_keyword, valence_keyword, dep_type)`` tuples.
66
+ """
67
+ if nlp is None:
68
+ nlp = get_nlp(model_name, disable=["ner"])
69
+
70
+ doc = nlp(text)
71
+ dependencies: list[NounDependency] = []
72
+
73
+ for tok in doc:
74
+ # --- Pattern 1: adjective modifier of a noun ---
75
+ if tok.pos_ in {"NOUN", "PROPN"}:
76
+ schema = tok.lemma_
77
+ for child in tok.children:
78
+ if child.dep_ == "amod":
79
+ valence = child.lemma_
80
+ if not nlp.vocab[valence].is_stop:
81
+ dependencies.append((schema, valence, "amod"))
82
+
83
+ # --- Patterns 2 & 3: noun as subject or object of a verb ---
84
+ if tok.pos_ == "VERB":
85
+ valence = tok.lemma_
86
+ if nlp.vocab[valence].is_stop:
87
+ continue
88
+ for child in tok.children:
89
+ if child.pos_ in {"NOUN", "PROPN"}:
90
+ schema = child.lemma_
91
+ if child.dep_ == "nsubj":
92
+ dependencies.append((schema, valence, "nsubj"))
93
+ elif child.dep_ == "dobj":
94
+ dependencies.append((schema, valence, "dobj"))
95
+
96
+ return dependencies
@@ -0,0 +1,168 @@
1
+ """
2
+ Sentence parsing with POS-tag extraction.
3
+
4
+ Provides sentence segmentation (with a custom abbreviation-aware
5
+ boundary detector) and per-sentence POS-tag sequences, which feed
6
+ into the Adaptive Evolution (APE) and Keyword ERP (KEPM) analyzers.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import re
12
+ from dataclasses import dataclass, field
13
+ from typing import TYPE_CHECKING
14
+
15
+ if TYPE_CHECKING:
16
+ import spacy
17
+
18
+ from spell_exploder.core.nlp import get_nlp
19
+
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Default abbreviation set for sentence boundary detection
23
+ # ---------------------------------------------------------------------------
24
+
25
+ DEFAULT_ABBREVIATIONS: frozenset[str] = frozenset({
26
+ "mr.", "mrs.", "ms.", "dr.", "prof.", "rev.", "col.",
27
+ "gen.", "maj.", "capt.", "lt.", "sgt.", "pvt.",
28
+ "jr.", "sr.", "etc.", "e.g.", "i.e.",
29
+ })
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Result dataclass
34
+ # ---------------------------------------------------------------------------
35
+
36
+ @dataclass(frozen=True)
37
+ class ParsedSentence:
38
+ """A single sentence with its POS tag sequence."""
39
+
40
+ text: str
41
+ """The raw sentence text (whitespace-stripped)."""
42
+
43
+ pos_tags: list[str] = field(default_factory=list)
44
+ """POS tags for every token in the sentence (e.g. ``['DET', 'NOUN', 'VERB', ...]``)."""
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Core parsing functions
49
+ # ---------------------------------------------------------------------------
50
+
51
+ def _register_boundary_detector(
52
+ nlp: spacy.Language,
53
+ abbreviations: frozenset[str],
54
+ ) -> spacy.Language:
55
+ """
56
+ Register (once) a custom sentence-boundary component on *nlp* that
57
+ respects abbreviations and newline boundaries.
58
+
59
+ The component is inserted **before** the parser so that spaCy's
60
+ built-in sentenciser can still override when needed.
61
+ """
62
+ from spacy.language import Language as SpacyLanguage
63
+
64
+ component_name = "spell_exploder_sent_boundary"
65
+
66
+ if component_name in nlp.pipe_names:
67
+ return nlp
68
+
69
+ @SpacyLanguage.component(component_name)
70
+ def _boundary_detector(doc):
71
+ if len(doc) == 0:
72
+ return doc
73
+ doc[0].is_sent_start = True
74
+
75
+ for i, token in enumerate(doc[:-1]):
76
+ nxt = doc[i + 1]
77
+ # Period followed by capitalised word (not after abbreviation)
78
+ if (
79
+ token.text == "."
80
+ and nxt.text
81
+ and nxt.text[0].isupper()
82
+ and not nxt.is_space
83
+ and not nxt.is_punct
84
+ and token.text.lower() not in abbreviations
85
+ ):
86
+ nxt.is_sent_start = True
87
+ # Newline triggers new sentence
88
+ if token.text in ("\n", "\r", "\r\n") and i + 1 < len(doc):
89
+ nxt.is_sent_start = True
90
+
91
+ return doc
92
+
93
+ nlp.add_pipe(component_name, before="parser")
94
+ return nlp
95
+
96
+
97
+ def parse_sentences(
98
+ text: str,
99
+ *,
100
+ nlp: spacy.Language | None = None,
101
+ model_name: str = "en_core_web_sm",
102
+ abbreviations: frozenset[str] | None = None,
103
+ use_custom_boundaries: bool = True,
104
+ ) -> list[ParsedSentence]:
105
+ """
106
+ Segment *text* into sentences with per-token POS tags.
107
+
108
+ Parameters
109
+ ----------
110
+ text : str
111
+ Raw input text.
112
+ nlp : spacy.Language or None
113
+ Pre-loaded pipeline. When ``None``, loaded via
114
+ :func:`~spell_exploder.core.nlp.get_nlp`.
115
+ model_name : str
116
+ spaCy model name (used only when *nlp* is ``None``).
117
+ abbreviations : frozenset[str] or None
118
+ Abbreviations that should **not** trigger sentence boundaries
119
+ (e.g. ``"dr."``). Defaults to :data:`DEFAULT_ABBREVIATIONS`.
120
+ use_custom_boundaries : bool
121
+ If ``True``, register a custom boundary detector component
122
+ on the pipeline.
123
+
124
+ Returns
125
+ -------
126
+ list[ParsedSentence]
127
+ Ordered list of non-empty sentences with POS tags.
128
+ """
129
+ if nlp is None:
130
+ nlp = get_nlp(model_name)
131
+
132
+ if use_custom_boundaries:
133
+ abbrevs = abbreviations if abbreviations is not None else DEFAULT_ABBREVIATIONS
134
+ nlp = _register_boundary_detector(nlp, abbrevs)
135
+
136
+ doc = nlp(text)
137
+ results: list[ParsedSentence] = []
138
+
139
+ for sent in doc.sents:
140
+ sent_text = sent.text.strip()
141
+ if sent_text:
142
+ pos_tags = [token.pos_ for token in sent]
143
+ results.append(ParsedSentence(text=sent_text, pos_tags=pos_tags))
144
+
145
+ return results
146
+
147
+
148
+ def split_sentences_simple(text: str) -> list[str]:
149
+ """
150
+ Lightweight sentence splitter using regex (no spaCy required).
151
+
152
+ Splits on periods, newlines, and bullet characters. Used by the
153
+ :class:`~spell_exploder.analyzers.complexity_index.TextComplexityAnalyzer`
154
+ which does not need POS tags.
155
+
156
+ Parameters
157
+ ----------
158
+ text : str
159
+ Raw input text.
160
+
161
+ Returns
162
+ -------
163
+ list[str]
164
+ Non-empty, whitespace-stripped sentence strings.
165
+ """
166
+ clean = re.sub(r"<[^>]*>", "", text)
167
+ parts = re.split(r"[.\n•]+", clean)
168
+ return [s.strip() for s in parts if s.strip()]
File without changes
@@ -0,0 +1,14 @@
1
+ """
2
+ I/O utilities: text loading and result export.
3
+ """
4
+
5
+ from spell_exploder.io.readers import TextDocument, load_texts, texts_from_strings
6
+ from spell_exploder.io.exporters import export_csv, export_json
7
+
8
+ __all__ = [
9
+ "TextDocument",
10
+ "load_texts",
11
+ "texts_from_strings",
12
+ "export_csv",
13
+ "export_json",
14
+ ]
@@ -0,0 +1,94 @@
1
+ """
2
+ Export utilities for Spellcaster results.
3
+
4
+ Provides generic serialisation of any result object to JSON or CSV,
5
+ plus a specialised APE evolutionary-dynamics JSON report.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ from pathlib import Path
13
+
14
+ import pandas as pd
15
+
16
+ from spell_exploder.results.evolution import EvolutionResult
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def export_csv(
22
+ result,
23
+ path: str,
24
+ **kwargs,
25
+ ) -> Path:
26
+ """
27
+ Export any result with a ``.to_dataframe()`` method to CSV.
28
+
29
+ Parameters
30
+ ----------
31
+ result
32
+ Any Spellcaster result object (``ComplexityComparisonResult``,
33
+ ``ValenceModelResult``, ``EvolutionResult``, ``KeywordERPResult``).
34
+ path : str
35
+ Output file path.
36
+ **kwargs
37
+ Forwarded to :meth:`pandas.DataFrame.to_csv`.
38
+
39
+ Returns
40
+ -------
41
+ Path
42
+ The written file path.
43
+ """
44
+ df = result.to_dataframe()
45
+ p = Path(path)
46
+ df.to_csv(p, index=kwargs.pop("index", False), **kwargs)
47
+ logger.info("Exported %d rows to %s", len(df), p)
48
+ return p
49
+
50
+
51
+ def export_json(
52
+ result,
53
+ path: str,
54
+ indent: int = 2,
55
+ ) -> Path:
56
+ """
57
+ Export a result to JSON.
58
+
59
+ For :class:`~spell_exploder.results.evolution.EvolutionResult`, uses the
60
+ structured ``to_json()`` format. For all other result types, converts
61
+ the ``.to_dataframe()`` output to JSON records.
62
+
63
+ Parameters
64
+ ----------
65
+ result
66
+ Any Spellcaster result object.
67
+ path : str
68
+ Output file path.
69
+ indent : int
70
+ JSON indentation.
71
+
72
+ Returns
73
+ -------
74
+ Path
75
+ The written file path.
76
+ """
77
+ p = Path(path)
78
+
79
+ if isinstance(result, EvolutionResult):
80
+ data = result.to_json()
81
+ elif hasattr(result, "to_dataframe"):
82
+ df = result.to_dataframe()
83
+ data = json.loads(df.to_json(orient="records", default_handler=str))
84
+ else:
85
+ raise TypeError(
86
+ f"Cannot export {type(result).__name__}: "
87
+ "expected a Spellcaster result with .to_dataframe() or .to_json()"
88
+ )
89
+
90
+ with open(p, "w", encoding="utf-8") as f:
91
+ json.dump(data, f, indent=indent, ensure_ascii=False, default=str)
92
+
93
+ logger.info("Exported JSON to %s", p)
94
+ return p
@@ -0,0 +1,117 @@
1
+ """
2
+ Text loading utilities.
3
+
4
+ Provides a uniform way to load documents from file paths or raw strings,
5
+ producing :class:`TextDocument` instances that all analyzers accept.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import pathlib
11
+ from dataclasses import dataclass
12
+
13
+
14
+ @dataclass
15
+ class TextDocument:
16
+ """
17
+ A loaded text document with provenance metadata.
18
+
19
+ Attributes
20
+ ----------
21
+ path : str
22
+ Original file path, or ``"<inline>"`` for text supplied directly.
23
+ label : str
24
+ Human-readable label (defaults to the filename stem).
25
+ text : str
26
+ Full text content.
27
+ """
28
+
29
+ path: str
30
+ label: str
31
+ text: str
32
+
33
+
34
+ def load_texts(
35
+ file_paths: list[str],
36
+ labels: list[str] | None = None,
37
+ encoding: str = "utf-8",
38
+ ) -> list[TextDocument]:
39
+ """
40
+ Load text files from disk and wrap them as :class:`TextDocument` objects.
41
+
42
+ Parameters
43
+ ----------
44
+ file_paths : list[str]
45
+ Paths to ``.txt`` files.
46
+ labels : list[str] or None
47
+ Human-readable labels, one per file. When ``None``, labels
48
+ default to the file-name stems (e.g. ``"essay1"`` for
49
+ ``"/data/essay1.txt"``).
50
+ encoding : str
51
+ Text encoding to use when reading files.
52
+
53
+ Returns
54
+ -------
55
+ list[TextDocument]
56
+
57
+ Raises
58
+ ------
59
+ FileNotFoundError
60
+ If any file does not exist.
61
+ ValueError
62
+ If *labels* is provided but its length differs from *file_paths*.
63
+ """
64
+ if labels is not None and len(labels) != len(file_paths):
65
+ raise ValueError(
66
+ f"labels length ({len(labels)}) must match "
67
+ f"file_paths length ({len(file_paths)})"
68
+ )
69
+
70
+ documents: list[TextDocument] = []
71
+ for i, fp in enumerate(file_paths):
72
+ p = pathlib.Path(fp)
73
+ if not p.exists():
74
+ raise FileNotFoundError(f"Text file not found: {fp}")
75
+
76
+ text = p.read_text(encoding=encoding, errors="ignore")
77
+ label = labels[i] if labels is not None else p.stem
78
+
79
+ documents.append(TextDocument(path=str(fp), label=label, text=text))
80
+
81
+ return documents
82
+
83
+
84
+ def texts_from_strings(
85
+ texts: list[str],
86
+ labels: list[str] | None = None,
87
+ ) -> list[TextDocument]:
88
+ """
89
+ Wrap raw strings as :class:`TextDocument` objects (no file I/O).
90
+
91
+ Useful when text is already in memory — for example, from a
92
+ database or an API response.
93
+
94
+ Parameters
95
+ ----------
96
+ texts : list[str]
97
+ Raw text strings.
98
+ labels : list[str] or None
99
+ Human-readable labels. When ``None``, defaults to
100
+ ``"text_0"``, ``"text_1"``, etc.
101
+
102
+ Returns
103
+ -------
104
+ list[TextDocument]
105
+ """
106
+ if labels is not None and len(labels) != len(texts):
107
+ raise ValueError(
108
+ f"labels length ({len(labels)}) must match "
109
+ f"texts length ({len(texts)})"
110
+ )
111
+
112
+ documents: list[TextDocument] = []
113
+ for i, text in enumerate(texts):
114
+ label = labels[i] if labels is not None else f"text_{i}"
115
+ documents.append(TextDocument(path="<inline>", label=label, text=text))
116
+
117
+ return documents
@@ -0,0 +1,44 @@
1
+ """
2
+ Structured result objects for all Spellcaster analyzers.
3
+
4
+ Every result class provides a ``.to_dataframe()`` method that produces
5
+ a flat :class:`pandas.DataFrame` for exploratory analysis.
6
+ """
7
+
8
+ from spell_exploder.results.complexity import (
9
+ ComplexityComparisonResult,
10
+ ComplexityFlowResult,
11
+ SentenceMetrics,
12
+ )
13
+ from spell_exploder.results.evolution import (
14
+ EvolutionaryStatus,
15
+ EvolutionResult,
16
+ POSComposition,
17
+ SpeciesRecord,
18
+ )
19
+ from spell_exploder.results.keyword import (
20
+ CrossKeywordEntanglement,
21
+ FileKeywordResult,
22
+ KeywordERPResult,
23
+ KeywordMeasures,
24
+ )
25
+ from spell_exploder.results.valence import (
26
+ PostMetrics,
27
+ ValenceModelResult,
28
+ )
29
+
30
+ __all__ = [
31
+ "SentenceMetrics",
32
+ "ComplexityFlowResult",
33
+ "ComplexityComparisonResult",
34
+ "PostMetrics",
35
+ "ValenceModelResult",
36
+ "EvolutionaryStatus",
37
+ "SpeciesRecord",
38
+ "POSComposition",
39
+ "EvolutionResult",
40
+ "KeywordMeasures",
41
+ "CrossKeywordEntanglement",
42
+ "FileKeywordResult",
43
+ "KeywordERPResult",
44
+ ]
@@ -0,0 +1,111 @@
1
+ """
2
+ Result dataclasses for the Complexity Index (LCX) analyzer.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass, field
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+
13
+ @dataclass
14
+ class SentenceMetrics:
15
+ """Metrics for a single sentence within a complexity flow."""
16
+
17
+ text: str
18
+ """Raw sentence text."""
19
+
20
+ index: int
21
+ """Zero-based position in the document."""
22
+
23
+ k_hist: int
24
+ """Cumulative compressed size of all text up to and including this sentence (bytes)."""
25
+
26
+ volatility: int
27
+ """Levenshtein edit distance from the previous sentence (0 for the first)."""
28
+
29
+ synergy: float
30
+ """Ratio of volatility to marginal compression cost (volatility / delta_k)."""
31
+
32
+
33
+ @dataclass
34
+ class ComplexityFlowResult:
35
+ """
36
+ Result of analysing a single text's complexity flow.
37
+
38
+ Returned by :meth:`~spell_exploder.analyzers.complexity_index.TextComplexityAnalyzer.analyze_flow`.
39
+ """
40
+
41
+ label: str
42
+ """User-supplied label for this text (e.g. file name or condition)."""
43
+
44
+ sentences: list[SentenceMetrics] = field(default_factory=list)
45
+ """Per-sentence metrics in document order."""
46
+
47
+ def to_dataframe(self) -> pd.DataFrame:
48
+ """
49
+ Flat DataFrame with one row per sentence.
50
+
51
+ Columns: ``index``, ``text``, ``k_hist``, ``volatility``, ``synergy``.
52
+ """
53
+ if not self.sentences:
54
+ return pd.DataFrame(
55
+ columns=["index", "text", "k_hist", "volatility", "synergy"]
56
+ )
57
+ return pd.DataFrame(
58
+ {
59
+ "index": [s.index for s in self.sentences],
60
+ "text": [s.text for s in self.sentences],
61
+ "k_hist": [s.k_hist for s in self.sentences],
62
+ "volatility": [s.volatility for s in self.sentences],
63
+ "synergy": [s.synergy for s in self.sentences],
64
+ }
65
+ )
66
+
67
+ @property
68
+ def k_hist_array(self) -> np.ndarray:
69
+ """Cumulative complexity as a NumPy array."""
70
+ return np.array([s.k_hist for s in self.sentences])
71
+
72
+ @property
73
+ def volatility_array(self) -> np.ndarray:
74
+ """Volatility sequence as a NumPy array."""
75
+ return np.array([s.volatility for s in self.sentences])
76
+
77
+ @property
78
+ def synergy_array(self) -> np.ndarray:
79
+ """Synergy sequence as a NumPy array."""
80
+ return np.array([s.synergy for s in self.sentences])
81
+
82
+
83
+ @dataclass
84
+ class ComplexityComparisonResult:
85
+ """
86
+ Result of comparing N texts via complexity flow analysis.
87
+
88
+ Returned by :meth:`~spell_exploder.analyzers.complexity_index.TextComplexityAnalyzer.compare`.
89
+ """
90
+
91
+ flows: list[ComplexityFlowResult] = field(default_factory=list)
92
+ """One :class:`ComplexityFlowResult` per input text."""
93
+
94
+ def to_dataframe(self) -> pd.DataFrame:
95
+ """
96
+ Combined DataFrame with a ``label`` column distinguishing texts.
97
+ """
98
+ frames = []
99
+ for flow in self.flows:
100
+ df = flow.to_dataframe()
101
+ df["label"] = flow.label
102
+ frames.append(df)
103
+ if not frames:
104
+ return pd.DataFrame(
105
+ columns=["index", "text", "k_hist", "volatility", "synergy", "label"]
106
+ )
107
+ return pd.concat(frames, ignore_index=True)
108
+
109
+ @property
110
+ def labels(self) -> list[str]:
111
+ return [f.label for f in self.flows]