structural-topic-model 0.2.2__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {structural_topic_model-0.2.2 → structural_topic_model-0.3.0}/.gitignore +1 -2
- {structural_topic_model-0.2.2 → structural_topic_model-0.3.0}/PKG-INFO +6 -6
- {structural_topic_model-0.2.2 → structural_topic_model-0.3.0}/README.md +5 -5
- {structural_topic_model-0.2.2 → structural_topic_model-0.3.0}/README_ja.md +5 -5
- {structural_topic_model-0.2.2 → structural_topic_model-0.3.0}/pyproject.toml +6 -3
- {structural_topic_model-0.2.2 → structural_topic_model-0.3.0}/scripts/validate_gadarian.py +4 -4
- {structural_topic_model-0.2.2 → structural_topic_model-0.3.0}/tests/test_features.py +1 -1
- {structural_topic_model-0.2.2 → structural_topic_model-0.3.0}/tests/test_stm.py +2 -2
- {structural_topic_model-0.2.2 → structural_topic_model-0.3.0}/LICENSE +0 -0
- {structural_topic_model-0.2.2 → structural_topic_model-0.3.0}/scripts/gadarian_prep.py +0 -0
- {structural_topic_model-0.2.2/pystm → structural_topic_model-0.3.0/stm}/__init__.py +0 -0
- {structural_topic_model-0.2.2/pystm → structural_topic_model-0.3.0/stm}/_estep.py +0 -0
- {structural_topic_model-0.2.2/pystm → structural_topic_model-0.3.0/stm}/_mnreg.py +0 -0
- {structural_topic_model-0.2.2/pystm → structural_topic_model-0.3.0/stm}/_mstep.py +0 -0
- {structural_topic_model-0.2.2/pystm → structural_topic_model-0.3.0/stm}/_spectral.py +0 -0
- {structural_topic_model-0.2.2/pystm → structural_topic_model-0.3.0/stm}/_utils.py +0 -0
- {structural_topic_model-0.2.2/pystm → structural_topic_model-0.3.0/stm}/diagnostics.py +0 -0
- {structural_topic_model-0.2.2/pystm → structural_topic_model-0.3.0/stm}/effects.py +0 -0
- {structural_topic_model-0.2.2/pystm → structural_topic_model-0.3.0/stm}/model_selection.py +0 -0
- {structural_topic_model-0.2.2/pystm → structural_topic_model-0.3.0/stm}/stm.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: structural-topic-model
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Python implementation of the Structural Topic Model (STM), a port of the R stm package with a scikit-learn style API
|
|
5
5
|
Project-URL: Homepage, https://github.com/hirata-keisuke/pystm
|
|
6
6
|
Project-URL: Repository, https://github.com/hirata-keisuke/pystm
|
|
@@ -22,7 +22,7 @@ Requires-Dist: scikit-learn>=1.9.0
|
|
|
22
22
|
Requires-Dist: scipy>=1.14
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
|
|
25
|
-
#
|
|
25
|
+
# stm — Structural Topic Model in Python
|
|
26
26
|
|
|
27
27
|
[](https://pypi.org/project/structural-topic-model/)
|
|
28
28
|
[](LICENSE)
|
|
@@ -45,7 +45,7 @@ pip install structural-topic-model
|
|
|
45
45
|
|
|
46
46
|
```python
|
|
47
47
|
import numpy as np
|
|
48
|
-
from
|
|
48
|
+
from stm import StructuralTopicModel
|
|
49
49
|
|
|
50
50
|
# X: (n_docs, n_vocab) word count matrix (dense or scipy.sparse)
|
|
51
51
|
# covar: (n_docs, n_covariates) prevalence covariate matrix (intercept added automatically)
|
|
@@ -94,7 +94,7 @@ Estimated via Distributed Poisson regression (equivalent to the R package's defa
|
|
|
94
94
|
Regress topic proportions on covariates using method of composition, returning coefficients with measurement uncertainty:
|
|
95
95
|
|
|
96
96
|
```python
|
|
97
|
-
from
|
|
97
|
+
from stm import estimate_effect
|
|
98
98
|
|
|
99
99
|
eff = estimate_effect(model, covar, uncertainty="Global", nsims=25)
|
|
100
100
|
tables = eff.summary() # {topic: structured array with estimate/std_error/t_value/p_value}
|
|
@@ -104,7 +104,7 @@ tables[0]["estimate"] # coefficients for topic 0 (first entry is intercept
|
|
|
104
104
|
## Topic Selection (searchK)
|
|
105
105
|
|
|
106
106
|
```python
|
|
107
|
-
from
|
|
107
|
+
from stm import search_k
|
|
108
108
|
|
|
109
109
|
res = search_k(X, K_values=[5, 10, 15], prevalence=covar,
|
|
110
110
|
model_params={"max_iter": 100})
|
|
@@ -117,7 +117,7 @@ res["exclus"] # exclusivity
|
|
|
117
117
|
## Diagnostics
|
|
118
118
|
|
|
119
119
|
```python
|
|
120
|
-
from
|
|
120
|
+
from stm import topic_corr, semantic_coherence, exclusivity, check_residuals
|
|
121
121
|
|
|
122
122
|
tc = topic_corr(model, cutoff=0.01) # topic correlation graph (simple method)
|
|
123
123
|
tc.posadj # positive correlation adjacency matrix
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
#
|
|
1
|
+
# stm — Structural Topic Model in Python
|
|
2
2
|
|
|
3
3
|
[](https://pypi.org/project/structural-topic-model/)
|
|
4
4
|
[](LICENSE)
|
|
@@ -21,7 +21,7 @@ pip install structural-topic-model
|
|
|
21
21
|
|
|
22
22
|
```python
|
|
23
23
|
import numpy as np
|
|
24
|
-
from
|
|
24
|
+
from stm import StructuralTopicModel
|
|
25
25
|
|
|
26
26
|
# X: (n_docs, n_vocab) word count matrix (dense or scipy.sparse)
|
|
27
27
|
# covar: (n_docs, n_covariates) prevalence covariate matrix (intercept added automatically)
|
|
@@ -70,7 +70,7 @@ Estimated via Distributed Poisson regression (equivalent to the R package's defa
|
|
|
70
70
|
Regress topic proportions on covariates using method of composition, returning coefficients with measurement uncertainty:
|
|
71
71
|
|
|
72
72
|
```python
|
|
73
|
-
from
|
|
73
|
+
from stm import estimate_effect
|
|
74
74
|
|
|
75
75
|
eff = estimate_effect(model, covar, uncertainty="Global", nsims=25)
|
|
76
76
|
tables = eff.summary() # {topic: structured array with estimate/std_error/t_value/p_value}
|
|
@@ -80,7 +80,7 @@ tables[0]["estimate"] # coefficients for topic 0 (first entry is intercept
|
|
|
80
80
|
## Topic Selection (searchK)
|
|
81
81
|
|
|
82
82
|
```python
|
|
83
|
-
from
|
|
83
|
+
from stm import search_k
|
|
84
84
|
|
|
85
85
|
res = search_k(X, K_values=[5, 10, 15], prevalence=covar,
|
|
86
86
|
model_params={"max_iter": 100})
|
|
@@ -93,7 +93,7 @@ res["exclus"] # exclusivity
|
|
|
93
93
|
## Diagnostics
|
|
94
94
|
|
|
95
95
|
```python
|
|
96
|
-
from
|
|
96
|
+
from stm import topic_corr, semantic_coherence, exclusivity, check_residuals
|
|
97
97
|
|
|
98
98
|
tc = topic_corr(model, cutoff=0.01) # topic correlation graph (simple method)
|
|
99
99
|
tc.posadj # positive correlation adjacency matrix
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
#
|
|
1
|
+
# stm — Python による構造的トピックモデル
|
|
2
2
|
|
|
3
3
|
[](https://pypi.org/project/structural-topic-model/)
|
|
4
4
|
[](LICENSE)
|
|
@@ -21,7 +21,7 @@ pip install structural-topic-model
|
|
|
21
21
|
|
|
22
22
|
```python
|
|
23
23
|
import numpy as np
|
|
24
|
-
from
|
|
24
|
+
from stm import StructuralTopicModel
|
|
25
25
|
|
|
26
26
|
# X: (n_docs, n_vocab) の単語カウント行列(dense / scipy.sparse どちらも可)
|
|
27
27
|
# covar: (n_docs, n_covariates) の prevalence 共変量(切片は自動付与)
|
|
@@ -70,7 +70,7 @@ R 版の `kappa.prior="L1"`(既定)に相当する Distributed Poisson 回
|
|
|
70
70
|
トピック比率を目的変数とする回帰を method of composition(変分事後分布からの θ サンプリング)で行い、測定不確実性込みの係数を返します:
|
|
71
71
|
|
|
72
72
|
```python
|
|
73
|
-
from
|
|
73
|
+
from stm import estimate_effect
|
|
74
74
|
|
|
75
75
|
eff = estimate_effect(model, covar, uncertainty="Global", nsims=25)
|
|
76
76
|
tables = eff.summary() # {topic: 構造化配列(estimate/std_error/t_value/p_value)}
|
|
@@ -80,7 +80,7 @@ tables[0]["estimate"] # トピック 0 の回帰係数(先頭が切片
|
|
|
80
80
|
## トピック数の選択(searchK 相当)
|
|
81
81
|
|
|
82
82
|
```python
|
|
83
|
-
from
|
|
83
|
+
from stm import search_k
|
|
84
84
|
|
|
85
85
|
res = search_k(X, K_values=[5, 10, 15], prevalence=covar,
|
|
86
86
|
model_params={"max_iter": 100})
|
|
@@ -93,7 +93,7 @@ res["exclus"] # 排他性
|
|
|
93
93
|
## 診断
|
|
94
94
|
|
|
95
95
|
```python
|
|
96
|
-
from
|
|
96
|
+
from stm import topic_corr, semantic_coherence, exclusivity, check_residuals
|
|
97
97
|
|
|
98
98
|
tc = topic_corr(model, cutoff=0.01) # トピック相関グラフ(simple 法)
|
|
99
99
|
tc.posadj # 正相関の隣接行列
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "structural-topic-model"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
description = "Python implementation of the Structural Topic Model (STM), a port of the R stm package with a scikit-learn style API"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -46,8 +46,11 @@ dev = [
|
|
|
46
46
|
requires = ["hatchling"]
|
|
47
47
|
build-backend = "hatchling.build"
|
|
48
48
|
|
|
49
|
+
[tool.pytest.ini_options]
|
|
50
|
+
pythonpath = ["."]
|
|
51
|
+
|
|
49
52
|
[tool.hatch.build.targets.wheel]
|
|
50
|
-
packages = ["
|
|
53
|
+
packages = ["stm"]
|
|
51
54
|
|
|
52
55
|
[tool.hatch.build.targets.sdist]
|
|
53
|
-
only-include = ["
|
|
56
|
+
only-include = ["stm", "tests", "scripts", "README.md", "README_ja.md", "LICENSE"]
|
|
@@ -38,10 +38,10 @@ sys.path.insert(0, str(Path(__file__).parent))
|
|
|
38
38
|
from gadarian_prep import load_gadarian, prep_documents, text_processor
|
|
39
39
|
|
|
40
40
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
41
|
-
from
|
|
42
|
-
from
|
|
43
|
-
from
|
|
44
|
-
from
|
|
41
|
+
from stm import StructuralTopicModel, estimate_effect
|
|
42
|
+
from stm._estep import estep
|
|
43
|
+
from stm._mstep import opt_beta, opt_mu, opt_sigma
|
|
44
|
+
from stm._utils import row_softmax, to_doc_list
|
|
45
45
|
|
|
46
46
|
PASS = []
|
|
47
47
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Tests for
|
|
1
|
+
"""Tests for stm.StructuralTopicModel.
|
|
2
2
|
|
|
3
3
|
Validation strategy (no R available for direct comparison):
|
|
4
4
|
- ELBO increases over EM iterations and converges,
|
|
@@ -11,7 +11,7 @@ import numpy as np
|
|
|
11
11
|
import pytest
|
|
12
12
|
from scipy.optimize import linear_sum_assignment
|
|
13
13
|
|
|
14
|
-
from
|
|
14
|
+
from stm import StructuralTopicModel
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def make_corpus(n_docs=200, n_topics=3, vocab_size=60, doc_len=80,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|