topica 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. topica-0.1.0/.github/workflows/CI.yml +217 -0
  2. topica-0.1.0/.github/workflows/docs.yml +44 -0
  3. topica-0.1.0/.github/workflows/release.yml +77 -0
  4. topica-0.1.0/.gitignore +49 -0
  5. topica-0.1.0/Cargo.lock +485 -0
  6. topica-0.1.0/Cargo.toml +57 -0
  7. topica-0.1.0/PKG-INFO +108 -0
  8. topica-0.1.0/README.md +84 -0
  9. topica-0.1.0/benchmarks/bench_stm.py +144 -0
  10. topica-0.1.0/docs/api/diagnostics.md +48 -0
  11. topica-0.1.0/docs/api/keywords.md +21 -0
  12. topica-0.1.0/docs/api/models.md +37 -0
  13. topica-0.1.0/docs/api/stm.md +13 -0
  14. topica-0.1.0/docs/can-do/index.md +44 -0
  15. topica-0.1.0/docs/examples/dubois.md +182 -0
  16. topica-0.1.0/docs/examples/gadarian.md +106 -0
  17. topica-0.1.0/docs/examples/poliblog.md +180 -0
  18. topica-0.1.0/docs/getting-started/installation.md +41 -0
  19. topica-0.1.0/docs/getting-started/quickstart.md +61 -0
  20. topica-0.1.0/docs/guides/covariates.md +61 -0
  21. topica-0.1.0/docs/guides/diagnostics.md +66 -0
  22. topica-0.1.0/docs/guides/guided.md +66 -0
  23. topica-0.1.0/docs/guides/keywords.md +44 -0
  24. topica-0.1.0/docs/guides/models.md +140 -0
  25. topica-0.1.0/docs/guides/preprocessing.md +62 -0
  26. topica-0.1.0/docs/guides/short-text.md +47 -0
  27. topica-0.1.0/docs/guides/transform.md +42 -0
  28. topica-0.1.0/docs/index.md +70 -0
  29. topica-0.1.0/docs/publishing/choosing-k.md +84 -0
  30. topica-0.1.0/docs/publishing/choosing-model.md +60 -0
  31. topica-0.1.0/docs/publishing/corpus.md +92 -0
  32. topica-0.1.0/docs/publishing/effects.md +91 -0
  33. topica-0.1.0/docs/publishing/index.md +88 -0
  34. topica-0.1.0/docs/publishing/reporting.md +88 -0
  35. topica-0.1.0/docs/publishing/validation.md +111 -0
  36. topica-0.1.0/docs/requirements.txt +3 -0
  37. topica-0.1.0/docs/stylesheets/extra.css +32 -0
  38. topica-0.1.0/examples/cs_cl.tsv +5000 -0
  39. topica-0.1.0/examples/dubois_crisis.csv +705 -0
  40. topica-0.1.0/examples/dubois_tutorial.ipynb +947 -0
  41. topica-0.1.0/examples/dubois_tutorial.py +372 -0
  42. topica-0.1.0/examples/english-stoplist.txt +118 -0
  43. topica-0.1.0/examples/gadarian.csv +502 -0
  44. topica-0.1.0/examples/poliblog.csv +2001 -0
  45. topica-0.1.0/examples/quickstart.py +140 -0
  46. topica-0.1.0/examples/sample-docs.txt +55 -0
  47. topica-0.1.0/examples/stm_vignette.py +120 -0
  48. topica-0.1.0/mkdocs.yml +106 -0
  49. topica-0.1.0/parity/DMRDriver.java +97 -0
  50. topica-0.1.0/parity/LabeledLDADriver.java +87 -0
  51. topica-0.1.0/parity/mallet_parity.py +357 -0
  52. topica-0.1.0/parity/stm_r_compare.py +252 -0
  53. topica-0.1.0/pyproject.toml +38 -0
  54. topica-0.1.0/python/topica/__init__.py +173 -0
  55. topica-0.1.0/python/topica/__init__.pyi +177 -0
  56. topica-0.1.0/python/topica/_topica.pyi +1185 -0
  57. topica-0.1.0/python/topica/coherence.py +498 -0
  58. topica-0.1.0/python/topica/diagnostics.py +801 -0
  59. topica-0.1.0/python/topica/keywords.py +96 -0
  60. topica-0.1.0/python/topica/phrases.py +364 -0
  61. topica-0.1.0/python/topica/preprocess.py +135 -0
  62. topica-0.1.0/python/topica/py.typed +0 -0
  63. topica-0.1.0/python/topica/stm.py +497 -0
  64. topica-0.1.0/src/bin/analyze.rs +249 -0
  65. topica-0.1.0/src/bin/preprocess.rs +175 -0
  66. topica-0.1.0/src/bin/show.rs +202 -0
  67. topica-0.1.0/src/bin/train.rs +269 -0
  68. topica-0.1.0/src/coherence.rs +155 -0
  69. topica-0.1.0/src/corpus.rs +389 -0
  70. topica-0.1.0/src/ctm.rs +876 -0
  71. topica-0.1.0/src/dmr.rs +431 -0
  72. topica-0.1.0/src/dtm.rs +710 -0
  73. topica-0.1.0/src/gsdmm.rs +462 -0
  74. topica-0.1.0/src/hdp.rs +424 -0
  75. topica-0.1.0/src/hlda.rs +743 -0
  76. topica-0.1.0/src/keyatm.rs +684 -0
  77. topica-0.1.0/src/labeled.rs +132 -0
  78. topica-0.1.0/src/lib.rs +25 -0
  79. topica-0.1.0/src/lightlda.rs +362 -0
  80. topica-0.1.0/src/linalg.rs +115 -0
  81. topica-0.1.0/src/model.rs +181 -0
  82. topica-0.1.0/src/optimize.rs +167 -0
  83. topica-0.1.0/src/output.rs +224 -0
  84. topica-0.1.0/src/pa.rs +433 -0
  85. topica-0.1.0/src/pt.rs +413 -0
  86. topica-0.1.0/src/python.rs +6270 -0
  87. topica-0.1.0/src/sage.rs +320 -0
  88. topica-0.1.0/src/sampler.rs +324 -0
  89. topica-0.1.0/src/seeded.rs +489 -0
  90. topica-0.1.0/src/slda.rs +362 -0
  91. topica-0.1.0/src/spectral.rs +369 -0
  92. topica-0.1.0/tests/conftest.py +36 -0
  93. topica-0.1.0/tests/test_cli_parity.py +131 -0
  94. topica-0.1.0/tests/test_coherence.py +105 -0
  95. topica-0.1.0/tests/test_coherence_fast.py +69 -0
  96. topica-0.1.0/tests/test_corpus.py +249 -0
  97. topica-0.1.0/tests/test_css_extras.py +204 -0
  98. topica-0.1.0/tests/test_ctm.py +461 -0
  99. topica-0.1.0/tests/test_determinism.py +154 -0
  100. topica-0.1.0/tests/test_diagnostics.py +143 -0
  101. topica-0.1.0/tests/test_dmr.py +462 -0
  102. topica-0.1.0/tests/test_dtm.py +158 -0
  103. topica-0.1.0/tests/test_dubois_tutorial.py +71 -0
  104. topica-0.1.0/tests/test_estimate_effect_moc.py +152 -0
  105. topica-0.1.0/tests/test_extra_models.py +181 -0
  106. topica-0.1.0/tests/test_fit_stats.py +440 -0
  107. topica-0.1.0/tests/test_gsdmm.py +84 -0
  108. topica-0.1.0/tests/test_guided_models.py +115 -0
  109. topica-0.1.0/tests/test_hdp.py +120 -0
  110. topica-0.1.0/tests/test_inference.py +382 -0
  111. topica-0.1.0/tests/test_labeled.py +385 -0
  112. topica-0.1.0/tests/test_lda.py +254 -0
  113. topica-0.1.0/tests/test_lightlda.py +117 -0
  114. topica-0.1.0/tests/test_mallet_parity.py +44 -0
  115. topica-0.1.0/tests/test_parallel.py +428 -0
  116. topica-0.1.0/tests/test_phrases.py +371 -0
  117. topica-0.1.0/tests/test_sage.py +525 -0
  118. topica-0.1.0/tests/test_save_load.py +102 -0
  119. topica-0.1.0/tests/test_slda.py +132 -0
  120. topica-0.1.0/tests/test_stm.py +520 -0
  121. topica-0.1.0/tests/test_stm_content.py +451 -0
  122. topica-0.1.0/tests/test_stm_model.py +515 -0
  123. topica-0.1.0/tests/test_stm_r_compare.py +29 -0
  124. topica-0.1.0/tests/test_transform.py +155 -0
  125. topica-0.1.0/tests/test_validation.py +128 -0
  126. topica-0.1.0/tests/test_vignette.py +70 -0
  127. topica-0.1.0/tests/test_vocab_filter.py +60 -0
@@ -0,0 +1,217 @@
1
+ # CI for rustmallet — a PyO3/maturin package producing abi3 wheels.
2
+ #
3
+ # Jobs:
4
+ # test — build in dev mode & run pytest on Linux, macOS, Windows
5
+ # build-wheels — produce abi3 wheels for all target platforms
6
+ # sdist — produce a source distribution
7
+ # release — publish to PyPI on tag push v*
8
+ #
9
+ # Publishing uses PyPI Trusted Publishing (OIDC) — no API token / secret.
10
+ # One-time setup on PyPI: add a trusted publisher for project "topica"
11
+ # (owner nealcaren, repo topica, workflow CI.yml, environment pypi).
12
+
13
+ name: CI
14
+
15
+ on:
16
+ push:
17
+ branches: [main]
18
+ tags: ["v*"]
19
+ pull_request:
20
+ workflow_dispatch:
21
+
22
+ # Minimal permissions by default; individual jobs override as needed.
23
+ permissions:
24
+ contents: read
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Job: test
28
+ # Builds the extension in-place with `maturin develop` and runs pytest.
29
+ # Matrix covers the three major platforms; a single Python version suffices
30
+ # because the wheel is abi3 (CPython >=3.9 compatible).
31
+ # ---------------------------------------------------------------------------
32
+ jobs:
33
+ test:
34
+ name: test / ${{ matrix.os }}
35
+ runs-on: ${{ matrix.os }}
36
+ strategy:
37
+ fail-fast: false
38
+ matrix:
39
+ os: [ubuntu-latest, macos-latest, windows-latest]
40
+
41
+ steps:
42
+ - uses: actions/checkout@v4
43
+
44
+ # Install stable Rust toolchain (required for maturin/pyo3 compilation).
45
+ - name: Install Rust (stable)
46
+ uses: dtolnay/rust-toolchain@stable
47
+
48
+ - name: Set up Python 3.12
49
+ uses: actions/setup-python@v5
50
+ with:
51
+ python-version: "3.12"
52
+
53
+ # `maturin develop` requires an active virtualenv, so create one and use
54
+ # it for the build + test. A single bash step keeps the venv activated
55
+ # across the build and test commands (activation does not persist between
56
+ # steps). `shell: bash` selects Git bash on Windows, so the same
57
+ # activation path logic works on all three platforms.
58
+ - name: Build extension and run tests
59
+ shell: bash
60
+ run: |
61
+ python -m venv .venv
62
+ if [ -f .venv/bin/activate ]; then source .venv/bin/activate; else source .venv/Scripts/activate; fi
63
+ python -m pip install --upgrade pip
64
+ pip install maturin numpy pytest
65
+ maturin develop --features python
66
+ pytest -q
67
+
68
+ # ---------------------------------------------------------------------------
69
+ # Job: build-wheels
70
+ # Produces abi3 wheels. Because the wheel is abi3 (tagged cp39-abi3-*), one
71
+ # build per platform target is sufficient — no Python version matrix needed.
72
+ #
73
+ # maturin-action handles QEMU setup automatically for aarch64-linux and
74
+ # invokes maturin inside the appropriate manylinux container for Linux.
75
+ # ---------------------------------------------------------------------------
76
+ build-wheels:
77
+ name: wheel / ${{ matrix.target }}
78
+ runs-on: ${{ matrix.os }}
79
+ strategy:
80
+ fail-fast: false
81
+ matrix:
82
+ include:
83
+ # ---- Linux x86_64 (manylinux) ----
84
+ - os: ubuntu-latest
85
+ target: x86_64
86
+ manylinux: auto
87
+
88
+ # ---- Linux aarch64 (manylinux, cross-compiled via QEMU) ----
89
+ - os: ubuntu-latest
90
+ target: aarch64
91
+ manylinux: auto
92
+
93
+ # ---- macOS x86_64 ----
94
+ - os: macos-latest
95
+ target: x86_64
96
+ manylinux: ""
97
+
98
+ # ---- macOS aarch64 (Apple Silicon) ----
99
+ - os: macos-latest
100
+ target: aarch64
101
+ manylinux: ""
102
+
103
+ # ---- Windows x64 ----
104
+ - os: windows-latest
105
+ target: x64
106
+ manylinux: ""
107
+
108
+ steps:
109
+ - uses: actions/checkout@v4
110
+
111
+ # maturin-action provides Rust toolchain internally; explicit install not
112
+ # needed here, but setup-python IS needed so maturin can find an interpreter
113
+ # for the abi3 wheel stub.
114
+ - name: Set up Python 3.12
115
+ uses: actions/setup-python@v5
116
+ with:
117
+ python-version: "3.12"
118
+
119
+ - name: Build wheel
120
+ uses: PyO3/maturin-action@v1
121
+ with:
122
+ target: ${{ matrix.target }}
123
+ # manylinux is only meaningful on Linux legs; empty string on others.
124
+ manylinux: ${{ matrix.manylinux || 'auto' }}
125
+ # --release: optimised build; --out dist: place wheel in dist/;
126
+ # --features python: enable pyo3/numpy deps (see Cargo.toml).
127
+ args: --release --out dist --features python
128
+ # Rust source is at the repo root.
129
+ working-directory: .
130
+
131
+ # Upload each leg's wheel as a separate artifact so the release job can
132
+ # download them all cleanly.
133
+ - name: Upload wheel artifact
134
+ uses: actions/upload-artifact@v4
135
+ with:
136
+ name: wheel-${{ matrix.target }}-${{ matrix.os }}
137
+ path: dist/*.whl
138
+ if-no-files-found: error
139
+
140
+ # ---------------------------------------------------------------------------
141
+ # Job: sdist
142
+ # Produces a source distribution (required for a complete PyPI release).
143
+ # ---------------------------------------------------------------------------
144
+ sdist:
145
+ name: sdist
146
+ runs-on: ubuntu-latest
147
+
148
+ steps:
149
+ - uses: actions/checkout@v4
150
+
151
+ - name: Install Rust (stable)
152
+ uses: dtolnay/rust-toolchain@stable
153
+
154
+ - name: Set up Python 3.12
155
+ uses: actions/setup-python@v5
156
+ with:
157
+ python-version: "3.12"
158
+
159
+ - name: Install maturin
160
+ run: pip install maturin
161
+
162
+ - name: Build sdist
163
+ run: maturin sdist --out dist
164
+
165
+ - name: Upload sdist artifact
166
+ uses: actions/upload-artifact@v4
167
+ with:
168
+ name: sdist
169
+ path: dist/*.tar.gz
170
+ if-no-files-found: error
171
+
172
+ # ---------------------------------------------------------------------------
173
+ # Job: release
174
+ # Runs only when a version tag (v*) is pushed.
175
+ # Downloads all wheel + sdist artifacts and publishes to PyPI via Trusted
176
+ # Publishing (OIDC) — no stored token. Requires the one-time PyPI setup noted
177
+ # at the top of this file.
178
+ # ---------------------------------------------------------------------------
179
+ release:
180
+ name: release (PyPI)
181
+ runs-on: ubuntu-latest
182
+ if: startsWith(github.ref, 'refs/tags/v')
183
+ needs: [build-wheels, sdist]
184
+
185
+ # PyPI Trusted Publishing (OIDC): no API token needed. The id-token
186
+ # permission lets GitHub mint a short-lived OIDC token that PyPI trades for
187
+ # an upload credential, based on the trusted-publisher entry you configure
188
+ # on PyPI (project: topica, owner: nealcaren, repo: topica, workflow:
189
+ # CI.yml, environment: pypi). See README / docs for the one-time setup.
190
+ permissions:
191
+ contents: read
192
+ id-token: write
193
+
194
+ # Binding the job to a named environment lets you add approval gates on
195
+ # PyPI publishes and is the environment referenced in the PyPI config.
196
+ environment:
197
+ name: pypi
198
+ url: https://pypi.org/p/topica
199
+
200
+ steps:
201
+ - name: Download all artifacts
202
+ uses: actions/download-artifact@v4
203
+ with:
204
+ # Download every artifact produced above into dist/.
205
+ path: dist
206
+ merge-multiple: true
207
+
208
+ - name: List dist contents
209
+ run: ls -lh dist/
210
+
211
+ # Trusted Publishing: the action detects the OIDC token from the
212
+ # id-token permission above and uploads without any stored secret.
213
+ - name: Publish to PyPI
214
+ uses: pypa/gh-action-pypi-publish@release/v1
215
+ with:
216
+ packages-dir: dist
217
+ skip-existing: true
@@ -0,0 +1,44 @@
1
+ name: docs
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ paths:
7
+ - "docs/**"
8
+ - "mkdocs.yml"
9
+ - "python/**"
10
+ - "src/**"
11
+ - ".github/workflows/docs.yml"
12
+ workflow_dispatch:
13
+
14
+ permissions:
15
+ contents: write
16
+
17
+ jobs:
18
+ build-and-deploy:
19
+ runs-on: ubuntu-latest
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+
23
+ - uses: actions/setup-python@v5
24
+ with:
25
+ python-version: "3.11"
26
+
27
+ - name: Set up Rust
28
+ uses: dtolnay/rust-toolchain@stable
29
+
30
+ - name: Build the extension and deploy the docs
31
+ shell: bash
32
+ run: |
33
+ # maturin develop needs an activated virtualenv; create one and keep it
34
+ # active for the whole step so the build and mkdocs share it.
35
+ python -m venv .venv
36
+ source .venv/bin/activate
37
+ python -m pip install --upgrade pip
38
+ pip install maturin numpy
39
+ pip install -r docs/requirements.txt
40
+ # force_inspection imports the compiled module, so build it first.
41
+ maturin develop --release --features python
42
+ git config user.name "github-actions[bot]"
43
+ git config user.email "github-actions[bot]@users.noreply.github.com"
44
+ mkdocs gh-deploy --force --no-history
@@ -0,0 +1,77 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ jobs:
9
+ build:
10
+ name: ${{ matrix.target }}
11
+ runs-on: ${{ matrix.os }}
12
+ permissions:
13
+ contents: write
14
+ strategy:
15
+ fail-fast: false
16
+ matrix:
17
+ include:
18
+ - os: ubuntu-latest
19
+ target: x86_64-unknown-linux-musl
20
+ archive: tar.gz
21
+ - os: macos-13
22
+ target: x86_64-apple-darwin
23
+ archive: tar.gz
24
+ - os: macos-latest
25
+ target: aarch64-apple-darwin
26
+ archive: tar.gz
27
+ - os: windows-latest
28
+ target: x86_64-pc-windows-msvc
29
+ archive: zip
30
+
31
+ steps:
32
+ - uses: actions/checkout@v4
33
+
34
+ - name: Install Rust toolchain
35
+ uses: dtolnay/rust-toolchain@stable
36
+ with:
37
+ targets: ${{ matrix.target }}
38
+
39
+ - name: Install musl tools (Linux)
40
+ if: matrix.target == 'x86_64-unknown-linux-musl'
41
+ run: sudo apt-get install -y musl-tools
42
+
43
+ - name: Build
44
+ run: cargo build --release --target ${{ matrix.target }}
45
+
46
+ - name: Package (Unix)
47
+ if: matrix.os != 'windows-latest'
48
+ run: |
49
+ STAGING="rust-mallet-${{ github.ref_name }}-${{ matrix.target }}"
50
+ mkdir "$STAGING"
51
+ for bin in preprocess analyze train show; do
52
+ cp "target/${{ matrix.target }}/release/$bin" "$STAGING/"
53
+ done
54
+ cp README.md "$STAGING/"
55
+ cp -r examples "$STAGING/"
56
+ tar -czf "$STAGING.tar.gz" "$STAGING"
57
+ echo "ASSET=$STAGING.tar.gz" >> $GITHUB_ENV
58
+
59
+ - name: Package (Windows)
60
+ if: matrix.os == 'windows-latest'
61
+ shell: pwsh
62
+ run: |
63
+ $staging = "rust-mallet-${{ github.ref_name }}-${{ matrix.target }}"
64
+ New-Item -ItemType Directory $staging
65
+ foreach ($bin in @('preprocess','analyze','train','show')) {
66
+ Copy-Item "target/${{ matrix.target }}/release/$bin.exe" "$staging/"
67
+ }
68
+ Copy-Item README.md "$staging/"
69
+ Copy-Item -Recurse examples "$staging/"
70
+ Compress-Archive -Path "$staging/*" -DestinationPath "$staging.zip"
71
+ echo "ASSET=$staging.zip" | Out-File -FilePath $env:GITHUB_ENV -Append
72
+
73
+ - name: Upload to GitHub Release
74
+ uses: softprops/action-gh-release@v2
75
+ with:
76
+ files: ${{ env.ASSET }}
77
+ generate_release_notes: true
@@ -0,0 +1,49 @@
1
+ # Rust build artifacts
2
+ /target/
3
+
4
+ # Java reference source (not part of the Rust project)
5
+ Mallet/
6
+
7
+ # Generated corpus and output files
8
+ *.corp
9
+ *.mallet
10
+ topic_word.tsv
11
+ doc_topic.tsv
12
+
13
+ # Local test and scratch files
14
+ aha.*
15
+
16
+ # Distribution / wheel output
17
+ dist/
18
+
19
+ # Compiled Python extension modules (dev builds)
20
+ *.so
21
+ *.pyd
22
+
23
+ # Python bytecode caches
24
+ __pycache__/
25
+ *.py[cod]
26
+
27
+ # Virtual environments
28
+ .venv/
29
+ .venv*/
30
+ venv/
31
+ env/
32
+
33
+ # Egg / setuptools / maturin dev-install artifacts
34
+ *.egg-info/
35
+ *.egg
36
+
37
+ # Test / type-checker caches
38
+ .pytest_cache/
39
+ .mypy_cache/
40
+
41
+ # Built documentation site
42
+ /site/
43
+
44
+ # macOS metadata
45
+ .DS_Store
46
+ parity/*.class
47
+
48
+ # Claude Code local dir (agent worktrees, etc.)
49
+ .claude/