speech-mine 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. speech_mine-0.1.0/.github/workflows/publish.yml +43 -0
  2. speech_mine-0.1.0/.gitignore +210 -0
  3. speech_mine-0.1.0/.python-version +1 -0
  4. speech_mine-0.1.0/PKG-INFO +76 -0
  5. speech_mine-0.1.0/README.md +53 -0
  6. speech_mine-0.1.0/docs/chunk.md +81 -0
  7. speech_mine-0.1.0/docs/extract.md +109 -0
  8. speech_mine-0.1.0/docs/format.md +100 -0
  9. speech_mine-0.1.0/docs/index.md +30 -0
  10. speech_mine-0.1.0/docs/installation.md +47 -0
  11. speech_mine-0.1.0/docs/models.md +33 -0
  12. speech_mine-0.1.0/docs/output-format.md +49 -0
  13. speech_mine-0.1.0/docs/search.md +97 -0
  14. speech_mine-0.1.0/docs/troubleshooting.md +46 -0
  15. speech_mine-0.1.0/examples/example_chunk_config.yaml +26 -0
  16. speech_mine-0.1.0/examples/example_extract_metadata.json +13 -0
  17. speech_mine-0.1.0/examples/example_extract_output.csv +454 -0
  18. speech_mine-0.1.0/examples/example_format_output.txt +63 -0
  19. speech_mine-0.1.0/mkdocs.yml +47 -0
  20. speech_mine-0.1.0/pyproject.toml +45 -0
  21. speech_mine-0.1.0/src/speech_mine/__init__.py +13 -0
  22. speech_mine-0.1.0/src/speech_mine/access.py +354 -0
  23. speech_mine-0.1.0/src/speech_mine/cli.py +409 -0
  24. speech_mine-0.1.0/src/speech_mine/diarizer/__init__.py +12 -0
  25. speech_mine-0.1.0/src/speech_mine/diarizer/cli.py +107 -0
  26. speech_mine-0.1.0/src/speech_mine/diarizer/cli_extract.py +159 -0
  27. speech_mine-0.1.0/src/speech_mine/diarizer/cli_format.py +107 -0
  28. speech_mine-0.1.0/src/speech_mine/diarizer/formatter.py +330 -0
  29. speech_mine-0.1.0/src/speech_mine/diarizer/models.py +14 -0
  30. speech_mine-0.1.0/src/speech_mine/diarizer/processor.py +414 -0
  31. speech_mine-0.1.0/src/speech_mine/fuzz.py +94 -0
  32. speech_mine-0.1.0/src/speech_mine/models.py +32 -0
  33. speech_mine-0.1.0/src/speech_mine/pickaxe/__init__.py +10 -0
  34. speech_mine-0.1.0/src/speech_mine/pickaxe/chunk.py +212 -0
  35. speech_mine-0.1.0/src/speech_mine/pickaxe/cli_chunk.py +137 -0
  36. speech_mine-0.1.0/tests/test_chunk.py +253 -0
  37. speech_mine-0.1.0/tests/test_diary_access.py +520 -0
  38. speech_mine-0.1.0/tests/test_speech_fuzz.py +221 -0
  39. speech_mine-0.1.0/uv.lock +2932 -0
@@ -0,0 +1,43 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ build:
9
+ name: Build distribution
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Install uv
16
+ uses: astral-sh/setup-uv@v5
17
+
18
+ - name: Build package
19
+ run: uv build
20
+
21
+ - name: Upload build artifacts
22
+ uses: actions/upload-artifact@v4
23
+ with:
24
+ name: dist
25
+ path: dist/
26
+
27
+ publish:
28
+ name: Publish to PyPI
29
+ needs: build
30
+ runs-on: ubuntu-latest
31
+ environment: pypi
32
+ permissions:
33
+ id-token: write # required for OIDC trusted publishing
34
+
35
+ steps:
36
+ - name: Download build artifacts
37
+ uses: actions/download-artifact@v4
38
+ with:
39
+ name: dist
40
+ path: dist/
41
+
42
+ - name: Publish to PyPI
43
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,210 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
208
+
209
+ # macOS files
210
+ .DS_Storesite/
@@ -0,0 +1 @@
1
+ 3.11
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.4
2
+ Name: speech-mine
3
+ Version: 0.1.0
4
+ Summary: A powerful tool for extracting and analyzing speech data from audio files with known speaker counts and contents.
5
+ Project-URL: Homepage, https://github.com/your-org/speech-mine
6
+ Project-URL: Repository, https://github.com/your-org/speech-mine
7
+ Project-URL: Issues, https://github.com/your-org/speech-mine/issues
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
12
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
13
+ Requires-Python: >=3.11
14
+ Requires-Dist: faster-whisper>=1.2.0
15
+ Requires-Dist: pandas>=2.3.2
16
+ Requires-Dist: pyannote-audio>=3.3.2
17
+ Requires-Dist: pydub>=0.25.1
18
+ Requires-Dist: pytest>=8.4.2
19
+ Requires-Dist: pyyaml>=6.0.0
20
+ Requires-Dist: rapidfuzz>=3.14.1
21
+ Requires-Dist: tqdm>=4.67.1
22
+ Description-Content-Type: text/markdown
23
+
24
+
25
+ # speech-mine
26
+
27
+ Speech diarization and transcript analysis toolkit. Extract speaker-labeled transcripts from audio, format them into readable scripts, search them with fuzzy matching, and pre-process audio with chunking.
28
+
29
+ ## Modules
30
+
31
+ | Module | Description | Docs |
32
+ |--------|-------------|------|
33
+ | `extract` | Transcribe audio with speaker diarization | [→](docs/extract.md) |
34
+ | `format` | Format CSV transcripts into readable scripts | [→](docs/format.md) |
35
+ | `chunk` | Split audio into segments via YAML config | [→](docs/chunk.md) |
36
+ | `search` | Fuzzy search transcripts by word or phrase | [→](docs/search.md) |
37
+
38
+ ## Installation
39
+
40
+ ```bash
41
+ curl -LsSf https://astral.sh/uv/install.sh | sh
42
+ git clone <repository-url>
43
+ cd speech-mine
44
+ uv sync
45
+ ```
46
+
47
+ See [docs/installation.md](docs/installation.md) for library dependency setup and HuggingFace token configuration.
48
+
49
+ ## Quick Start
50
+
51
+ ```bash
52
+ # 1. Extract a transcript
53
+ uv run speech-mine extract interview.mp3 output.csv \
54
+ --hf-token YOUR_TOKEN \
55
+ --num-speakers 2 \
56
+ --compute-type float32
57
+
58
+ # 2. Format into a readable script
59
+ uv run speech-mine format output.csv script.txt
60
+
61
+ # 3. Search it
62
+ uv run speech-mine search "topic of interest" output.csv --pretty
63
+ ```
64
+
65
+ ## Documentation
66
+
67
+ ```bash
68
+ # Serve docs locally
69
+ uv run mkdocs serve
70
+ ```
71
+
72
+ Or browse the `docs/` folder directly.
73
+
74
+ ## License
75
+
76
+ TBD
@@ -0,0 +1,53 @@
1
+
2
+ # speech-mine
3
+
4
+ Speech diarization and transcript analysis toolkit. Extract speaker-labeled transcripts from audio, format them into readable scripts, search them with fuzzy matching, and pre-process audio with chunking.
5
+
6
+ ## Modules
7
+
8
+ | Module | Description | Docs |
9
+ |--------|-------------|------|
10
+ | `extract` | Transcribe audio with speaker diarization | [→](docs/extract.md) |
11
+ | `format` | Format CSV transcripts into readable scripts | [→](docs/format.md) |
12
+ | `chunk` | Split audio into segments via YAML config | [→](docs/chunk.md) |
13
+ | `search` | Fuzzy search transcripts by word or phrase | [→](docs/search.md) |
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ curl -LsSf https://astral.sh/uv/install.sh | sh
19
+ git clone <repository-url>
20
+ cd speech-mine
21
+ uv sync
22
+ ```
23
+
24
+ See [docs/installation.md](docs/installation.md) for library dependency setup and HuggingFace token configuration.
25
+
26
+ ## Quick Start
27
+
28
+ ```bash
29
+ # 1. Extract a transcript
30
+ uv run speech-mine extract interview.mp3 output.csv \
31
+ --hf-token YOUR_TOKEN \
32
+ --num-speakers 2 \
33
+ --compute-type float32
34
+
35
+ # 2. Format into a readable script
36
+ uv run speech-mine format output.csv script.txt
37
+
38
+ # 3. Search it
39
+ uv run speech-mine search "topic of interest" output.csv --pretty
40
+ ```
41
+
42
+ ## Documentation
43
+
44
+ ```bash
45
+ # Serve docs locally
46
+ uv run mkdocs serve
47
+ ```
48
+
49
+ Or browse the `docs/` folder directly.
50
+
51
+ ## License
52
+
53
+ TBD
@@ -0,0 +1,81 @@
1
+ # chunk — Audio Chunking
2
+
3
+ Splits a `.wav` file into smaller segments based on a YAML configuration defining time boundaries. Useful for pre-processing long recordings before running `extract`.
4
+
5
+ !!! note
6
+ Only `.wav` input files are supported. Output chunks are also `.wav`.
7
+
8
+ ## CLI
9
+
10
+ ```bash
11
+ uv run speech-mine chunk <audio.wav> <config.yaml> <output_dir/> [options]
12
+ ```
13
+
14
+ ### Options
15
+
16
+ | Flag | Default | Description |
17
+ |------|---------|-------------|
18
+ | `--fade-in MS` | `0` | Fade in duration in milliseconds |
19
+ | `--fade-out MS` | `0` | Fade out duration in milliseconds |
20
+ | `--padding MS` | `0` | Silence padding added to both ends (ms) |
21
+ | `--verbose` | — | Print file sizes for each chunk |
22
+
23
+ ### Examples
24
+
25
+ ```bash
26
+ # Basic chunking
27
+ uv run speech-mine chunk recording.wav config.yaml chunks/
28
+
29
+ # With fade effects and padding
30
+ uv run speech-mine chunk recording.wav config.yaml chunks/ \
31
+ --fade-in 500 \
32
+ --fade-out 500 \
33
+ --padding 100 \
34
+ --verbose
35
+ ```
36
+
37
+ ## Library
38
+
39
+ ```python
40
+ from speech_mine.pickaxe.chunk import chunk_audio_file, AudioChunker
41
+
42
+ # Convenience function
43
+ output_files = chunk_audio_file(
44
+ audio_path="recording.wav",
45
+ config_path="config.yaml",
46
+ output_dir="chunks/",
47
+ fade_in=500,
48
+ fade_out=500,
49
+ silence_padding=100,
50
+ )
51
+
52
+ # Or use the class directly
53
+ chunker = AudioChunker(fade_in_duration=500, fade_out_duration=500, silence_padding=100)
54
+ output_files = chunker.process_audio_file("recording.wav", "config.yaml", "chunks/")
55
+ ```
56
+
57
+ ## YAML config format
58
+
59
+ See [examples/example_chunk_config.yaml](https://github.com/your-org/speech-mine/blob/main/examples/example_chunk_config.yaml) for a full example.
60
+
61
+ ```yaml
62
+ chunks:
63
+ - start: 0.0
64
+ end: 30.0
65
+ name: "intro" # optional — included in output filename
66
+ - start: 30.0
67
+ end: 120.0
68
+ name: "discussion"
69
+ - start: 120.0
70
+ end: 300.0
71
+ # no name — output will be "2.wav"
72
+ ```
73
+
74
+ Output filenames follow the pattern `{index}.{name}.wav` or `{index}.wav` if no name is set. Chunks are sorted by start time before indexing.
75
+
76
+ Validation rules:
77
+
78
+ - `start` and `end` are required for every chunk
79
+ - `end` must be greater than `start`
80
+ - `end` cannot exceed the audio file duration
81
+ - Start times must be unique across chunks
@@ -0,0 +1,109 @@
1
+ # extract — Transcription + Speaker Diarization
2
+
3
+ Transcribes audio and labels each segment with the speaker who said it. Uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) for transcription and [pyannote](https://github.com/pyannote/pyannote-audio) for speaker diarization.
4
+
5
+ **Supported audio formats:** `.wav`, `.mp3`, `.ogg`, `.flac`
6
+
7
+ ## CLI
8
+
9
+ ```bash
10
+ uv run speech-mine extract <audio> <output.csv> --hf-token TOKEN [options]
11
+ ```
12
+
13
+ ### Options
14
+
15
+ | Flag | Default | Description |
16
+ |------|---------|-------------|
17
+ | `--hf-token TOKEN` | *(required)* | HuggingFace access token |
18
+ | `--model SIZE` | `large-v3` | Whisper model size |
19
+ | `--device` | `auto` | `auto`, `cpu`, or `cuda` |
20
+ | `--compute-type` | `float16` | `float16` (GPU), `float32` (CPU), `int8` |
21
+ | `--num-speakers N` | — | Exact speaker count (best accuracy when known) |
22
+ | `--min-speakers N` | `1` | Minimum expected speakers |
23
+ | `--max-speakers N` | — | Maximum expected speakers |
24
+ | `--verbose` | — | Enable verbose logging |
25
+
26
+ ### Examples
27
+
28
+ ```bash
29
+ # Basic (CPU)
30
+ uv run speech-mine extract interview.mp3 output.csv \
31
+ --hf-token YOUR_TOKEN \
32
+ --compute-type float32
33
+
34
+ # 2-person interview with known speaker count
35
+ uv run speech-mine extract interview.wav output.csv \
36
+ --hf-token YOUR_TOKEN \
37
+ --num-speakers 2 \
38
+ --compute-type float32
39
+
40
+ # GPU with best accuracy model
41
+ uv run speech-mine extract meeting.wav output.csv \
42
+ --hf-token YOUR_TOKEN \
43
+ --model large-v3 \
44
+ --device cuda \
45
+ --compute-type float16 \
46
+ --num-speakers 4
47
+
48
+ # Speaker range when count is unknown
49
+ uv run speech-mine extract conference.wav output.csv \
50
+ --hf-token YOUR_TOKEN \
51
+ --min-speakers 2 \
52
+ --max-speakers 8 \
53
+ --compute-type float32
54
+ ```
55
+
56
+ !!! warning
57
+ Always use `--compute-type float32` when running on CPU. The default (`float16`) requires a GPU and will raise an error on CPU.
58
+
59
+ ## Library
60
+
61
+ ```python
62
+ from speech_mine.diarizer.processor import SpeechDiarizationProcessor
63
+
64
+ processor = SpeechDiarizationProcessor(
65
+ hf_token="YOUR_TOKEN",
66
+ num_speakers=2,
67
+ whisper_model_size="large-v3",
68
+ )
69
+
70
+ # Full pipeline in one call
71
+ processor.process_audio_file("interview.mp3", "output.csv")
72
+ ```
73
+
74
+ ### Individual pipeline steps
75
+
76
+ ```python
77
+ # Step 1: Transcribe
78
+ segments, info = processor.transcribe_audio("interview.mp3")
79
+
80
+ # Step 2: Diarize
81
+ diarization = processor.perform_speaker_diarization("interview.mp3")
82
+
83
+ # Step 3: Align
84
+ aligned = processor.align_transcription_with_speakers(segments, diarization)
85
+
86
+ # Step 4: Save
87
+ processor.save_to_csv(aligned, "output.csv", info)
88
+ ```
89
+
90
+ ## Output
91
+
92
+ Two files are written:
93
+
94
+ - `output.csv` — segment and word-level transcript data ([see example](https://github.com/your-org/speech-mine/blob/main/examples/example_extract_output.csv))
95
+ - `output_metadata.json` — language, duration, speaker list, processing info ([see example](https://github.com/your-org/speech-mine/blob/main/examples/example_extract_metadata.json))
96
+
97
+ See [Output Format](output-format.md) for full column/field reference.
98
+
99
+ ## Speaker Count Tips
100
+
101
+ Specifying `--num-speakers` when you know the exact count improves diarization accuracy by 15–30%.
102
+
103
+ | Parameter | When to use |
104
+ |-----------|-------------|
105
+ | `--num-speakers N` | You know exactly how many people speak |
106
+ | `--min-speakers N` | You know there are at least N speakers |
107
+ | `--max-speakers N` | You want to cap false speaker detection |
108
+
109
+ See [Model Options](models.md) for whisper model and compute type guidance.
@@ -0,0 +1,100 @@
1
+ # format — Script Formatting
2
+
3
+ Converts the CSV output from `extract` into a human-readable, movie-style script.
4
+
5
+ ## CLI
6
+
7
+ ```bash
8
+ uv run speech-mine format <input.csv> <output.txt> [options]
9
+ ```
10
+
11
+ ### Options
12
+
13
+ | Flag | Description |
14
+ |------|-------------|
15
+ | `--speakers FILE` | JSON file mapping `SPEAKER_00` → custom name |
16
+ | `--create-template` | Generate a speaker names template JSON from the CSV |
17
+
18
+ ### Examples
19
+
20
+ ```bash
21
+ # Basic formatting
22
+ uv run speech-mine format output.csv script.txt
23
+
24
+ # Generate a speaker names template
25
+ uv run speech-mine format output.csv script.txt --create-template
26
+
27
+ # Format with custom speaker names
28
+ uv run speech-mine format output.csv script.txt --speakers output_speaker_names.json
29
+ ```
30
+
31
+ ### Custom speaker names workflow
32
+
33
+ ```bash
34
+ # 1. Generate template — creates output_speaker_names.json
35
+ uv run speech-mine format output.csv script.txt --create-template
36
+
37
+ # 2. Edit the template
38
+ # {"SPEAKER_00": "Alice", "SPEAKER_01": "Bob"}
39
+
40
+ # 3. Format with names applied
41
+ uv run speech-mine format output.csv final_script.txt --speakers output_speaker_names.json
42
+ ```
43
+
44
+ ## Library
45
+
46
+ ```python
47
+ from speech_mine.diarizer.formatter import ScriptFormatter
48
+
49
+ # Basic formatting
50
+ formatter = ScriptFormatter()
51
+ formatter.format_script("output.csv", "script.txt")
52
+
53
+ # With custom speaker names
54
+ formatter = ScriptFormatter(custom_speakers={"SPEAKER_00": "Alice", "SPEAKER_01": "Bob"})
55
+ formatter.format_script("output.csv", "script.txt")
56
+
57
+ # Generate a speaker names template from a CSV
58
+ template_path = ScriptFormatter.create_custom_speakers_template("output.csv")
59
+
60
+ # Load speaker names from a JSON file
61
+ speakers = ScriptFormatter.load_custom_speakers("names.json")
62
+ formatter = ScriptFormatter(custom_speakers=speakers)
63
+ ```
64
+
65
+ ## Output format
66
+
67
+ See [examples/example_format_output.txt](https://github.com/your-org/speech-mine/blob/main/examples/example_format_output.txt) for a full sample.
68
+
69
+ ```
70
+ ================================================================================
71
+ TRANSCRIPT
72
+ ================================================================================
73
+
74
+ RECORDING DETAILS:
75
+ ----------------------------------------
76
+ File: interview.mp3
77
+ Duration: 08:47
78
+ Language: EN (confidence: 99.0%)
79
+ Speakers: 2
80
+ Processed: 2026-03-05 22:00:00
81
+
82
+ CAST:
83
+ ----------------------------------------
84
+ SPEAKER A
85
+ SPEAKER B
86
+
87
+ TRANSCRIPT:
88
+ ----------------------------------------
89
+
90
+ [00:00 - 00:05] SPEAKER A:
91
+ So tell me about your background.
92
+
93
+ [00:06 - 00:12] SPEAKER B:
94
+ Sure, I started out in radio back in the eighties.
95
+
96
+ [...5:30 pause...]
97
+
98
+ [05:42 - 05:50] SPEAKER A:
99
+ And how did that shape your career?
100
+ ```
@@ -0,0 +1,30 @@
1
+ # speech-mine
2
+
3
+ Speech diarization and transcript analysis toolkit. Extract speaker-labeled transcripts from audio, format them into readable scripts, search them with fuzzy matching, and pre-process audio with chunking.
4
+
5
+ ## Modules
6
+
7
+ | Module | Description |
8
+ |--------|-------------|
9
+ | [`extract`](extract.md) | Transcribe audio with speaker diarization |
10
+ | [`format`](format.md) | Format CSV transcripts into readable scripts |
11
+ | [`chunk`](chunk.md) | Split audio into segments via YAML config |
12
+ | [`search`](search.md) | Fuzzy search transcripts by word or phrase |
13
+
14
+ ## Quick Start
15
+
16
+ ```bash
17
+ # 1. Extract a transcript
18
+ uv run speech-mine extract interview.mp3 output.csv \
19
+ --hf-token YOUR_TOKEN \
20
+ --num-speakers 2 \
21
+ --compute-type float32
22
+
23
+ # 2. Format it into a readable script
24
+ uv run speech-mine format output.csv script.txt
25
+
26
+ # 3. Search it
27
+ uv run speech-mine search "topic of interest" output.csv --pretty
28
+ ```
29
+
30
+ See [Installation](installation.md) to get started.