speech-prep 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,45 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v4
21
+ with:
22
+ version: "latest"
23
+
24
+ - name: Set up Python ${{ matrix.python-version }}
25
+ run: uv python install ${{ matrix.python-version }}
26
+
27
+ - name: Install FFmpeg
28
+ run: |
29
+ sudo apt-get update
30
+ sudo apt-get install -y ffmpeg
31
+
32
+ - name: Install dependencies
33
+ run: uv sync --group dev
34
+
35
+ - name: Run linting
36
+ run: uv run ruff check src/
37
+
38
+ - name: Run formatting check
39
+ run: uv run ruff format src/ --check
40
+
41
+ - name: Run type checking
42
+ run: uv run mypy src/
43
+
44
+ - name: Run tests
45
+ run: uv run pytest tests/
@@ -0,0 +1,18 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ .mypy_cache/
13
+ .pytest_cache/
14
+ .ruff_cache/
15
+
16
+ # Tests
17
+ .coverage
18
+ .coverage.*
@@ -0,0 +1,31 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v5.0.0
4
+ hooks:
5
+ - id: trailing-whitespace
6
+ - id: end-of-file-fixer
7
+ - id: check-yaml
8
+ - id: check-toml
9
+ - id: check-merge-conflict
10
+
11
+ - repo: https://github.com/astral-sh/ruff-pre-commit
12
+ rev: v0.3.0
13
+ hooks:
14
+ - id: ruff
15
+ args: [--fix]
16
+ - id: ruff-format
17
+
18
+ - repo: https://github.com/pre-commit/mirrors-mypy
19
+ rev: v1.8.0
20
+ hooks:
21
+ - id: mypy
22
+ additional_dependencies: []
23
+
24
+ - repo: local
25
+ hooks:
26
+ - id: pytest
27
+ name: pytest
28
+ entry: uv run pytest tests/
29
+ language: system
30
+ types: [python]
31
+ pass_filenames: false
@@ -0,0 +1 @@
1
+ 3.9
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Dim Kharitonov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,220 @@
1
+ Metadata-Version: 2.4
2
+ Name: speech-prep
3
+ Version: 0.1.3
4
+ Summary: Audio preprocessing toolkit for speech-to-text applications using ffmpeg
5
+ Project-URL: Homepage, https://github.com/dimdasci/speech-prep
6
+ Project-URL: Repository, https://github.com/dimdasci/speech-prep
7
+ Project-URL: Issues, https://github.com/dimdasci/speech-prep/issues
8
+ Author-email: Dim Kharitonov <dimds@fastmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: audio,ffmpeg,preprocessing,silence-detection,speech-to-text
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Multimedia :: Sound/Audio
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.9
23
+ Provides-Extra: dev
24
+ Description-Content-Type: text/markdown
25
+
26
+ # Speech Prep
27
+
28
+ Audio preprocessing toolkit for speech-to-text applications using FFmpeg.
29
+
30
+ ## Overview
31
+
32
+ Speech Prep is a Python package designed to prepare audio files for speech-to-text processing. It provides tools for silence detection and removal, speed adjustment, and format conversion - all essential steps for optimizing audio before transcription.
33
+
34
+ ## Features
35
+
36
+ - **Silence Detection**: Automatically detect silence periods in audio files
37
+ - **Silence Removal**: Remove leading/trailing silence to clean up recordings
38
+ - **Speed Adjustment**: Change playback speed while maintaining audio quality
39
+ - **Format Conversion**: Convert between different audio formats (MP3, WAV, FLAC, etc.)
40
+ - **Clean API**: Simple, intuitive interface with method chaining support
41
+ - **FFmpeg Integration**: Leverages the power and reliability of FFmpeg
42
+
43
+ ## Requirements
44
+
45
+ - Python 3.9+
46
+ - FFmpeg (must be installed and accessible via PATH)
47
+
48
+ ## Installation
49
+
50
+ ```bash
51
+ # Install from PyPI (when published)
52
+ pip install speech-prep
53
+
54
+ # Or install from source
55
+ git clone https://github.com/dimdasci/speech-prep.git
56
+ cd speech-prep
57
+ uv sync # or pip install -e .
58
+ ```
59
+
60
+ ## Quick Start
61
+
62
+ ```python
63
+ from speech_prep import SoundFile
64
+ from pathlib import Path
65
+
66
+ # Load an audio file
67
+ audio = SoundFile(Path("recording.wav"))
68
+
69
+ if audio:
70
+ print(f"Duration: {audio.duration:.2f} seconds")
71
+ print(f"Format: {audio.format}")
72
+ print(f"Silence periods detected: {len(audio.silence_periods)}")
73
+
74
+ # Clean up the audio for speech-to-text
75
+ cleaned = audio.strip(output_path=Path("recording_stripped.wav"))
76
+ faster = cleaned.speed(output_path=Path("recording_stripped_fast.wav"), speed_factor=1.2)
77
+ final = faster.convert(output_path=Path("clean.mp3"))
78
+
79
+ print(f"Processed file saved: {final.path}")
80
+ ```
81
+
82
+ ## Usage Examples
83
+
84
+ ### Basic Operations
85
+
86
+ ```python
87
+ from speech_prep import SoundFile
88
+ from pathlib import Path
89
+
90
+ # Load audio file
91
+ audio = SoundFile(Path("interview.wav"))
92
+
93
+ # View audio information
94
+ print(audio) # Shows duration, format, file size, and silence periods
95
+
96
+ # Remove silence from beginning and end
97
+ cleaned = audio.strip(output_path=Path("interview_stripped.wav"))
98
+
99
+ # Remove only leading silence
100
+ cleaned = audio.strip(output_path=Path("interview_leading.wav"), trailing=False)
101
+
102
+ # Speed up audio by 50%
103
+ faster = audio.speed(output_path=Path("interview_fast.wav"), speed_factor=1.5)
104
+
105
+ # Convert format
106
+ mp3_file = audio.convert(output_path=Path("output.mp3"))
107
+ ```
108
+
109
+ ### Processing Pipeline
110
+
111
+ ```python
112
+ from speech_prep import SoundFile
113
+ from pathlib import Path
114
+
115
+ def prepare_for_transcription(input_file: Path, output_file: Path):
116
+ """Prepare audio file for speech-to-text processing."""
117
+ # Load the original file
118
+ audio = SoundFile(input_file)
119
+ if not audio:
120
+ return None
121
+ # Processing pipeline
122
+ stripped = audio.strip(output_path=input_file.with_stem(input_file.stem + "_stripped"))
123
+ faster = stripped.speed(output_path=input_file.with_stem(input_file.stem + "_stripped_fast"), speed_factor=1.1)
124
+ processed = faster.convert(output_path=output_file)
125
+ if processed:
126
+ print(f"Original duration: {audio.duration:.2f}s")
127
+ print(f"Processed duration: {processed.duration:.2f}s")
128
+ print(f"Time saved: {audio.duration - processed.duration:.2f}s")
129
+ return processed
130
+
131
+ # Use the pipeline
132
+ result = prepare_for_transcription(
133
+ Path("long_meeting.wav"),
134
+ Path("ready_for_stt.mp3")
135
+ )
136
+ ```
137
+
138
+ ### Error Handling
139
+
140
+ ```python
141
+ from speech_prep import SoundFile, SpeechPrepError, FFmpegError
142
+ from pathlib import Path
143
+
144
+ try:
145
+ audio = SoundFile(Path("audio.wav"))
146
+ if audio:
147
+ result = audio.strip().speed(2.0)
148
+ print(f"Success: {result.path}")
149
+ else:
150
+ print("Failed to load audio file")
151
+
152
+ except FFmpegError as e:
153
+ print(f"FFmpeg error: {e}")
154
+ if e.stderr:
155
+ print(f"Details: {e.stderr}")
156
+
157
+ except SpeechPrepError as e:
158
+ print(f"Processing error: {e}")
159
+ ```
160
+
161
+ ### Custom Parameters
162
+
163
+ ```python
164
+ from speech_prep import SoundFile
165
+ from pathlib import Path
166
+
167
+ # Custom silence detection settings
168
+ audio = SoundFile(
169
+ Path("audio.wav"),
170
+ noise_threshold_db=-40, # More sensitive silence detection
171
+ min_silence_duration=0.3 # Shorter minimum silence periods
172
+ )
173
+
174
+ # Custom output paths
175
+ cleaned = audio.strip(output_path=Path("custom_output.wav"))
176
+
177
+ # Custom conversion settings
178
+ mp3 = audio.convert(
179
+ output_path=Path("output.mp3"),
180
+ audio_bitrate="192k" # Custom bitrate
181
+ )
182
+ ```
183
+
184
+ ## API Reference
185
+
186
+ ### SoundFile Class
187
+
188
+ #### Constructor
189
+ ```python
190
+ SoundFile(file_path, noise_threshold_db=-30, min_silence_duration=0.5)
191
+ ```
192
+
193
+ #### Methods
194
+ - **`strip(output_path, leading=True, trailing=True)`**: Remove silence
195
+ - **`speed(output_path, speed_factor)`**: Adjust playback speed
196
+ - **`convert(output_path, audio_bitrate=None)`**: Convert format
197
+
198
+ #### Properties
199
+ - **`path`**: Path to the audio file
200
+ - **`duration`**: Duration in seconds
201
+ - **`format`**: Audio format
202
+ - **`file_size`**: File size in bytes
203
+ - **`silence_periods`**: List of detected silence periods
204
+ - **`median_silence`**: Median silence duration
205
+
206
+ ## Contributing
207
+
208
+ 1. Fork the repository
209
+ 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
210
+ 3. Commit your changes (`git commit -m 'Add amazing feature'`)
211
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
212
+ 5. Open a Pull Request
213
+
214
+ ## License
215
+
216
+ This project is licensed under the MIT License - see the LICENSE file for details.
217
+
218
+ ## Acknowledgments
219
+
220
+ - Built on top of the powerful [FFmpeg](https://ffmpeg.org/) multimedia framework
@@ -0,0 +1,195 @@
1
+ # Speech Prep
2
+
3
+ Audio preprocessing toolkit for speech-to-text applications using FFmpeg.
4
+
5
+ ## Overview
6
+
7
+ Speech Prep is a Python package designed to prepare audio files for speech-to-text processing. It provides tools for silence detection and removal, speed adjustment, and format conversion - all essential steps for optimizing audio before transcription.
8
+
9
+ ## Features
10
+
11
+ - **Silence Detection**: Automatically detect silence periods in audio files
12
+ - **Silence Removal**: Remove leading/trailing silence to clean up recordings
13
+ - **Speed Adjustment**: Change playback speed while maintaining audio quality
14
+ - **Format Conversion**: Convert between different audio formats (MP3, WAV, FLAC, etc.)
15
+ - **Clean API**: Simple, intuitive interface with method chaining support
16
+ - **FFmpeg Integration**: Leverages the power and reliability of FFmpeg
17
+
18
+ ## Requirements
19
+
20
+ - Python 3.9+
21
+ - FFmpeg (must be installed and accessible via PATH)
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ # Install from PyPI (when published)
27
+ pip install speech-prep
28
+
29
+ # Or install from source
30
+ git clone https://github.com/dimdasci/speech-prep.git
31
+ cd speech-prep
32
+ uv sync # or pip install -e .
33
+ ```
34
+
35
+ ## Quick Start
36
+
37
+ ```python
38
+ from speech_prep import SoundFile
39
+ from pathlib import Path
40
+
41
+ # Load an audio file
42
+ audio = SoundFile(Path("recording.wav"))
43
+
44
+ if audio:
45
+ print(f"Duration: {audio.duration:.2f} seconds")
46
+ print(f"Format: {audio.format}")
47
+ print(f"Silence periods detected: {len(audio.silence_periods)}")
48
+
49
+ # Clean up the audio for speech-to-text
50
+ cleaned = audio.strip(output_path=Path("recording_stripped.wav"))
51
+ faster = cleaned.speed(output_path=Path("recording_stripped_fast.wav"), speed_factor=1.2)
52
+ final = faster.convert(output_path=Path("clean.mp3"))
53
+
54
+ print(f"Processed file saved: {final.path}")
55
+ ```
56
+
57
+ ## Usage Examples
58
+
59
+ ### Basic Operations
60
+
61
+ ```python
62
+ from speech_prep import SoundFile
63
+ from pathlib import Path
64
+
65
+ # Load audio file
66
+ audio = SoundFile(Path("interview.wav"))
67
+
68
+ # View audio information
69
+ print(audio) # Shows duration, format, file size, and silence periods
70
+
71
+ # Remove silence from beginning and end
72
+ cleaned = audio.strip(output_path=Path("interview_stripped.wav"))
73
+
74
+ # Remove only leading silence
75
+ cleaned = audio.strip(output_path=Path("interview_leading.wav"), trailing=False)
76
+
77
+ # Speed up audio by 50%
78
+ faster = audio.speed(output_path=Path("interview_fast.wav"), speed_factor=1.5)
79
+
80
+ # Convert format
81
+ mp3_file = audio.convert(output_path=Path("output.mp3"))
82
+ ```
83
+
84
+ ### Processing Pipeline
85
+
86
+ ```python
87
+ from speech_prep import SoundFile
88
+ from pathlib import Path
89
+
90
+ def prepare_for_transcription(input_file: Path, output_file: Path):
91
+ """Prepare audio file for speech-to-text processing."""
92
+ # Load the original file
93
+ audio = SoundFile(input_file)
94
+ if not audio:
95
+ return None
96
+ # Processing pipeline
97
+ stripped = audio.strip(output_path=input_file.with_stem(input_file.stem + "_stripped"))
98
+ faster = stripped.speed(output_path=input_file.with_stem(input_file.stem + "_stripped_fast"), speed_factor=1.1)
99
+ processed = faster.convert(output_path=output_file)
100
+ if processed:
101
+ print(f"Original duration: {audio.duration:.2f}s")
102
+ print(f"Processed duration: {processed.duration:.2f}s")
103
+ print(f"Time saved: {audio.duration - processed.duration:.2f}s")
104
+ return processed
105
+
106
+ # Use the pipeline
107
+ result = prepare_for_transcription(
108
+ Path("long_meeting.wav"),
109
+ Path("ready_for_stt.mp3")
110
+ )
111
+ ```
112
+
113
+ ### Error Handling
114
+
115
+ ```python
116
+ from speech_prep import SoundFile, SpeechPrepError, FFmpegError
117
+ from pathlib import Path
118
+
119
+ try:
120
+ audio = SoundFile(Path("audio.wav"))
121
+ if audio:
122
+ result = audio.strip().speed(2.0)
123
+ print(f"Success: {result.path}")
124
+ else:
125
+ print("Failed to load audio file")
126
+
127
+ except FFmpegError as e:
128
+ print(f"FFmpeg error: {e}")
129
+ if e.stderr:
130
+ print(f"Details: {e.stderr}")
131
+
132
+ except SpeechPrepError as e:
133
+ print(f"Processing error: {e}")
134
+ ```
135
+
136
+ ### Custom Parameters
137
+
138
+ ```python
139
+ from speech_prep import SoundFile
140
+ from pathlib import Path
141
+
142
+ # Custom silence detection settings
143
+ audio = SoundFile(
144
+ Path("audio.wav"),
145
+ noise_threshold_db=-40, # More sensitive silence detection
146
+ min_silence_duration=0.3 # Shorter minimum silence periods
147
+ )
148
+
149
+ # Custom output paths
150
+ cleaned = audio.strip(output_path=Path("custom_output.wav"))
151
+
152
+ # Custom conversion settings
153
+ mp3 = audio.convert(
154
+ output_path=Path("output.mp3"),
155
+ audio_bitrate="192k" # Custom bitrate
156
+ )
157
+ ```
158
+
159
+ ## API Reference
160
+
161
+ ### SoundFile Class
162
+
163
+ #### Constructor
164
+ ```python
165
+ SoundFile(file_path, noise_threshold_db=-30, min_silence_duration=0.5)
166
+ ```
167
+
168
+ #### Methods
169
+ - **`strip(output_path, leading=True, trailing=True)`**: Remove silence
170
+ - **`speed(output_path, speed_factor)`**: Adjust playback speed
171
+ - **`convert(output_path, audio_bitrate=None)`**: Convert format
172
+
173
+ #### Properties
174
+ - **`path`**: Path to the audio file
175
+ - **`duration`**: Duration in seconds
176
+ - **`format`**: Audio format
177
+ - **`file_size`**: File size in bytes
178
+ - **`silence_periods`**: List of detected silence periods
179
+ - **`median_silence`**: Median silence duration
180
+
181
+ ## Contributing
182
+
183
+ 1. Fork the repository
184
+ 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
185
+ 3. Commit your changes (`git commit -m 'Add amazing feature'`)
186
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
187
+ 5. Open a Pull Request
188
+
189
+ ## License
190
+
191
+ This project is licensed under the MIT License - see the LICENSE file for details.
192
+
193
+ ## Acknowledgments
194
+
195
+ - Built on top of the powerful [FFmpeg](https://ffmpeg.org/) multimedia framework
@@ -0,0 +1,114 @@
1
+ [project]
2
+ name = "speech-prep"
3
+ dynamic = ["version"] # Tell build tools that version is determined dynamically
4
+ description = "Audio preprocessing toolkit for speech-to-text applications using ffmpeg"
5
+ readme = "README.md"
6
+ requires-python = ">=3.9"
7
+ authors = [
8
+ { name = "Dim Kharitonov", email = "dimds@fastmail.com" }
9
+ ]
10
+ license = { text = "MIT" }
11
+ keywords = ["speech-to-text", "audio", "ffmpeg", "preprocessing", "silence-detection"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Intended Audience :: Developers",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.9",
18
+ "Programming Language :: Python :: 3.10",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Topic :: Multimedia :: Sound/Audio",
22
+ "Topic :: Software Development :: Libraries :: Python Modules",
23
+ ]
24
+ dependencies = []
25
+
26
+ [project.optional-dependencies]
27
+ dev = [
28
+ ]
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/dimdasci/speech-prep"
32
+ Repository = "https://github.com/dimdasci/speech-prep"
33
+ Issues = "https://github.com/dimdasci/speech-prep/issues"
34
+
35
+ [build-system]
36
+ requires = ["hatchling", "hatch-vcs"]
37
+ build-backend = "hatchling.build"
38
+
39
+ [tool.hatch.version]
40
+ source = "vcs"
41
+
42
+ [tool.ruff]
43
+ target-version = "py39"
44
+ line-length = 88
45
+
46
+ [tool.ruff.lint]
47
+ select = [
48
+ "E", # pycodestyle errors
49
+ "F", # pyflakes
50
+ "I", # isort
51
+ "B", # flake8-bugbear
52
+ "C4", # flake8-comprehensions
53
+ "UP", # pyupgrade
54
+ "D", # pydocstyle
55
+ "N", # pep8-naming
56
+ ]
57
+ ignore = [
58
+ "D203", # 1 blank line required before class docstring
59
+ "D212", # Multi-line docstring summary should start at the first line
60
+ ]
61
+ exclude = [
62
+ ".git",
63
+ ".venv",
64
+ "venv",
65
+ "__pycache__",
66
+ "build",
67
+ "dist",
68
+ ]
69
+
70
+ [tool.ruff.lint.isort]
71
+ known-first-party = ["speech_prep"]
72
+ force-single-line = false
73
+ force-sort-within-sections = true
74
+
75
+ [tool.ruff.lint.pydocstyle]
76
+ convention = "google"
77
+
78
+ [tool.mypy]
79
+ python_version = "3.9"
80
+ disallow_untyped_defs = true
81
+ disallow_incomplete_defs = true
82
+ check_untyped_defs = true
83
+ disallow_untyped_decorators = true
84
+ no_implicit_optional = true
85
+ strict_optional = true
86
+ warn_redundant_casts = true
87
+ warn_unused_ignores = true
88
+ warn_return_any = true
89
+ warn_unreachable = true
90
+ disallow_any_generics = false
91
+ disallow_subclassing_any = false
92
+ exclude = [
93
+ "venv/",
94
+ ".venv/",
95
+ "build/",
96
+ "dist/",
97
+ ]
98
+
99
+ [[tool.mypy.overrides]]
100
+ module = "tests.*"
101
+ disallow_untyped_defs = false
102
+ disallow_incomplete_defs = false
103
+ disallow_untyped_decorators = false
104
+
105
+ [dependency-groups]
106
+ dev = [
107
+ "ruff>=0.3.0",
108
+ "mypy>=1.8.0",
109
+ "pytest>=7.4.0",
110
+ "pytest-mock>=3.11.1",
111
+ "pydub>=0.25.1",
112
+ "pre-commit>=4.2.0",
113
+ "hatch-vcs>=0.5.0",
114
+ ]