speech-prep 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- speech_prep-0.1.3/.github/workflows/ci.yml +45 -0
- speech_prep-0.1.3/.gitignore +18 -0
- speech_prep-0.1.3/.pre-commit-config.yaml +31 -0
- speech_prep-0.1.3/.python-version +1 -0
- speech_prep-0.1.3/LICENSE +21 -0
- speech_prep-0.1.3/PKG-INFO +220 -0
- speech_prep-0.1.3/README.md +195 -0
- speech_prep-0.1.3/pyproject.toml +114 -0
- speech_prep-0.1.3/scripts/dev.sh +68 -0
- speech_prep-0.1.3/src/speech_prep/__init__.py +34 -0
- speech_prep-0.1.3/src/speech_prep/core.py +203 -0
- speech_prep-0.1.3/src/speech_prep/detection.py +116 -0
- speech_prep-0.1.3/src/speech_prep/exceptions.py +49 -0
- speech_prep-0.1.3/src/speech_prep/processing.py +185 -0
- speech_prep-0.1.3/src/speech_prep/utils.py +130 -0
- speech_prep-0.1.3/tests/__init__.py +1 -0
- speech_prep-0.1.3/tests/conftest.py +82 -0
- speech_prep-0.1.3/tests/fixtures/__init__.py +1 -0
- speech_prep-0.1.3/tests/fixtures/audio.py +72 -0
- speech_prep-0.1.3/tests/integration/__init__.py +1 -0
- speech_prep-0.1.3/tests/integration/test_sound_file_integration.py +127 -0
- speech_prep-0.1.3/tests/test_core.py +650 -0
- speech_prep-0.1.3/uv.lock +485 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ main ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ main ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
test:
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
strategy:
|
13
|
+
matrix:
|
14
|
+
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
15
|
+
|
16
|
+
steps:
|
17
|
+
- uses: actions/checkout@v4
|
18
|
+
|
19
|
+
- name: Install uv
|
20
|
+
uses: astral-sh/setup-uv@v4
|
21
|
+
with:
|
22
|
+
version: "latest"
|
23
|
+
|
24
|
+
- name: Set up Python ${{ matrix.python-version }}
|
25
|
+
run: uv python install ${{ matrix.python-version }}
|
26
|
+
|
27
|
+
- name: Install FFmpeg
|
28
|
+
run: |
|
29
|
+
sudo apt-get update
|
30
|
+
sudo apt-get install -y ffmpeg
|
31
|
+
|
32
|
+
- name: Install dependencies
|
33
|
+
run: uv sync --group dev
|
34
|
+
|
35
|
+
- name: Run linting
|
36
|
+
run: uv run ruff check src/
|
37
|
+
|
38
|
+
- name: Run formatting check
|
39
|
+
run: uv run ruff format src/ --check
|
40
|
+
|
41
|
+
- name: Run type checking
|
42
|
+
run: uv run mypy src/
|
43
|
+
|
44
|
+
- name: Run tests
|
45
|
+
run: uv run pytest tests/
|
@@ -0,0 +1,31 @@
|
|
1
|
+
repos:
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
3
|
+
rev: v5.0.0
|
4
|
+
hooks:
|
5
|
+
- id: trailing-whitespace
|
6
|
+
- id: end-of-file-fixer
|
7
|
+
- id: check-yaml
|
8
|
+
- id: check-toml
|
9
|
+
- id: check-merge-conflict
|
10
|
+
|
11
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
12
|
+
rev: v0.3.0
|
13
|
+
hooks:
|
14
|
+
- id: ruff
|
15
|
+
args: [--fix]
|
16
|
+
- id: ruff-format
|
17
|
+
|
18
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
19
|
+
rev: v1.8.0
|
20
|
+
hooks:
|
21
|
+
- id: mypy
|
22
|
+
additional_dependencies: []
|
23
|
+
|
24
|
+
- repo: local
|
25
|
+
hooks:
|
26
|
+
- id: pytest
|
27
|
+
name: pytest
|
28
|
+
entry: uv run pytest tests/
|
29
|
+
language: system
|
30
|
+
types: [python]
|
31
|
+
pass_filenames: false
|
@@ -0,0 +1 @@
|
|
1
|
+
3.9
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Dim Kharitonov
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,220 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: speech-prep
|
3
|
+
Version: 0.1.3
|
4
|
+
Summary: Audio preprocessing toolkit for speech-to-text applications using ffmpeg
|
5
|
+
Project-URL: Homepage, https://github.com/dimdasci/speech-prep
|
6
|
+
Project-URL: Repository, https://github.com/dimdasci/speech-prep
|
7
|
+
Project-URL: Issues, https://github.com/dimdasci/speech-prep/issues
|
8
|
+
Author-email: Dim Kharitonov <dimds@fastmail.com>
|
9
|
+
License: MIT
|
10
|
+
License-File: LICENSE
|
11
|
+
Keywords: audio,ffmpeg,preprocessing,silence-detection,speech-to-text
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
20
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
22
|
+
Requires-Python: >=3.9
|
23
|
+
Provides-Extra: dev
|
24
|
+
Description-Content-Type: text/markdown
|
25
|
+
|
26
|
+
# Speech Prep
|
27
|
+
|
28
|
+
Audio preprocessing toolkit for speech-to-text applications using FFmpeg.
|
29
|
+
|
30
|
+
## Overview
|
31
|
+
|
32
|
+
Speech Prep is a Python package designed to prepare audio files for speech-to-text processing. It provides tools for silence detection and removal, speed adjustment, and format conversion - all essential steps for optimizing audio before transcription.
|
33
|
+
|
34
|
+
## Features
|
35
|
+
|
36
|
+
- **Silence Detection**: Automatically detect silence periods in audio files
|
37
|
+
- **Silence Removal**: Remove leading/trailing silence to clean up recordings
|
38
|
+
- **Speed Adjustment**: Change playback speed while maintaining audio quality
|
39
|
+
- **Format Conversion**: Convert between different audio formats (MP3, WAV, FLAC, etc.)
|
40
|
+
- **Clean API**: Simple, intuitive interface with method chaining support
|
41
|
+
- **FFmpeg Integration**: Leverages the power and reliability of FFmpeg
|
42
|
+
|
43
|
+
## Requirements
|
44
|
+
|
45
|
+
- Python 3.9+
|
46
|
+
- FFmpeg (must be installed and accessible via PATH)
|
47
|
+
|
48
|
+
## Installation
|
49
|
+
|
50
|
+
```bash
|
51
|
+
# Install from PyPI (when published)
|
52
|
+
pip install speech-prep
|
53
|
+
|
54
|
+
# Or install from source
|
55
|
+
git clone https://github.com/dimdasci/speech-prep.git
|
56
|
+
cd speech-prep
|
57
|
+
uv sync # or pip install -e .
|
58
|
+
```
|
59
|
+
|
60
|
+
## Quick Start
|
61
|
+
|
62
|
+
```python
|
63
|
+
from speech_prep import SoundFile
|
64
|
+
from pathlib import Path
|
65
|
+
|
66
|
+
# Load an audio file
|
67
|
+
audio = SoundFile(Path("recording.wav"))
|
68
|
+
|
69
|
+
if audio:
|
70
|
+
print(f"Duration: {audio.duration:.2f} seconds")
|
71
|
+
print(f"Format: {audio.format}")
|
72
|
+
print(f"Silence periods detected: {len(audio.silence_periods)}")
|
73
|
+
|
74
|
+
# Clean up the audio for speech-to-text
|
75
|
+
cleaned = audio.strip(output_path=Path("recording_stripped.wav"))
|
76
|
+
faster = cleaned.speed(output_path=Path("recording_stripped_fast.wav"), speed_factor=1.2)
|
77
|
+
final = faster.convert(output_path=Path("clean.mp3"))
|
78
|
+
|
79
|
+
print(f"Processed file saved: {final.path}")
|
80
|
+
```
|
81
|
+
|
82
|
+
## Usage Examples
|
83
|
+
|
84
|
+
### Basic Operations
|
85
|
+
|
86
|
+
```python
|
87
|
+
from speech_prep import SoundFile
|
88
|
+
from pathlib import Path
|
89
|
+
|
90
|
+
# Load audio file
|
91
|
+
audio = SoundFile(Path("interview.wav"))
|
92
|
+
|
93
|
+
# View audio information
|
94
|
+
print(audio) # Shows duration, format, file size, and silence periods
|
95
|
+
|
96
|
+
# Remove silence from beginning and end
|
97
|
+
cleaned = audio.strip(output_path=Path("interview_stripped.wav"))
|
98
|
+
|
99
|
+
# Remove only leading silence
|
100
|
+
cleaned = audio.strip(output_path=Path("interview_leading.wav"), trailing=False)
|
101
|
+
|
102
|
+
# Speed up audio by 50%
|
103
|
+
faster = audio.speed(output_path=Path("interview_fast.wav"), speed_factor=1.5)
|
104
|
+
|
105
|
+
# Convert format
|
106
|
+
mp3_file = audio.convert(output_path=Path("output.mp3"))
|
107
|
+
```
|
108
|
+
|
109
|
+
### Processing Pipeline
|
110
|
+
|
111
|
+
```python
|
112
|
+
from speech_prep import SoundFile
|
113
|
+
from pathlib import Path
|
114
|
+
|
115
|
+
def prepare_for_transcription(input_file: Path, output_file: Path):
|
116
|
+
"""Prepare audio file for speech-to-text processing."""
|
117
|
+
# Load the original file
|
118
|
+
audio = SoundFile(input_file)
|
119
|
+
if not audio:
|
120
|
+
return None
|
121
|
+
# Processing pipeline
|
122
|
+
stripped = audio.strip(output_path=input_file.with_stem(input_file.stem + "_stripped"))
|
123
|
+
faster = stripped.speed(output_path=input_file.with_stem(input_file.stem + "_stripped_fast"), speed_factor=1.1)
|
124
|
+
processed = faster.convert(output_path=output_file)
|
125
|
+
if processed:
|
126
|
+
print(f"Original duration: {audio.duration:.2f}s")
|
127
|
+
print(f"Processed duration: {processed.duration:.2f}s")
|
128
|
+
print(f"Time saved: {audio.duration - processed.duration:.2f}s")
|
129
|
+
return processed
|
130
|
+
|
131
|
+
# Use the pipeline
|
132
|
+
result = prepare_for_transcription(
|
133
|
+
Path("long_meeting.wav"),
|
134
|
+
Path("ready_for_stt.mp3")
|
135
|
+
)
|
136
|
+
```
|
137
|
+
|
138
|
+
### Error Handling
|
139
|
+
|
140
|
+
```python
|
141
|
+
from speech_prep import SoundFile, SpeechPrepError, FFmpegError
|
142
|
+
from pathlib import Path
|
143
|
+
|
144
|
+
try:
|
145
|
+
audio = SoundFile(Path("audio.wav"))
|
146
|
+
if audio:
|
147
|
+
result = audio.strip().speed(2.0)
|
148
|
+
print(f"Success: {result.path}")
|
149
|
+
else:
|
150
|
+
print("Failed to load audio file")
|
151
|
+
|
152
|
+
except FFmpegError as e:
|
153
|
+
print(f"FFmpeg error: {e}")
|
154
|
+
if e.stderr:
|
155
|
+
print(f"Details: {e.stderr}")
|
156
|
+
|
157
|
+
except SpeechPrepError as e:
|
158
|
+
print(f"Processing error: {e}")
|
159
|
+
```
|
160
|
+
|
161
|
+
### Custom Parameters
|
162
|
+
|
163
|
+
```python
|
164
|
+
from speech_prep import SoundFile
|
165
|
+
from pathlib import Path
|
166
|
+
|
167
|
+
# Custom silence detection settings
|
168
|
+
audio = SoundFile(
|
169
|
+
Path("audio.wav"),
|
170
|
+
noise_threshold_db=-40, # More sensitive silence detection
|
171
|
+
min_silence_duration=0.3 # Shorter minimum silence periods
|
172
|
+
)
|
173
|
+
|
174
|
+
# Custom output paths
|
175
|
+
cleaned = audio.strip(output_path=Path("custom_output.wav"))
|
176
|
+
|
177
|
+
# Custom conversion settings
|
178
|
+
mp3 = audio.convert(
|
179
|
+
output_path=Path("output.mp3"),
|
180
|
+
audio_bitrate="192k" # Custom bitrate
|
181
|
+
)
|
182
|
+
```
|
183
|
+
|
184
|
+
## API Reference
|
185
|
+
|
186
|
+
### SoundFile Class
|
187
|
+
|
188
|
+
#### Constructor
|
189
|
+
```python
|
190
|
+
SoundFile(file_path, noise_threshold_db=-30, min_silence_duration=0.5)
|
191
|
+
```
|
192
|
+
|
193
|
+
#### Methods
|
194
|
+
- **`strip(output_path, leading=True, trailing=True)`**: Remove silence
|
195
|
+
- **`speed(output_path, speed_factor)`**: Adjust playback speed
|
196
|
+
- **`convert(output_path, audio_bitrate=None)`**: Convert format
|
197
|
+
|
198
|
+
#### Properties
|
199
|
+
- **`path`**: Path to the audio file
|
200
|
+
- **`duration`**: Duration in seconds
|
201
|
+
- **`format`**: Audio format
|
202
|
+
- **`file_size`**: File size in bytes
|
203
|
+
- **`silence_periods`**: List of detected silence periods
|
204
|
+
- **`median_silence`**: Median silence duration
|
205
|
+
|
206
|
+
## Contributing
|
207
|
+
|
208
|
+
1. Fork the repository
|
209
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
210
|
+
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
211
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
212
|
+
5. Open a Pull Request
|
213
|
+
|
214
|
+
## License
|
215
|
+
|
216
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
217
|
+
|
218
|
+
## Acknowledgments
|
219
|
+
|
220
|
+
- Built on top of the powerful [FFmpeg](https://ffmpeg.org/) multimedia framework
|
@@ -0,0 +1,195 @@
|
|
1
|
+
# Speech Prep
|
2
|
+
|
3
|
+
Audio preprocessing toolkit for speech-to-text applications using FFmpeg.
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
|
7
|
+
Speech Prep is a Python package designed to prepare audio files for speech-to-text processing. It provides tools for silence detection and removal, speed adjustment, and format conversion - all essential steps for optimizing audio before transcription.
|
8
|
+
|
9
|
+
## Features
|
10
|
+
|
11
|
+
- **Silence Detection**: Automatically detect silence periods in audio files
|
12
|
+
- **Silence Removal**: Remove leading/trailing silence to clean up recordings
|
13
|
+
- **Speed Adjustment**: Change playback speed while maintaining audio quality
|
14
|
+
- **Format Conversion**: Convert between different audio formats (MP3, WAV, FLAC, etc.)
|
15
|
+
- **Clean API**: Simple, intuitive interface with method chaining support
|
16
|
+
- **FFmpeg Integration**: Leverages the power and reliability of FFmpeg
|
17
|
+
|
18
|
+
## Requirements
|
19
|
+
|
20
|
+
- Python 3.9+
|
21
|
+
- FFmpeg (must be installed and accessible via PATH)
|
22
|
+
|
23
|
+
## Installation
|
24
|
+
|
25
|
+
```bash
|
26
|
+
# Install from PyPI (when published)
|
27
|
+
pip install speech-prep
|
28
|
+
|
29
|
+
# Or install from source
|
30
|
+
git clone https://github.com/dimdasci/speech-prep.git
|
31
|
+
cd speech-prep
|
32
|
+
uv sync # or pip install -e .
|
33
|
+
```
|
34
|
+
|
35
|
+
## Quick Start
|
36
|
+
|
37
|
+
```python
|
38
|
+
from speech_prep import SoundFile
|
39
|
+
from pathlib import Path
|
40
|
+
|
41
|
+
# Load an audio file
|
42
|
+
audio = SoundFile(Path("recording.wav"))
|
43
|
+
|
44
|
+
if audio:
|
45
|
+
print(f"Duration: {audio.duration:.2f} seconds")
|
46
|
+
print(f"Format: {audio.format}")
|
47
|
+
print(f"Silence periods detected: {len(audio.silence_periods)}")
|
48
|
+
|
49
|
+
# Clean up the audio for speech-to-text
|
50
|
+
cleaned = audio.strip(output_path=Path("recording_stripped.wav"))
|
51
|
+
faster = cleaned.speed(output_path=Path("recording_stripped_fast.wav"), speed_factor=1.2)
|
52
|
+
final = faster.convert(output_path=Path("clean.mp3"))
|
53
|
+
|
54
|
+
print(f"Processed file saved: {final.path}")
|
55
|
+
```
|
56
|
+
|
57
|
+
## Usage Examples
|
58
|
+
|
59
|
+
### Basic Operations
|
60
|
+
|
61
|
+
```python
|
62
|
+
from speech_prep import SoundFile
|
63
|
+
from pathlib import Path
|
64
|
+
|
65
|
+
# Load audio file
|
66
|
+
audio = SoundFile(Path("interview.wav"))
|
67
|
+
|
68
|
+
# View audio information
|
69
|
+
print(audio) # Shows duration, format, file size, and silence periods
|
70
|
+
|
71
|
+
# Remove silence from beginning and end
|
72
|
+
cleaned = audio.strip(output_path=Path("interview_stripped.wav"))
|
73
|
+
|
74
|
+
# Remove only leading silence
|
75
|
+
cleaned = audio.strip(output_path=Path("interview_leading.wav"), trailing=False)
|
76
|
+
|
77
|
+
# Speed up audio by 50%
|
78
|
+
faster = audio.speed(output_path=Path("interview_fast.wav"), speed_factor=1.5)
|
79
|
+
|
80
|
+
# Convert format
|
81
|
+
mp3_file = audio.convert(output_path=Path("output.mp3"))
|
82
|
+
```
|
83
|
+
|
84
|
+
### Processing Pipeline
|
85
|
+
|
86
|
+
```python
|
87
|
+
from speech_prep import SoundFile
|
88
|
+
from pathlib import Path
|
89
|
+
|
90
|
+
def prepare_for_transcription(input_file: Path, output_file: Path):
|
91
|
+
"""Prepare audio file for speech-to-text processing."""
|
92
|
+
# Load the original file
|
93
|
+
audio = SoundFile(input_file)
|
94
|
+
if not audio:
|
95
|
+
return None
|
96
|
+
# Processing pipeline
|
97
|
+
stripped = audio.strip(output_path=input_file.with_stem(input_file.stem + "_stripped"))
|
98
|
+
faster = stripped.speed(output_path=input_file.with_stem(input_file.stem + "_stripped_fast"), speed_factor=1.1)
|
99
|
+
processed = faster.convert(output_path=output_file)
|
100
|
+
if processed:
|
101
|
+
print(f"Original duration: {audio.duration:.2f}s")
|
102
|
+
print(f"Processed duration: {processed.duration:.2f}s")
|
103
|
+
print(f"Time saved: {audio.duration - processed.duration:.2f}s")
|
104
|
+
return processed
|
105
|
+
|
106
|
+
# Use the pipeline
|
107
|
+
result = prepare_for_transcription(
|
108
|
+
Path("long_meeting.wav"),
|
109
|
+
Path("ready_for_stt.mp3")
|
110
|
+
)
|
111
|
+
```
|
112
|
+
|
113
|
+
### Error Handling
|
114
|
+
|
115
|
+
```python
|
116
|
+
from speech_prep import SoundFile, SpeechPrepError, FFmpegError
|
117
|
+
from pathlib import Path
|
118
|
+
|
119
|
+
try:
|
120
|
+
audio = SoundFile(Path("audio.wav"))
|
121
|
+
if audio:
|
122
|
+
result = audio.strip().speed(2.0)
|
123
|
+
print(f"Success: {result.path}")
|
124
|
+
else:
|
125
|
+
print("Failed to load audio file")
|
126
|
+
|
127
|
+
except FFmpegError as e:
|
128
|
+
print(f"FFmpeg error: {e}")
|
129
|
+
if e.stderr:
|
130
|
+
print(f"Details: {e.stderr}")
|
131
|
+
|
132
|
+
except SpeechPrepError as e:
|
133
|
+
print(f"Processing error: {e}")
|
134
|
+
```
|
135
|
+
|
136
|
+
### Custom Parameters
|
137
|
+
|
138
|
+
```python
|
139
|
+
from speech_prep import SoundFile
|
140
|
+
from pathlib import Path
|
141
|
+
|
142
|
+
# Custom silence detection settings
|
143
|
+
audio = SoundFile(
|
144
|
+
Path("audio.wav"),
|
145
|
+
noise_threshold_db=-40, # More sensitive silence detection
|
146
|
+
min_silence_duration=0.3 # Shorter minimum silence periods
|
147
|
+
)
|
148
|
+
|
149
|
+
# Custom output paths
|
150
|
+
cleaned = audio.strip(output_path=Path("custom_output.wav"))
|
151
|
+
|
152
|
+
# Custom conversion settings
|
153
|
+
mp3 = audio.convert(
|
154
|
+
output_path=Path("output.mp3"),
|
155
|
+
audio_bitrate="192k" # Custom bitrate
|
156
|
+
)
|
157
|
+
```
|
158
|
+
|
159
|
+
## API Reference
|
160
|
+
|
161
|
+
### SoundFile Class
|
162
|
+
|
163
|
+
#### Constructor
|
164
|
+
```python
|
165
|
+
SoundFile(file_path, noise_threshold_db=-30, min_silence_duration=0.5)
|
166
|
+
```
|
167
|
+
|
168
|
+
#### Methods
|
169
|
+
- **`strip(output_path, leading=True, trailing=True)`**: Remove silence
|
170
|
+
- **`speed(output_path, speed_factor)`**: Adjust playback speed
|
171
|
+
- **`convert(output_path, audio_bitrate=None)`**: Convert format
|
172
|
+
|
173
|
+
#### Properties
|
174
|
+
- **`path`**: Path to the audio file
|
175
|
+
- **`duration`**: Duration in seconds
|
176
|
+
- **`format`**: Audio format
|
177
|
+
- **`file_size`**: File size in bytes
|
178
|
+
- **`silence_periods`**: List of detected silence periods
|
179
|
+
- **`median_silence`**: Median silence duration
|
180
|
+
|
181
|
+
## Contributing
|
182
|
+
|
183
|
+
1. Fork the repository
|
184
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
185
|
+
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
186
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
187
|
+
5. Open a Pull Request
|
188
|
+
|
189
|
+
## License
|
190
|
+
|
191
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
192
|
+
|
193
|
+
## Acknowledgments
|
194
|
+
|
195
|
+
- Built on top of the powerful [FFmpeg](https://ffmpeg.org/) multimedia framework
|
@@ -0,0 +1,114 @@
|
|
1
|
+
[project]
|
2
|
+
name = "speech-prep"
|
3
|
+
dynamic = ["version"] # Tell build tools that version is determined dynamically
|
4
|
+
description = "Audio preprocessing toolkit for speech-to-text applications using ffmpeg"
|
5
|
+
readme = "README.md"
|
6
|
+
requires-python = ">=3.9"
|
7
|
+
authors = [
|
8
|
+
{ name = "Dim Kharitonov", email = "dimds@fastmail.com" }
|
9
|
+
]
|
10
|
+
license = { text = "MIT" }
|
11
|
+
keywords = ["speech-to-text", "audio", "ffmpeg", "preprocessing", "silence-detection"]
|
12
|
+
classifiers = [
|
13
|
+
"Development Status :: 3 - Alpha",
|
14
|
+
"Intended Audience :: Developers",
|
15
|
+
"License :: OSI Approved :: MIT License",
|
16
|
+
"Programming Language :: Python :: 3",
|
17
|
+
"Programming Language :: Python :: 3.9",
|
18
|
+
"Programming Language :: Python :: 3.10",
|
19
|
+
"Programming Language :: Python :: 3.11",
|
20
|
+
"Programming Language :: Python :: 3.12",
|
21
|
+
"Topic :: Multimedia :: Sound/Audio",
|
22
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
23
|
+
]
|
24
|
+
dependencies = []
|
25
|
+
|
26
|
+
[project.optional-dependencies]
|
27
|
+
dev = [
|
28
|
+
]
|
29
|
+
|
30
|
+
[project.urls]
|
31
|
+
Homepage = "https://github.com/dimdasci/speech-prep"
|
32
|
+
Repository = "https://github.com/dimdasci/speech-prep"
|
33
|
+
Issues = "https://github.com/dimdasci/speech-prep/issues"
|
34
|
+
|
35
|
+
[build-system]
|
36
|
+
requires = ["hatchling", "hatch-vcs"]
|
37
|
+
build-backend = "hatchling.build"
|
38
|
+
|
39
|
+
[tool.hatch.version]
|
40
|
+
source = "vcs"
|
41
|
+
|
42
|
+
[tool.ruff]
|
43
|
+
target-version = "py39"
|
44
|
+
line-length = 88
|
45
|
+
|
46
|
+
[tool.ruff.lint]
|
47
|
+
select = [
|
48
|
+
"E", # pycodestyle errors
|
49
|
+
"F", # pyflakes
|
50
|
+
"I", # isort
|
51
|
+
"B", # flake8-bugbear
|
52
|
+
"C4", # flake8-comprehensions
|
53
|
+
"UP", # pyupgrade
|
54
|
+
"D", # pydocstyle
|
55
|
+
"N", # pep8-naming
|
56
|
+
]
|
57
|
+
ignore = [
|
58
|
+
"D203", # 1 blank line required before class docstring
|
59
|
+
"D212", # Multi-line docstring summary should start at the first line
|
60
|
+
]
|
61
|
+
exclude = [
|
62
|
+
".git",
|
63
|
+
".venv",
|
64
|
+
"venv",
|
65
|
+
"__pycache__",
|
66
|
+
"build",
|
67
|
+
"dist",
|
68
|
+
]
|
69
|
+
|
70
|
+
[tool.ruff.lint.isort]
|
71
|
+
known-first-party = ["speech_prep"]
|
72
|
+
force-single-line = false
|
73
|
+
force-sort-within-sections = true
|
74
|
+
|
75
|
+
[tool.ruff.lint.pydocstyle]
|
76
|
+
convention = "google"
|
77
|
+
|
78
|
+
[tool.mypy]
|
79
|
+
python_version = "3.9"
|
80
|
+
disallow_untyped_defs = true
|
81
|
+
disallow_incomplete_defs = true
|
82
|
+
check_untyped_defs = true
|
83
|
+
disallow_untyped_decorators = true
|
84
|
+
no_implicit_optional = true
|
85
|
+
strict_optional = true
|
86
|
+
warn_redundant_casts = true
|
87
|
+
warn_unused_ignores = true
|
88
|
+
warn_return_any = true
|
89
|
+
warn_unreachable = true
|
90
|
+
disallow_any_generics = false
|
91
|
+
disallow_subclassing_any = false
|
92
|
+
exclude = [
|
93
|
+
"venv/",
|
94
|
+
".venv/",
|
95
|
+
"build/",
|
96
|
+
"dist/",
|
97
|
+
]
|
98
|
+
|
99
|
+
[[tool.mypy.overrides]]
|
100
|
+
module = "tests.*"
|
101
|
+
disallow_untyped_defs = false
|
102
|
+
disallow_incomplete_defs = false
|
103
|
+
disallow_untyped_decorators = false
|
104
|
+
|
105
|
+
[dependency-groups]
|
106
|
+
dev = [
|
107
|
+
"ruff>=0.3.0",
|
108
|
+
"mypy>=1.8.0",
|
109
|
+
"pytest>=7.4.0",
|
110
|
+
"pytest-mock>=3.11.1",
|
111
|
+
"pydub>=0.25.1",
|
112
|
+
"pre-commit>=4.2.0",
|
113
|
+
"hatch-vcs>=0.5.0",
|
114
|
+
]
|