speech-prep 0.1.3__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- speech_prep-0.1.4/.github/workflows/cd.yml +53 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/PKG-INFO +30 -12
- {speech_prep-0.1.3 → speech_prep-0.1.4}/README.md +29 -11
- {speech_prep-0.1.3 → speech_prep-0.1.4}/pyproject.toml +2 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/src/speech_prep/__init__.py +2 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/src/speech_prep/core.py +7 -2
- speech_prep-0.1.4/src/speech_prep/formats.py +15 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/src/speech_prep/processing.py +30 -7
- {speech_prep-0.1.3 → speech_prep-0.1.4}/src/speech_prep/utils.py +12 -3
- {speech_prep-0.1.3 → speech_prep-0.1.4}/tests/integration/test_sound_file_integration.py +8 -3
- {speech_prep-0.1.3 → speech_prep-0.1.4}/tests/test_core.py +15 -4
- speech_prep-0.1.4/uv.lock +944 -0
- speech_prep-0.1.3/uv.lock +0 -485
- {speech_prep-0.1.3 → speech_prep-0.1.4}/.github/workflows/ci.yml +0 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/.gitignore +0 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/.pre-commit-config.yaml +0 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/.python-version +0 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/LICENSE +0 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/scripts/dev.sh +0 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/src/speech_prep/detection.py +0 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/src/speech_prep/exceptions.py +0 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/tests/__init__.py +0 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/tests/conftest.py +0 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/tests/fixtures/__init__.py +0 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/tests/fixtures/audio.py +0 -0
- {speech_prep-0.1.3 → speech_prep-0.1.4}/tests/integration/__init__.py +0 -0
@@ -0,0 +1,53 @@
|
|
1
|
+
name: CD
|
2
|
+
|
3
|
+
on:
|
4
|
+
# Deploy to TestPyPI after CI passes on main branch
|
5
|
+
workflow_run:
|
6
|
+
workflows: ["CI"]
|
7
|
+
types: [completed]
|
8
|
+
branches: [main]
|
9
|
+
|
10
|
+
# Deploy to PyPI on version tags
|
11
|
+
push:
|
12
|
+
tags:
|
13
|
+
- 'v*'
|
14
|
+
|
15
|
+
jobs:
|
16
|
+
publish:
|
17
|
+
runs-on: ubuntu-latest
|
18
|
+
# Only run if CI workflow succeeded (for main branch) or on tags
|
19
|
+
if: |
|
20
|
+
(github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.head_branch == 'main') ||
|
21
|
+
startsWith(github.ref, 'refs/tags/v')
|
22
|
+
steps:
|
23
|
+
- uses: actions/checkout@v4
|
24
|
+
with:
|
25
|
+
fetch-depth: 0 # Needed for hatch-vcs to work properly
|
26
|
+
|
27
|
+
- name: Install uv
|
28
|
+
uses: astral-sh/setup-uv@v4
|
29
|
+
with:
|
30
|
+
version: "latest"
|
31
|
+
|
32
|
+
- name: Set up Python
|
33
|
+
run: uv python install 3.9
|
34
|
+
|
35
|
+
- name: Install dependencies
|
36
|
+
run: uv sync --group dev
|
37
|
+
|
38
|
+
- name: Build package
|
39
|
+
run: uv build
|
40
|
+
|
41
|
+
- name: Publish to TestPyPI (on main branch)
|
42
|
+
if: github.ref == 'refs/heads/main'
|
43
|
+
env:
|
44
|
+
TWINE_USERNAME: __token__
|
45
|
+
TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
|
46
|
+
run: uv run twine upload --repository testpypi --skip-existing --verbose dist/*
|
47
|
+
|
48
|
+
- name: Publish to PyPI (on tags)
|
49
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
50
|
+
env:
|
51
|
+
TWINE_USERNAME: __token__
|
52
|
+
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
53
|
+
run: uv run twine upload dist/*
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: speech-prep
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.4
|
4
4
|
Summary: Audio preprocessing toolkit for speech-to-text applications using ffmpeg
|
5
5
|
Project-URL: Homepage, https://github.com/dimdasci/speech-prep
|
6
6
|
Project-URL: Repository, https://github.com/dimdasci/speech-prep
|
@@ -60,21 +60,19 @@ uv sync # or pip install -e .
|
|
60
60
|
## Quick Start
|
61
61
|
|
62
62
|
```python
|
63
|
-
from speech_prep import SoundFile
|
63
|
+
from speech_prep import SoundFile, AudioFormat
|
64
64
|
from pathlib import Path
|
65
65
|
|
66
66
|
# Load an audio file
|
67
67
|
audio = SoundFile(Path("recording.wav"))
|
68
68
|
|
69
69
|
if audio:
|
70
|
-
print(
|
71
|
-
print(f"Format: {audio.format}")
|
72
|
-
print(f"Silence periods detected: {len(audio.silence_periods)}")
|
70
|
+
print(audio) # Shows duration, format, file size, and silence periods
|
73
71
|
|
74
72
|
# Clean up the audio for speech-to-text
|
75
73
|
cleaned = audio.strip(output_path=Path("recording_stripped.wav"))
|
76
74
|
faster = cleaned.speed(output_path=Path("recording_stripped_fast.wav"), speed_factor=1.2)
|
77
|
-
final = faster.convert(output_path=Path("clean.mp3"))
|
75
|
+
final = faster.convert(output_path=Path("clean.mp3", target_format=AudioFormat.MP3))
|
78
76
|
|
79
77
|
print(f"Processed file saved: {final.path}")
|
80
78
|
```
|
@@ -84,7 +82,7 @@ if audio:
|
|
84
82
|
### Basic Operations
|
85
83
|
|
86
84
|
```python
|
87
|
-
from speech_prep import SoundFile
|
85
|
+
from speech_prep import SoundFile, AudioFormat
|
88
86
|
from pathlib import Path
|
89
87
|
|
90
88
|
# Load audio file
|
@@ -103,17 +101,18 @@ cleaned = audio.strip(output_path=Path("interview_leading.wav"), trailing=False)
|
|
103
101
|
faster = audio.speed(output_path=Path("interview_fast.wav"), speed_factor=1.5)
|
104
102
|
|
105
103
|
# Convert format
|
106
|
-
mp3_file = audio.convert(output_path=Path("output.mp3"))
|
104
|
+
mp3_file = audio.convert(output_path=Path("output.mp3"), target_format=AudioFormat.MP3)
|
107
105
|
```
|
108
106
|
|
109
107
|
### Processing Pipeline
|
110
108
|
|
111
109
|
```python
|
112
|
-
from speech_prep import SoundFile
|
110
|
+
from speech_prep import AudioFormat, SoundFile
|
113
111
|
from pathlib import Path
|
114
112
|
|
115
113
|
def prepare_for_transcription(input_file: Path, output_file: Path):
|
116
114
|
"""Prepare audio file for speech-to-text processing."""
|
115
|
+
|
117
116
|
# Load the original file
|
118
117
|
audio = SoundFile(input_file)
|
119
118
|
if not audio:
|
@@ -121,7 +120,7 @@ def prepare_for_transcription(input_file: Path, output_file: Path):
|
|
121
120
|
# Processing pipeline
|
122
121
|
stripped = audio.strip(output_path=input_file.with_stem(input_file.stem + "_stripped"))
|
123
122
|
faster = stripped.speed(output_path=input_file.with_stem(input_file.stem + "_stripped_fast"), speed_factor=1.1)
|
124
|
-
processed = faster.convert(output_path=output_file)
|
123
|
+
processed = faster.convert(output_path=output_file, target_format=AudioFormat.MP3)
|
125
124
|
if processed:
|
126
125
|
print(f"Original duration: {audio.duration:.2f}s")
|
127
126
|
print(f"Processed duration: {processed.duration:.2f}s")
|
@@ -175,8 +174,10 @@ audio = SoundFile(
|
|
175
174
|
cleaned = audio.strip(output_path=Path("custom_output.wav"))
|
176
175
|
|
177
176
|
# Custom conversion settings
|
177
|
+
from speech_prep import AudioFormat
|
178
178
|
mp3 = audio.convert(
|
179
179
|
output_path=Path("output.mp3"),
|
180
|
+
target_format=AudioFormat.MP3,
|
180
181
|
audio_bitrate="192k" # Custom bitrate
|
181
182
|
)
|
182
183
|
```
|
@@ -193,16 +194,33 @@ SoundFile(file_path, noise_threshold_db=-30, min_silence_duration=0.5)
|
|
193
194
|
#### Methods
|
194
195
|
- **`strip(output_path, leading=True, trailing=True)`**: Remove silence
|
195
196
|
- **`speed(output_path, speed_factor)`**: Adjust playback speed
|
196
|
-
- **`convert(output_path, audio_bitrate=None)`**: Convert format
|
197
|
+
- **`convert(output_path, target_format, audio_bitrate=None)`**: Convert format
|
197
198
|
|
198
199
|
#### Properties
|
199
200
|
- **`path`**: Path to the audio file
|
200
201
|
- **`duration`**: Duration in seconds
|
201
|
-
- **`format`**: Audio format
|
202
|
+
- **`format`**: Audio format (AudioFormat enum)
|
202
203
|
- **`file_size`**: File size in bytes
|
203
204
|
- **`silence_periods`**: List of detected silence periods
|
204
205
|
- **`median_silence`**: Median silence duration
|
205
206
|
|
207
|
+
### AudioFormat Enum
|
208
|
+
|
209
|
+
The `AudioFormat` enum represents supported audio formats:
|
210
|
+
|
211
|
+
```python
|
212
|
+
from speech_prep import AudioFormat
|
213
|
+
|
214
|
+
# Available formats
|
215
|
+
AudioFormat.MP3 # MP3 format
|
216
|
+
AudioFormat.WAV # WAV format
|
217
|
+
AudioFormat.FLAC # FLAC format
|
218
|
+
AudioFormat.AAC # AAC format
|
219
|
+
AudioFormat.OGG # OGG format
|
220
|
+
AudioFormat.M4A # M4A format
|
221
|
+
AudioFormat.UNKNOWN # Unknown/unsupported format
|
222
|
+
```
|
223
|
+
|
206
224
|
## Contributing
|
207
225
|
|
208
226
|
1. Fork the repository
|
@@ -35,21 +35,19 @@ uv sync # or pip install -e .
|
|
35
35
|
## Quick Start
|
36
36
|
|
37
37
|
```python
|
38
|
-
from speech_prep import SoundFile
|
38
|
+
from speech_prep import SoundFile, AudioFormat
|
39
39
|
from pathlib import Path
|
40
40
|
|
41
41
|
# Load an audio file
|
42
42
|
audio = SoundFile(Path("recording.wav"))
|
43
43
|
|
44
44
|
if audio:
|
45
|
-
print(
|
46
|
-
print(f"Format: {audio.format}")
|
47
|
-
print(f"Silence periods detected: {len(audio.silence_periods)}")
|
45
|
+
print(audio) # Shows duration, format, file size, and silence periods
|
48
46
|
|
49
47
|
# Clean up the audio for speech-to-text
|
50
48
|
cleaned = audio.strip(output_path=Path("recording_stripped.wav"))
|
51
49
|
faster = cleaned.speed(output_path=Path("recording_stripped_fast.wav"), speed_factor=1.2)
|
52
|
-
final = faster.convert(output_path=Path("clean.mp3"))
|
50
|
+
final = faster.convert(output_path=Path("clean.mp3", target_format=AudioFormat.MP3))
|
53
51
|
|
54
52
|
print(f"Processed file saved: {final.path}")
|
55
53
|
```
|
@@ -59,7 +57,7 @@ if audio:
|
|
59
57
|
### Basic Operations
|
60
58
|
|
61
59
|
```python
|
62
|
-
from speech_prep import SoundFile
|
60
|
+
from speech_prep import SoundFile, AudioFormat
|
63
61
|
from pathlib import Path
|
64
62
|
|
65
63
|
# Load audio file
|
@@ -78,17 +76,18 @@ cleaned = audio.strip(output_path=Path("interview_leading.wav"), trailing=False)
|
|
78
76
|
faster = audio.speed(output_path=Path("interview_fast.wav"), speed_factor=1.5)
|
79
77
|
|
80
78
|
# Convert format
|
81
|
-
mp3_file = audio.convert(output_path=Path("output.mp3"))
|
79
|
+
mp3_file = audio.convert(output_path=Path("output.mp3"), target_format=AudioFormat.MP3)
|
82
80
|
```
|
83
81
|
|
84
82
|
### Processing Pipeline
|
85
83
|
|
86
84
|
```python
|
87
|
-
from speech_prep import SoundFile
|
85
|
+
from speech_prep import AudioFormat, SoundFile
|
88
86
|
from pathlib import Path
|
89
87
|
|
90
88
|
def prepare_for_transcription(input_file: Path, output_file: Path):
|
91
89
|
"""Prepare audio file for speech-to-text processing."""
|
90
|
+
|
92
91
|
# Load the original file
|
93
92
|
audio = SoundFile(input_file)
|
94
93
|
if not audio:
|
@@ -96,7 +95,7 @@ def prepare_for_transcription(input_file: Path, output_file: Path):
|
|
96
95
|
# Processing pipeline
|
97
96
|
stripped = audio.strip(output_path=input_file.with_stem(input_file.stem + "_stripped"))
|
98
97
|
faster = stripped.speed(output_path=input_file.with_stem(input_file.stem + "_stripped_fast"), speed_factor=1.1)
|
99
|
-
processed = faster.convert(output_path=output_file)
|
98
|
+
processed = faster.convert(output_path=output_file, target_format=AudioFormat.MP3)
|
100
99
|
if processed:
|
101
100
|
print(f"Original duration: {audio.duration:.2f}s")
|
102
101
|
print(f"Processed duration: {processed.duration:.2f}s")
|
@@ -150,8 +149,10 @@ audio = SoundFile(
|
|
150
149
|
cleaned = audio.strip(output_path=Path("custom_output.wav"))
|
151
150
|
|
152
151
|
# Custom conversion settings
|
152
|
+
from speech_prep import AudioFormat
|
153
153
|
mp3 = audio.convert(
|
154
154
|
output_path=Path("output.mp3"),
|
155
|
+
target_format=AudioFormat.MP3,
|
155
156
|
audio_bitrate="192k" # Custom bitrate
|
156
157
|
)
|
157
158
|
```
|
@@ -168,16 +169,33 @@ SoundFile(file_path, noise_threshold_db=-30, min_silence_duration=0.5)
|
|
168
169
|
#### Methods
|
169
170
|
- **`strip(output_path, leading=True, trailing=True)`**: Remove silence
|
170
171
|
- **`speed(output_path, speed_factor)`**: Adjust playback speed
|
171
|
-
- **`convert(output_path, audio_bitrate=None)`**: Convert format
|
172
|
+
- **`convert(output_path, target_format, audio_bitrate=None)`**: Convert format
|
172
173
|
|
173
174
|
#### Properties
|
174
175
|
- **`path`**: Path to the audio file
|
175
176
|
- **`duration`**: Duration in seconds
|
176
|
-
- **`format`**: Audio format
|
177
|
+
- **`format`**: Audio format (AudioFormat enum)
|
177
178
|
- **`file_size`**: File size in bytes
|
178
179
|
- **`silence_periods`**: List of detected silence periods
|
179
180
|
- **`median_silence`**: Median silence duration
|
180
181
|
|
182
|
+
### AudioFormat Enum
|
183
|
+
|
184
|
+
The `AudioFormat` enum represents supported audio formats:
|
185
|
+
|
186
|
+
```python
|
187
|
+
from speech_prep import AudioFormat
|
188
|
+
|
189
|
+
# Available formats
|
190
|
+
AudioFormat.MP3 # MP3 format
|
191
|
+
AudioFormat.WAV # WAV format
|
192
|
+
AudioFormat.FLAC # FLAC format
|
193
|
+
AudioFormat.AAC # AAC format
|
194
|
+
AudioFormat.OGG # OGG format
|
195
|
+
AudioFormat.M4A # M4A format
|
196
|
+
AudioFormat.UNKNOWN # Unknown/unsupported format
|
197
|
+
```
|
198
|
+
|
181
199
|
## Contributing
|
182
200
|
|
183
201
|
1. Fork the repository
|
@@ -38,6 +38,7 @@ build-backend = "hatchling.build"
|
|
38
38
|
|
39
39
|
[tool.hatch.version]
|
40
40
|
source = "vcs"
|
41
|
+
raw-options = { local_scheme = "no-local-version" }
|
41
42
|
|
42
43
|
[tool.ruff]
|
43
44
|
target-version = "py39"
|
@@ -111,4 +112,5 @@ dev = [
|
|
111
112
|
"pydub>=0.25.1",
|
112
113
|
"pre-commit>=4.2.0",
|
113
114
|
"hatch-vcs>=0.5.0",
|
115
|
+
"twine>=6.1.0",
|
114
116
|
]
|
@@ -13,6 +13,7 @@ from .exceptions import (
|
|
13
13
|
SilenceDetectionError,
|
14
14
|
SpeechPrepError,
|
15
15
|
)
|
16
|
+
from .formats import AudioFormat
|
16
17
|
|
17
18
|
# Import version from hatch-vcs
|
18
19
|
try:
|
@@ -25,6 +26,7 @@ except ImportError:
|
|
25
26
|
|
26
27
|
__all__ = [
|
27
28
|
"SoundFile",
|
29
|
+
"AudioFormat",
|
28
30
|
"SpeechPrepError",
|
29
31
|
"FFmpegError",
|
30
32
|
"FileValidationError",
|
@@ -6,6 +6,7 @@ from typing import Optional
|
|
6
6
|
|
7
7
|
from .detection import calculate_median_silence, detect_silence
|
8
8
|
from .exceptions import SpeechPrepError
|
9
|
+
from .formats import AudioFormat
|
9
10
|
from .processing import adjust_speed, convert_format, strip_silence
|
10
11
|
from .utils import format_time, get_audio_properties
|
11
12
|
|
@@ -158,20 +159,24 @@ class SoundFile:
|
|
158
159
|
return None
|
159
160
|
|
160
161
|
def convert(
|
161
|
-
self,
|
162
|
+
self,
|
163
|
+
output_path: Path,
|
164
|
+
target_format: AudioFormat,
|
165
|
+
audio_bitrate: Optional[str] = None,
|
162
166
|
) -> Optional["SoundFile"]:
|
163
167
|
"""
|
164
168
|
Convert the audio file to a different format.
|
165
169
|
|
166
170
|
Args:
|
167
171
|
output_path: Path to save the converted file
|
172
|
+
target_format: Target audio format
|
168
173
|
audio_bitrate: Optional bitrate for the output file (e.g., '192k', '320k')
|
169
174
|
|
170
175
|
Returns:
|
171
176
|
A new SoundFile instance for the converted file, or None if operation failed
|
172
177
|
"""
|
173
178
|
try:
|
174
|
-
convert_format(self.path, output_path, audio_bitrate)
|
179
|
+
convert_format(self.path, output_path, target_format, audio_bitrate)
|
175
180
|
return SoundFile(
|
176
181
|
output_path, self.noise_threshold_db, self.min_silence_duration
|
177
182
|
)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
"""Enums for audio file formats."""
|
2
|
+
|
3
|
+
from enum import Enum
|
4
|
+
|
5
|
+
|
6
|
+
class AudioFormat(Enum):
|
7
|
+
"""Enum representing supported audio formats."""
|
8
|
+
|
9
|
+
MP3 = "mp3"
|
10
|
+
WAV = "wav"
|
11
|
+
FLAC = "flac"
|
12
|
+
AAC = "aac"
|
13
|
+
OGG = "ogg"
|
14
|
+
M4A = "m4a"
|
15
|
+
UNKNOWN = "unknown"
|
@@ -5,6 +5,7 @@ import subprocess
|
|
5
5
|
from typing import Optional
|
6
6
|
|
7
7
|
from .exceptions import FFmpegError
|
8
|
+
from .formats import AudioFormat
|
8
9
|
|
9
10
|
|
10
11
|
def strip_silence(
|
@@ -67,7 +68,10 @@ def strip_silence(
|
|
67
68
|
|
68
69
|
|
69
70
|
def convert_format(
|
70
|
-
input_path: Path,
|
71
|
+
input_path: Path,
|
72
|
+
output_path: Path,
|
73
|
+
target_format: AudioFormat,
|
74
|
+
audio_bitrate: Optional[str] = None,
|
71
75
|
) -> None:
|
72
76
|
"""
|
73
77
|
Convert the audio file to a different format.
|
@@ -75,6 +79,7 @@ def convert_format(
|
|
75
79
|
Args:
|
76
80
|
input_path: Path to the input audio file
|
77
81
|
output_path: Path to save the converted file
|
82
|
+
target_format: Target audio format
|
78
83
|
audio_bitrate: Optional bitrate for the output file (e.g., '192k', '320k')
|
79
84
|
|
80
85
|
Raises:
|
@@ -90,9 +95,21 @@ def convert_format(
|
|
90
95
|
# Add output file
|
91
96
|
cmd.append(str(output_path))
|
92
97
|
|
93
|
-
|
94
|
-
|
95
|
-
|
98
|
+
# Determine the input format from the file extension
|
99
|
+
input_format = AudioFormat.UNKNOWN
|
100
|
+
try:
|
101
|
+
ext = input_path.suffix.lower().lstrip(".")
|
102
|
+
input_format = AudioFormat(ext)
|
103
|
+
except ValueError:
|
104
|
+
pass # Keep as UNKNOWN if not found
|
105
|
+
|
106
|
+
# Use the provided target_format
|
107
|
+
output_format = target_format
|
108
|
+
|
109
|
+
print(
|
110
|
+
f"Converting {input_path.name} from "
|
111
|
+
f"{input_format.value} to {output_format.value}"
|
112
|
+
)
|
96
113
|
|
97
114
|
_run_ffmpeg_command(cmd, "converting format")
|
98
115
|
|
@@ -136,10 +153,16 @@ def adjust_speed(input_path: Path, output_path: Path, speed_factor: float) -> No
|
|
136
153
|
filter_str = ",".join(atempo_filters) if atempo_filters else "atempo=1.0"
|
137
154
|
|
138
155
|
# Determine appropriate codec based on output format
|
139
|
-
output_format =
|
140
|
-
|
156
|
+
output_format = AudioFormat.UNKNOWN
|
157
|
+
try:
|
158
|
+
ext = output_path.suffix.lower().lstrip(".")
|
159
|
+
output_format = AudioFormat(ext)
|
160
|
+
except ValueError:
|
161
|
+
pass # Keep as UNKNOWN
|
162
|
+
|
163
|
+
if output_format == AudioFormat.MP3:
|
141
164
|
codec = "libmp3lame"
|
142
|
-
elif output_format ==
|
165
|
+
elif output_format == AudioFormat.WAV:
|
143
166
|
codec = "pcm_s16le"
|
144
167
|
else:
|
145
168
|
codec = "libmp3lame" # Default to mp3 codec
|
@@ -5,6 +5,7 @@ from pathlib import Path
|
|
5
5
|
import subprocess
|
6
6
|
|
7
7
|
from .exceptions import AudioPropertiesError, FileValidationError
|
8
|
+
from .formats import AudioFormat
|
8
9
|
|
9
10
|
|
10
11
|
def validate_file(file_path: Path) -> bool:
|
@@ -29,7 +30,7 @@ def validate_file(file_path: Path) -> bool:
|
|
29
30
|
return True
|
30
31
|
|
31
32
|
|
32
|
-
def get_audio_properties(file_path: Path) -> tuple[float, int,
|
33
|
+
def get_audio_properties(file_path: Path) -> tuple[float, int, AudioFormat]:
|
33
34
|
"""
|
34
35
|
Extract audio properties (duration, file size, format) using ffprobe.
|
35
36
|
|
@@ -37,7 +38,8 @@ def get_audio_properties(file_path: Path) -> tuple[float, int, str]:
|
|
37
38
|
file_path: Path to the audio file
|
38
39
|
|
39
40
|
Returns:
|
40
|
-
Tuple of (duration, file_size, audio_format)
|
41
|
+
Tuple of (duration, file_size, audio_format) where audio_format
|
42
|
+
is an AudioFormat enum representing the detected audio format
|
41
43
|
|
42
44
|
Raises:
|
43
45
|
AudioPropertiesError: If properties cannot be extracted
|
@@ -71,10 +73,17 @@ def get_audio_properties(file_path: Path) -> tuple[float, int, str]:
|
|
71
73
|
probe_data = json.loads(probe_result.stdout)["format"]
|
72
74
|
duration = float(probe_data["duration"])
|
73
75
|
file_size = int(probe_data["size"])
|
74
|
-
|
76
|
+
format_str = probe_data["format_name"].split(",")[
|
75
77
|
0
|
76
78
|
] # Get the first format name
|
77
79
|
|
80
|
+
# Convert format string to enum
|
81
|
+
try:
|
82
|
+
audio_format = AudioFormat(format_str.lower())
|
83
|
+
except ValueError:
|
84
|
+
# If not a direct match, use UNKNOWN
|
85
|
+
audio_format = AudioFormat.UNKNOWN
|
86
|
+
|
78
87
|
if duration <= 0 or file_size <= 0:
|
79
88
|
raise AudioPropertiesError(
|
80
89
|
f"Invalid duration or file size for {file_path}. "
|
@@ -41,7 +41,10 @@ class TestSoundFileIntegrationWithRealFiles:
|
|
41
41
|
# Verify basic properties
|
42
42
|
assert sound_file.path == file_path
|
43
43
|
assert sound_file.duration > 0
|
44
|
-
assert
|
44
|
+
assert sound_file.format is not None
|
45
|
+
from speech_prep.formats import AudioFormat
|
46
|
+
|
47
|
+
assert isinstance(sound_file.format, AudioFormat)
|
45
48
|
assert sound_file.file_size > 0
|
46
49
|
|
47
50
|
# Verify silence detection
|
@@ -102,10 +105,12 @@ class TestSoundFileIntegrationWithRealFiles:
|
|
102
105
|
print(f"Sped file: {sped}")
|
103
106
|
|
104
107
|
# 3. Convert format
|
105
|
-
|
108
|
+
from speech_prep.formats import AudioFormat
|
109
|
+
|
110
|
+
converted = sped.convert(converted_path, AudioFormat.MP3, audio_bitrate="192k")
|
106
111
|
assert converted is not None, "Convert operation failed"
|
107
112
|
assert converted.path.exists(), "Converted file doesn't exist"
|
108
|
-
assert converted.format
|
113
|
+
assert converted.format == AudioFormat.MP3, "Format conversion failed"
|
109
114
|
print(f"Converted file: {converted}")
|
110
115
|
|
111
116
|
# Verify final file properties
|
@@ -430,11 +430,16 @@ class TestSoundFileConvert:
|
|
430
430
|
# Create the original SoundFile
|
431
431
|
sound_file = SoundFile(input_path)
|
432
432
|
|
433
|
+
# Import AudioFormat
|
434
|
+
from speech_prep.formats import AudioFormat
|
435
|
+
|
433
436
|
# Call convert method
|
434
|
-
result = sound_file.convert(output_path, audio_bitrate="192k")
|
437
|
+
result = sound_file.convert(output_path, AudioFormat.MP3, audio_bitrate="192k")
|
435
438
|
|
436
439
|
# Verify convert_format was called with correct arguments
|
437
|
-
mock_convert.assert_called_once_with(
|
440
|
+
mock_convert.assert_called_once_with(
|
441
|
+
input_path, output_path, AudioFormat.MP3, "192k"
|
442
|
+
)
|
438
443
|
|
439
444
|
# Verify a new SoundFile instance was returned
|
440
445
|
assert result is not None
|
@@ -473,8 +478,11 @@ class TestSoundFileConvert:
|
|
473
478
|
# Create the SoundFile
|
474
479
|
sound_file = SoundFile(input_path)
|
475
480
|
|
481
|
+
# Import AudioFormat
|
482
|
+
from speech_prep.formats import AudioFormat
|
483
|
+
|
476
484
|
# Call convert method
|
477
|
-
result = sound_file.convert(output_path)
|
485
|
+
result = sound_file.convert(output_path, AudioFormat.MP3)
|
478
486
|
|
479
487
|
# Verify error is logged and None is returned
|
480
488
|
mock_logger.error.assert_called_once()
|
@@ -642,7 +650,10 @@ class TestSoundFileIntegration:
|
|
642
650
|
sped = stripped.speed(sped_path, 1.5)
|
643
651
|
assert sped is not None, "Speed operation failed"
|
644
652
|
|
645
|
-
|
653
|
+
# Import AudioFormat
|
654
|
+
from speech_prep.formats import AudioFormat
|
655
|
+
|
656
|
+
converted = sped.convert(converted_path, AudioFormat.MP3, audio_bitrate="192k")
|
646
657
|
assert converted is not None, "Convert operation failed"
|
647
658
|
|
648
659
|
# Verify the final file exists and has expected properties
|