speech-prep 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
speech_prep/__init__.py CHANGED
@@ -13,6 +13,7 @@ from .exceptions import (
13
13
  SilenceDetectionError,
14
14
  SpeechPrepError,
15
15
  )
16
+ from .formats import AudioFormat
16
17
 
17
18
  # Import version from hatch-vcs
18
19
  try:
@@ -25,6 +26,7 @@ except ImportError:
25
26
 
26
27
  __all__ = [
27
28
  "SoundFile",
29
+ "AudioFormat",
28
30
  "SpeechPrepError",
29
31
  "FFmpegError",
30
32
  "FileValidationError",
speech_prep/core.py CHANGED
@@ -6,6 +6,7 @@ from typing import Optional
6
6
 
7
7
  from .detection import calculate_median_silence, detect_silence
8
8
  from .exceptions import SpeechPrepError
9
+ from .formats import AudioFormat
9
10
  from .processing import adjust_speed, convert_format, strip_silence
10
11
  from .utils import format_time, get_audio_properties
11
12
 
@@ -158,20 +159,24 @@ class SoundFile:
158
159
  return None
159
160
 
160
161
  def convert(
161
- self, output_path: Path, audio_bitrate: Optional[str] = None
162
+ self,
163
+ output_path: Path,
164
+ target_format: AudioFormat,
165
+ audio_bitrate: Optional[str] = None,
162
166
  ) -> Optional["SoundFile"]:
163
167
  """
164
168
  Convert the audio file to a different format.
165
169
 
166
170
  Args:
167
171
  output_path: Path to save the converted file
172
+ target_format: Target audio format
168
173
  audio_bitrate: Optional bitrate for the output file (e.g., '192k', '320k')
169
174
 
170
175
  Returns:
171
176
  A new SoundFile instance for the converted file, or None if operation failed
172
177
  """
173
178
  try:
174
- convert_format(self.path, output_path, audio_bitrate)
179
+ convert_format(self.path, output_path, target_format, audio_bitrate)
175
180
  return SoundFile(
176
181
  output_path, self.noise_threshold_db, self.min_silence_duration
177
182
  )
speech_prep/formats.py ADDED
@@ -0,0 +1,15 @@
1
+ """Enums for audio file formats."""
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class AudioFormat(Enum):
7
+ """Enum representing supported audio formats."""
8
+
9
+ MP3 = "mp3"
10
+ WAV = "wav"
11
+ FLAC = "flac"
12
+ AAC = "aac"
13
+ OGG = "ogg"
14
+ M4A = "m4a"
15
+ UNKNOWN = "unknown"
speech_prep/processing.py CHANGED
@@ -5,6 +5,7 @@ import subprocess
5
5
  from typing import Optional
6
6
 
7
7
  from .exceptions import FFmpegError
8
+ from .formats import AudioFormat
8
9
 
9
10
 
10
11
  def strip_silence(
@@ -67,7 +68,10 @@ def strip_silence(
67
68
 
68
69
 
69
70
  def convert_format(
70
- input_path: Path, output_path: Path, audio_bitrate: Optional[str] = None
71
+ input_path: Path,
72
+ output_path: Path,
73
+ target_format: AudioFormat,
74
+ audio_bitrate: Optional[str] = None,
71
75
  ) -> None:
72
76
  """
73
77
  Convert the audio file to a different format.
@@ -75,6 +79,7 @@ def convert_format(
75
79
  Args:
76
80
  input_path: Path to the input audio file
77
81
  output_path: Path to save the converted file
82
+ target_format: Target audio format
78
83
  audio_bitrate: Optional bitrate for the output file (e.g., '192k', '320k')
79
84
 
80
85
  Raises:
@@ -90,9 +95,21 @@ def convert_format(
90
95
  # Add output file
91
96
  cmd.append(str(output_path))
92
97
 
93
- input_format = input_path.suffix.lower().lstrip(".")
94
- output_format = output_path.suffix.lower().lstrip(".")
95
- print(f"Converting {input_path.name} from {input_format} to {output_format}")
98
+ # Determine the input format from the file extension
99
+ input_format = AudioFormat.UNKNOWN
100
+ try:
101
+ ext = input_path.suffix.lower().lstrip(".")
102
+ input_format = AudioFormat(ext)
103
+ except ValueError:
104
+ pass # Keep as UNKNOWN if not found
105
+
106
+ # Use the provided target_format
107
+ output_format = target_format
108
+
109
+ print(
110
+ f"Converting {input_path.name} from "
111
+ f"{input_format.value} to {output_format.value}"
112
+ )
96
113
 
97
114
  _run_ffmpeg_command(cmd, "converting format")
98
115
 
@@ -136,10 +153,16 @@ def adjust_speed(input_path: Path, output_path: Path, speed_factor: float) -> No
136
153
  filter_str = ",".join(atempo_filters) if atempo_filters else "atempo=1.0"
137
154
 
138
155
  # Determine appropriate codec based on output format
139
- output_format = output_path.suffix.lower()
140
- if output_format == ".mp3":
156
+ output_format = AudioFormat.UNKNOWN
157
+ try:
158
+ ext = output_path.suffix.lower().lstrip(".")
159
+ output_format = AudioFormat(ext)
160
+ except ValueError:
161
+ pass # Keep as UNKNOWN
162
+
163
+ if output_format == AudioFormat.MP3:
141
164
  codec = "libmp3lame"
142
- elif output_format == ".wav":
165
+ elif output_format == AudioFormat.WAV:
143
166
  codec = "pcm_s16le"
144
167
  else:
145
168
  codec = "libmp3lame" # Default to mp3 codec
speech_prep/utils.py CHANGED
@@ -5,6 +5,7 @@ from pathlib import Path
5
5
  import subprocess
6
6
 
7
7
  from .exceptions import AudioPropertiesError, FileValidationError
8
+ from .formats import AudioFormat
8
9
 
9
10
 
10
11
  def validate_file(file_path: Path) -> bool:
@@ -29,7 +30,7 @@ def validate_file(file_path: Path) -> bool:
29
30
  return True
30
31
 
31
32
 
32
- def get_audio_properties(file_path: Path) -> tuple[float, int, str]:
33
+ def get_audio_properties(file_path: Path) -> tuple[float, int, AudioFormat]:
33
34
  """
34
35
  Extract audio properties (duration, file size, format) using ffprobe.
35
36
 
@@ -37,7 +38,8 @@ def get_audio_properties(file_path: Path) -> tuple[float, int, str]:
37
38
  file_path: Path to the audio file
38
39
 
39
40
  Returns:
40
- Tuple of (duration, file_size, audio_format)
41
+ Tuple of (duration, file_size, audio_format) where audio_format
42
+ is an AudioFormat enum representing the detected audio format
41
43
 
42
44
  Raises:
43
45
  AudioPropertiesError: If properties cannot be extracted
@@ -71,10 +73,17 @@ def get_audio_properties(file_path: Path) -> tuple[float, int, str]:
71
73
  probe_data = json.loads(probe_result.stdout)["format"]
72
74
  duration = float(probe_data["duration"])
73
75
  file_size = int(probe_data["size"])
74
- audio_format = probe_data["format_name"].split(",")[
76
+ format_str = probe_data["format_name"].split(",")[
75
77
  0
76
78
  ] # Get the first format name
77
79
 
80
+ # Convert format string to enum
81
+ try:
82
+ audio_format = AudioFormat(format_str.lower())
83
+ except ValueError:
84
+ # If not a direct match, use UNKNOWN
85
+ audio_format = AudioFormat.UNKNOWN
86
+
78
87
  if duration <= 0 or file_size <= 0:
79
88
  raise AudioPropertiesError(
80
89
  f"Invalid duration or file size for {file_path}. "
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: speech-prep
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Audio preprocessing toolkit for speech-to-text applications using ffmpeg
5
5
  Project-URL: Homepage, https://github.com/dimdasci/speech-prep
6
6
  Project-URL: Repository, https://github.com/dimdasci/speech-prep
@@ -60,21 +60,19 @@ uv sync # or pip install -e .
60
60
  ## Quick Start
61
61
 
62
62
  ```python
63
- from speech_prep import SoundFile
63
+ from speech_prep import SoundFile, AudioFormat
64
64
  from pathlib import Path
65
65
 
66
66
  # Load an audio file
67
67
  audio = SoundFile(Path("recording.wav"))
68
68
 
69
69
  if audio:
70
- print(f"Duration: {audio.duration:.2f} seconds")
71
- print(f"Format: {audio.format}")
72
- print(f"Silence periods detected: {len(audio.silence_periods)}")
70
+ print(audio) # Shows duration, format, file size, and silence periods
73
71
 
74
72
  # Clean up the audio for speech-to-text
75
73
  cleaned = audio.strip(output_path=Path("recording_stripped.wav"))
76
74
  faster = cleaned.speed(output_path=Path("recording_stripped_fast.wav"), speed_factor=1.2)
77
- final = faster.convert(output_path=Path("clean.mp3"))
75
+ final = faster.convert(output_path=Path("clean.mp3", target_format=AudioFormat.MP3))
78
76
 
79
77
  print(f"Processed file saved: {final.path}")
80
78
  ```
@@ -84,7 +82,7 @@ if audio:
84
82
  ### Basic Operations
85
83
 
86
84
  ```python
87
- from speech_prep import SoundFile
85
+ from speech_prep import SoundFile, AudioFormat
88
86
  from pathlib import Path
89
87
 
90
88
  # Load audio file
@@ -103,17 +101,18 @@ cleaned = audio.strip(output_path=Path("interview_leading.wav"), trailing=False)
103
101
  faster = audio.speed(output_path=Path("interview_fast.wav"), speed_factor=1.5)
104
102
 
105
103
  # Convert format
106
- mp3_file = audio.convert(output_path=Path("output.mp3"))
104
+ mp3_file = audio.convert(output_path=Path("output.mp3"), target_format=AudioFormat.MP3)
107
105
  ```
108
106
 
109
107
  ### Processing Pipeline
110
108
 
111
109
  ```python
112
- from speech_prep import SoundFile
110
+ from speech_prep import AudioFormat, SoundFile
113
111
  from pathlib import Path
114
112
 
115
113
  def prepare_for_transcription(input_file: Path, output_file: Path):
116
114
  """Prepare audio file for speech-to-text processing."""
115
+
117
116
  # Load the original file
118
117
  audio = SoundFile(input_file)
119
118
  if not audio:
@@ -121,7 +120,7 @@ def prepare_for_transcription(input_file: Path, output_file: Path):
121
120
  # Processing pipeline
122
121
  stripped = audio.strip(output_path=input_file.with_stem(input_file.stem + "_stripped"))
123
122
  faster = stripped.speed(output_path=input_file.with_stem(input_file.stem + "_stripped_fast"), speed_factor=1.1)
124
- processed = faster.convert(output_path=output_file)
123
+ processed = faster.convert(output_path=output_file, target_format=AudioFormat.MP3)
125
124
  if processed:
126
125
  print(f"Original duration: {audio.duration:.2f}s")
127
126
  print(f"Processed duration: {processed.duration:.2f}s")
@@ -175,8 +174,10 @@ audio = SoundFile(
175
174
  cleaned = audio.strip(output_path=Path("custom_output.wav"))
176
175
 
177
176
  # Custom conversion settings
177
+ from speech_prep import AudioFormat
178
178
  mp3 = audio.convert(
179
179
  output_path=Path("output.mp3"),
180
+ target_format=AudioFormat.MP3,
180
181
  audio_bitrate="192k" # Custom bitrate
181
182
  )
182
183
  ```
@@ -193,16 +194,33 @@ SoundFile(file_path, noise_threshold_db=-30, min_silence_duration=0.5)
193
194
  #### Methods
194
195
  - **`strip(output_path, leading=True, trailing=True)`**: Remove silence
195
196
  - **`speed(output_path, speed_factor)`**: Adjust playback speed
196
- - **`convert(output_path, audio_bitrate=None)`**: Convert format
197
+ - **`convert(output_path, target_format, audio_bitrate=None)`**: Convert format
197
198
 
198
199
  #### Properties
199
200
  - **`path`**: Path to the audio file
200
201
  - **`duration`**: Duration in seconds
201
- - **`format`**: Audio format
202
+ - **`format`**: Audio format (AudioFormat enum)
202
203
  - **`file_size`**: File size in bytes
203
204
  - **`silence_periods`**: List of detected silence periods
204
205
  - **`median_silence`**: Median silence duration
205
206
 
207
+ ### AudioFormat Enum
208
+
209
+ The `AudioFormat` enum represents supported audio formats:
210
+
211
+ ```python
212
+ from speech_prep import AudioFormat
213
+
214
+ # Available formats
215
+ AudioFormat.MP3 # MP3 format
216
+ AudioFormat.WAV # WAV format
217
+ AudioFormat.FLAC # FLAC format
218
+ AudioFormat.AAC # AAC format
219
+ AudioFormat.OGG # OGG format
220
+ AudioFormat.M4A # M4A format
221
+ AudioFormat.UNKNOWN # Unknown/unsupported format
222
+ ```
223
+
206
224
  ## Contributing
207
225
 
208
226
  1. Fork the repository
@@ -0,0 +1,11 @@
1
+ speech_prep/__init__.py,sha256=BWVsOFBywQYAiykMB3XJX6JQww155M6R8NLxNCn3Z10,891
2
+ speech_prep/core.py,sha256=GCxmKlf_ovEiRRzM8vr3ucPjb1pWHrL-MzkfWAKtzgg,7715
3
+ speech_prep/detection.py,sha256=D5_WkTYoFDUIYA2u6cfWK6E_Rd5R6g1Lng0Hh1UGgBs,3495
4
+ speech_prep/exceptions.py,sha256=qZcIzM-IPltgJNtfmj5K4D8OJsL1zButmLnshas9m4M,1091
5
+ speech_prep/formats.py,sha256=fYeOMpMOrl3LX62L32xoAo2qYgxl43UYbywX_4j2nbw,262
6
+ speech_prep/processing.py,sha256=wFZEVt2nB4PSiRQu3thVBQnODe8DSdXVogo9b09L9q4,6231
7
+ speech_prep/utils.py,sha256=vz5OWIHvICTa2sz3__rDFxLeDXi4j8B5hvT5vdFblMM,3949
8
+ speech_prep-0.1.4.dist-info/METADATA,sha256=f1UUmZgGnH1TyFreBxi5XMGlSSMwQve9hzz4rAi13mY,7161
9
+ speech_prep-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ speech_prep-0.1.4.dist-info/licenses/LICENSE,sha256=-M8NcLlGaRvQqThXHq5g0D9CUR05KMhdswCB9s_0Sds,1066
11
+ speech_prep-0.1.4.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- speech_prep/__init__.py,sha256=0Eu8vjSjvG3sOQbN9dsjtQkKcVBPcLthK4Eit0UrtAQ,839
2
- speech_prep/core.py,sha256=pe4djUP1wQF4TJiaw1lg7xIvBzVHOMWP7dHgar3unt4,7567
3
- speech_prep/detection.py,sha256=D5_WkTYoFDUIYA2u6cfWK6E_Rd5R6g1Lng0Hh1UGgBs,3495
4
- speech_prep/exceptions.py,sha256=qZcIzM-IPltgJNtfmj5K4D8OJsL1zButmLnshas9m4M,1091
5
- speech_prep/processing.py,sha256=421IqfAcRUqMtXBsiTypSp_4H0X3uh5UjQ8Af-nPaX0,5684
6
- speech_prep/utils.py,sha256=_yjn1hoVVHfLc3nGAhD2n6bsevgweqNOt1rsDyahQnY,3585
7
- speech_prep-0.1.3.dist-info/METADATA,sha256=8wP2R43DbY7JH9S8r1_DJlWKPsYMgi9CIIl8HpZMLsI,6616
8
- speech_prep-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- speech_prep-0.1.3.dist-info/licenses/LICENSE,sha256=-M8NcLlGaRvQqThXHq5g0D9CUR05KMhdswCB9s_0Sds,1066
10
- speech_prep-0.1.3.dist-info/RECORD,,