PyPI - speech-prep - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

speech-prep 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

speech_prep/__init__.py +2 -0
speech_prep/core.py +7 -2
speech_prep/formats.py +15 -0
speech_prep/processing.py +30 -7
speech_prep/utils.py +12 -3
{speech_prep-0.1.3.dist-info → speech_prep-0.1.4.dist-info}/METADATA +30 -12
speech_prep-0.1.4.dist-info/RECORD +11 -0
speech_prep-0.1.3.dist-info/RECORD +0 -10
{speech_prep-0.1.3.dist-info → speech_prep-0.1.4.dist-info}/WHEEL +0 -0
{speech_prep-0.1.3.dist-info → speech_prep-0.1.4.dist-info}/licenses/LICENSE +0 -0

speech_prep/__init__.py CHANGED Viewed

@@ -13,6 +13,7 @@ from .exceptions import (
     SilenceDetectionError,
     SpeechPrepError,
 )
+from .formats import AudioFormat
 # Import version from hatch-vcs
 try:
@@ -25,6 +26,7 @@ except ImportError:
 __all__ = [
     "SoundFile",
+    "AudioFormat",
     "SpeechPrepError",
     "FFmpegError",
     "FileValidationError",

speech_prep/core.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import Optional
 from .detection import calculate_median_silence, detect_silence
 from .exceptions import SpeechPrepError
+from .formats import AudioFormat
 from .processing import adjust_speed, convert_format, strip_silence
 from .utils import format_time, get_audio_properties
@@ -158,20 +159,24 @@ class SoundFile:
             return None
     def convert(
-        self, output_path: Path, audio_bitrate: Optional[str] = None
+        self,
+        output_path: Path,
+        target_format: AudioFormat,
+        audio_bitrate: Optional[str] = None,
     ) -> Optional["SoundFile"]:
         """
         Convert the audio file to a different format.
         Args:
             output_path: Path to save the converted file
+            target_format: Target audio format
             audio_bitrate: Optional bitrate for the output file (e.g., '192k', '320k')
         Returns:
             A new SoundFile instance for the converted file, or None if operation failed
         """
         try:
-            convert_format(self.path, output_path, audio_bitrate)
+            convert_format(self.path, output_path, target_format, audio_bitrate)
             return SoundFile(
                 output_path, self.noise_threshold_db, self.min_silence_duration
             )

speech_prep/formats.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Enums for audio file formats."""
+from enum import Enum
+class AudioFormat(Enum):
+    """Enum representing supported audio formats."""
+    MP3 = "mp3"
+    WAV = "wav"
+    FLAC = "flac"
+    AAC = "aac"
+    OGG = "ogg"
+    M4A = "m4a"
+    UNKNOWN = "unknown"

speech_prep/processing.py CHANGED Viewed

@@ -5,6 +5,7 @@ import subprocess
 from typing import Optional
 from .exceptions import FFmpegError
+from .formats import AudioFormat
 def strip_silence(
@@ -67,7 +68,10 @@ def strip_silence(
 def convert_format(
-    input_path: Path, output_path: Path, audio_bitrate: Optional[str] = None
+    input_path: Path,
+    output_path: Path,
+    target_format: AudioFormat,
+    audio_bitrate: Optional[str] = None,
 ) -> None:
     """
     Convert the audio file to a different format.
@@ -75,6 +79,7 @@ def convert_format(
     Args:
         input_path: Path to the input audio file
         output_path: Path to save the converted file
+        target_format: Target audio format
         audio_bitrate: Optional bitrate for the output file (e.g., '192k', '320k')
     Raises:
@@ -90,9 +95,21 @@ def convert_format(
     # Add output file
     cmd.append(str(output_path))
-    input_format = input_path.suffix.lower().lstrip(".")
-    output_format = output_path.suffix.lower().lstrip(".")
-    print(f"Converting {input_path.name} from {input_format} to {output_format}")
+    # Determine the input format from the file extension
+    input_format = AudioFormat.UNKNOWN
+    try:
+        ext = input_path.suffix.lower().lstrip(".")
+        input_format = AudioFormat(ext)
+    except ValueError:
+        pass  # Keep as UNKNOWN if not found
+    # Use the provided target_format
+    output_format = target_format
+    print(
+        f"Converting {input_path.name} from "
+        f"{input_format.value} to {output_format.value}"
+    )
     _run_ffmpeg_command(cmd, "converting format")
@@ -136,10 +153,16 @@ def adjust_speed(input_path: Path, output_path: Path, speed_factor: float) -> No
     filter_str = ",".join(atempo_filters) if atempo_filters else "atempo=1.0"
     # Determine appropriate codec based on output format
-    output_format = output_path.suffix.lower()
-    if output_format == ".mp3":
+    output_format = AudioFormat.UNKNOWN
+    try:
+        ext = output_path.suffix.lower().lstrip(".")
+        output_format = AudioFormat(ext)
+    except ValueError:
+        pass  # Keep as UNKNOWN
+    if output_format == AudioFormat.MP3:
         codec = "libmp3lame"
-    elif output_format == ".wav":
+    elif output_format == AudioFormat.WAV:
         codec = "pcm_s16le"
     else:
         codec = "libmp3lame"  # Default to mp3 codec

speech_prep/utils.py CHANGED Viewed

@@ -5,6 +5,7 @@ from pathlib import Path
 import subprocess
 from .exceptions import AudioPropertiesError, FileValidationError
+from .formats import AudioFormat
 def validate_file(file_path: Path) -> bool:
@@ -29,7 +30,7 @@ def validate_file(file_path: Path) -> bool:
     return True
-def get_audio_properties(file_path: Path) -> tuple[float, int, str]:
+def get_audio_properties(file_path: Path) -> tuple[float, int, AudioFormat]:
     """
     Extract audio properties (duration, file size, format) using ffprobe.
@@ -37,7 +38,8 @@ def get_audio_properties(file_path: Path) -> tuple[float, int, str]:
         file_path: Path to the audio file
     Returns:
-        Tuple of (duration, file_size, audio_format)
+        Tuple of (duration, file_size, audio_format) where audio_format
+        is an AudioFormat enum representing the detected audio format
     Raises:
         AudioPropertiesError: If properties cannot be extracted
@@ -71,10 +73,17 @@ def get_audio_properties(file_path: Path) -> tuple[float, int, str]:
         probe_data = json.loads(probe_result.stdout)["format"]
         duration = float(probe_data["duration"])
         file_size = int(probe_data["size"])
-        audio_format = probe_data["format_name"].split(",")[
+        format_str = probe_data["format_name"].split(",")[
             0
         ]  # Get the first format name
+        # Convert format string to enum
+        try:
+            audio_format = AudioFormat(format_str.lower())
+        except ValueError:
+            # If not a direct match, use UNKNOWN
+            audio_format = AudioFormat.UNKNOWN
         if duration <= 0 or file_size <= 0:
             raise AudioPropertiesError(
                 f"Invalid duration or file size for {file_path}. "

{speech_prep-0.1.3.dist-info → speech_prep-0.1.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: speech-prep
-Version: 0.1.3
+Version: 0.1.4
 Summary: Audio preprocessing toolkit for speech-to-text applications using ffmpeg
 Project-URL: Homepage, https://github.com/dimdasci/speech-prep
 Project-URL: Repository, https://github.com/dimdasci/speech-prep
@@ -60,21 +60,19 @@ uv sync  # or pip install -e .
 ## Quick Start
 ```python
-from speech_prep import SoundFile
+from speech_prep import SoundFile, AudioFormat
 from pathlib import Path
 # Load an audio file
 audio = SoundFile(Path("recording.wav"))
 if audio:
-    print(f"Duration: {audio.duration:.2f} seconds")
-    print(f"Format: {audio.format}")
-    print(f"Silence periods detected: {len(audio.silence_periods)}")
+    print(audio)  # Shows duration, format, file size, and silence periods
     # Clean up the audio for speech-to-text
     cleaned = audio.strip(output_path=Path("recording_stripped.wav"))
     faster = cleaned.speed(output_path=Path("recording_stripped_fast.wav"), speed_factor=1.2)
-    final = faster.convert(output_path=Path("clean.mp3"))
+    final = faster.convert(output_path=Path("clean.mp3", target_format=AudioFormat.MP3))
     print(f"Processed file saved: {final.path}")
 ```
@@ -84,7 +82,7 @@ if audio:
 ### Basic Operations
 ```python
-from speech_prep import SoundFile
+from speech_prep import SoundFile, AudioFormat
 from pathlib import Path
 # Load audio file
@@ -103,17 +101,18 @@ cleaned = audio.strip(output_path=Path("interview_leading.wav"), trailing=False)
 faster = audio.speed(output_path=Path("interview_fast.wav"), speed_factor=1.5)
 # Convert format
-mp3_file = audio.convert(output_path=Path("output.mp3"))
+mp3_file = audio.convert(output_path=Path("output.mp3"), target_format=AudioFormat.MP3)
 ```
 ### Processing Pipeline
 ```python
-from speech_prep import SoundFile
+from speech_prep import AudioFormat, SoundFile
 from pathlib import Path
 def prepare_for_transcription(input_file: Path, output_file: Path):
     """Prepare audio file for speech-to-text processing."""
     # Load the original file
     audio = SoundFile(input_file)
     if not audio:
@@ -121,7 +120,7 @@ def prepare_for_transcription(input_file: Path, output_file: Path):
     # Processing pipeline
     stripped = audio.strip(output_path=input_file.with_stem(input_file.stem + "_stripped"))
     faster = stripped.speed(output_path=input_file.with_stem(input_file.stem + "_stripped_fast"), speed_factor=1.1)
-    processed = faster.convert(output_path=output_file)
+    processed = faster.convert(output_path=output_file, target_format=AudioFormat.MP3)
     if processed:
         print(f"Original duration: {audio.duration:.2f}s")
         print(f"Processed duration: {processed.duration:.2f}s")
@@ -175,8 +174,10 @@ audio = SoundFile(
 cleaned = audio.strip(output_path=Path("custom_output.wav"))
 # Custom conversion settings
+from speech_prep import AudioFormat
 mp3 = audio.convert(
     output_path=Path("output.mp3"),
+    target_format=AudioFormat.MP3,
     audio_bitrate="192k"  # Custom bitrate
 )
 ```
@@ -193,16 +194,33 @@ SoundFile(file_path, noise_threshold_db=-30, min_silence_duration=0.5)
 #### Methods
 - **`strip(output_path, leading=True, trailing=True)`**: Remove silence
 - **`speed(output_path, speed_factor)`**: Adjust playback speed
-- **`convert(output_path, audio_bitrate=None)`**: Convert format
+- **`convert(output_path, target_format, audio_bitrate=None)`**: Convert format
 #### Properties
 - **`path`**: Path to the audio file
 - **`duration`**: Duration in seconds
-- **`format`**: Audio format
+- **`format`**: Audio format (AudioFormat enum)
 - **`file_size`**: File size in bytes
 - **`silence_periods`**: List of detected silence periods
 - **`median_silence`**: Median silence duration
+### AudioFormat Enum
+The `AudioFormat` enum represents supported audio formats:
+```python
+from speech_prep import AudioFormat
+# Available formats
+AudioFormat.MP3   # MP3 format
+AudioFormat.WAV   # WAV format
+AudioFormat.FLAC  # FLAC format
+AudioFormat.AAC   # AAC format
+AudioFormat.OGG   # OGG format
+AudioFormat.M4A   # M4A format
+AudioFormat.UNKNOWN  # Unknown/unsupported format
+```
 ## Contributing
 1. Fork the repository

speech_prep-0.1.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+speech_prep/__init__.py,sha256=BWVsOFBywQYAiykMB3XJX6JQww155M6R8NLxNCn3Z10,891
+speech_prep/core.py,sha256=GCxmKlf_ovEiRRzM8vr3ucPjb1pWHrL-MzkfWAKtzgg,7715
+speech_prep/detection.py,sha256=D5_WkTYoFDUIYA2u6cfWK6E_Rd5R6g1Lng0Hh1UGgBs,3495
+speech_prep/exceptions.py,sha256=qZcIzM-IPltgJNtfmj5K4D8OJsL1zButmLnshas9m4M,1091
+speech_prep/formats.py,sha256=fYeOMpMOrl3LX62L32xoAo2qYgxl43UYbywX_4j2nbw,262
+speech_prep/processing.py,sha256=wFZEVt2nB4PSiRQu3thVBQnODe8DSdXVogo9b09L9q4,6231
+speech_prep/utils.py,sha256=vz5OWIHvICTa2sz3__rDFxLeDXi4j8B5hvT5vdFblMM,3949
+speech_prep-0.1.4.dist-info/METADATA,sha256=f1UUmZgGnH1TyFreBxi5XMGlSSMwQve9hzz4rAi13mY,7161
+speech_prep-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+speech_prep-0.1.4.dist-info/licenses/LICENSE,sha256=-M8NcLlGaRvQqThXHq5g0D9CUR05KMhdswCB9s_0Sds,1066
+speech_prep-0.1.4.dist-info/RECORD,,

speech_prep-0.1.3.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-speech_prep/__init__.py,sha256=0Eu8vjSjvG3sOQbN9dsjtQkKcVBPcLthK4Eit0UrtAQ,839
-speech_prep/core.py,sha256=pe4djUP1wQF4TJiaw1lg7xIvBzVHOMWP7dHgar3unt4,7567
-speech_prep/detection.py,sha256=D5_WkTYoFDUIYA2u6cfWK6E_Rd5R6g1Lng0Hh1UGgBs,3495
-speech_prep/exceptions.py,sha256=qZcIzM-IPltgJNtfmj5K4D8OJsL1zButmLnshas9m4M,1091
-speech_prep/processing.py,sha256=421IqfAcRUqMtXBsiTypSp_4H0X3uh5UjQ8Af-nPaX0,5684
-speech_prep/utils.py,sha256=_yjn1hoVVHfLc3nGAhD2n6bsevgweqNOt1rsDyahQnY,3585
-speech_prep-0.1.3.dist-info/METADATA,sha256=8wP2R43DbY7JH9S8r1_DJlWKPsYMgi9CIIl8HpZMLsI,6616
-speech_prep-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-speech_prep-0.1.3.dist-info/licenses/LICENSE,sha256=-M8NcLlGaRvQqThXHq5g0D9CUR05KMhdswCB9s_0Sds,1066
-speech_prep-0.1.3.dist-info/RECORD,,

{speech_prep-0.1.3.dist-info → speech_prep-0.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{speech_prep-0.1.3.dist-info → speech_prep-0.1.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

speech-prep 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

speech-prep 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl