PyPI - speech-prep - Versions diffs - 0.1.3__tar.gz → 0.1.4__tar.gz - Mend

speech-prep 0.1.3tar.gz → 0.1.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

speech_prep-0.1.4/.github/workflows/cd.yml ADDED Viewed

@@ -0,0 +1,53 @@
+name: CD
+on:
+  # Deploy to TestPyPI after CI passes on main branch
+  workflow_run:
+    workflows: ["CI"]
+    types: [completed]
+    branches: [main]
+  # Deploy to PyPI on version tags
+  push:
+    tags:
+      - 'v*'
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    # Only run if CI workflow succeeded (for main branch) or on tags
+    if: |
+      (github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.head_branch == 'main') ||
+      startsWith(github.ref, 'refs/tags/v')
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0  # Needed for hatch-vcs to work properly
+    - name: Install uv
+      uses: astral-sh/setup-uv@v4
+      with:
+        version: "latest"
+    - name: Set up Python
+      run: uv python install 3.9
+    - name: Install dependencies
+      run: uv sync --group dev
+    - name: Build package
+      run: uv build
+    - name: Publish to TestPyPI (on main branch)
+      if: github.ref == 'refs/heads/main'
+      env:
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
+      run: uv run twine upload --repository testpypi --skip-existing --verbose dist/*
+    - name: Publish to PyPI (on tags)
+      if: startsWith(github.ref, 'refs/tags/v')
+      env:
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+      run: uv run twine upload dist/*

{speech_prep-0.1.3 → speech_prep-0.1.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: speech-prep
-Version: 0.1.3
+Version: 0.1.4
 Summary: Audio preprocessing toolkit for speech-to-text applications using ffmpeg
 Project-URL: Homepage, https://github.com/dimdasci/speech-prep
 Project-URL: Repository, https://github.com/dimdasci/speech-prep
@@ -60,21 +60,19 @@ uv sync  # or pip install -e .
 ## Quick Start
 ```python
-from speech_prep import SoundFile
+from speech_prep import SoundFile, AudioFormat
 from pathlib import Path
 # Load an audio file
 audio = SoundFile(Path("recording.wav"))
 if audio:
-    print(f"Duration: {audio.duration:.2f} seconds")
-    print(f"Format: {audio.format}")
-    print(f"Silence periods detected: {len(audio.silence_periods)}")
+    print(audio)  # Shows duration, format, file size, and silence periods
     # Clean up the audio for speech-to-text
     cleaned = audio.strip(output_path=Path("recording_stripped.wav"))
     faster = cleaned.speed(output_path=Path("recording_stripped_fast.wav"), speed_factor=1.2)
-    final = faster.convert(output_path=Path("clean.mp3"))
+    final = faster.convert(output_path=Path("clean.mp3", target_format=AudioFormat.MP3))
     print(f"Processed file saved: {final.path}")
 ```
@@ -84,7 +82,7 @@ if audio:
 ### Basic Operations
 ```python
-from speech_prep import SoundFile
+from speech_prep import SoundFile, AudioFormat
 from pathlib import Path
 # Load audio file
@@ -103,17 +101,18 @@ cleaned = audio.strip(output_path=Path("interview_leading.wav"), trailing=False)
 faster = audio.speed(output_path=Path("interview_fast.wav"), speed_factor=1.5)
 # Convert format
-mp3_file = audio.convert(output_path=Path("output.mp3"))
+mp3_file = audio.convert(output_path=Path("output.mp3"), target_format=AudioFormat.MP3)
 ```
 ### Processing Pipeline
 ```python
-from speech_prep import SoundFile
+from speech_prep import AudioFormat, SoundFile
 from pathlib import Path
 def prepare_for_transcription(input_file: Path, output_file: Path):
     """Prepare audio file for speech-to-text processing."""
     # Load the original file
     audio = SoundFile(input_file)
     if not audio:
@@ -121,7 +120,7 @@ def prepare_for_transcription(input_file: Path, output_file: Path):
     # Processing pipeline
     stripped = audio.strip(output_path=input_file.with_stem(input_file.stem + "_stripped"))
     faster = stripped.speed(output_path=input_file.with_stem(input_file.stem + "_stripped_fast"), speed_factor=1.1)
-    processed = faster.convert(output_path=output_file)
+    processed = faster.convert(output_path=output_file, target_format=AudioFormat.MP3)
     if processed:
         print(f"Original duration: {audio.duration:.2f}s")
         print(f"Processed duration: {processed.duration:.2f}s")
@@ -175,8 +174,10 @@ audio = SoundFile(
 cleaned = audio.strip(output_path=Path("custom_output.wav"))
 # Custom conversion settings
+from speech_prep import AudioFormat
 mp3 = audio.convert(
     output_path=Path("output.mp3"),
+    target_format=AudioFormat.MP3,
     audio_bitrate="192k"  # Custom bitrate
 )
 ```
@@ -193,16 +194,33 @@ SoundFile(file_path, noise_threshold_db=-30, min_silence_duration=0.5)
 #### Methods
 - **`strip(output_path, leading=True, trailing=True)`**: Remove silence
 - **`speed(output_path, speed_factor)`**: Adjust playback speed
-- **`convert(output_path, audio_bitrate=None)`**: Convert format
+- **`convert(output_path, target_format, audio_bitrate=None)`**: Convert format
 #### Properties
 - **`path`**: Path to the audio file
 - **`duration`**: Duration in seconds
-- **`format`**: Audio format
+- **`format`**: Audio format (AudioFormat enum)
 - **`file_size`**: File size in bytes
 - **`silence_periods`**: List of detected silence periods
 - **`median_silence`**: Median silence duration
+### AudioFormat Enum
+The `AudioFormat` enum represents supported audio formats:
+```python
+from speech_prep import AudioFormat
+# Available formats
+AudioFormat.MP3   # MP3 format
+AudioFormat.WAV   # WAV format
+AudioFormat.FLAC  # FLAC format
+AudioFormat.AAC   # AAC format
+AudioFormat.OGG   # OGG format
+AudioFormat.M4A   # M4A format
+AudioFormat.UNKNOWN  # Unknown/unsupported format
+```
 ## Contributing
 1. Fork the repository

{speech_prep-0.1.3 → speech_prep-0.1.4}/README.md RENAMED Viewed

@@ -35,21 +35,19 @@ uv sync  # or pip install -e .
 ## Quick Start
 ```python
-from speech_prep import SoundFile
+from speech_prep import SoundFile, AudioFormat
 from pathlib import Path
 # Load an audio file
 audio = SoundFile(Path("recording.wav"))
 if audio:
-    print(f"Duration: {audio.duration:.2f} seconds")
-    print(f"Format: {audio.format}")
-    print(f"Silence periods detected: {len(audio.silence_periods)}")
+    print(audio)  # Shows duration, format, file size, and silence periods
     # Clean up the audio for speech-to-text
     cleaned = audio.strip(output_path=Path("recording_stripped.wav"))
     faster = cleaned.speed(output_path=Path("recording_stripped_fast.wav"), speed_factor=1.2)
-    final = faster.convert(output_path=Path("clean.mp3"))
+    final = faster.convert(output_path=Path("clean.mp3", target_format=AudioFormat.MP3))
     print(f"Processed file saved: {final.path}")
 ```
@@ -59,7 +57,7 @@ if audio:
 ### Basic Operations
 ```python
-from speech_prep import SoundFile
+from speech_prep import SoundFile, AudioFormat
 from pathlib import Path
 # Load audio file
@@ -78,17 +76,18 @@ cleaned = audio.strip(output_path=Path("interview_leading.wav"), trailing=False)
 faster = audio.speed(output_path=Path("interview_fast.wav"), speed_factor=1.5)
 # Convert format
-mp3_file = audio.convert(output_path=Path("output.mp3"))
+mp3_file = audio.convert(output_path=Path("output.mp3"), target_format=AudioFormat.MP3)
 ```
 ### Processing Pipeline
 ```python
-from speech_prep import SoundFile
+from speech_prep import AudioFormat, SoundFile
 from pathlib import Path
 def prepare_for_transcription(input_file: Path, output_file: Path):
     """Prepare audio file for speech-to-text processing."""
     # Load the original file
     audio = SoundFile(input_file)
     if not audio:
@@ -96,7 +95,7 @@ def prepare_for_transcription(input_file: Path, output_file: Path):
     # Processing pipeline
     stripped = audio.strip(output_path=input_file.with_stem(input_file.stem + "_stripped"))
     faster = stripped.speed(output_path=input_file.with_stem(input_file.stem + "_stripped_fast"), speed_factor=1.1)
-    processed = faster.convert(output_path=output_file)
+    processed = faster.convert(output_path=output_file, target_format=AudioFormat.MP3)
     if processed:
         print(f"Original duration: {audio.duration:.2f}s")
         print(f"Processed duration: {processed.duration:.2f}s")
@@ -150,8 +149,10 @@ audio = SoundFile(
 cleaned = audio.strip(output_path=Path("custom_output.wav"))
 # Custom conversion settings
+from speech_prep import AudioFormat
 mp3 = audio.convert(
     output_path=Path("output.mp3"),
+    target_format=AudioFormat.MP3,
     audio_bitrate="192k"  # Custom bitrate
 )
 ```
@@ -168,16 +169,33 @@ SoundFile(file_path, noise_threshold_db=-30, min_silence_duration=0.5)
 #### Methods
 - **`strip(output_path, leading=True, trailing=True)`**: Remove silence
 - **`speed(output_path, speed_factor)`**: Adjust playback speed
-- **`convert(output_path, audio_bitrate=None)`**: Convert format
+- **`convert(output_path, target_format, audio_bitrate=None)`**: Convert format
 #### Properties
 - **`path`**: Path to the audio file
 - **`duration`**: Duration in seconds
-- **`format`**: Audio format
+- **`format`**: Audio format (AudioFormat enum)
 - **`file_size`**: File size in bytes
 - **`silence_periods`**: List of detected silence periods
 - **`median_silence`**: Median silence duration
+### AudioFormat Enum
+The `AudioFormat` enum represents supported audio formats:
+```python
+from speech_prep import AudioFormat
+# Available formats
+AudioFormat.MP3   # MP3 format
+AudioFormat.WAV   # WAV format
+AudioFormat.FLAC  # FLAC format
+AudioFormat.AAC   # AAC format
+AudioFormat.OGG   # OGG format
+AudioFormat.M4A   # M4A format
+AudioFormat.UNKNOWN  # Unknown/unsupported format
+```
 ## Contributing
 1. Fork the repository

{speech_prep-0.1.3 → speech_prep-0.1.4}/pyproject.toml RENAMED Viewed

@@ -38,6 +38,7 @@ build-backend = "hatchling.build"
 [tool.hatch.version]
 source = "vcs"
+raw-options = { local_scheme = "no-local-version" }
 [tool.ruff]
 target-version = "py39"
@@ -111,4 +112,5 @@ dev = [
     "pydub>=0.25.1",
     "pre-commit>=4.2.0",
     "hatch-vcs>=0.5.0",
+    "twine>=6.1.0",
 ]

{speech_prep-0.1.3 → speech_prep-0.1.4}/src/speech_prep/__init__.py RENAMED Viewed

@@ -13,6 +13,7 @@ from .exceptions import (
     SilenceDetectionError,
     SpeechPrepError,
 )
+from .formats import AudioFormat
 # Import version from hatch-vcs
 try:
@@ -25,6 +26,7 @@ except ImportError:
 __all__ = [
     "SoundFile",
+    "AudioFormat",
     "SpeechPrepError",
     "FFmpegError",
     "FileValidationError",

{speech_prep-0.1.3 → speech_prep-0.1.4}/src/speech_prep/core.py RENAMED Viewed

@@ -6,6 +6,7 @@ from typing import Optional
 from .detection import calculate_median_silence, detect_silence
 from .exceptions import SpeechPrepError
+from .formats import AudioFormat
 from .processing import adjust_speed, convert_format, strip_silence
 from .utils import format_time, get_audio_properties
@@ -158,20 +159,24 @@ class SoundFile:
             return None
     def convert(
-        self, output_path: Path, audio_bitrate: Optional[str] = None
+        self,
+        output_path: Path,
+        target_format: AudioFormat,
+        audio_bitrate: Optional[str] = None,
     ) -> Optional["SoundFile"]:
         """
         Convert the audio file to a different format.
         Args:
             output_path: Path to save the converted file
+            target_format: Target audio format
             audio_bitrate: Optional bitrate for the output file (e.g., '192k', '320k')
         Returns:
             A new SoundFile instance for the converted file, or None if operation failed
         """
         try:
-            convert_format(self.path, output_path, audio_bitrate)
+            convert_format(self.path, output_path, target_format, audio_bitrate)
             return SoundFile(
                 output_path, self.noise_threshold_db, self.min_silence_duration
             )

speech_prep-0.1.4/src/speech_prep/formats.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Enums for audio file formats."""
+from enum import Enum
+class AudioFormat(Enum):
+    """Enum representing supported audio formats."""
+    MP3 = "mp3"
+    WAV = "wav"
+    FLAC = "flac"
+    AAC = "aac"
+    OGG = "ogg"
+    M4A = "m4a"
+    UNKNOWN = "unknown"

{speech_prep-0.1.3 → speech_prep-0.1.4}/src/speech_prep/processing.py RENAMED Viewed

@@ -5,6 +5,7 @@ import subprocess
 from typing import Optional
 from .exceptions import FFmpegError
+from .formats import AudioFormat
 def strip_silence(
@@ -67,7 +68,10 @@ def strip_silence(
 def convert_format(
-    input_path: Path, output_path: Path, audio_bitrate: Optional[str] = None
+    input_path: Path,
+    output_path: Path,
+    target_format: AudioFormat,
+    audio_bitrate: Optional[str] = None,
 ) -> None:
     """
     Convert the audio file to a different format.
@@ -75,6 +79,7 @@ def convert_format(
     Args:
         input_path: Path to the input audio file
         output_path: Path to save the converted file
+        target_format: Target audio format
         audio_bitrate: Optional bitrate for the output file (e.g., '192k', '320k')
     Raises:
@@ -90,9 +95,21 @@ def convert_format(
     # Add output file
     cmd.append(str(output_path))
-    input_format = input_path.suffix.lower().lstrip(".")
-    output_format = output_path.suffix.lower().lstrip(".")
-    print(f"Converting {input_path.name} from {input_format} to {output_format}")
+    # Determine the input format from the file extension
+    input_format = AudioFormat.UNKNOWN
+    try:
+        ext = input_path.suffix.lower().lstrip(".")
+        input_format = AudioFormat(ext)
+    except ValueError:
+        pass  # Keep as UNKNOWN if not found
+    # Use the provided target_format
+    output_format = target_format
+    print(
+        f"Converting {input_path.name} from "
+        f"{input_format.value} to {output_format.value}"
+    )
     _run_ffmpeg_command(cmd, "converting format")
@@ -136,10 +153,16 @@ def adjust_speed(input_path: Path, output_path: Path, speed_factor: float) -> No
     filter_str = ",".join(atempo_filters) if atempo_filters else "atempo=1.0"
     # Determine appropriate codec based on output format
-    output_format = output_path.suffix.lower()
-    if output_format == ".mp3":
+    output_format = AudioFormat.UNKNOWN
+    try:
+        ext = output_path.suffix.lower().lstrip(".")
+        output_format = AudioFormat(ext)
+    except ValueError:
+        pass  # Keep as UNKNOWN
+    if output_format == AudioFormat.MP3:
         codec = "libmp3lame"
-    elif output_format == ".wav":
+    elif output_format == AudioFormat.WAV:
         codec = "pcm_s16le"
     else:
         codec = "libmp3lame"  # Default to mp3 codec

{speech_prep-0.1.3 → speech_prep-0.1.4}/src/speech_prep/utils.py RENAMED Viewed

@@ -5,6 +5,7 @@ from pathlib import Path
 import subprocess
 from .exceptions import AudioPropertiesError, FileValidationError
+from .formats import AudioFormat
 def validate_file(file_path: Path) -> bool:
@@ -29,7 +30,7 @@ def validate_file(file_path: Path) -> bool:
     return True
-def get_audio_properties(file_path: Path) -> tuple[float, int, str]:
+def get_audio_properties(file_path: Path) -> tuple[float, int, AudioFormat]:
     """
     Extract audio properties (duration, file size, format) using ffprobe.
@@ -37,7 +38,8 @@ def get_audio_properties(file_path: Path) -> tuple[float, int, str]:
         file_path: Path to the audio file
     Returns:
-        Tuple of (duration, file_size, audio_format)
+        Tuple of (duration, file_size, audio_format) where audio_format
+        is an AudioFormat enum representing the detected audio format
     Raises:
         AudioPropertiesError: If properties cannot be extracted
@@ -71,10 +73,17 @@ def get_audio_properties(file_path: Path) -> tuple[float, int, str]:
         probe_data = json.loads(probe_result.stdout)["format"]
         duration = float(probe_data["duration"])
         file_size = int(probe_data["size"])
-        audio_format = probe_data["format_name"].split(",")[
+        format_str = probe_data["format_name"].split(",")[
             0
         ]  # Get the first format name
+        # Convert format string to enum
+        try:
+            audio_format = AudioFormat(format_str.lower())
+        except ValueError:
+            # If not a direct match, use UNKNOWN
+            audio_format = AudioFormat.UNKNOWN
         if duration <= 0 or file_size <= 0:
             raise AudioPropertiesError(
                 f"Invalid duration or file size for {file_path}. "

{speech_prep-0.1.3 → speech_prep-0.1.4}/tests/integration/test_sound_file_integration.py RENAMED Viewed

@@ -41,7 +41,10 @@ class TestSoundFileIntegrationWithRealFiles:
         # Verify basic properties
         assert sound_file.path == file_path
         assert sound_file.duration > 0
-        assert isinstance(sound_file.format, str)
+        assert sound_file.format is not None
+        from speech_prep.formats import AudioFormat
+        assert isinstance(sound_file.format, AudioFormat)
         assert sound_file.file_size > 0
         # Verify silence detection
@@ -102,10 +105,12 @@ class TestSoundFileIntegrationWithRealFiles:
         print(f"Sped file: {sped}")
         # 3. Convert format
-        converted = sped.convert(converted_path, audio_bitrate="192k")
+        from speech_prep.formats import AudioFormat
+        converted = sped.convert(converted_path, AudioFormat.MP3, audio_bitrate="192k")
         assert converted is not None, "Convert operation failed"
         assert converted.path.exists(), "Converted file doesn't exist"
-        assert converted.format.lower() == "mp3", "Format conversion failed"
+        assert converted.format == AudioFormat.MP3, "Format conversion failed"
         print(f"Converted file: {converted}")
         # Verify final file properties

{speech_prep-0.1.3 → speech_prep-0.1.4}/tests/test_core.py RENAMED Viewed

@@ -430,11 +430,16 @@ class TestSoundFileConvert:
         # Create the original SoundFile
         sound_file = SoundFile(input_path)
+        # Import AudioFormat
+        from speech_prep.formats import AudioFormat
         # Call convert method
-        result = sound_file.convert(output_path, audio_bitrate="192k")
+        result = sound_file.convert(output_path, AudioFormat.MP3, audio_bitrate="192k")
         # Verify convert_format was called with correct arguments
-        mock_convert.assert_called_once_with(input_path, output_path, "192k")
+        mock_convert.assert_called_once_with(
+            input_path, output_path, AudioFormat.MP3, "192k"
+        )
         # Verify a new SoundFile instance was returned
         assert result is not None
@@ -473,8 +478,11 @@ class TestSoundFileConvert:
         # Create the SoundFile
         sound_file = SoundFile(input_path)
+        # Import AudioFormat
+        from speech_prep.formats import AudioFormat
         # Call convert method
-        result = sound_file.convert(output_path)
+        result = sound_file.convert(output_path, AudioFormat.MP3)
         # Verify error is logged and None is returned
         mock_logger.error.assert_called_once()
@@ -642,7 +650,10 @@ class TestSoundFileIntegration:
         sped = stripped.speed(sped_path, 1.5)
         assert sped is not None, "Speed operation failed"
-        converted = sped.convert(converted_path, audio_bitrate="192k")
+        # Import AudioFormat
+        from speech_prep.formats import AudioFormat
+        converted = sped.convert(converted_path, AudioFormat.MP3, audio_bitrate="192k")
         assert converted is not None, "Convert operation failed"
         # Verify the final file exists and has expected properties

speech-prep 0.1.3__tar.gz → 0.1.4__tar.gz

speech-prep 0.1.3tar.gz → 0.1.4tar.gz