voicepipe 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 DanLab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,207 @@
1
+ Metadata-Version: 2.4
2
+ Name: voicepipe
3
+ Version: 0.1.0
4
+ Summary: One-command STT + TTS for any app
5
+ Author-email: DanLab <dan@danlab.dev>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/danlab-ai/voicepipe
8
+ Project-URL: Documentation, https://voicepipe.readthedocs.io
9
+ Project-URL: Repository, https://github.com/danlab-ai/voicepipe
10
+ Project-URL: Issues, https://github.com/danlab-ai/voicepipe/issues
11
+ Keywords: stt,tts,speech,voice,whisper,kittentts,ai
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: numpy>=1.20.0
24
+ Provides-Extra: kittentts
25
+ Requires-Dist: kittentts; extra == "kittentts"
26
+ Provides-Extra: gtts
27
+ Requires-Dist: gtts; extra == "gtts"
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
30
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
31
+ Requires-Dist: black>=23.0.0; extra == "dev"
32
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
33
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
34
+ Dynamic: license-file
35
+
36
+ # VoicePipe
37
+
38
+ <p align="center">
39
+ <strong>One-command voice integration for any app</strong>
40
+ </p>
41
+
42
+ <p align="center">
43
+ <a href="https://pypi.org/project/voicepipe/">
44
+ <img src="https://img.shields.io/pypi/v/voicepipe.svg" alt="PyPI version">
45
+ </a>
46
+ <a href="https://pypi.org/project/voicepipe/">
47
+ <img src="https://img.shields.io/pypi/pyversions/voicepipe.svg" alt="Python versions">
48
+ </a>
49
+ <a href="https://github.com/danlab-ai/voicepipe/blob/main/LICENSE">
50
+ <img src="https://img.shields.io/github/license/danlab-ai/voicepipe.svg" alt="License">
51
+ </a>
52
+ </p>
53
+
54
+ ---
55
+
56
+ ## Overview
57
+
58
+ VoicePipe provides **one-command** STT (Speech-to-Text) + TTS (Text-to-Speech) for any application.
59
+
60
+ - **STT**: whisper.cpp - fastest local speech recognition
61
+ - **TTS**: KittenTTS - smallest neural TTS (15-80MB)
62
+
63
+ ## Installation
64
+
65
+ ```bash
66
+ pip install voicepipe
67
+ ```
68
+
69
+ ## Quick Start
70
+
71
+ ```python
72
+ from voicepipe import VoicePipeline
73
+
74
+ # Initialize (auto-downloads models)
75
+ voice = VoicePipeline()
76
+
77
+ # Speech to Text
78
+ text = voice.speech_to_text("audio.wav")
79
+ print(f"You said: {text}")
80
+
81
+ # Text to Speech
82
+ audio = voice.text_to_speech("Hello, world!")
83
+ ```
84
+
85
+ ## Requirements
86
+
87
+ - Python 3.8+
88
+ - FFmpeg (for audio processing)
89
+
90
+ ## Install FFmpeg
91
+
92
+ **macOS:**
93
+ ```bash
94
+ brew install ffmpeg
95
+ ```
96
+
97
+ **Linux:**
98
+ ```bash
99
+ sudo apt install ffmpeg
100
+ ```
101
+
102
+ **Windows:**
103
+ ```powershell
104
+ choco install ffmpeg
105
+ ```
106
+
107
+ ## Configuration
108
+
109
+ ```python
110
+ voice = VoicePipeline(
111
+ stt_model="tiny", # tiny, base, small
112
+ tts_model="nano", # nano, micro, mini
113
+ tts_voice="Bella", # 8 voices available
114
+ tts_speed=1.0, # 0.5 - 2.0
115
+ language="en", # or "auto"
116
+ cache_dir="~/.voicepipe" # model cache
117
+ )
118
+ ```
119
+
120
+ ## Available Voices
121
+
122
+ - Bella, Jasper, Luna, Bruno, Rosie, Hugo, Kiki, Leo
123
+
124
+ ## Models
125
+
126
+ ### STT (whisper.cpp)
127
+ | Model | Size | RAM | Speed |
128
+ |-------|------|-----|-------|
129
+ | tiny | 75MB | ~500MB | 10x realtime |
130
+ | base | 142MB | ~1GB | 5x realtime |
131
+ | small | 466MB | ~2GB | 2x realtime |
132
+
133
+ ### TTS (KittenTTS)
134
+ | Model | Size | Quality |
135
+ |-------|------|---------|
136
+ | nano | 15MB | Good |
137
+ | micro | 40MB | Better |
138
+ | mini | 80MB | Best |
139
+
140
+ ## Use Cases
141
+
142
+ ### Chatbot with Voice
143
+ ```python
144
+ @app.post("/voice/chat")
145
+ async def voice_chat(audio: bytes):
146
+ # Convert speech to text
147
+ text = voice.speech_to_text_bytes(audio)
148
+
149
+ # Get chatbot response
150
+ response = await chatbot.chat(text)
151
+
152
+ # Convert response to speech
153
+ audio_response = voice.text_to_speech(response)
154
+
155
+ return {"audio": audio_response}
156
+ ```
157
+
158
+ ### Voice Assistant
159
+ ```python
160
+ async def run_assistant():
161
+ while True:
162
+ # Continuously listen and respond
163
+ text = await voice.speech_to_text_async(microphone_stream)
164
+ response = await assistant.respond(text)
165
+ voice.text_to_speech(response, play=True)
166
+ ```
167
+
168
+ ## API Reference
169
+
170
+ ### VoicePipeline
171
+
172
+ | Method | Description |
173
+ |--------|-------------|
174
+ | `speech_to_text(audio_path)` | Convert audio file to text |
175
+ | `speech_to_text_bytes(audio_data)` | Convert raw audio to text |
176
+ | `text_to_speech(text)` | Convert text to audio bytes |
177
+ | `text_to_speech_file(text, path)` | Convert text to audio file |
178
+ | `list_voices()` | Get available TTS voices |
179
+ | `get_status()` | Get pipeline status |
180
+
181
+ ## Development
182
+
183
+ ```bash
184
+ # Clone repository
185
+ git clone https://github.com/danlab-ai/voicepipe.git
186
+ cd voicepipe
187
+
188
+ # Install in development mode
189
+ pip install -e ".[dev]"
190
+
191
+ # Run tests
192
+ pytest
193
+
194
+ # Format code
195
+ black src/voicepipe
196
+ ruff check src/voicepipe
197
+ ```
198
+
199
+ ## License
200
+
201
+ MIT License - see [LICENSE](LICENSE)
202
+
203
+ ---
204
+
205
+ <p align="center">
206
+ Built by <a href="https://danlab.dev">DanLab</a>
207
+ </p>
@@ -0,0 +1,172 @@
1
+ # VoicePipe
2
+
3
+ <p align="center">
4
+ <strong>One-command voice integration for any app</strong>
5
+ </p>
6
+
7
+ <p align="center">
8
+ <a href="https://pypi.org/project/voicepipe/">
9
+ <img src="https://img.shields.io/pypi/v/voicepipe.svg" alt="PyPI version">
10
+ </a>
11
+ <a href="https://pypi.org/project/voicepipe/">
12
+ <img src="https://img.shields.io/pypi/pyversions/voicepipe.svg" alt="Python versions">
13
+ </a>
14
+ <a href="https://github.com/danlab-ai/voicepipe/blob/main/LICENSE">
15
+ <img src="https://img.shields.io/github/license/danlab-ai/voicepipe.svg" alt="License">
16
+ </a>
17
+ </p>
18
+
19
+ ---
20
+
21
+ ## Overview
22
+
23
+ VoicePipe provides **one-command** STT (Speech-to-Text) + TTS (Text-to-Speech) for any application.
24
+
25
+ - **STT**: whisper.cpp - fastest local speech recognition
26
+ - **TTS**: KittenTTS - smallest neural TTS (15-80MB)
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install voicepipe
32
+ ```
33
+
34
+ ## Quick Start
35
+
36
+ ```python
37
+ from voicepipe import VoicePipeline
38
+
39
+ # Initialize (auto-downloads models)
40
+ voice = VoicePipeline()
41
+
42
+ # Speech to Text
43
+ text = voice.speech_to_text("audio.wav")
44
+ print(f"You said: {text}")
45
+
46
+ # Text to Speech
47
+ audio = voice.text_to_speech("Hello, world!")
48
+ ```
49
+
50
+ ## Requirements
51
+
52
+ - Python 3.8+
53
+ - FFmpeg (for audio processing)
54
+
55
+ ## Install FFmpeg
56
+
57
+ **macOS:**
58
+ ```bash
59
+ brew install ffmpeg
60
+ ```
61
+
62
+ **Linux:**
63
+ ```bash
64
+ sudo apt install ffmpeg
65
+ ```
66
+
67
+ **Windows:**
68
+ ```powershell
69
+ choco install ffmpeg
70
+ ```
71
+
72
+ ## Configuration
73
+
74
+ ```python
75
+ voice = VoicePipeline(
76
+ stt_model="tiny", # tiny, base, small
77
+ tts_model="nano", # nano, micro, mini
78
+ tts_voice="Bella", # 8 voices available
79
+ tts_speed=1.0, # 0.5 - 2.0
80
+ language="en", # or "auto"
81
+ cache_dir="~/.voicepipe" # model cache
82
+ )
83
+ ```
84
+
85
+ ## Available Voices
86
+
87
+ - Bella, Jasper, Luna, Bruno, Rosie, Hugo, Kiki, Leo
88
+
89
+ ## Models
90
+
91
+ ### STT (whisper.cpp)
92
+ | Model | Size | RAM | Speed |
93
+ |-------|------|-----|-------|
94
+ | tiny | 75MB | ~500MB | 10x realtime |
95
+ | base | 142MB | ~1GB | 5x realtime |
96
+ | small | 466MB | ~2GB | 2x realtime |
97
+
98
+ ### TTS (KittenTTS)
99
+ | Model | Size | Quality |
100
+ |-------|------|---------|
101
+ | nano | 15MB | Good |
102
+ | micro | 40MB | Better |
103
+ | mini | 80MB | Best |
104
+
105
+ ## Use Cases
106
+
107
+ ### Chatbot with Voice
108
+ ```python
109
+ @app.post("/voice/chat")
110
+ async def voice_chat(audio: bytes):
111
+ # Convert speech to text
112
+ text = voice.speech_to_text_bytes(audio)
113
+
114
+ # Get chatbot response
115
+ response = await chatbot.chat(text)
116
+
117
+ # Convert response to speech
118
+ audio_response = voice.text_to_speech(response)
119
+
120
+ return {"audio": audio_response}
121
+ ```
122
+
123
+ ### Voice Assistant
124
+ ```python
125
+ async def run_assistant():
126
+ while True:
127
+ # Continuously listen and respond
128
+ text = await voice.speech_to_text_async(microphone_stream)
129
+ response = await assistant.respond(text)
130
+ voice.text_to_speech(response, play=True)
131
+ ```
132
+
133
+ ## API Reference
134
+
135
+ ### VoicePipeline
136
+
137
+ | Method | Description |
138
+ |--------|-------------|
139
+ | `speech_to_text(audio_path)` | Convert audio file to text |
140
+ | `speech_to_text_bytes(audio_data)` | Convert raw audio to text |
141
+ | `text_to_speech(text)` | Convert text to audio bytes |
142
+ | `text_to_speech_file(text, path)` | Convert text to audio file |
143
+ | `list_voices()` | Get available TTS voices |
144
+ | `get_status()` | Get pipeline status |
145
+
146
+ ## Development
147
+
148
+ ```bash
149
+ # Clone repository
150
+ git clone https://github.com/danlab-ai/voicepipe.git
151
+ cd voicepipe
152
+
153
+ # Install in development mode
154
+ pip install -e ".[dev]"
155
+
156
+ # Run tests
157
+ pytest
158
+
159
+ # Format code
160
+ black src/voicepipe
161
+ ruff check src/voicepipe
162
+ ```
163
+
164
+ ## License
165
+
166
+ MIT License - see [LICENSE](LICENSE)
167
+
168
+ ---
169
+
170
+ <p align="center">
171
+ Built by <a href="https://danlab.dev">DanLab</a>
172
+ </p>
@@ -0,0 +1,69 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "voicepipe"
7
+ version = "0.1.0"
8
+ description = "One-command STT + TTS for any app"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ authors = [
12
+ {name = "DanLab", email = "dan@danlab.dev"},
13
+ ]
14
+ keywords = ["stt", "tts", "speech", "voice", "whisper", "kittentts", "ai"]
15
+ classifiers = [
16
+ "Development Status :: 3 - Alpha",
17
+ "Intended Audience :: Developers",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.8",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Topic :: Software Development :: Libraries :: Python Modules",
25
+ ]
26
+
27
+ dependencies = [
28
+ "numpy>=1.20.0",
29
+ ]
30
+
31
+ [project.optional-dependencies]
32
+ kittentts = ["kittentts"]
33
+ gtts = ["gtts"]
34
+ dev = [
35
+ "pytest>=7.0.0",
36
+ "pytest-asyncio>=0.21.0",
37
+ "black>=23.0.0",
38
+ "mypy>=1.0.0",
39
+ "ruff>=0.1.0",
40
+ ]
41
+
42
+ [project.urls]
43
+ Homepage = "https://github.com/danlab-ai/voicepipe"
44
+ Documentation = "https://voicepipe.readthedocs.io"
45
+ Repository = "https://github.com/danlab-ai/voicepipe"
46
+ Issues = "https://github.com/danlab-ai/voicepipe/issues"
47
+
48
+ [project.scripts]
49
+ voicepipe = "voicepipe.cli:main"
50
+
51
+ [tool.setuptools.packages.find]
52
+ where = ["src"]
53
+
54
+ [tool.setuptools.package-data]
55
+ voicepipe = ["py.typed"]
56
+
57
+ [tool.black]
58
+ line-length = 100
59
+ target-version = ["py38"]
60
+
61
+ [tool.ruff]
62
+ line-length = 100
63
+ target-version = "py38"
64
+
65
+ [tool.mypy]
66
+ python_version = "3.8"
67
+ warn_return_any = true
68
+ warn_unused_configs = true
69
+ disallow_untyped_defs = false
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,18 @@
1
+ """
2
+ VoicePipe - Universal Voice Pipeline
3
+ One-command STT + TTS for any app
4
+
5
+ Install: pip install voicepipe
6
+ Usage:
7
+ from voicepipe import VoicePipeline
8
+ voice = VoicePipeline()
9
+ text = voice.speech_to_text("audio.wav")
10
+ audio = voice.text_to_speech("Hello!")
11
+ """
12
+
13
+ __version__ = "0.1.0"
14
+ __author__ = "DanLab"
15
+
16
+ from voicepipe.voice_pipeline import VoicePipeline
17
+
18
+ __all__ = ["VoicePipeline", "__version__"]
@@ -0,0 +1,156 @@
1
+ """
2
+ Audio utilities for VoicePipe
3
+ """
4
+ import subprocess
5
+ import tempfile
6
+ import os
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+
11
+ class AudioUtils:
12
+ """Audio processing utilities."""
13
+
14
+ @staticmethod
15
+ def convert_audio(
16
+ input_path: str,
17
+ output_path: str,
18
+ sample_rate: int = 16000,
19
+ mono: bool = True,
20
+ format: str = "wav",
21
+ ) -> str:
22
+ """
23
+ Convert audio file to different format.
24
+
25
+ Args:
26
+ input_path: Input audio file
27
+ output_path: Output audio file
28
+ sample_rate: Target sample rate
29
+ mono: Convert to mono
30
+ format: Output format
31
+
32
+ Returns:
33
+ Path to converted file
34
+ """
35
+ mono_arg = "-ac 1" if mono else ""
36
+ cmd = [
37
+ "ffmpeg", "-y",
38
+ "-i", input_path,
39
+ "-ar", str(sample_rate),
40
+ mono_arg,
41
+ f"-c:a", "pcm_s16le" if format == "wav" else format,
42
+ output_path,
43
+ ]
44
+
45
+ result = subprocess.run(cmd, capture_output=True, text=True)
46
+
47
+ if result.returncode != 0:
48
+ raise RuntimeError(f"Audio conversion failed: {result.stderr}")
49
+
50
+ return output_path
51
+
52
+ @staticmethod
53
+ def get_audio_duration(audio_path: str) -> float:
54
+ """Get duration of audio file in seconds."""
55
+ cmd = [
56
+ "ffprobe",
57
+ "-v", "error",
58
+ "-show_entries", "format=duration",
59
+ "-of", "default=noprint_wrappers=1:nokey=1",
60
+ audio_path,
61
+ ]
62
+
63
+ result = subprocess.run(cmd, capture_output=True, text=True)
64
+
65
+ if result.returncode != 0:
66
+ raise RuntimeError(f"Failed to get duration: {result.stderr}")
67
+
68
+ return float(result.stdout.strip())
69
+
70
+ @staticmethod
71
+ def normalize_audio(input_path: str, output_path: str) -> str:
72
+ """Normalize audio volume."""
73
+ cmd = [
74
+ "ffmpeg", "-y",
75
+ "-i", input_path,
76
+ "-af", "loudnorm=I=-16:TP=-1.5:LRA=11",
77
+ output_path,
78
+ ]
79
+
80
+ result = subprocess.run(cmd, capture_output=True, text=True)
81
+
82
+ if result.returncode != 0:
83
+ raise RuntimeError(f"Audio normalization failed: {result.stderr}")
84
+
85
+ return output_path
86
+
87
+ @staticmethod
88
+ def trim_silence(
89
+ input_path: str,
90
+ output_path: str,
91
+ threshold: float = -40,
92
+ min_duration: float = 0.5,
93
+ ) -> str:
94
+ """Trim silence from audio."""
95
+ cmd = [
96
+ "ffmpeg", "-y",
97
+ "-i", input_path,
98
+ "-af", f"silenceremove=start_periods=1:start_duration={min_duration}:start_threshold={threshold}dB:detection=speech",
99
+ output_path,
100
+ ]
101
+
102
+ result = subprocess.run(cmd, capture_output=True, text=True)
103
+
104
+ if result.returncode != 0:
105
+ raise RuntimeError(f"Silence trim failed: {result.stderr}")
106
+
107
+ return output_path
108
+
109
+ @staticmethod
110
+ def split_stereo(input_path: str) -> tuple:
111
+ """Split stereo audio to two mono files."""
112
+ left_path = input_path.replace(".wav", "_left.wav")
113
+ right_path = input_path.replace(".wav", "_right.wav")
114
+
115
+ # Left channel
116
+ subprocess.run([
117
+ "ffmpeg", "-y", "-i", input_path,
118
+ "-af", "pan=mono|c0=c0",
119
+ left_path
120
+ ], capture_output=True)
121
+
122
+ # Right channel
123
+ subprocess.run([
124
+ "ffmpeg", "-y", "-i", input_path,
125
+ "-af", "pan=mono|c0=c1",
126
+ right_path
127
+ ], capture_output=True)
128
+
129
+ return left_path, right_path
130
+
131
+ @staticmethod
132
+ def concatenate_audio(audio_files: list, output_path: str) -> str:
133
+ """Concatenate multiple audio files."""
134
+ # Create file list
135
+ with tempfile.NamedTemporaryFile(mode='w', suffix=".txt", delete=False) as f:
136
+ for audio_file in audio_files:
137
+ f.write(f"file '{audio_file}'\n")
138
+ list_path = f.name
139
+
140
+ cmd = [
141
+ "ffmpeg", "-y",
142
+ "-f", "concat",
143
+ "-safe", "0",
144
+ "-i", list_path,
145
+ "-c", "copy",
146
+ output_path,
147
+ ]
148
+
149
+ result = subprocess.run(cmd, capture_output=True, text=True)
150
+
151
+ os.unlink(list_path)
152
+
153
+ if result.returncode != 0:
154
+ raise RuntimeError(f"Audio concatenation failed: {result.stderr}")
155
+
156
+ return output_path