vocal-sdk 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+ MANIFEST
23
+
24
+ # Virtual Environment
25
+ .venv/
26
+ venv/
27
+ ENV/
28
+ env/
29
+
30
+ # UV
31
+ uv.lock
32
+
33
+ # IDE
34
+ .vscode/
35
+ .idea/
36
+ *.swp
37
+ *.swo
38
+ *~
39
+ .DS_Store
40
+
41
+ # Testing
42
+ .pytest_cache/
43
+ .coverage
44
+ htmlcov/
45
+ .tox/
46
+
47
+ # Jupyter
48
+ .ipynb_checkpoints
49
+
50
+ # Model cache
51
+ .cache/
52
+ models/
53
+ *.ckpt
54
+ *.pth
55
+ *.pt
56
+ *.safetensors
57
+
58
+ # Audio files (test data)
59
+ *.mp3
60
+ *.wav
61
+ *.m4a
62
+ *.ogg
63
+ *.flac
64
+
65
+ # Logs
66
+ *.log
67
+ logs/
68
+
69
+ # Environment variables
70
+ .env
71
+ .env.local
72
+
73
+ # OS
74
+ Thumbs.db
@@ -0,0 +1,4 @@
1
+ include ../../../README.md
2
+ include ../../../LICENSE
3
+ recursive-include vocal_sdk *.py
4
+ include openapi.json
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.4
2
+ Name: vocal-sdk
3
+ Version: 0.3.0
4
+ Summary: Python SDK for Vocal API - Auto-generated OpenAI-compatible client
5
+ Project-URL: Homepage, https://github.com/niradler/vocal
6
+ Project-URL: Documentation, https://github.com/niradler/vocal#readme
7
+ Project-URL: Repository, https://github.com/niradler/vocal
8
+ Project-URL: Issues, https://github.com/niradler/vocal/issues
9
+ Author: Vocal Contributors
10
+ License: SSPL-1.0
11
+ Keywords: api-client,client,openai-compatible,sdk,speech-to-text,tts
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: Other/Proprietary License
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Requires-Python: >=3.11
20
+ Requires-Dist: pydantic>=2.5.0
21
+ Requires-Dist: requests>=2.31.0
22
+ Provides-Extra: dev
23
+ Requires-Dist: datamodel-code-generator>=0.25.0; extra == 'dev'
24
+ Provides-Extra: test
25
+ Requires-Dist: numpy>=1.24.0; extra == 'test'
26
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == 'test'
27
+ Requires-Dist: pytest>=8.0.0; extra == 'test'
@@ -0,0 +1,64 @@
1
+ # Vocal SDK
2
+
3
+ Auto-generated Python SDK for the Vocal API.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ cd packages/sdk
9
+ uv pip install -e .
10
+ ```
11
+
12
+ ## Usage
13
+
14
+ ```python
15
+ from vocal_sdk import VocalSDK
16
+
17
+ # Initialize client
18
+ client = VocalSDK(base_url="http://localhost:8000")
19
+
20
+ # Check health
21
+ health = client.health()
22
+ print(health)
23
+
24
+ # List models
25
+ models = client.models.list()
26
+ for model in models['models']:
27
+ print(f"{model['id']}: {model['status']}")
28
+
29
+ # Download a model if needed
30
+ model_id = "Systran/faster-whisper-tiny"
31
+ model_info = client.models.get(model_id)
32
+ if model_info['status'] != 'available':
33
+ client.models.download(model_id)
34
+
35
+ # Transcribe audio
36
+ result = client.audio.transcribe(
37
+ file="path/to/audio.mp3",
38
+ model=model_id,
39
+ language="en" # optional
40
+ )
41
+ print(f"Transcription: {result['text']}")
42
+ print(f"Language: {result['language']}")
43
+ print(f"Duration: {result['duration']}s")
44
+ ```
45
+
46
+ ## Regenerating the SDK
47
+
48
+ When the API changes, regenerate the SDK models:
49
+
50
+ ```bash
51
+ # 1. Make sure API is running
52
+ uv run uvicorn vocal_api.main:app --port 8000
53
+
54
+ # 2. Download latest OpenAPI spec
55
+ curl http://localhost:8000/openapi.json -o packages/sdk/openapi.json
56
+
57
+ # 3. Generate models (optional - for type hints)
58
+ cd packages/sdk
59
+ uv run python scripts/generate.py
60
+ ```
61
+
62
+ ## API Documentation
63
+
64
+ Interactive API docs available at: http://localhost:8000/docs
@@ -0,0 +1 @@
1
+ {"openapi":"3.1.0","info":{"title":"Vocal API","description":"Generic Speech AI Platform (STT + TTS)","version":"0.1.0"},"paths":{"/v1/audio/transcriptions":{"post":{"tags":["transcription"],"summary":"Transcribe audio","description":"Transcribe audio file to text using specified model","operationId":"create_transcription_v1_audio_transcriptions_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_create_transcription_v1_audio_transcriptions_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TranscriptionResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/audio/translations":{"post":{"tags":["transcription"],"summary":"Translate audio to English","description":"Translate audio to English text","operationId":"create_translation_v1_audio_translations_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_create_translation_v1_audio_translations_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TranscriptionResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models":{"get":{"tags":["models"],"summary":"List models","description":"List all available models (Ollama-style)","operationId":"list_models_v1_models_get","parameters":[{"name":"status","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Status"}},{"name":"task","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelListResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models/{model_id}":{"get":{"tags":["models"],"summary":"Get model info","description":"Get detailed information about a specific model","operationId":"get_model_v1_models__model_id__get","parameters":[{"name":"model_id","in":"path","required":true,"schema":{"type":"string","title":"Model Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelInfo"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["models"],"summary":"Delete model","description":"Remove a downloaded model","operationId":"delete_model_v1_models__model_id__delete","parameters":[{"name":"model_id","in":"path","required":true,"schema":{"type":"string","title":"Model Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models/{model_id}/download":{"post":{"tags":["models"],"summary":"Download model","description":"Download a model for local use (Ollama-style pull)","operationId":"download_model_v1_models__model_id__download_post","parameters":[{"name":"model_id","in":"path","required":true,"schema":{"type":"string","title":"Model Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelDownloadProgress"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models/{model_id}/download/status":{"get":{"tags":["models"],"summary":"Get download status","description":"Check model download progress","operationId":"get_download_status_v1_models__model_id__download_status_get","parameters":[{"name":"model_id","in":"path","required":true,"schema":{"type":"string","title":"Model Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelDownloadProgress"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/":{"get":{"tags":["health"],"summary":"Root","description":"API health check","operationId":"root__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/health":{"get":{"tags":["health"],"summary":"Health","description":"Detailed health check","operationId":"health_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"Body_create_transcription_v1_audio_transcriptions_post":{"properties":{"file":{"type":"string","format":"binary","title":"File","description":"Audio file to transcribe"},"model":{"type":"string","title":"Model","description":"Model ID","default":"Systran/faster-whisper-tiny"},"language":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Language","description":"Language code"},"prompt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Prompt","description":"Style prompt"},"response_format":{"$ref":"#/components/schemas/TranscriptionFormat","description":"Output format","default":"json"},"temperature":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Temperature","default":0.0}},"type":"object","required":["file"],"title":"Body_create_transcription_v1_audio_transcriptions_post"},"Body_create_translation_v1_audio_translations_post":{"properties":{"file":{"type":"string","format":"binary","title":"File"},"model":{"type":"string","title":"Model","default":"Systran/faster-whisper-tiny"}},"type":"object","required":["file"],"title":"Body_create_translation_v1_audio_translations_post"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ModelBackend":{"type":"string","enum":["faster_whisper","transformers","ctranslate2","nemo","onnx","custom"],"title":"ModelBackend","description":"Model inference backend"},"ModelDownloadProgress":{"properties":{"model_id":{"type":"string","title":"Model Id"},"status":{"type":"string","title":"Status"},"progress":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Progress","default":0.0},"downloaded_bytes":{"type":"integer","title":"Downloaded Bytes","default":0},"total_bytes":{"type":"integer","title":"Total Bytes","default":0},"message":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Message"}},"type":"object","required":["model_id","status"],"title":"ModelDownloadProgress","description":"Model download progress"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id","description":"Unique model identifier"},"name":{"type":"string","title":"Name","description":"Human-readable model name"},"provider":{"$ref":"#/components/schemas/ModelProvider","description":"Model provider"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"size":{"type":"integer","title":"Size","description":"Model size in bytes","default":0},"size_readable":{"type":"string","title":"Size Readable","description":"Human-readable size","default":"Unknown"},"parameters":{"type":"string","title":"Parameters","description":"Number of parameters","default":"Unknown"},"languages":{"items":{"type":"string"},"type":"array","title":"Languages","description":"Supported languages"},"backend":{"$ref":"#/components/schemas/ModelBackend","description":"Inference backend"},"status":{"$ref":"#/components/schemas/ModelStatus","description":"Current model status"},"source_url":{"anyOf":[{"type":"string","maxLength":2083,"minLength":1,"format":"uri"},{"type":"null"}],"title":"Source Url"},"license":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"License"},"recommended_vram":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Recommended Vram"},"task":{"$ref":"#/components/schemas/ModelTask","description":"Task type"},"local_path":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Local Path"},"created_at":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Created At"},"updated_at":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Updated At"}},"type":"object","required":["id","name","provider","backend","status","task"],"title":"ModelInfo","description":"Model information schema"},"ModelListResponse":{"properties":{"models":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Models"},"total":{"type":"integer","title":"Total"}},"type":"object","required":["models","total"],"title":"ModelListResponse","description":"List of available models"},"ModelProvider":{"type":"string","enum":["huggingface","local","custom"],"title":"ModelProvider","description":"Model provider/source"},"ModelStatus":{"type":"string","enum":["available","downloading","not_downloaded","error"],"title":"ModelStatus","description":"Model download/availability status"},"ModelTask":{"type":"string","enum":["stt","tts"],"title":"ModelTask","description":"Model task type"},"TranscriptionFormat":{"type":"string","enum":["json","text","srt","vtt","verbose_json"],"title":"TranscriptionFormat","description":"Output format for transcription"},"TranscriptionResponse":{"properties":{"text":{"type":"string","title":"Text","description":"Full transcribed text"},"language":{"type":"string","title":"Language","description":"Detected or specified language"},"duration":{"type":"number","title":"Duration","description":"Audio duration in seconds"},"segments":{"anyOf":[{"items":{"$ref":"#/components/schemas/TranscriptionSegment"},"type":"array"},{"type":"null"}],"title":"Segments"},"words":{"anyOf":[{"items":{"$ref":"#/components/schemas/TranscriptionWord"},"type":"array"},{"type":"null"}],"title":"Words"}},"type":"object","required":["text","language","duration"],"title":"TranscriptionResponse","description":"Response schema for transcription","example":{"duration":2.5,"language":"en","segments":[{"end":2.5,"id":0,"start":0.0,"text":"Hello, how are you today?"}],"text":"Hello, how are you today?"}},"TranscriptionSegment":{"properties":{"id":{"type":"integer","title":"Id"},"start":{"type":"number","title":"Start","description":"Start time in seconds"},"end":{"type":"number","title":"End","description":"End time in seconds"},"text":{"type":"string","title":"Text","description":"Transcribed text"},"tokens":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Tokens"},"temperature":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Temperature"},"avg_logprob":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Logprob"},"compression_ratio":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Compression Ratio"},"no_speech_prob":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"No Speech Prob"}},"type":"object","required":["id","start","end","text"],"title":"TranscriptionSegment","description":"A segment of transcribed text with timing"},"TranscriptionWord":{"properties":{"word":{"type":"string","title":"Word"},"start":{"type":"number","title":"Start"},"end":{"type":"number","title":"End"},"probability":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Probability"}},"type":"object","required":["word","start","end"],"title":"TranscriptionWord","description":"Word-level timestamp"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"},"input":{"title":"Input"},"ctx":{"type":"object","title":"Context"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}}}
@@ -0,0 +1,43 @@
1
+ [project]
2
+ name = "vocal-sdk"
3
+ version = "0.3.0"
4
+ description = "Python SDK for Vocal API - Auto-generated OpenAI-compatible client"
5
+ requires-python = ">=3.11"
6
+ license = { text = "SSPL-1.0" }
7
+ authors = [
8
+ { name = "Vocal Contributors" }
9
+ ]
10
+ keywords = ["sdk", "client", "speech-to-text", "tts", "openai-compatible", "api-client"]
11
+ classifiers = [
12
+ "Development Status :: 3 - Alpha",
13
+ "Intended Audience :: Developers",
14
+ "License :: Other/Proprietary License",
15
+ "Programming Language :: Python :: 3.11",
16
+ "Programming Language :: Python :: 3.12",
17
+ "Programming Language :: Python :: 3.13",
18
+ "Topic :: Software Development :: Libraries :: Python Modules",
19
+ ]
20
+ dependencies = [
21
+ "requests>=2.31.0",
22
+ "pydantic>=2.5.0",
23
+ ]
24
+
25
+ [project.urls]
26
+ Homepage = "https://github.com/niradler/vocal"
27
+ Documentation = "https://github.com/niradler/vocal#readme"
28
+ Repository = "https://github.com/niradler/vocal"
29
+ Issues = "https://github.com/niradler/vocal/issues"
30
+
31
+ [project.optional-dependencies]
32
+ dev = [
33
+ "datamodel-code-generator>=0.25.0",
34
+ ]
35
+ test = [
36
+ "pytest>=8.0.0",
37
+ "pytest-asyncio>=0.23.0",
38
+ "numpy>=1.24.0",
39
+ ]
40
+
41
+ [build-system]
42
+ requires = ["hatchling"]
43
+ build-backend = "hatchling.build"
@@ -0,0 +1,46 @@
1
+ """
2
+ Generate Python SDK from OpenAPI spec
3
+ """
4
+
5
+ import subprocess
6
+ import sys
7
+ from pathlib import Path
8
+
9
+
10
+ def generate_sdk():
11
+ """Generate SDK from OpenAPI spec"""
12
+ sdk_dir = Path(__file__).parent.parent
13
+ openapi_file = sdk_dir / "openapi.json"
14
+ output_dir = sdk_dir / "vocal_sdk"
15
+
16
+ if not openapi_file.exists():
17
+ print(f"Error: {openapi_file} not found")
18
+ print("Download it first:")
19
+ print(" curl http://localhost:8000/openapi.json -o packages/sdk/openapi.json")
20
+ sys.exit(1)
21
+
22
+ print("Generating SDK from OpenAPI spec...")
23
+
24
+ # Generate Pydantic models
25
+ subprocess.run(
26
+ [
27
+ "datamodel-codegen",
28
+ "--input",
29
+ str(openapi_file),
30
+ "--input-file-type",
31
+ "openapi",
32
+ "--output",
33
+ str(output_dir / "models.py"),
34
+ "--field-constraints",
35
+ "--use-standard-collections",
36
+ ],
37
+ check=True,
38
+ )
39
+
40
+ print(f"✓ Generated models at {output_dir / 'models.py'}")
41
+ print("\nSDK generated successfully!")
42
+ print(f"Location: {output_dir}")
43
+
44
+
45
+ if __name__ == "__main__":
46
+ generate_sdk()
@@ -0,0 +1,10 @@
1
+ """
2
+ Vocal SDK - Python client for Vocal API
3
+
4
+ Auto-generated from OpenAPI specification.
5
+ """
6
+
7
+ from .client import AudioAPI, ModelsAPI, VocalSDK
8
+
9
+ __version__ = "0.3.0"
10
+ __all__ = ["VocalSDK", "ModelsAPI", "AudioAPI"]
@@ -0,0 +1,327 @@
1
+ """
2
+ Vocal SDK - Auto-generated client for Vocal API
3
+
4
+ This SDK provides a clean Python interface to the Vocal API.
5
+ Models are auto-generated from the OpenAPI spec.
6
+ """
7
+
8
+ from pathlib import Path
9
+ from typing import Any, BinaryIO
10
+ from urllib.parse import urljoin
11
+
12
+ import requests
13
+
14
+
15
+ class VocalSDK:
16
+ """
17
+ Vocal SDK Client
18
+
19
+ Example:
20
+ >>> from vocal_sdk import VocalSDK
21
+ >>> client = VocalSDK(base_url="http://localhost:8000")
22
+ >>>
23
+ >>> # List models
24
+ >>> models = client.models.list()
25
+ >>> print(f"Found {len(models['models'])} models")
26
+ >>>
27
+ >>> # Transcribe audio
28
+ >>> result = client.audio.transcribe("recording.mp3")
29
+ >>> print(result['text'])
30
+ >>>
31
+ >>> # Text to speech
32
+ >>> audio = client.audio.text_to_speech("Hello, world!")
33
+ >>> with open("output.wav", "wb") as f:
34
+ >>> f.write(audio)
35
+ """
36
+
37
+ def __init__(self, base_url: str = "http://localhost:8000", timeout: int = 300):
38
+ """
39
+ Initialize Vocal SDK
40
+
41
+ Args:
42
+ base_url: Base URL of Vocal API
43
+ timeout: Request timeout in seconds
44
+ """
45
+ self.base_url = base_url.rstrip("/")
46
+ self.timeout = timeout
47
+ self.session = requests.Session()
48
+
49
+ # Namespaced APIs
50
+ self.models = ModelsAPI(self)
51
+ self.audio = AudioAPI(self)
52
+
53
+ def _request(self, method: str, endpoint: str, **kwargs) -> dict[str, Any]:
54
+ """Make HTTP request to API"""
55
+ url = urljoin(self.base_url + "/", endpoint.lstrip("/"))
56
+
57
+ if "timeout" not in kwargs:
58
+ kwargs["timeout"] = self.timeout
59
+
60
+ response = self.session.request(method, url, **kwargs)
61
+ response.raise_for_status()
62
+
63
+ return response.json()
64
+
65
+ def _request_raw(self, method: str, endpoint: str, **kwargs) -> bytes:
66
+ """Make HTTP request and return raw bytes"""
67
+ url = urljoin(self.base_url + "/", endpoint.lstrip("/"))
68
+
69
+ if "timeout" not in kwargs:
70
+ kwargs["timeout"] = self.timeout
71
+
72
+ response = self.session.request(method, url, **kwargs)
73
+ response.raise_for_status()
74
+
75
+ return response.content
76
+
77
+ def health(self) -> dict[str, Any]:
78
+ """Check API health"""
79
+ return self._request("GET", "/health")
80
+
81
+
82
+ class ModelsAPI:
83
+ """Models API namespace"""
84
+
85
+ def __init__(self, client: VocalSDK):
86
+ self.client = client
87
+
88
+ def list(self, status: str | None = None, task: str | None = None) -> dict[str, Any]:
89
+ """
90
+ List all available models
91
+
92
+ Args:
93
+ status: Filter by status (available, downloading, not_downloaded)
94
+ task: Filter by task (stt, tts)
95
+
96
+ Returns:
97
+ Dictionary with 'models' list and 'total' count
98
+ """
99
+ params = {}
100
+ if status:
101
+ params["status"] = status
102
+ if task:
103
+ params["task"] = task
104
+
105
+ return self.client._request("GET", "/v1/models", params=params)
106
+
107
+ def get(self, model_id: str) -> dict[str, Any]:
108
+ """
109
+ Get model information
110
+
111
+ Args:
112
+ model_id: Model identifier (e.g., "Systran/faster-whisper-tiny")
113
+
114
+ Returns:
115
+ Model information dictionary
116
+ """
117
+ return self.client._request("GET", f"/v1/models/{model_id}")
118
+
119
+ def download(self, model_id: str, quantization: str | None = None) -> dict[str, Any]:
120
+ """
121
+ Download a model (Ollama-style pull)
122
+
123
+ Args:
124
+ model_id: Model identifier
125
+ quantization: Optional quantization format
126
+
127
+ Returns:
128
+ Download progress information
129
+ """
130
+ data = {}
131
+ if quantization:
132
+ data["quantization"] = quantization
133
+
134
+ return self.client._request("POST", f"/v1/models/{model_id}/download", json=data if data else None)
135
+
136
+ def download_status(self, model_id: str) -> dict[str, Any]:
137
+ """
138
+ Check download status for a model
139
+
140
+ Args:
141
+ model_id: Model identifier
142
+
143
+ Returns:
144
+ Download status information
145
+ """
146
+ return self.client._request("GET", f"/v1/models/{model_id}/download/status")
147
+
148
+ def delete(self, model_id: str) -> dict[str, Any]:
149
+ """
150
+ Delete a downloaded model
151
+
152
+ Args:
153
+ model_id: Model identifier
154
+
155
+ Returns:
156
+ Deletion confirmation
157
+ """
158
+ return self.client._request("DELETE", f"/v1/models/{model_id}")
159
+
160
+
161
+ class AudioAPI:
162
+ """Audio API namespace"""
163
+
164
+ def __init__(self, client: VocalSDK):
165
+ self.client = client
166
+
167
+ def transcribe(
168
+ self,
169
+ file: str | Path | BinaryIO,
170
+ model: str = "Systran/faster-whisper-tiny",
171
+ language: str | None = None,
172
+ response_format: str = "json",
173
+ temperature: float = 0.0,
174
+ **kwargs,
175
+ ) -> dict[str, Any]:
176
+ """
177
+ Transcribe audio to text
178
+
179
+ Args:
180
+ file: Path to audio file or file-like object
181
+ model: Model to use for transcription
182
+ language: Language code (e.g., "en", "es") or None for auto-detect
183
+ response_format: Output format (json, text, srt, vtt)
184
+ temperature: Sampling temperature (0.0-1.0)
185
+ **kwargs: Additional parameters
186
+
187
+ Returns:
188
+ Transcription result with text, language, duration, segments
189
+
190
+ Example:
191
+ >>> result = client.audio.transcribe("audio.mp3")
192
+ >>> print(result['text'])
193
+ >>> print(f"Language: {result['language']}")
194
+ """
195
+ if isinstance(file, (str, Path)):
196
+ with open(file, "rb") as f:
197
+ return self._transcribe_file(f, model, language, response_format, temperature, **kwargs)
198
+ else:
199
+ return self._transcribe_file(file, model, language, response_format, temperature, **kwargs)
200
+
201
+ def _transcribe_file(
202
+ self,
203
+ file_obj: BinaryIO,
204
+ model: str,
205
+ language: str | None,
206
+ response_format: str,
207
+ temperature: float,
208
+ **kwargs,
209
+ ) -> dict[str, Any]:
210
+ """Internal method to transcribe file object"""
211
+ files = {"file": file_obj}
212
+ data = {
213
+ "model": model,
214
+ "response_format": response_format,
215
+ "temperature": temperature,
216
+ **kwargs,
217
+ }
218
+
219
+ if language:
220
+ data["language"] = language
221
+
222
+ return self.client._request("POST", "/v1/audio/transcriptions", files=files, data=data)
223
+
224
+ def translate(
225
+ self,
226
+ file: str | Path | BinaryIO,
227
+ model: str = "Systran/faster-whisper-tiny",
228
+ **kwargs,
229
+ ) -> dict[str, Any]:
230
+ """
231
+ Translate audio to English
232
+
233
+ Args:
234
+ file: Path to audio file or file-like object
235
+ model: Model to use for translation
236
+ **kwargs: Additional parameters
237
+
238
+ Returns:
239
+ Translation result
240
+ """
241
+ if isinstance(file, (str, Path)):
242
+ with open(file, "rb") as f:
243
+ files = {"file": f}
244
+ data = {"model": model, **kwargs}
245
+ return self.client._request("POST", "/v1/audio/translations", files=files, data=data)
246
+ else:
247
+ files = {"file": file}
248
+ data = {"model": model, **kwargs}
249
+ return self.client._request("POST", "/v1/audio/translations", files=files, data=data)
250
+
251
+ def text_to_speech(
252
+ self,
253
+ text: str,
254
+ model: str = "pyttsx3",
255
+ voice: str | None = None,
256
+ speed: float = 1.0,
257
+ response_format: str = "wav",
258
+ output_file: str | Path | None = None,
259
+ ) -> bytes:
260
+ """
261
+ Convert text to speech (TTS)
262
+
263
+ Args:
264
+ text: Text to convert to speech
265
+ model: TTS model to use (default: 'pyttsx3' for system TTS)
266
+ voice: Voice ID to use (None for default)
267
+ speed: Speech speed multiplier (0.25 to 4.0)
268
+ response_format: Audio format (currently only 'wav' supported)
269
+ output_file: Optional path to save audio file
270
+
271
+ Returns:
272
+ Audio data as bytes
273
+
274
+ Example:
275
+ >>> audio = client.audio.text_to_speech("Hello, world!")
276
+ >>> with open("output.wav", "wb") as f:
277
+ >>> f.write(audio)
278
+
279
+ >>> # Or save directly
280
+ >>> client.audio.text_to_speech("Hello!", output_file="hello.wav")
281
+
282
+ >>> # Use specific TTS model
283
+ >>> audio = client.audio.text_to_speech(
284
+ ... "Hello!",
285
+ ... model="hexgrad/Kokoro-82M"
286
+ ... )
287
+ """
288
+ data = {
289
+ "model": model,
290
+ "input": text,
291
+ "speed": speed,
292
+ "response_format": response_format,
293
+ }
294
+
295
+ if voice:
296
+ data["voice"] = voice
297
+
298
+ audio_data = self.client._request_raw("POST", "/v1/audio/speech", json=data)
299
+
300
+ if output_file:
301
+ output_path = Path(output_file)
302
+ output_path.write_bytes(audio_data)
303
+
304
+ return audio_data
305
+
306
+ def list_voices(self, model: str | None = None) -> dict[str, Any]:
307
+ """
308
+ List available TTS voices
309
+
310
+ Args:
311
+ model: Optional model ID to list voices for a specific model
312
+
313
+ Returns:
314
+ Dictionary with 'voices' list and 'total' count
315
+
316
+ Example:
317
+ >>> voices = client.audio.list_voices()
318
+ >>> for voice in voices['voices']:
319
+ >>> print(f"{voice['id']}: {voice['name']} ({voice['language']})")
320
+ """
321
+ params = {}
322
+ if model:
323
+ params["model"] = model
324
+ return self.client._request("GET", "/v1/audio/voices", params=params)
325
+
326
+
327
+ __all__ = ["VocalSDK", "ModelsAPI", "AudioAPI"]