vocal-sdk 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vocal_sdk-0.3.0/.gitignore +74 -0
- vocal_sdk-0.3.0/MANIFEST.in +4 -0
- vocal_sdk-0.3.0/PKG-INFO +27 -0
- vocal_sdk-0.3.0/README.md +64 -0
- vocal_sdk-0.3.0/openapi.json +1 -0
- vocal_sdk-0.3.0/pyproject.toml +43 -0
- vocal_sdk-0.3.0/scripts/generate.py +46 -0
- vocal_sdk-0.3.0/vocal_sdk/__init__.py +10 -0
- vocal_sdk-0.3.0/vocal_sdk/client.py +327 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
.installed.cfg
|
|
21
|
+
*.egg
|
|
22
|
+
MANIFEST
|
|
23
|
+
|
|
24
|
+
# Virtual Environment
|
|
25
|
+
.venv/
|
|
26
|
+
venv/
|
|
27
|
+
ENV/
|
|
28
|
+
env/
|
|
29
|
+
|
|
30
|
+
# UV
|
|
31
|
+
uv.lock
|
|
32
|
+
|
|
33
|
+
# IDE
|
|
34
|
+
.vscode/
|
|
35
|
+
.idea/
|
|
36
|
+
*.swp
|
|
37
|
+
*.swo
|
|
38
|
+
*~
|
|
39
|
+
.DS_Store
|
|
40
|
+
|
|
41
|
+
# Testing
|
|
42
|
+
.pytest_cache/
|
|
43
|
+
.coverage
|
|
44
|
+
htmlcov/
|
|
45
|
+
.tox/
|
|
46
|
+
|
|
47
|
+
# Jupyter
|
|
48
|
+
.ipynb_checkpoints
|
|
49
|
+
|
|
50
|
+
# Model cache
|
|
51
|
+
.cache/
|
|
52
|
+
models/
|
|
53
|
+
*.ckpt
|
|
54
|
+
*.pth
|
|
55
|
+
*.pt
|
|
56
|
+
*.safetensors
|
|
57
|
+
|
|
58
|
+
# Audio files (test data)
|
|
59
|
+
*.mp3
|
|
60
|
+
*.wav
|
|
61
|
+
*.m4a
|
|
62
|
+
*.ogg
|
|
63
|
+
*.flac
|
|
64
|
+
|
|
65
|
+
# Logs
|
|
66
|
+
*.log
|
|
67
|
+
logs/
|
|
68
|
+
|
|
69
|
+
# Environment variables
|
|
70
|
+
.env
|
|
71
|
+
.env.local
|
|
72
|
+
|
|
73
|
+
# OS
|
|
74
|
+
Thumbs.db
|
vocal_sdk-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vocal-sdk
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Python SDK for Vocal API - Auto-generated OpenAI-compatible client
|
|
5
|
+
Project-URL: Homepage, https://github.com/niradler/vocal
|
|
6
|
+
Project-URL: Documentation, https://github.com/niradler/vocal#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/niradler/vocal
|
|
8
|
+
Project-URL: Issues, https://github.com/niradler/vocal/issues
|
|
9
|
+
Author: Vocal Contributors
|
|
10
|
+
License: SSPL-1.0
|
|
11
|
+
Keywords: api-client,client,openai-compatible,sdk,speech-to-text,tts
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: Other/Proprietary License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Requires-Dist: pydantic>=2.5.0
|
|
21
|
+
Requires-Dist: requests>=2.31.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: datamodel-code-generator>=0.25.0; extra == 'dev'
|
|
24
|
+
Provides-Extra: test
|
|
25
|
+
Requires-Dist: numpy>=1.24.0; extra == 'test'
|
|
26
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'test'
|
|
27
|
+
Requires-Dist: pytest>=8.0.0; extra == 'test'
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Vocal SDK
|
|
2
|
+
|
|
3
|
+
Auto-generated Python SDK for the Vocal API.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
cd packages/sdk
|
|
9
|
+
uv pip install -e .
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Usage
|
|
13
|
+
|
|
14
|
+
```python
|
|
15
|
+
from vocal_sdk import VocalSDK
|
|
16
|
+
|
|
17
|
+
# Initialize client
|
|
18
|
+
client = VocalSDK(base_url="http://localhost:8000")
|
|
19
|
+
|
|
20
|
+
# Check health
|
|
21
|
+
health = client.health()
|
|
22
|
+
print(health)
|
|
23
|
+
|
|
24
|
+
# List models
|
|
25
|
+
models = client.models.list()
|
|
26
|
+
for model in models['models']:
|
|
27
|
+
print(f"{model['id']}: {model['status']}")
|
|
28
|
+
|
|
29
|
+
# Download a model if needed
|
|
30
|
+
model_id = "Systran/faster-whisper-tiny"
|
|
31
|
+
model_info = client.models.get(model_id)
|
|
32
|
+
if model_info['status'] != 'available':
|
|
33
|
+
client.models.download(model_id)
|
|
34
|
+
|
|
35
|
+
# Transcribe audio
|
|
36
|
+
result = client.audio.transcribe(
|
|
37
|
+
file="path/to/audio.mp3",
|
|
38
|
+
model=model_id,
|
|
39
|
+
language="en" # optional
|
|
40
|
+
)
|
|
41
|
+
print(f"Transcription: {result['text']}")
|
|
42
|
+
print(f"Language: {result['language']}")
|
|
43
|
+
print(f"Duration: {result['duration']}s")
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Regenerating the SDK
|
|
47
|
+
|
|
48
|
+
When the API changes, regenerate the SDK models:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
# 1. Make sure API is running
|
|
52
|
+
uv run uvicorn vocal_api.main:app --port 8000
|
|
53
|
+
|
|
54
|
+
# 2. Download latest OpenAPI spec
|
|
55
|
+
curl http://localhost:8000/openapi.json -o packages/sdk/openapi.json
|
|
56
|
+
|
|
57
|
+
# 3. Generate models (optional - for type hints)
|
|
58
|
+
cd packages/sdk
|
|
59
|
+
uv run python scripts/generate.py
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## API Documentation
|
|
63
|
+
|
|
64
|
+
Interactive API docs available at: http://localhost:8000/docs
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"openapi":"3.1.0","info":{"title":"Vocal API","description":"Generic Speech AI Platform (STT + TTS)","version":"0.1.0"},"paths":{"/v1/audio/transcriptions":{"post":{"tags":["transcription"],"summary":"Transcribe audio","description":"Transcribe audio file to text using specified model","operationId":"create_transcription_v1_audio_transcriptions_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_create_transcription_v1_audio_transcriptions_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TranscriptionResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/audio/translations":{"post":{"tags":["transcription"],"summary":"Translate audio to English","description":"Translate audio to English text","operationId":"create_translation_v1_audio_translations_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_create_translation_v1_audio_translations_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TranscriptionResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models":{"get":{"tags":["models"],"summary":"List models","description":"List all available models (Ollama-style)","operationId":"list_models_v1_models_get","parameters":[{"name":"status","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Status"}},{"name":"task","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelListResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models/{model_id}":{"get":{"tags":["models"],"summary":"Get model info","description":"Get detailed information about a specific model","operationId":"get_model_v1_models__model_id__get","parameters":[{"name":"model_id","in":"path","required":true,"schema":{"type":"string","title":"Model Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelInfo"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["models"],"summary":"Delete model","description":"Remove a downloaded model","operationId":"delete_model_v1_models__model_id__delete","parameters":[{"name":"model_id","in":"path","required":true,"schema":{"type":"string","title":"Model Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models/{model_id}/download":{"post":{"tags":["models"],"summary":"Download model","description":"Download a model for local use (Ollama-style pull)","operationId":"download_model_v1_models__model_id__download_post","parameters":[{"name":"model_id","in":"path","required":true,"schema":{"type":"string","title":"Model Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelDownloadProgress"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models/{model_id}/download/status":{"get":{"tags":["models"],"summary":"Get download status","description":"Check model download progress","operationId":"get_download_status_v1_models__model_id__download_status_get","parameters":[{"name":"model_id","in":"path","required":true,"schema":{"type":"string","title":"Model Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelDownloadProgress"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/":{"get":{"tags":["health"],"summary":"Root","description":"API health check","operationId":"root__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/health":{"get":{"tags":["health"],"summary":"Health","description":"Detailed health check","operationId":"health_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"Body_create_transcription_v1_audio_transcriptions_post":{"properties":{"file":{"type":"string","format":"binary","title":"File","description":"Audio file to transcribe"},"model":{"type":"string","title":"Model","description":"Model ID","default":"Systran/faster-whisper-tiny"},"language":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Language","description":"Language code"},"prompt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Prompt","description":"Style prompt"},"response_format":{"$ref":"#/components/schemas/TranscriptionFormat","description":"Output format","default":"json"},"temperature":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Temperature","default":0.0}},"type":"object","required":["file"],"title":"Body_create_transcription_v1_audio_transcriptions_post"},"Body_create_translation_v1_audio_translations_post":{"properties":{"file":{"type":"string","format":"binary","title":"File"},"model":{"type":"string","title":"Model","default":"Systran/faster-whisper-tiny"}},"type":"object","required":["file"],"title":"Body_create_translation_v1_audio_translations_post"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ModelBackend":{"type":"string","enum":["faster_whisper","transformers","ctranslate2","nemo","onnx","custom"],"title":"ModelBackend","description":"Model inference backend"},"ModelDownloadProgress":{"properties":{"model_id":{"type":"string","title":"Model Id"},"status":{"type":"string","title":"Status"},"progress":{"type":"number","maximum":1.0,"minimum":0.0,"title":"Progress","default":0.0},"downloaded_bytes":{"type":"integer","title":"Downloaded Bytes","default":0},"total_bytes":{"type":"integer","title":"Total Bytes","default":0},"message":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Message"}},"type":"object","required":["model_id","status"],"title":"ModelDownloadProgress","description":"Model download progress"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id","description":"Unique model identifier"},"name":{"type":"string","title":"Name","description":"Human-readable model name"},"provider":{"$ref":"#/components/schemas/ModelProvider","description":"Model provider"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"size":{"type":"integer","title":"Size","description":"Model size in bytes","default":0},"size_readable":{"type":"string","title":"Size Readable","description":"Human-readable size","default":"Unknown"},"parameters":{"type":"string","title":"Parameters","description":"Number of parameters","default":"Unknown"},"languages":{"items":{"type":"string"},"type":"array","title":"Languages","description":"Supported languages"},"backend":{"$ref":"#/components/schemas/ModelBackend","description":"Inference backend"},"status":{"$ref":"#/components/schemas/ModelStatus","description":"Current model status"},"source_url":{"anyOf":[{"type":"string","maxLength":2083,"minLength":1,"format":"uri"},{"type":"null"}],"title":"Source Url"},"license":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"License"},"recommended_vram":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Recommended Vram"},"task":{"$ref":"#/components/schemas/ModelTask","description":"Task type"},"local_path":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Local Path"},"created_at":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Created At"},"updated_at":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Updated At"}},"type":"object","required":["id","name","provider","backend","status","task"],"title":"ModelInfo","description":"Model information schema"},"ModelListResponse":{"properties":{"models":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Models"},"total":{"type":"integer","title":"Total"}},"type":"object","required":["models","total"],"title":"ModelListResponse","description":"List of available models"},"ModelProvider":{"type":"string","enum":["huggingface","local","custom"],"title":"ModelProvider","description":"Model provider/source"},"ModelStatus":{"type":"string","enum":["available","downloading","not_downloaded","error"],"title":"ModelStatus","description":"Model download/availability status"},"ModelTask":{"type":"string","enum":["stt","tts"],"title":"ModelTask","description":"Model task type"},"TranscriptionFormat":{"type":"string","enum":["json","text","srt","vtt","verbose_json"],"title":"TranscriptionFormat","description":"Output format for transcription"},"TranscriptionResponse":{"properties":{"text":{"type":"string","title":"Text","description":"Full transcribed text"},"language":{"type":"string","title":"Language","description":"Detected or specified language"},"duration":{"type":"number","title":"Duration","description":"Audio duration in seconds"},"segments":{"anyOf":[{"items":{"$ref":"#/components/schemas/TranscriptionSegment"},"type":"array"},{"type":"null"}],"title":"Segments"},"words":{"anyOf":[{"items":{"$ref":"#/components/schemas/TranscriptionWord"},"type":"array"},{"type":"null"}],"title":"Words"}},"type":"object","required":["text","language","duration"],"title":"TranscriptionResponse","description":"Response schema for transcription","example":{"duration":2.5,"language":"en","segments":[{"end":2.5,"id":0,"start":0.0,"text":"Hello, how are you today?"}],"text":"Hello, how are you today?"}},"TranscriptionSegment":{"properties":{"id":{"type":"integer","title":"Id"},"start":{"type":"number","title":"Start","description":"Start time in seconds"},"end":{"type":"number","title":"End","description":"End time in seconds"},"text":{"type":"string","title":"Text","description":"Transcribed text"},"tokens":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Tokens"},"temperature":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Temperature"},"avg_logprob":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Avg Logprob"},"compression_ratio":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Compression Ratio"},"no_speech_prob":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"No Speech Prob"}},"type":"object","required":["id","start","end","text"],"title":"TranscriptionSegment","description":"A segment of transcribed text with timing"},"TranscriptionWord":{"properties":{"word":{"type":"string","title":"Word"},"start":{"type":"number","title":"Start"},"end":{"type":"number","title":"End"},"probability":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Probability"}},"type":"object","required":["word","start","end"],"title":"TranscriptionWord","description":"Word-level timestamp"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"},"input":{"title":"Input"},"ctx":{"type":"object","title":"Context"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}}}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "vocal-sdk"
|
|
3
|
+
version = "0.3.0"
|
|
4
|
+
description = "Python SDK for Vocal API - Auto-generated OpenAI-compatible client"
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
license = { text = "SSPL-1.0" }
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Vocal Contributors" }
|
|
9
|
+
]
|
|
10
|
+
keywords = ["sdk", "client", "speech-to-text", "tts", "openai-compatible", "api-client"]
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Development Status :: 3 - Alpha",
|
|
13
|
+
"Intended Audience :: Developers",
|
|
14
|
+
"License :: Other/Proprietary License",
|
|
15
|
+
"Programming Language :: Python :: 3.11",
|
|
16
|
+
"Programming Language :: Python :: 3.12",
|
|
17
|
+
"Programming Language :: Python :: 3.13",
|
|
18
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"requests>=2.31.0",
|
|
22
|
+
"pydantic>=2.5.0",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://github.com/niradler/vocal"
|
|
27
|
+
Documentation = "https://github.com/niradler/vocal#readme"
|
|
28
|
+
Repository = "https://github.com/niradler/vocal"
|
|
29
|
+
Issues = "https://github.com/niradler/vocal/issues"
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
dev = [
|
|
33
|
+
"datamodel-code-generator>=0.25.0",
|
|
34
|
+
]
|
|
35
|
+
test = [
|
|
36
|
+
"pytest>=8.0.0",
|
|
37
|
+
"pytest-asyncio>=0.23.0",
|
|
38
|
+
"numpy>=1.24.0",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[build-system]
|
|
42
|
+
requires = ["hatchling"]
|
|
43
|
+
build-backend = "hatchling.build"
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Generate Python SDK from OpenAPI spec
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def generate_sdk():
|
|
11
|
+
"""Generate SDK from OpenAPI spec"""
|
|
12
|
+
sdk_dir = Path(__file__).parent.parent
|
|
13
|
+
openapi_file = sdk_dir / "openapi.json"
|
|
14
|
+
output_dir = sdk_dir / "vocal_sdk"
|
|
15
|
+
|
|
16
|
+
if not openapi_file.exists():
|
|
17
|
+
print(f"Error: {openapi_file} not found")
|
|
18
|
+
print("Download it first:")
|
|
19
|
+
print(" curl http://localhost:8000/openapi.json -o packages/sdk/openapi.json")
|
|
20
|
+
sys.exit(1)
|
|
21
|
+
|
|
22
|
+
print("Generating SDK from OpenAPI spec...")
|
|
23
|
+
|
|
24
|
+
# Generate Pydantic models
|
|
25
|
+
subprocess.run(
|
|
26
|
+
[
|
|
27
|
+
"datamodel-codegen",
|
|
28
|
+
"--input",
|
|
29
|
+
str(openapi_file),
|
|
30
|
+
"--input-file-type",
|
|
31
|
+
"openapi",
|
|
32
|
+
"--output",
|
|
33
|
+
str(output_dir / "models.py"),
|
|
34
|
+
"--field-constraints",
|
|
35
|
+
"--use-standard-collections",
|
|
36
|
+
],
|
|
37
|
+
check=True,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
print(f"✓ Generated models at {output_dir / 'models.py'}")
|
|
41
|
+
print("\nSDK generated successfully!")
|
|
42
|
+
print(f"Location: {output_dir}")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
generate_sdk()
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Vocal SDK - Auto-generated client for Vocal API
|
|
3
|
+
|
|
4
|
+
This SDK provides a clean Python interface to the Vocal API.
|
|
5
|
+
Models are auto-generated from the OpenAPI spec.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, BinaryIO
|
|
10
|
+
from urllib.parse import urljoin
|
|
11
|
+
|
|
12
|
+
import requests
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class VocalSDK:
|
|
16
|
+
"""
|
|
17
|
+
Vocal SDK Client
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
>>> from vocal_sdk import VocalSDK
|
|
21
|
+
>>> client = VocalSDK(base_url="http://localhost:8000")
|
|
22
|
+
>>>
|
|
23
|
+
>>> # List models
|
|
24
|
+
>>> models = client.models.list()
|
|
25
|
+
>>> print(f"Found {len(models['models'])} models")
|
|
26
|
+
>>>
|
|
27
|
+
>>> # Transcribe audio
|
|
28
|
+
>>> result = client.audio.transcribe("recording.mp3")
|
|
29
|
+
>>> print(result['text'])
|
|
30
|
+
>>>
|
|
31
|
+
>>> # Text to speech
|
|
32
|
+
>>> audio = client.audio.text_to_speech("Hello, world!")
|
|
33
|
+
>>> with open("output.wav", "wb") as f:
|
|
34
|
+
>>> f.write(audio)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, base_url: str = "http://localhost:8000", timeout: int = 300):
|
|
38
|
+
"""
|
|
39
|
+
Initialize Vocal SDK
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
base_url: Base URL of Vocal API
|
|
43
|
+
timeout: Request timeout in seconds
|
|
44
|
+
"""
|
|
45
|
+
self.base_url = base_url.rstrip("/")
|
|
46
|
+
self.timeout = timeout
|
|
47
|
+
self.session = requests.Session()
|
|
48
|
+
|
|
49
|
+
# Namespaced APIs
|
|
50
|
+
self.models = ModelsAPI(self)
|
|
51
|
+
self.audio = AudioAPI(self)
|
|
52
|
+
|
|
53
|
+
def _request(self, method: str, endpoint: str, **kwargs) -> dict[str, Any]:
|
|
54
|
+
"""Make HTTP request to API"""
|
|
55
|
+
url = urljoin(self.base_url + "/", endpoint.lstrip("/"))
|
|
56
|
+
|
|
57
|
+
if "timeout" not in kwargs:
|
|
58
|
+
kwargs["timeout"] = self.timeout
|
|
59
|
+
|
|
60
|
+
response = self.session.request(method, url, **kwargs)
|
|
61
|
+
response.raise_for_status()
|
|
62
|
+
|
|
63
|
+
return response.json()
|
|
64
|
+
|
|
65
|
+
def _request_raw(self, method: str, endpoint: str, **kwargs) -> bytes:
|
|
66
|
+
"""Make HTTP request and return raw bytes"""
|
|
67
|
+
url = urljoin(self.base_url + "/", endpoint.lstrip("/"))
|
|
68
|
+
|
|
69
|
+
if "timeout" not in kwargs:
|
|
70
|
+
kwargs["timeout"] = self.timeout
|
|
71
|
+
|
|
72
|
+
response = self.session.request(method, url, **kwargs)
|
|
73
|
+
response.raise_for_status()
|
|
74
|
+
|
|
75
|
+
return response.content
|
|
76
|
+
|
|
77
|
+
def health(self) -> dict[str, Any]:
|
|
78
|
+
"""Check API health"""
|
|
79
|
+
return self._request("GET", "/health")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class ModelsAPI:
|
|
83
|
+
"""Models API namespace"""
|
|
84
|
+
|
|
85
|
+
def __init__(self, client: VocalSDK):
|
|
86
|
+
self.client = client
|
|
87
|
+
|
|
88
|
+
def list(self, status: str | None = None, task: str | None = None) -> dict[str, Any]:
|
|
89
|
+
"""
|
|
90
|
+
List all available models
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
status: Filter by status (available, downloading, not_downloaded)
|
|
94
|
+
task: Filter by task (stt, tts)
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Dictionary with 'models' list and 'total' count
|
|
98
|
+
"""
|
|
99
|
+
params = {}
|
|
100
|
+
if status:
|
|
101
|
+
params["status"] = status
|
|
102
|
+
if task:
|
|
103
|
+
params["task"] = task
|
|
104
|
+
|
|
105
|
+
return self.client._request("GET", "/v1/models", params=params)
|
|
106
|
+
|
|
107
|
+
def get(self, model_id: str) -> dict[str, Any]:
|
|
108
|
+
"""
|
|
109
|
+
Get model information
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
model_id: Model identifier (e.g., "Systran/faster-whisper-tiny")
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Model information dictionary
|
|
116
|
+
"""
|
|
117
|
+
return self.client._request("GET", f"/v1/models/{model_id}")
|
|
118
|
+
|
|
119
|
+
def download(self, model_id: str, quantization: str | None = None) -> dict[str, Any]:
|
|
120
|
+
"""
|
|
121
|
+
Download a model (Ollama-style pull)
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
model_id: Model identifier
|
|
125
|
+
quantization: Optional quantization format
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Download progress information
|
|
129
|
+
"""
|
|
130
|
+
data = {}
|
|
131
|
+
if quantization:
|
|
132
|
+
data["quantization"] = quantization
|
|
133
|
+
|
|
134
|
+
return self.client._request("POST", f"/v1/models/{model_id}/download", json=data if data else None)
|
|
135
|
+
|
|
136
|
+
def download_status(self, model_id: str) -> dict[str, Any]:
|
|
137
|
+
"""
|
|
138
|
+
Check download status for a model
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
model_id: Model identifier
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Download status information
|
|
145
|
+
"""
|
|
146
|
+
return self.client._request("GET", f"/v1/models/{model_id}/download/status")
|
|
147
|
+
|
|
148
|
+
def delete(self, model_id: str) -> dict[str, Any]:
|
|
149
|
+
"""
|
|
150
|
+
Delete a downloaded model
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
model_id: Model identifier
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
Deletion confirmation
|
|
157
|
+
"""
|
|
158
|
+
return self.client._request("DELETE", f"/v1/models/{model_id}")
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class AudioAPI:
|
|
162
|
+
"""Audio API namespace"""
|
|
163
|
+
|
|
164
|
+
def __init__(self, client: VocalSDK):
|
|
165
|
+
self.client = client
|
|
166
|
+
|
|
167
|
+
def transcribe(
|
|
168
|
+
self,
|
|
169
|
+
file: str | Path | BinaryIO,
|
|
170
|
+
model: str = "Systran/faster-whisper-tiny",
|
|
171
|
+
language: str | None = None,
|
|
172
|
+
response_format: str = "json",
|
|
173
|
+
temperature: float = 0.0,
|
|
174
|
+
**kwargs,
|
|
175
|
+
) -> dict[str, Any]:
|
|
176
|
+
"""
|
|
177
|
+
Transcribe audio to text
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
file: Path to audio file or file-like object
|
|
181
|
+
model: Model to use for transcription
|
|
182
|
+
language: Language code (e.g., "en", "es") or None for auto-detect
|
|
183
|
+
response_format: Output format (json, text, srt, vtt)
|
|
184
|
+
temperature: Sampling temperature (0.0-1.0)
|
|
185
|
+
**kwargs: Additional parameters
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Transcription result with text, language, duration, segments
|
|
189
|
+
|
|
190
|
+
Example:
|
|
191
|
+
>>> result = client.audio.transcribe("audio.mp3")
|
|
192
|
+
>>> print(result['text'])
|
|
193
|
+
>>> print(f"Language: {result['language']}")
|
|
194
|
+
"""
|
|
195
|
+
if isinstance(file, (str, Path)):
|
|
196
|
+
with open(file, "rb") as f:
|
|
197
|
+
return self._transcribe_file(f, model, language, response_format, temperature, **kwargs)
|
|
198
|
+
else:
|
|
199
|
+
return self._transcribe_file(file, model, language, response_format, temperature, **kwargs)
|
|
200
|
+
|
|
201
|
+
def _transcribe_file(
|
|
202
|
+
self,
|
|
203
|
+
file_obj: BinaryIO,
|
|
204
|
+
model: str,
|
|
205
|
+
language: str | None,
|
|
206
|
+
response_format: str,
|
|
207
|
+
temperature: float,
|
|
208
|
+
**kwargs,
|
|
209
|
+
) -> dict[str, Any]:
|
|
210
|
+
"""Internal method to transcribe file object"""
|
|
211
|
+
files = {"file": file_obj}
|
|
212
|
+
data = {
|
|
213
|
+
"model": model,
|
|
214
|
+
"response_format": response_format,
|
|
215
|
+
"temperature": temperature,
|
|
216
|
+
**kwargs,
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if language:
|
|
220
|
+
data["language"] = language
|
|
221
|
+
|
|
222
|
+
return self.client._request("POST", "/v1/audio/transcriptions", files=files, data=data)
|
|
223
|
+
|
|
224
|
+
def translate(
|
|
225
|
+
self,
|
|
226
|
+
file: str | Path | BinaryIO,
|
|
227
|
+
model: str = "Systran/faster-whisper-tiny",
|
|
228
|
+
**kwargs,
|
|
229
|
+
) -> dict[str, Any]:
|
|
230
|
+
"""
|
|
231
|
+
Translate audio to English
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
file: Path to audio file or file-like object
|
|
235
|
+
model: Model to use for translation
|
|
236
|
+
**kwargs: Additional parameters
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
Translation result
|
|
240
|
+
"""
|
|
241
|
+
if isinstance(file, (str, Path)):
|
|
242
|
+
with open(file, "rb") as f:
|
|
243
|
+
files = {"file": f}
|
|
244
|
+
data = {"model": model, **kwargs}
|
|
245
|
+
return self.client._request("POST", "/v1/audio/translations", files=files, data=data)
|
|
246
|
+
else:
|
|
247
|
+
files = {"file": file}
|
|
248
|
+
data = {"model": model, **kwargs}
|
|
249
|
+
return self.client._request("POST", "/v1/audio/translations", files=files, data=data)
|
|
250
|
+
|
|
251
|
+
def text_to_speech(
|
|
252
|
+
self,
|
|
253
|
+
text: str,
|
|
254
|
+
model: str = "pyttsx3",
|
|
255
|
+
voice: str | None = None,
|
|
256
|
+
speed: float = 1.0,
|
|
257
|
+
response_format: str = "wav",
|
|
258
|
+
output_file: str | Path | None = None,
|
|
259
|
+
) -> bytes:
|
|
260
|
+
"""
|
|
261
|
+
Convert text to speech (TTS)
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
text: Text to convert to speech
|
|
265
|
+
model: TTS model to use (default: 'pyttsx3' for system TTS)
|
|
266
|
+
voice: Voice ID to use (None for default)
|
|
267
|
+
speed: Speech speed multiplier (0.25 to 4.0)
|
|
268
|
+
response_format: Audio format (currently only 'wav' supported)
|
|
269
|
+
output_file: Optional path to save audio file
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
Audio data as bytes
|
|
273
|
+
|
|
274
|
+
Example:
|
|
275
|
+
>>> audio = client.audio.text_to_speech("Hello, world!")
|
|
276
|
+
>>> with open("output.wav", "wb") as f:
|
|
277
|
+
>>> f.write(audio)
|
|
278
|
+
|
|
279
|
+
>>> # Or save directly
|
|
280
|
+
>>> client.audio.text_to_speech("Hello!", output_file="hello.wav")
|
|
281
|
+
|
|
282
|
+
>>> # Use specific TTS model
|
|
283
|
+
>>> audio = client.audio.text_to_speech(
|
|
284
|
+
... "Hello!",
|
|
285
|
+
... model="hexgrad/Kokoro-82M"
|
|
286
|
+
... )
|
|
287
|
+
"""
|
|
288
|
+
data = {
|
|
289
|
+
"model": model,
|
|
290
|
+
"input": text,
|
|
291
|
+
"speed": speed,
|
|
292
|
+
"response_format": response_format,
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if voice:
|
|
296
|
+
data["voice"] = voice
|
|
297
|
+
|
|
298
|
+
audio_data = self.client._request_raw("POST", "/v1/audio/speech", json=data)
|
|
299
|
+
|
|
300
|
+
if output_file:
|
|
301
|
+
output_path = Path(output_file)
|
|
302
|
+
output_path.write_bytes(audio_data)
|
|
303
|
+
|
|
304
|
+
return audio_data
|
|
305
|
+
|
|
306
|
+
def list_voices(self, model: str | None = None) -> dict[str, Any]:
|
|
307
|
+
"""
|
|
308
|
+
List available TTS voices
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
model: Optional model ID to list voices for a specific model
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
Dictionary with 'voices' list and 'total' count
|
|
315
|
+
|
|
316
|
+
Example:
|
|
317
|
+
>>> voices = client.audio.list_voices()
|
|
318
|
+
>>> for voice in voices['voices']:
|
|
319
|
+
>>> print(f"{voice['id']}: {voice['name']} ({voice['language']})")
|
|
320
|
+
"""
|
|
321
|
+
params = {}
|
|
322
|
+
if model:
|
|
323
|
+
params["model"] = model
|
|
324
|
+
return self.client._request("GET", "/v1/audio/voices", params=params)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
__all__ = ["VocalSDK", "ModelsAPI", "AudioAPI"]
|