vocal-api 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vocal_api-0.3.0/.gitignore +74 -0
- vocal_api-0.3.0/MANIFEST.in +3 -0
- vocal_api-0.3.0/PKG-INFO +26 -0
- vocal_api-0.3.0/pyproject.toml +38 -0
- vocal_api-0.3.0/vocal_api/__init__.py +9 -0
- vocal_api-0.3.0/vocal_api/config.py +21 -0
- vocal_api-0.3.0/vocal_api/dependencies.py +43 -0
- vocal_api-0.3.0/vocal_api/main.py +75 -0
- vocal_api-0.3.0/vocal_api/routes/__init__.py +11 -0
- vocal_api-0.3.0/vocal_api/routes/models.py +122 -0
- vocal_api-0.3.0/vocal_api/routes/system.py +16 -0
- vocal_api-0.3.0/vocal_api/routes/transcription.py +81 -0
- vocal_api-0.3.0/vocal_api/routes/tts.py +98 -0
- vocal_api-0.3.0/vocal_api/services/__init__.py +9 -0
- vocal_api-0.3.0/vocal_api/services/model_service.py +96 -0
- vocal_api-0.3.0/vocal_api/services/transcription_service.py +196 -0
- vocal_api-0.3.0/vocal_api/services/tts_service.py +139 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
.installed.cfg
|
|
21
|
+
*.egg
|
|
22
|
+
MANIFEST
|
|
23
|
+
|
|
24
|
+
# Virtual Environment
|
|
25
|
+
.venv/
|
|
26
|
+
venv/
|
|
27
|
+
ENV/
|
|
28
|
+
env/
|
|
29
|
+
|
|
30
|
+
# UV
|
|
31
|
+
uv.lock
|
|
32
|
+
|
|
33
|
+
# IDE
|
|
34
|
+
.vscode/
|
|
35
|
+
.idea/
|
|
36
|
+
*.swp
|
|
37
|
+
*.swo
|
|
38
|
+
*~
|
|
39
|
+
.DS_Store
|
|
40
|
+
|
|
41
|
+
# Testing
|
|
42
|
+
.pytest_cache/
|
|
43
|
+
.coverage
|
|
44
|
+
htmlcov/
|
|
45
|
+
.tox/
|
|
46
|
+
|
|
47
|
+
# Jupyter
|
|
48
|
+
.ipynb_checkpoints
|
|
49
|
+
|
|
50
|
+
# Model cache
|
|
51
|
+
.cache/
|
|
52
|
+
models/
|
|
53
|
+
*.ckpt
|
|
54
|
+
*.pth
|
|
55
|
+
*.pt
|
|
56
|
+
*.safetensors
|
|
57
|
+
|
|
58
|
+
# Audio files (test data)
|
|
59
|
+
*.mp3
|
|
60
|
+
*.wav
|
|
61
|
+
*.m4a
|
|
62
|
+
*.ogg
|
|
63
|
+
*.flac
|
|
64
|
+
|
|
65
|
+
# Logs
|
|
66
|
+
*.log
|
|
67
|
+
logs/
|
|
68
|
+
|
|
69
|
+
# Environment variables
|
|
70
|
+
.env
|
|
71
|
+
.env.local
|
|
72
|
+
|
|
73
|
+
# OS
|
|
74
|
+
Thumbs.db
|
vocal_api-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vocal-api
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: FastAPI server for Vocal - Ollama-style Voice Model Management
|
|
5
|
+
Project-URL: Homepage, https://github.com/niradler/vocal
|
|
6
|
+
Project-URL: Documentation, https://github.com/niradler/vocal#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/niradler/vocal
|
|
8
|
+
Project-URL: Issues, https://github.com/niradler/vocal/issues
|
|
9
|
+
Author: Vocal Contributors
|
|
10
|
+
License: SSPL-1.0
|
|
11
|
+
Keywords: api,fastapi,openai-compatible,speech-to-text,stt,tts
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Framework :: FastAPI
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: Other/Proprietary License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Requires-Dist: aiofiles>=25.1.0
|
|
22
|
+
Requires-Dist: fastapi>=0.109.0
|
|
23
|
+
Requires-Dist: pydantic>=2.5.0
|
|
24
|
+
Requires-Dist: python-multipart>=0.0.6
|
|
25
|
+
Requires-Dist: uvicorn[standard]>=0.27.0
|
|
26
|
+
Requires-Dist: vocal-core>=0.3.0
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "vocal-api"
|
|
3
|
+
version = "0.3.0"
|
|
4
|
+
description = "FastAPI server for Vocal - Ollama-style Voice Model Management"
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
license = { text = "SSPL-1.0" }
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Vocal Contributors" }
|
|
9
|
+
]
|
|
10
|
+
keywords = ["fastapi", "api", "speech-to-text", "tts", "stt", "openai-compatible"]
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Development Status :: 3 - Alpha",
|
|
13
|
+
"Intended Audience :: Developers",
|
|
14
|
+
"License :: Other/Proprietary License",
|
|
15
|
+
"Programming Language :: Python :: 3.11",
|
|
16
|
+
"Programming Language :: Python :: 3.12",
|
|
17
|
+
"Programming Language :: Python :: 3.13",
|
|
18
|
+
"Framework :: FastAPI",
|
|
19
|
+
"Topic :: Internet :: WWW/HTTP :: HTTP Servers",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"fastapi>=0.109.0",
|
|
23
|
+
"uvicorn[standard]>=0.27.0",
|
|
24
|
+
"pydantic>=2.5.0",
|
|
25
|
+
"python-multipart>=0.0.6",
|
|
26
|
+
"vocal-core>=0.3.0",
|
|
27
|
+
"aiofiles>=25.1.0",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://github.com/niradler/vocal"
|
|
32
|
+
Documentation = "https://github.com/niradler/vocal#readme"
|
|
33
|
+
Repository = "https://github.com/niradler/vocal"
|
|
34
|
+
Issues = "https://github.com/niradler/vocal/issues"
|
|
35
|
+
|
|
36
|
+
[build-system]
|
|
37
|
+
requires = ["hatchling"]
|
|
38
|
+
build-backend = "hatchling.build"
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from pydantic_settings import BaseSettings
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Settings(BaseSettings):
|
|
5
|
+
"""Application settings"""
|
|
6
|
+
|
|
7
|
+
APP_NAME: str = "Vocal API"
|
|
8
|
+
VERSION: str = "0.1.0"
|
|
9
|
+
DEBUG: bool = True
|
|
10
|
+
|
|
11
|
+
CORS_ORIGINS: list[str] = ["*"]
|
|
12
|
+
|
|
13
|
+
MAX_UPLOAD_SIZE: int = 25 * 1024 * 1024
|
|
14
|
+
|
|
15
|
+
model_config = {
|
|
16
|
+
"env_file": ".env",
|
|
17
|
+
"case_sensitive": True,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
settings = Settings()
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from vocal_core import ModelRegistry
|
|
2
|
+
|
|
3
|
+
from .services import ModelService, TranscriptionService, TTSService
|
|
4
|
+
|
|
5
|
+
_registry: ModelRegistry | None = None
|
|
6
|
+
_transcription_service: TranscriptionService | None = None
|
|
7
|
+
_model_service: ModelService | None = None
|
|
8
|
+
_tts_service: TTSService | None = None
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_registry() -> ModelRegistry:
|
|
12
|
+
"""Get or create ModelRegistry singleton"""
|
|
13
|
+
global _registry
|
|
14
|
+
if _registry is None:
|
|
15
|
+
_registry = ModelRegistry()
|
|
16
|
+
return _registry
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_transcription_service() -> TranscriptionService:
|
|
20
|
+
"""Get or create TranscriptionService singleton"""
|
|
21
|
+
global _transcription_service
|
|
22
|
+
if _transcription_service is None:
|
|
23
|
+
registry = get_registry()
|
|
24
|
+
_transcription_service = TranscriptionService(registry, keep_alive_seconds=300)
|
|
25
|
+
return _transcription_service
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_model_service() -> ModelService:
|
|
29
|
+
"""Get or create ModelService singleton"""
|
|
30
|
+
global _model_service
|
|
31
|
+
if _model_service is None:
|
|
32
|
+
registry = get_registry()
|
|
33
|
+
_model_service = ModelService(registry)
|
|
34
|
+
return _model_service
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_tts_service() -> TTSService:
|
|
38
|
+
"""Get or create TTSService singleton"""
|
|
39
|
+
global _tts_service
|
|
40
|
+
if _tts_service is None:
|
|
41
|
+
registry = get_registry()
|
|
42
|
+
_tts_service = TTSService(registry, keep_alive_seconds=300)
|
|
43
|
+
return _tts_service
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from fastapi import FastAPI
|
|
2
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
3
|
+
from fastapi.openapi.utils import get_openapi
|
|
4
|
+
|
|
5
|
+
from .config import settings
|
|
6
|
+
from .dependencies import get_transcription_service, get_tts_service
|
|
7
|
+
from .routes import models_router, system_router, transcription_router, tts_router
|
|
8
|
+
|
|
9
|
+
app = FastAPI(
|
|
10
|
+
title="Vocal API",
|
|
11
|
+
description="Generic Speech AI Platform (STT + TTS)",
|
|
12
|
+
version=settings.VERSION,
|
|
13
|
+
docs_url="/docs",
|
|
14
|
+
redoc_url="/redoc",
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
app.add_middleware(
|
|
18
|
+
CORSMiddleware,
|
|
19
|
+
allow_origins=settings.CORS_ORIGINS,
|
|
20
|
+
allow_credentials=True,
|
|
21
|
+
allow_methods=["*"],
|
|
22
|
+
allow_headers=["*"],
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
app.include_router(transcription_router)
|
|
26
|
+
app.include_router(models_router)
|
|
27
|
+
app.include_router(tts_router)
|
|
28
|
+
app.include_router(system_router)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@app.on_event("startup")
|
|
32
|
+
async def startup_event():
|
|
33
|
+
"""Initialize services and start background tasks"""
|
|
34
|
+
transcription_service = get_transcription_service()
|
|
35
|
+
await transcription_service.start_cleanup_task()
|
|
36
|
+
|
|
37
|
+
tts_service = get_tts_service()
|
|
38
|
+
await tts_service.start_cleanup_task()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@app.get("/", tags=["health"])
|
|
42
|
+
async def root():
|
|
43
|
+
"""API health check"""
|
|
44
|
+
return {
|
|
45
|
+
"status": "ok",
|
|
46
|
+
"message": "Vocal API - Ollama-style voice model management",
|
|
47
|
+
"version": settings.VERSION,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@app.get("/health", tags=["health"])
|
|
52
|
+
async def health():
|
|
53
|
+
"""Detailed health check"""
|
|
54
|
+
return {
|
|
55
|
+
"status": "healthy",
|
|
56
|
+
"api_version": settings.VERSION,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def custom_openapi():
|
|
61
|
+
if app.openapi_schema:
|
|
62
|
+
return app.openapi_schema
|
|
63
|
+
|
|
64
|
+
openapi_schema = get_openapi(
|
|
65
|
+
title="Vocal API",
|
|
66
|
+
version=settings.VERSION,
|
|
67
|
+
description="Generic Speech AI Platform (STT + TTS)",
|
|
68
|
+
routes=app.routes,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
app.openapi_schema = openapi_schema
|
|
72
|
+
return app.openapi_schema
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
app.openapi = custom_openapi
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .models import router as models_router
|
|
2
|
+
from .system import router as system_router
|
|
3
|
+
from .transcription import router as transcription_router
|
|
4
|
+
from .tts import tts_router
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"transcription_router",
|
|
8
|
+
"models_router",
|
|
9
|
+
"tts_router",
|
|
10
|
+
"system_router",
|
|
11
|
+
]
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
|
|
2
|
+
|
|
3
|
+
from ..dependencies import get_model_service
|
|
4
|
+
from ..models.model import (
|
|
5
|
+
ModelDownloadProgress,
|
|
6
|
+
ModelInfo,
|
|
7
|
+
ModelListResponse,
|
|
8
|
+
)
|
|
9
|
+
from ..services import ModelService
|
|
10
|
+
|
|
11
|
+
router = APIRouter(prefix="/v1/models", tags=["models"])
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@router.get(
|
|
15
|
+
"",
|
|
16
|
+
response_model=ModelListResponse,
|
|
17
|
+
summary="List models",
|
|
18
|
+
description="List all available models (Ollama-style)",
|
|
19
|
+
)
|
|
20
|
+
async def list_models(
|
|
21
|
+
status: str | None = None,
|
|
22
|
+
task: str | None = None,
|
|
23
|
+
service: ModelService = Depends(get_model_service),
|
|
24
|
+
) -> ModelListResponse:
|
|
25
|
+
"""
|
|
26
|
+
List all available models
|
|
27
|
+
|
|
28
|
+
Query params:
|
|
29
|
+
- status: Filter by status (available, downloading, not_downloaded)
|
|
30
|
+
- task: Filter by task (stt, tts)
|
|
31
|
+
"""
|
|
32
|
+
models = await service.list_models(status_filter=status, task=task)
|
|
33
|
+
return ModelListResponse(models=models, total=len(models))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@router.get(
|
|
37
|
+
"/{model_id:path}",
|
|
38
|
+
response_model=ModelInfo,
|
|
39
|
+
summary="Get model info",
|
|
40
|
+
description="Get detailed information about a specific model",
|
|
41
|
+
)
|
|
42
|
+
async def get_model(model_id: str, service: ModelService = Depends(get_model_service)) -> ModelInfo:
|
|
43
|
+
"""Get detailed model information"""
|
|
44
|
+
model = await service.get_model(model_id)
|
|
45
|
+
if not model:
|
|
46
|
+
raise HTTPException(404, f"Model {model_id} not found")
|
|
47
|
+
return model
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@router.post(
|
|
51
|
+
"/{model_id:path}/download",
|
|
52
|
+
response_model=ModelDownloadProgress,
|
|
53
|
+
summary="Download model",
|
|
54
|
+
description="Download a model for local use (Ollama-style pull)",
|
|
55
|
+
)
|
|
56
|
+
async def download_model(
|
|
57
|
+
model_id: str,
|
|
58
|
+
background_tasks: BackgroundTasks,
|
|
59
|
+
service: ModelService = Depends(get_model_service),
|
|
60
|
+
) -> ModelDownloadProgress:
|
|
61
|
+
"""
|
|
62
|
+
Start downloading a model
|
|
63
|
+
|
|
64
|
+
Returns immediately with initial status.
|
|
65
|
+
Check progress with GET /models/{model_id}/download/status
|
|
66
|
+
"""
|
|
67
|
+
model = await service.get_model(model_id)
|
|
68
|
+
if not model:
|
|
69
|
+
raise HTTPException(404, f"Model {model_id} not found")
|
|
70
|
+
|
|
71
|
+
async def download_task():
|
|
72
|
+
async for progress in service.download_model(model_id):
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
background_tasks.add_task(download_task)
|
|
76
|
+
|
|
77
|
+
return ModelDownloadProgress(
|
|
78
|
+
model_id=model_id,
|
|
79
|
+
status="downloading",
|
|
80
|
+
progress=0.0,
|
|
81
|
+
downloaded_bytes=0,
|
|
82
|
+
total_bytes=0,
|
|
83
|
+
message="Starting download...",
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@router.get(
|
|
88
|
+
"/{model_id:path}/download/status",
|
|
89
|
+
response_model=ModelDownloadProgress,
|
|
90
|
+
summary="Get download status",
|
|
91
|
+
)
|
|
92
|
+
async def get_download_status(model_id: str, service: ModelService = Depends(get_model_service)) -> ModelDownloadProgress:
|
|
93
|
+
"""Check model download progress"""
|
|
94
|
+
status = await service.get_download_status(model_id)
|
|
95
|
+
if not status:
|
|
96
|
+
model = await service.get_model(model_id)
|
|
97
|
+
if not model:
|
|
98
|
+
raise HTTPException(404, f"Model {model_id} not found")
|
|
99
|
+
|
|
100
|
+
if model.status == "available":
|
|
101
|
+
return ModelDownloadProgress(
|
|
102
|
+
model_id=model_id,
|
|
103
|
+
status="available",
|
|
104
|
+
progress=1.0,
|
|
105
|
+
downloaded_bytes=model.size,
|
|
106
|
+
total_bytes=model.size,
|
|
107
|
+
message="Model already downloaded",
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
raise HTTPException(404, "No active download for this model")
|
|
111
|
+
|
|
112
|
+
return status
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@router.delete("/{model_id:path}", summary="Delete model", description="Remove a downloaded model")
|
|
116
|
+
async def delete_model(model_id: str, service: ModelService = Depends(get_model_service)):
|
|
117
|
+
"""Delete a downloaded model"""
|
|
118
|
+
success = await service.delete_model(model_id)
|
|
119
|
+
if not success:
|
|
120
|
+
raise HTTPException(404, f"Model {model_id} not found or not downloaded")
|
|
121
|
+
|
|
122
|
+
return {"status": "deleted", "model_id": model_id}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from fastapi import APIRouter
|
|
2
|
+
|
|
3
|
+
from vocal_core.utils import get_device_info
|
|
4
|
+
|
|
5
|
+
router = APIRouter(prefix="/v1/system", tags=["system"])
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@router.get("/device")
|
|
9
|
+
async def get_device():
|
|
10
|
+
"""
|
|
11
|
+
Get device and hardware information
|
|
12
|
+
|
|
13
|
+
Returns information about available compute devices (CPU, GPU)
|
|
14
|
+
and optimization settings being used.
|
|
15
|
+
"""
|
|
16
|
+
return get_device_info()
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
|
|
4
|
+
|
|
5
|
+
from ..config import settings
|
|
6
|
+
from ..dependencies import get_transcription_service
|
|
7
|
+
from ..models.transcription import (
|
|
8
|
+
TranscriptionFormat,
|
|
9
|
+
TranscriptionRequest,
|
|
10
|
+
TranscriptionResponse,
|
|
11
|
+
)
|
|
12
|
+
from ..services import TranscriptionService
|
|
13
|
+
|
|
14
|
+
router = APIRouter(prefix="/v1/audio", tags=["transcription"])
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@router.post(
|
|
18
|
+
"/transcriptions",
|
|
19
|
+
response_model=TranscriptionResponse,
|
|
20
|
+
summary="Transcribe audio",
|
|
21
|
+
description="Transcribe audio file to text using specified model",
|
|
22
|
+
)
|
|
23
|
+
async def create_transcription(
|
|
24
|
+
file: Annotated[UploadFile, File(description="Audio file to transcribe")],
|
|
25
|
+
model: Annotated[str, Form(description="Model ID")] = "Systran/faster-whisper-tiny",
|
|
26
|
+
language: Annotated[str | None, Form(description="Language code")] = None,
|
|
27
|
+
prompt: Annotated[str | None, Form(description="Style prompt")] = None,
|
|
28
|
+
response_format: Annotated[TranscriptionFormat, Form(description="Output format")] = TranscriptionFormat.JSON,
|
|
29
|
+
temperature: Annotated[float, Form(ge=0.0, le=1.0)] = 0.0,
|
|
30
|
+
service: TranscriptionService = Depends(get_transcription_service),
|
|
31
|
+
) -> TranscriptionResponse:
|
|
32
|
+
"""
|
|
33
|
+
Transcribe an audio file.
|
|
34
|
+
|
|
35
|
+
**Supported formats:** mp3, mp4, wav, m4a, flac, ogg, webm
|
|
36
|
+
**Max file size:** 25MB
|
|
37
|
+
|
|
38
|
+
Returns transcription with optional word/segment timestamps.
|
|
39
|
+
"""
|
|
40
|
+
file.file.seek(0, 2)
|
|
41
|
+
size = file.file.tell()
|
|
42
|
+
file.file.seek(0)
|
|
43
|
+
|
|
44
|
+
if size > settings.MAX_UPLOAD_SIZE:
|
|
45
|
+
raise HTTPException(400, f"File too large. Max {settings.MAX_UPLOAD_SIZE // (1024 * 1024)}MB.")
|
|
46
|
+
|
|
47
|
+
request = TranscriptionRequest(
|
|
48
|
+
model=model,
|
|
49
|
+
language=language,
|
|
50
|
+
prompt=prompt,
|
|
51
|
+
response_format=response_format,
|
|
52
|
+
temperature=temperature,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
result = await service.transcribe(file, request)
|
|
57
|
+
return result
|
|
58
|
+
except ValueError as e:
|
|
59
|
+
raise HTTPException(400, str(e))
|
|
60
|
+
except Exception as e:
|
|
61
|
+
raise HTTPException(500, f"Transcription failed: {str(e)}")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@router.post(
|
|
65
|
+
"/translations",
|
|
66
|
+
response_model=TranscriptionResponse,
|
|
67
|
+
summary="Translate audio to English",
|
|
68
|
+
description="Translate audio to English text",
|
|
69
|
+
)
|
|
70
|
+
async def create_translation(
|
|
71
|
+
file: Annotated[UploadFile, File()],
|
|
72
|
+
model: Annotated[str, Form()] = "Systran/faster-whisper-tiny",
|
|
73
|
+
service: TranscriptionService = Depends(get_transcription_service),
|
|
74
|
+
) -> TranscriptionResponse:
|
|
75
|
+
"""Translate audio to English."""
|
|
76
|
+
try:
|
|
77
|
+
return await service.translate(file, model)
|
|
78
|
+
except ValueError as e:
|
|
79
|
+
raise HTTPException(400, str(e))
|
|
80
|
+
except Exception as e:
|
|
81
|
+
raise HTTPException(500, f"Translation failed: {str(e)}")
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from fastapi import APIRouter, Depends, HTTPException, Response
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
|
|
4
|
+
from ..dependencies import get_tts_service
|
|
5
|
+
from ..services.tts_service import TTSService
|
|
6
|
+
|
|
7
|
+
router = APIRouter(prefix="/v1/audio", tags=["audio"])
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TTSRequest(BaseModel):
|
|
11
|
+
"""Text-to-Speech request (OpenAI-compatible)"""
|
|
12
|
+
|
|
13
|
+
model: str = Field(..., description="TTS model to use (e.g., 'hexgrad/Kokoro-82M')")
|
|
14
|
+
input: str = Field(..., description="The text to synthesize")
|
|
15
|
+
voice: str | None = Field(None, description="Voice ID to use")
|
|
16
|
+
speed: float = Field(1.0, ge=0.25, le=4.0, description="Speech speed multiplier")
|
|
17
|
+
response_format: str = Field("wav", description="Audio format (currently only 'wav' supported)")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class VoiceInfo(BaseModel):
|
|
21
|
+
"""Voice information"""
|
|
22
|
+
|
|
23
|
+
id: str
|
|
24
|
+
name: str
|
|
25
|
+
language: str
|
|
26
|
+
gender: str | None = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class VoicesResponse(BaseModel):
|
|
30
|
+
"""Response containing list of voices"""
|
|
31
|
+
|
|
32
|
+
voices: list[VoiceInfo]
|
|
33
|
+
total: int
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@router.post(
|
|
37
|
+
"/speech",
|
|
38
|
+
response_class=Response,
|
|
39
|
+
responses={200: {"content": {"audio/wav": {}}, "description": "Audio file in WAV format"}},
|
|
40
|
+
)
|
|
41
|
+
async def text_to_speech(request: TTSRequest, service: TTSService = Depends(get_tts_service)):
|
|
42
|
+
"""
|
|
43
|
+
Generate speech from text (OpenAI-compatible endpoint)
|
|
44
|
+
|
|
45
|
+
This endpoint synthesizes audio from text using the specified TTS model.
|
|
46
|
+
|
|
47
|
+
- **model**: TTS model to use (e.g., 'hexgrad/Kokoro-82M', 'coqui/XTTS-v2')
|
|
48
|
+
- **input**: The text to convert to speech
|
|
49
|
+
- **voice**: Optional voice ID (use /v1/audio/voices to list available voices)
|
|
50
|
+
- **speed**: Speech speed multiplier (0.25 to 4.0, default: 1.0)
|
|
51
|
+
- **response_format**: Audio format (currently only 'wav' supported)
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
result = await service.synthesize(
|
|
55
|
+
model_id=request.model,
|
|
56
|
+
text=request.input,
|
|
57
|
+
voice=request.voice,
|
|
58
|
+
speed=request.speed,
|
|
59
|
+
output_format=request.response_format,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
return Response(
|
|
63
|
+
content=result.audio_data,
|
|
64
|
+
media_type=f"audio/{result.format}",
|
|
65
|
+
headers={
|
|
66
|
+
"Content-Disposition": f"attachment; filename=speech.{result.format}",
|
|
67
|
+
"X-Duration": str(result.duration),
|
|
68
|
+
"X-Sample-Rate": str(result.sample_rate),
|
|
69
|
+
},
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
except ValueError as e:
|
|
73
|
+
raise HTTPException(status_code=400, detail=str(e))
|
|
74
|
+
except Exception as e:
|
|
75
|
+
raise HTTPException(status_code=500, detail=f"TTS error: {str(e)}")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@router.get("/voices", response_model=VoicesResponse)
|
|
79
|
+
async def list_voices(model: str | None = None, service: TTSService = Depends(get_tts_service)):
|
|
80
|
+
"""
|
|
81
|
+
List available TTS voices
|
|
82
|
+
|
|
83
|
+
Returns a list of all available voices that can be used for speech synthesis.
|
|
84
|
+
|
|
85
|
+
- **model**: Optional model ID to list voices for a specific model
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
voices = await service.get_voices(model_id=model)
|
|
89
|
+
|
|
90
|
+
voice_infos = [VoiceInfo(id=v.id, name=v.name, language=v.language, gender=v.gender) for v in voices]
|
|
91
|
+
|
|
92
|
+
return VoicesResponse(voices=voice_infos, total=len(voice_infos))
|
|
93
|
+
|
|
94
|
+
except Exception as e:
|
|
95
|
+
raise HTTPException(status_code=500, detail=f"Failed to list voices: {str(e)}")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
tts_router = router
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from collections.abc import AsyncIterator
|
|
2
|
+
|
|
3
|
+
from vocal_core import ModelRegistry
|
|
4
|
+
|
|
5
|
+
from ..models.model import (
|
|
6
|
+
ModelDownloadProgress,
|
|
7
|
+
ModelInfo,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ModelService:
|
|
12
|
+
"""Service for managing models"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, registry: ModelRegistry):
|
|
15
|
+
self.registry = registry
|
|
16
|
+
self._download_status: dict[str, ModelDownloadProgress] = {}
|
|
17
|
+
|
|
18
|
+
async def list_models(self, status_filter: str | None = None, task: str | None = None) -> list[ModelInfo]:
|
|
19
|
+
"""List all available models"""
|
|
20
|
+
models = await self.registry.list_models(task=task, status_filter=status_filter)
|
|
21
|
+
|
|
22
|
+
return [self._convert_model_info(m) for m in models]
|
|
23
|
+
|
|
24
|
+
async def get_model(self, model_id: str) -> ModelInfo | None:
|
|
25
|
+
"""Get model information"""
|
|
26
|
+
model = await self.registry.get_model(model_id)
|
|
27
|
+
if not model:
|
|
28
|
+
return None
|
|
29
|
+
return self._convert_model_info(model)
|
|
30
|
+
|
|
31
|
+
async def download_model(self, model_id: str, quantization: str | None = None) -> AsyncIterator[ModelDownloadProgress]:
|
|
32
|
+
"""Download a model"""
|
|
33
|
+
self._download_status[model_id] = ModelDownloadProgress(
|
|
34
|
+
model_id=model_id,
|
|
35
|
+
status="downloading",
|
|
36
|
+
progress=0.0,
|
|
37
|
+
downloaded_bytes=0,
|
|
38
|
+
total_bytes=0,
|
|
39
|
+
message="Starting download...",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
async for downloaded, total, status in self.registry.download_model(model_id, quantization=quantization):
|
|
44
|
+
progress = (downloaded / total) if total > 0 else 0.0
|
|
45
|
+
|
|
46
|
+
self._download_status[model_id] = ModelDownloadProgress(
|
|
47
|
+
model_id=model_id,
|
|
48
|
+
status=status.value,
|
|
49
|
+
progress=progress,
|
|
50
|
+
downloaded_bytes=downloaded,
|
|
51
|
+
total_bytes=total,
|
|
52
|
+
message=f"Downloaded {downloaded}/{total} bytes",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
yield self._download_status[model_id]
|
|
56
|
+
|
|
57
|
+
except Exception as e:
|
|
58
|
+
self._download_status[model_id] = ModelDownloadProgress(
|
|
59
|
+
model_id=model_id,
|
|
60
|
+
status="error",
|
|
61
|
+
progress=0.0,
|
|
62
|
+
downloaded_bytes=0,
|
|
63
|
+
total_bytes=0,
|
|
64
|
+
message=f"Download failed: {str(e)}",
|
|
65
|
+
)
|
|
66
|
+
yield self._download_status[model_id]
|
|
67
|
+
|
|
68
|
+
async def get_download_status(self, model_id: str) -> ModelDownloadProgress | None:
|
|
69
|
+
"""Get download status for a model"""
|
|
70
|
+
return self._download_status.get(model_id)
|
|
71
|
+
|
|
72
|
+
async def delete_model(self, model_id: str) -> bool:
|
|
73
|
+
"""Delete a downloaded model"""
|
|
74
|
+
return await self.registry.delete_model(model_id)
|
|
75
|
+
|
|
76
|
+
def _convert_model_info(self, model) -> ModelInfo:
|
|
77
|
+
"""Convert core ModelInfo to API ModelInfo"""
|
|
78
|
+
return ModelInfo(
|
|
79
|
+
id=model.id,
|
|
80
|
+
name=model.name,
|
|
81
|
+
provider=model.provider.value,
|
|
82
|
+
description=model.description,
|
|
83
|
+
size=model.size,
|
|
84
|
+
size_readable=model.size_readable,
|
|
85
|
+
parameters=model.parameters,
|
|
86
|
+
languages=model.languages,
|
|
87
|
+
backend=model.backend.value,
|
|
88
|
+
status=model.status.value,
|
|
89
|
+
source_url=model.source_url,
|
|
90
|
+
license=model.license,
|
|
91
|
+
recommended_vram=model.recommended_vram,
|
|
92
|
+
task=model.task.value,
|
|
93
|
+
local_path=model.local_path,
|
|
94
|
+
created_at=model.created_at,
|
|
95
|
+
updated_at=model.updated_at,
|
|
96
|
+
)
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import tempfile
|
|
3
|
+
import time
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import aiofiles
|
|
7
|
+
from fastapi import UploadFile
|
|
8
|
+
|
|
9
|
+
from vocal_core import FasterWhisperAdapter, ModelRegistry, TranscriptionResult
|
|
10
|
+
|
|
11
|
+
from ..models.transcription import (
|
|
12
|
+
TranscriptionRequest,
|
|
13
|
+
TranscriptionResponse,
|
|
14
|
+
TranscriptionSegment,
|
|
15
|
+
TranscriptionWord,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TranscriptionService:
|
|
20
|
+
"""Service for handling transcription requests with Ollama-style keep-alive"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, registry: ModelRegistry, keep_alive_seconds: int = 300):
|
|
23
|
+
self.registry = registry
|
|
24
|
+
self.adapters: dict[str, FasterWhisperAdapter] = {}
|
|
25
|
+
self.last_used: dict[str, float] = {}
|
|
26
|
+
self.keep_alive_seconds = keep_alive_seconds
|
|
27
|
+
self._cleanup_task = None
|
|
28
|
+
|
|
29
|
+
async def start_cleanup_task(self):
|
|
30
|
+
"""Start background task to cleanup unused models"""
|
|
31
|
+
if self._cleanup_task is None:
|
|
32
|
+
self._cleanup_task = asyncio.create_task(self._cleanup_loop())
|
|
33
|
+
|
|
34
|
+
async def _cleanup_loop(self):
|
|
35
|
+
"""Background task to unload models after keep_alive expires"""
|
|
36
|
+
while True:
|
|
37
|
+
try:
|
|
38
|
+
await asyncio.sleep(60)
|
|
39
|
+
current_time = time.time()
|
|
40
|
+
models_to_unload = []
|
|
41
|
+
|
|
42
|
+
for model_id, last_used_time in self.last_used.items():
|
|
43
|
+
if current_time - last_used_time > self.keep_alive_seconds:
|
|
44
|
+
models_to_unload.append(model_id)
|
|
45
|
+
|
|
46
|
+
for model_id in models_to_unload:
|
|
47
|
+
if model_id in self.adapters:
|
|
48
|
+
adapter = self.adapters[model_id]
|
|
49
|
+
await adapter.unload_model()
|
|
50
|
+
del self.adapters[model_id]
|
|
51
|
+
del self.last_used[model_id]
|
|
52
|
+
|
|
53
|
+
except Exception:
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
async def transcribe(self, file: UploadFile, request: TranscriptionRequest) -> TranscriptionResponse:
|
|
57
|
+
"""
|
|
58
|
+
Transcribe audio file
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
file: Uploaded audio file
|
|
62
|
+
request: Transcription parameters
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
TranscriptionResponse with text and metadata
|
|
66
|
+
"""
|
|
67
|
+
model_id = request.model
|
|
68
|
+
|
|
69
|
+
model_info = await self.registry.get_model(model_id)
|
|
70
|
+
if not model_info:
|
|
71
|
+
raise ValueError(f"Model {model_id} not found in registry")
|
|
72
|
+
|
|
73
|
+
model_path = self.registry.get_model_path(model_id)
|
|
74
|
+
|
|
75
|
+
if not model_path:
|
|
76
|
+
raise ValueError(f"Model {model_id} not downloaded. Download it first: POST /v1/models/{model_id}/download")
|
|
77
|
+
|
|
78
|
+
adapter = await self._get_or_create_adapter(model_id, model_path)
|
|
79
|
+
self.last_used[model_id] = time.time()
|
|
80
|
+
|
|
81
|
+
temp_file = None
|
|
82
|
+
temp_path = None
|
|
83
|
+
try:
|
|
84
|
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".audio")
|
|
85
|
+
temp_path = temp_file.name
|
|
86
|
+
temp_file.close()
|
|
87
|
+
|
|
88
|
+
content = await file.read()
|
|
89
|
+
|
|
90
|
+
async with aiofiles.open(temp_path, "wb") as f:
|
|
91
|
+
await f.write(content)
|
|
92
|
+
|
|
93
|
+
word_timestamps = "word" in request.timestamp_granularities
|
|
94
|
+
|
|
95
|
+
result = await adapter.transcribe(
|
|
96
|
+
audio=temp_path,
|
|
97
|
+
language=request.language,
|
|
98
|
+
task="transcribe",
|
|
99
|
+
temperature=request.temperature,
|
|
100
|
+
word_timestamps=word_timestamps,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
return self._convert_result(result)
|
|
104
|
+
|
|
105
|
+
finally:
|
|
106
|
+
if temp_path and Path(temp_path).exists():
|
|
107
|
+
Path(temp_path).unlink()
|
|
108
|
+
|
|
109
|
+
async def translate(self, file: UploadFile, model_id: str) -> TranscriptionResponse:
|
|
110
|
+
"""
|
|
111
|
+
Translate audio to English
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
file: Uploaded audio file
|
|
115
|
+
model_id: Model to use
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
TranscriptionResponse with translated text
|
|
119
|
+
"""
|
|
120
|
+
model_path = self.registry.get_model_path(model_id)
|
|
121
|
+
if not model_path:
|
|
122
|
+
raise ValueError(f"Model {model_id} not downloaded")
|
|
123
|
+
|
|
124
|
+
adapter = await self._get_or_create_adapter(model_id, model_path)
|
|
125
|
+
self.last_used[model_id] = time.time()
|
|
126
|
+
|
|
127
|
+
temp_file = None
|
|
128
|
+
temp_path = None
|
|
129
|
+
try:
|
|
130
|
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".audio")
|
|
131
|
+
temp_path = temp_file.name
|
|
132
|
+
temp_file.close()
|
|
133
|
+
|
|
134
|
+
content = await file.read()
|
|
135
|
+
|
|
136
|
+
async with aiofiles.open(temp_path, "wb") as f:
|
|
137
|
+
await f.write(content)
|
|
138
|
+
|
|
139
|
+
result = await adapter.transcribe(
|
|
140
|
+
audio=temp_path,
|
|
141
|
+
task="translate",
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
return self._convert_result(result)
|
|
145
|
+
|
|
146
|
+
finally:
|
|
147
|
+
if temp_path and Path(temp_path).exists():
|
|
148
|
+
Path(temp_path).unlink()
|
|
149
|
+
|
|
150
|
+
async def _get_or_create_adapter(self, model_id: str, model_path: Path) -> FasterWhisperAdapter:
|
|
151
|
+
"""Get or create adapter for model"""
|
|
152
|
+
if model_id not in self.adapters:
|
|
153
|
+
adapter = FasterWhisperAdapter()
|
|
154
|
+
await adapter.load_model(model_path)
|
|
155
|
+
self.adapters[model_id] = adapter
|
|
156
|
+
|
|
157
|
+
return self.adapters[model_id]
|
|
158
|
+
|
|
159
|
+
def _convert_result(self, result: TranscriptionResult) -> TranscriptionResponse:
|
|
160
|
+
"""Convert core TranscriptionResult to API TranscriptionResponse"""
|
|
161
|
+
segments = None
|
|
162
|
+
if result.segments:
|
|
163
|
+
segments = [
|
|
164
|
+
TranscriptionSegment(
|
|
165
|
+
id=seg.id,
|
|
166
|
+
start=seg.start,
|
|
167
|
+
end=seg.end,
|
|
168
|
+
text=seg.text,
|
|
169
|
+
tokens=seg.tokens,
|
|
170
|
+
temperature=seg.temperature,
|
|
171
|
+
avg_logprob=seg.avg_logprob,
|
|
172
|
+
compression_ratio=seg.compression_ratio,
|
|
173
|
+
no_speech_prob=seg.no_speech_prob,
|
|
174
|
+
)
|
|
175
|
+
for seg in result.segments
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
words = None
|
|
179
|
+
if result.words:
|
|
180
|
+
words = [
|
|
181
|
+
TranscriptionWord(
|
|
182
|
+
word=w.word,
|
|
183
|
+
start=w.start,
|
|
184
|
+
end=w.end,
|
|
185
|
+
probability=w.probability,
|
|
186
|
+
)
|
|
187
|
+
for w in result.words
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
return TranscriptionResponse(
|
|
191
|
+
text=result.text,
|
|
192
|
+
language=result.language,
|
|
193
|
+
duration=result.duration,
|
|
194
|
+
segments=segments,
|
|
195
|
+
words=words,
|
|
196
|
+
)
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import time
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from vocal_core import ModelRegistry
|
|
6
|
+
from vocal_core.adapters.tts import SimpleTTSAdapter, TTSAdapter, TTSResult, Voice
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TTSService:
|
|
10
|
+
"""Service for handling TTS operations with Ollama-style keep-alive"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, registry: ModelRegistry, keep_alive_seconds: int = 300):
|
|
13
|
+
self.registry = registry
|
|
14
|
+
self.adapters: dict[str, TTSAdapter] = {}
|
|
15
|
+
self.last_used: dict[str, float] = {}
|
|
16
|
+
self.keep_alive_seconds = keep_alive_seconds
|
|
17
|
+
self._cleanup_task = None
|
|
18
|
+
|
|
19
|
+
async def start_cleanup_task(self):
|
|
20
|
+
"""Start background task to cleanup unused models"""
|
|
21
|
+
if self._cleanup_task is None:
|
|
22
|
+
self._cleanup_task = asyncio.create_task(self._cleanup_loop())
|
|
23
|
+
|
|
24
|
+
async def _cleanup_loop(self):
|
|
25
|
+
"""Background task to unload models after keep_alive expires"""
|
|
26
|
+
while True:
|
|
27
|
+
try:
|
|
28
|
+
await asyncio.sleep(60)
|
|
29
|
+
current_time = time.time()
|
|
30
|
+
models_to_unload = []
|
|
31
|
+
|
|
32
|
+
for model_id, last_used_time in self.last_used.items():
|
|
33
|
+
if current_time - last_used_time > self.keep_alive_seconds:
|
|
34
|
+
models_to_unload.append(model_id)
|
|
35
|
+
|
|
36
|
+
for model_id in models_to_unload:
|
|
37
|
+
if model_id in self.adapters:
|
|
38
|
+
adapter = self.adapters[model_id]
|
|
39
|
+
if hasattr(adapter, "unload_model"):
|
|
40
|
+
await adapter.unload_model()
|
|
41
|
+
del self.adapters[model_id]
|
|
42
|
+
del self.last_used[model_id]
|
|
43
|
+
|
|
44
|
+
except Exception:
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
async def synthesize(
|
|
48
|
+
self,
|
|
49
|
+
model_id: str,
|
|
50
|
+
text: str,
|
|
51
|
+
voice: str | None = None,
|
|
52
|
+
speed: float = 1.0,
|
|
53
|
+
output_format: str = "wav",
|
|
54
|
+
) -> TTSResult:
|
|
55
|
+
"""
|
|
56
|
+
Synthesize text to speech using specified model
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
model_id: TTS model identifier (use "pyttsx3" for system TTS)
|
|
60
|
+
text: Text to convert to speech
|
|
61
|
+
voice: Voice ID to use
|
|
62
|
+
speed: Speech speed multiplier
|
|
63
|
+
output_format: Output audio format
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
TTSResult with audio data
|
|
67
|
+
"""
|
|
68
|
+
if model_id == "pyttsx3":
|
|
69
|
+
adapter = await self._get_or_create_simple_adapter()
|
|
70
|
+
self.last_used[model_id] = time.time()
|
|
71
|
+
return await adapter.synthesize(text=text, voice=voice, speed=speed, output_format=output_format)
|
|
72
|
+
|
|
73
|
+
model_info = await self.registry.get_model(model_id)
|
|
74
|
+
if not model_info:
|
|
75
|
+
raise ValueError(f"Model {model_id} not found in registry")
|
|
76
|
+
|
|
77
|
+
if model_info.task.value != "tts":
|
|
78
|
+
raise ValueError(f"Model {model_id} is not a TTS model (task: {model_info.task})")
|
|
79
|
+
|
|
80
|
+
model_path = self.registry.get_model_path(model_id)
|
|
81
|
+
|
|
82
|
+
if not model_path:
|
|
83
|
+
raise ValueError(f"Model {model_id} not downloaded. Download it first: POST /v1/models/{model_id}/download")
|
|
84
|
+
|
|
85
|
+
adapter = await self._get_or_create_adapter(model_id, model_path, model_info.backend.value)
|
|
86
|
+
self.last_used[model_id] = time.time()
|
|
87
|
+
|
|
88
|
+
return await adapter.synthesize(text=text, voice=voice, speed=speed, output_format=output_format)
|
|
89
|
+
|
|
90
|
+
async def get_voices(self, model_id: str | None = None) -> list[Voice]:
|
|
91
|
+
"""
|
|
92
|
+
Get list of available voices
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
model_id: Optional model ID to get voices for specific model
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
List of Voice objects
|
|
99
|
+
"""
|
|
100
|
+
if not model_id or model_id == "pyttsx3":
|
|
101
|
+
adapter = await self._get_or_create_simple_adapter()
|
|
102
|
+
self.last_used["pyttsx3"] = time.time()
|
|
103
|
+
return await adapter.get_voices()
|
|
104
|
+
|
|
105
|
+
model_path = self.registry.get_model_path(model_id)
|
|
106
|
+
if not model_path:
|
|
107
|
+
raise ValueError(f"Model {model_id} not downloaded")
|
|
108
|
+
|
|
109
|
+
model_info = await self.registry.get_model(model_id)
|
|
110
|
+
if not model_info:
|
|
111
|
+
raise ValueError(f"Model {model_id} not found")
|
|
112
|
+
|
|
113
|
+
adapter = await self._get_or_create_adapter(model_id, model_path, model_info.backend.value)
|
|
114
|
+
self.last_used[model_id] = time.time()
|
|
115
|
+
return await adapter.get_voices()
|
|
116
|
+
|
|
117
|
+
async def _get_or_create_simple_adapter(self) -> SimpleTTSAdapter:
|
|
118
|
+
"""Get or create the simple system TTS adapter"""
|
|
119
|
+
if "pyttsx3" not in self.adapters:
|
|
120
|
+
adapter = SimpleTTSAdapter()
|
|
121
|
+
await adapter.load_model(Path("."))
|
|
122
|
+
self.adapters["pyttsx3"] = adapter
|
|
123
|
+
return self.adapters["pyttsx3"]
|
|
124
|
+
|
|
125
|
+
async def _get_or_create_adapter(self, model_id: str, model_path: Path, backend: str) -> TTSAdapter:
|
|
126
|
+
"""Get or create TTS adapter for model"""
|
|
127
|
+
if model_id not in self.adapters:
|
|
128
|
+
adapter = self._create_adapter(backend)
|
|
129
|
+
await adapter.load_model(model_path)
|
|
130
|
+
self.adapters[model_id] = adapter
|
|
131
|
+
|
|
132
|
+
return self.adapters[model_id]
|
|
133
|
+
|
|
134
|
+
def _create_adapter(self, backend: str) -> TTSAdapter:
|
|
135
|
+
"""Create appropriate TTS adapter based on backend"""
|
|
136
|
+
if backend in ("onnx", "custom"):
|
|
137
|
+
return SimpleTTSAdapter()
|
|
138
|
+
else:
|
|
139
|
+
return SimpleTTSAdapter()
|