yta-fastapi-docker-pydantic-models 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yta_fastapi_docker_pydantic_models-0.0.1/LICENSE +19 -0
- yta_fastapi_docker_pydantic_models-0.0.1/PKG-INFO +23 -0
- yta_fastapi_docker_pydantic_models-0.0.1/README.md +5 -0
- yta_fastapi_docker_pydantic_models-0.0.1/pyproject.toml +28 -0
- yta_fastapi_docker_pydantic_models-0.0.1/src/yta_fastapi_docker_pydantic_models/__init__.py +3 -0
- yta_fastapi_docker_pydantic_models-0.0.1/src/yta_fastapi_docker_pydantic_models/vibevoice/__init__.py +197 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Copyright (c) 2018 The Python Packaging Authority
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
5
|
+
in the Software without restriction, including without limitation the rights
|
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
8
|
+
furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
SOFTWARE.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: yta-fastapi-docker-pydantic-models
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Youtube Autonomous FastAPI Docker Pydantic Models Module
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Author: danialcala94
|
|
7
|
+
Author-email: danielalcalavalera@gmail.com
|
|
8
|
+
Requires-Python: >=3.8,<3.14
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Dist: pydantic (>=0.0.1,<999.0.0)
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# Youtube Autonomous FastAPI Docker Base Module
|
|
20
|
+
|
|
21
|
+
This project does not include any FastAPI nor docker container configuration.
|
|
22
|
+
|
|
23
|
+
This module is to include the `pydantic` models that we will us in between our different `FastAPI-Docker` projects, being able to reuse them in between containers.
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
# Youtube Autonomous FastAPI Docker Base Module
|
|
2
|
+
|
|
3
|
+
This project does not include any FastAPI nor docker container configuration.
|
|
4
|
+
|
|
5
|
+
This module is to include the `pydantic` models that we will us in between our different `FastAPI-Docker` projects, being able to reuse them in between containers.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "yta-fastapi-docker-pydantic-models"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "Youtube Autonomous FastAPI Docker Pydantic Models Module"
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "danialcala94", email = "danielalcalavalera@gmail.com"}
|
|
7
|
+
]
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
requires-python = ">=3.8,<3.14"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"pydantic (>=0.0.1,<999.0.0)"
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[tool.poetry]
|
|
15
|
+
packages = [{include = "yta_fastapi_docker_pydantic_models", from = "src"}]
|
|
16
|
+
|
|
17
|
+
[tool.poetry.group.dev.dependencies]
|
|
18
|
+
pytest = "^8.3.5"
|
|
19
|
+
|
|
20
|
+
[tool.pytest.ini_options]
|
|
21
|
+
markers = [
|
|
22
|
+
"mandatory: mandatory tests for release",
|
|
23
|
+
"additional: exhaustive and demanding tests"
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[build-system]
|
|
27
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
28
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic models for API request/response schemas.
|
|
3
|
+
|
|
4
|
+
Extracted from 'yta-fastapi-docker-vibevoice/api/models.py'
|
|
5
|
+
"""
|
|
6
|
+
from pydantic import BaseModel, Field, validator
|
|
7
|
+
from typing import Optional, Literal, List
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# ============================================================
|
|
11
|
+
# OpenAI-Compatible TTS Models
|
|
12
|
+
# ============================================================
|
|
13
|
+
|
|
14
|
+
class OpenAITTSRequest(BaseModel):
|
|
15
|
+
"""OpenAI-compatible TTS request schema."""
|
|
16
|
+
|
|
17
|
+
model: str = Field(
|
|
18
|
+
default="tts-1",
|
|
19
|
+
description="Model to use: tts-1 or tts-1-hd (both map to VibeVoice)"
|
|
20
|
+
)
|
|
21
|
+
input: str = Field(
|
|
22
|
+
...,
|
|
23
|
+
description="The text to generate audio for",
|
|
24
|
+
max_length=4096
|
|
25
|
+
)
|
|
26
|
+
voice: str = Field(
|
|
27
|
+
...,
|
|
28
|
+
description="Voice to use: OpenAI voice names (alloy, echo, fable, onyx, nova, shimmer) or any VibeVoice preset name"
|
|
29
|
+
)
|
|
30
|
+
response_format: Optional[Literal["mp3", "opus", "aac", "flac", "wav", "pcm", "m4a"]] = Field(
|
|
31
|
+
default="mp3",
|
|
32
|
+
description="Audio format for the response"
|
|
33
|
+
)
|
|
34
|
+
speed: Optional[float] = Field(
|
|
35
|
+
default=1.0,
|
|
36
|
+
ge=0.25,
|
|
37
|
+
le=4.0,
|
|
38
|
+
description="Speed of generated audio (0.25 to 4.0)"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Note: Voice validation removed to allow any VibeVoice preset name
|
|
42
|
+
# Validation happens in the endpoint with proper error messages
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ============================================================
|
|
46
|
+
# VibeVoice-Specific Models
|
|
47
|
+
# ============================================================
|
|
48
|
+
|
|
49
|
+
class SpeakerConfig(BaseModel):
|
|
50
|
+
"""Configuration for a single speaker."""
|
|
51
|
+
|
|
52
|
+
speaker_id: int = Field(
|
|
53
|
+
...,
|
|
54
|
+
ge=0,
|
|
55
|
+
le=3,
|
|
56
|
+
description="Speaker ID (0-3)"
|
|
57
|
+
)
|
|
58
|
+
voice_preset: Optional[str] = Field(
|
|
59
|
+
default=None,
|
|
60
|
+
description="Name of voice preset to use (from voices directory)"
|
|
61
|
+
)
|
|
62
|
+
voice_sample_base64: Optional[str] = Field(
|
|
63
|
+
default=None,
|
|
64
|
+
description="Base64-encoded audio sample for voice cloning"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
@validator("voice_preset", "voice_sample_base64")
|
|
68
|
+
def validate_voice_source(cls, v, values):
|
|
69
|
+
"""Ensure at least one voice source is provided."""
|
|
70
|
+
if "voice_preset" in values and not values.get("voice_preset") and not v:
|
|
71
|
+
raise ValueError("Either voice_preset or voice_sample_base64 must be provided")
|
|
72
|
+
return v
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class VibeVoiceGenerateRequest(BaseModel):
|
|
76
|
+
"""VibeVoice-specific generation request with multi-speaker support."""
|
|
77
|
+
|
|
78
|
+
script: str = Field(
|
|
79
|
+
...,
|
|
80
|
+
description="Multi-speaker script with format 'Speaker 0: text\\nSpeaker 1: text'",
|
|
81
|
+
max_length=100000
|
|
82
|
+
)
|
|
83
|
+
speakers: List[SpeakerConfig] = Field(
|
|
84
|
+
...,
|
|
85
|
+
min_items=1,
|
|
86
|
+
max_items=4,
|
|
87
|
+
description="Speaker configurations (1-4 speakers)"
|
|
88
|
+
)
|
|
89
|
+
cfg_scale: Optional[float] = Field(
|
|
90
|
+
default=1.3,
|
|
91
|
+
ge=1.0,
|
|
92
|
+
le=2.0,
|
|
93
|
+
description="Classifier-free guidance scale (1.0-2.0)"
|
|
94
|
+
)
|
|
95
|
+
inference_steps: Optional[int] = Field(
|
|
96
|
+
default=10,
|
|
97
|
+
ge=5,
|
|
98
|
+
le=50,
|
|
99
|
+
description="Number of diffusion inference steps"
|
|
100
|
+
)
|
|
101
|
+
response_format: Optional[Literal["mp3", "opus", "aac", "flac", "wav", "pcm", "m4a"]] = Field(
|
|
102
|
+
default="mp3",
|
|
103
|
+
description="Audio format for the response"
|
|
104
|
+
)
|
|
105
|
+
stream: Optional[bool] = Field(
|
|
106
|
+
default=False,
|
|
107
|
+
description="Enable real-time streaming via Server-Sent Events"
|
|
108
|
+
)
|
|
109
|
+
seed: Optional[int] = Field(
|
|
110
|
+
default=None,
|
|
111
|
+
description="Random seed for reproducibility"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
@validator("speakers")
|
|
115
|
+
def validate_speaker_ids(cls, v):
|
|
116
|
+
"""Ensure speaker IDs are sequential starting from 0."""
|
|
117
|
+
speaker_ids = sorted([s.speaker_id for s in v])
|
|
118
|
+
expected_ids = list(range(len(v)))
|
|
119
|
+
if speaker_ids != expected_ids:
|
|
120
|
+
raise ValueError(f"Speaker IDs must be sequential starting from 0. Got: {speaker_ids}")
|
|
121
|
+
return v
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class VibeVoiceGenerateResponse(BaseModel):
|
|
125
|
+
"""Response for VibeVoice generation."""
|
|
126
|
+
|
|
127
|
+
audio_url: Optional[str] = Field(
|
|
128
|
+
default=None,
|
|
129
|
+
description="URL to download the generated audio (for non-streaming)"
|
|
130
|
+
)
|
|
131
|
+
duration: Optional[float] = Field(
|
|
132
|
+
default=None,
|
|
133
|
+
description="Duration of generated audio in seconds"
|
|
134
|
+
)
|
|
135
|
+
format: str = Field(
|
|
136
|
+
...,
|
|
137
|
+
description="Audio format of the response"
|
|
138
|
+
)
|
|
139
|
+
sample_rate: int = Field(
|
|
140
|
+
default=24000,
|
|
141
|
+
description="Sample rate of the audio"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ============================================================
|
|
146
|
+
# Common Models
|
|
147
|
+
# ============================================================
|
|
148
|
+
|
|
149
|
+
class ErrorResponse(BaseModel):
|
|
150
|
+
"""Error response schema."""
|
|
151
|
+
|
|
152
|
+
error: dict = Field(
|
|
153
|
+
...,
|
|
154
|
+
description="Error details"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
@staticmethod
|
|
158
|
+
def from_exception(exc: Exception, status_code: int = 500) -> "ErrorResponse":
|
|
159
|
+
"""Create error response from exception."""
|
|
160
|
+
return ErrorResponse(
|
|
161
|
+
error={
|
|
162
|
+
"message": str(exc),
|
|
163
|
+
"type": type(exc).__name__,
|
|
164
|
+
"code": status_code
|
|
165
|
+
}
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class HealthResponse(BaseModel):
|
|
170
|
+
"""Health check response."""
|
|
171
|
+
|
|
172
|
+
status: str = Field(
|
|
173
|
+
default="healthy",
|
|
174
|
+
description="Service health status"
|
|
175
|
+
)
|
|
176
|
+
model_loaded: bool = Field(
|
|
177
|
+
...,
|
|
178
|
+
description="Whether the model is loaded and ready"
|
|
179
|
+
)
|
|
180
|
+
device: str = Field(
|
|
181
|
+
...,
|
|
182
|
+
description="Device being used for inference"
|
|
183
|
+
)
|
|
184
|
+
model_path: str = Field(
|
|
185
|
+
...,
|
|
186
|
+
description="Path to the loaded model"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class VoiceListResponse(BaseModel):
|
|
191
|
+
"""Response listing available voices."""
|
|
192
|
+
|
|
193
|
+
voices: List[dict] = Field(
|
|
194
|
+
...,
|
|
195
|
+
description="List of available voice presets"
|
|
196
|
+
)
|
|
197
|
+
|