ytml-toolkit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ytml_toolkit-0.1.0/PKG-INFO +25 -0
- ytml_toolkit-0.1.0/pyproject.toml +0 -0
- ytml_toolkit-0.1.0/setup.cfg +4 -0
- ytml_toolkit-0.1.0/setup.py +34 -0
- ytml_toolkit-0.1.0/ytml/__init__.py +0 -0
- ytml_toolkit-0.1.0/ytml/cli.py +75 -0
- ytml_toolkit-0.1.0/ytml/interpretron/__init__.py +0 -0
- ytml_toolkit-0.1.0/ytml/interpretron/parser.py +192 -0
- ytml_toolkit-0.1.0/ytml/interpretron/test_parser.py +43 -0
- ytml_toolkit-0.1.0/ytml/vocalforge/__init__.py +0 -0
- ytml_toolkit-0.1.0/ytml/vocalforge/base_vocal_forge.py +17 -0
- ytml_toolkit-0.1.0/ytml/vocalforge/gtts_vocal_forge.py +33 -0
- ytml_toolkit-0.1.0/ytml/vocalforge/test_voice.py +0 -0
- ytml_toolkit-0.1.0/ytml/vocalforge/xi_labs_vocal_forge.py +94 -0
- ytml_toolkit-0.1.0/ytml_toolkit.egg-info/PKG-INFO +25 -0
- ytml_toolkit-0.1.0/ytml_toolkit.egg-info/SOURCES.txt +18 -0
- ytml_toolkit-0.1.0/ytml_toolkit.egg-info/dependency_links.txt +1 -0
- ytml_toolkit-0.1.0/ytml_toolkit.egg-info/entry_points.txt +2 -0
- ytml_toolkit-0.1.0/ytml_toolkit.egg-info/requires.txt +19 -0
- ytml_toolkit-0.1.0/ytml_toolkit.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: ytml-toolkit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Requires-Python: >=3.7
|
|
5
|
+
Requires-Dist: fastapi
|
|
6
|
+
Requires-Dist: uvicorn
|
|
7
|
+
Requires-Dist: websockets
|
|
8
|
+
Requires-Dist: boto3
|
|
9
|
+
Requires-Dist: gtts
|
|
10
|
+
Requires-Dist: pydub
|
|
11
|
+
Requires-Dist: moviepy
|
|
12
|
+
Requires-Dist: imageio
|
|
13
|
+
Requires-Dist: imageio-ffmpeg
|
|
14
|
+
Requires-Dist: playwright
|
|
15
|
+
Requires-Dist: numpy
|
|
16
|
+
Requires-Dist: requests
|
|
17
|
+
Requires-Dist: python-dotenv
|
|
18
|
+
Requires-Dist: beautifulsoup4
|
|
19
|
+
Requires-Dist: lxml
|
|
20
|
+
Requires-Dist: tqdm
|
|
21
|
+
Requires-Dist: pyttsx3
|
|
22
|
+
Requires-Dist: starlette
|
|
23
|
+
Requires-Dist: colorama
|
|
24
|
+
Dynamic: requires-dist
|
|
25
|
+
Dynamic: requires-python
|
|
File without changes
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="ytml-toolkit",
|
|
5
|
+
version="0.1.0",
|
|
6
|
+
packages=find_packages(),
|
|
7
|
+
entry_points={
|
|
8
|
+
"console_scripts": [
|
|
9
|
+
"ytml=ytml.cli:main", # This makes `ytml` a command
|
|
10
|
+
],
|
|
11
|
+
},
|
|
12
|
+
install_requires=[
|
|
13
|
+
"fastapi",
|
|
14
|
+
"uvicorn",
|
|
15
|
+
"websockets",
|
|
16
|
+
"boto3",
|
|
17
|
+
"gtts",
|
|
18
|
+
"pydub", # Used for audio processing
|
|
19
|
+
"moviepy", # Used for video processing
|
|
20
|
+
"imageio", # Required for image/video handling
|
|
21
|
+
"imageio-ffmpeg", # Supports video encoding/decoding
|
|
22
|
+
"playwright", # Needed for rendering animations
|
|
23
|
+
"numpy", # If used in image/video processing
|
|
24
|
+
"requests", # Required for API requests (e.g., ElevenLabs)
|
|
25
|
+
"python-dotenv", # If you're using `.env` files for config
|
|
26
|
+
"beautifulsoup4", # If used for HTML parsing
|
|
27
|
+
"lxml", # If parsing XML or HTML
|
|
28
|
+
"tqdm", # If you're showing progress bars
|
|
29
|
+
"pyttsx3", # If using local TTS
|
|
30
|
+
"starlette", # Dependency of FastAPI,
|
|
31
|
+
"colorama"
|
|
32
|
+
],
|
|
33
|
+
python_requires=">=3.7",
|
|
34
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
from ytml.vocalforge.xi_labs_vocal_forge import ElevenLabsVocalForge
|
|
5
|
+
from ytml.vocalforge.gtts_vocal_forge import gTTSVocalForge
|
|
6
|
+
from ytml.conductor.conductor import Conductor
|
|
7
|
+
from ytml.utils.config import get_config_from_file
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
from colorama import Fore, Style
|
|
10
|
+
|
|
11
|
+
VERSION = "0.1.0"
|
|
12
|
+
|
|
13
|
+
def check_elevenlabs_key():
|
|
14
|
+
"""Check if ELEVEN_LABS_API_KEY is set, warn if missing."""
|
|
15
|
+
if not os.getenv("ELEVEN_LABS_API_KEY"):
|
|
16
|
+
print(Fore.YELLOW + "[WARNING] ELEVEN_LABS_API_KEY is not set. "
|
|
17
|
+
"Use --use-gtts or define the API key for Eleven Labs." + Style.RESET_ALL)
|
|
18
|
+
return False
|
|
19
|
+
return True
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def main():
|
|
23
|
+
parser = argparse.ArgumentParser(description="YTML CLI - Video Generation Compiler")
|
|
24
|
+
parser.add_argument("-i", "--input", help="Path to the YTML input file.")
|
|
25
|
+
parser.add_argument("-o", "--output", default="output_video.mp4", help="Output video file.")
|
|
26
|
+
parser.add_argument("--use-gtts", action="store_true", help="Use gTTS VocalForge instead of Eleven Labs.")
|
|
27
|
+
parser.add_argument("--skip", nargs="*", choices=["parse", "voiceover", "render", "sync", "compose"], help="Steps to skip.")
|
|
28
|
+
parser.add_argument("--resume", help="Resume a job using the provided UUID.")
|
|
29
|
+
parser.add_argument("--job", help="Job ID of voiceovers to mix. Requires --skip voiceover.")
|
|
30
|
+
parser.add_argument("--preview", action="store_true", help="Preview HTML only.")
|
|
31
|
+
parser.add_argument("--version", action="store_true", help="Show CLI version.")
|
|
32
|
+
|
|
33
|
+
args = parser.parse_args()
|
|
34
|
+
|
|
35
|
+
# ✅ Handle version and help
|
|
36
|
+
if args.version:
|
|
37
|
+
print(Fore.CYAN + f"YTML CLI Version: {VERSION}" + Style.RESET_ALL)
|
|
38
|
+
sys.exit(0)
|
|
39
|
+
|
|
40
|
+
# ✅ Check if Eleven Labs API Key is missing
|
|
41
|
+
if not args.use_gtts:
|
|
42
|
+
if not check_elevenlabs_key():
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
config = get_config_from_file(args.input)
|
|
46
|
+
|
|
47
|
+
if args.preview:
|
|
48
|
+
conductor = Conductor(None, args.output, config)
|
|
49
|
+
conductor.previewHTML(args.input)
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
if args.resume:
|
|
53
|
+
job_dir = f"tmp/{args.resume}"
|
|
54
|
+
if not os.path.exists(job_dir):
|
|
55
|
+
print(Fore.RED + f"[ERROR] No job found with UUID {args.resume}." + Style.RESET_ALL)
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
print(Fore.BLUE + f"[INFO] Resuming job with UUID {args.resume}..." + Style.RESET_ALL)
|
|
59
|
+
conductor = Conductor(None, args.output, job_id=args.resume)
|
|
60
|
+
status = conductor.get_job_status()
|
|
61
|
+
|
|
62
|
+
skip_steps = [stage for stage in ["parse", "voiceover", "render", "sync"] if status.get(f"{stage}.json")]
|
|
63
|
+
conductor.run_workflow(f"{job_dir}/parsed.json", skip_steps)
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
if not os.path.exists(args.input):
|
|
67
|
+
print(Fore.RED + f"[ERROR] Input file '{args.input}' not found." + Style.RESET_ALL)
|
|
68
|
+
return
|
|
69
|
+
vocal_forge = gTTSVocalForge() if args.use_gtts or config.ENABLE_AI_VOICE == False else ElevenLabsVocalForge(config.AI_VOICE_ID)
|
|
70
|
+
conductor = Conductor(vocal_forge, args.output, config=config)
|
|
71
|
+
conductor.run_workflow(args.input, skip_steps=args.skip or [], job=args.job)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
if __name__ == "__main__":
|
|
75
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import unicodedata
|
|
3
|
+
import xml.etree.ElementTree as ET
|
|
4
|
+
import json
|
|
5
|
+
from ytml.utils.utils import parse_boolean, parse_duration
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class YTMLParser:
|
|
9
|
+
def __init__(self, ytml_file):
|
|
10
|
+
self.ytml_file = ytml_file
|
|
11
|
+
self.templates = {}
|
|
12
|
+
self.global_styles = ""
|
|
13
|
+
|
|
14
|
+
def clean_text(self, text):
|
|
15
|
+
"""
|
|
16
|
+
Cleans and normalizes the input text by:
|
|
17
|
+
- Stripping leading and trailing whitespace
|
|
18
|
+
- Replacing multiple spaces and newlines with a single space
|
|
19
|
+
- Normalizing Unicode characters to ASCII equivalents
|
|
20
|
+
"""
|
|
21
|
+
# Normalize Unicode characters
|
|
22
|
+
normalized_text = unicodedata.normalize('NFKC', text)
|
|
23
|
+
# Remove extra spaces and newlines
|
|
24
|
+
cleaned_text = re.sub(r'\s+', ' ', normalized_text.strip())
|
|
25
|
+
return cleaned_text
|
|
26
|
+
|
|
27
|
+
def _preprocess_file(self, file_path):
|
|
28
|
+
"""
|
|
29
|
+
Preprocess the YTML file to wrap content inside <code> tags in <![CDATA[ ... ]]>
|
|
30
|
+
"""
|
|
31
|
+
with open(file_path, "r") as file:
|
|
32
|
+
content = file.read()
|
|
33
|
+
|
|
34
|
+
# Wrap <frame> content in <![CDATA[ ... ]]>
|
|
35
|
+
content = re.sub(
|
|
36
|
+
r"(<frame[^>]*>)(.*?)(</frame>)",
|
|
37
|
+
lambda match: f"{match.group(1)}<![CDATA[{match.group(2)}]]>{match.group(3)}",
|
|
38
|
+
content,
|
|
39
|
+
flags=re.DOTALL
|
|
40
|
+
)
|
|
41
|
+
return content
|
|
42
|
+
|
|
43
|
+
def parse(self):
|
|
44
|
+
"""
|
|
45
|
+
Parse the YTML file and return structured JSON.
|
|
46
|
+
"""
|
|
47
|
+
try:
|
|
48
|
+
# Preprocess the file to handle <code> content
|
|
49
|
+
preprocessed_content = self._preprocess_file(self.ytml_file)
|
|
50
|
+
root = ET.fromstring(preprocessed_content)
|
|
51
|
+
except ET.ParseError as e:
|
|
52
|
+
raise ValueError(f"Invalid YTML format: {e}")
|
|
53
|
+
|
|
54
|
+
if root.tag != "ytml":
|
|
55
|
+
raise ValueError("Invalid root element. Expected <ytml>.")
|
|
56
|
+
|
|
57
|
+
# Extract templates
|
|
58
|
+
self._extract_templates(root)
|
|
59
|
+
|
|
60
|
+
# Extract styles
|
|
61
|
+
style_tags = root.find("style")
|
|
62
|
+
self.global_styles = (
|
|
63
|
+
ET.tostring(style_tags, encoding="unicode").strip(
|
|
64
|
+
) if style_tags is not None else None
|
|
65
|
+
)
|
|
66
|
+
# Parse composites
|
|
67
|
+
composites = []
|
|
68
|
+
for composite in root.findall("composite"):
|
|
69
|
+
# Check conditional logic
|
|
70
|
+
composites.append(self._parse_composite(composite))
|
|
71
|
+
|
|
72
|
+
# Extract global-music tag
|
|
73
|
+
global_music_tag = root.find("global-music")
|
|
74
|
+
global_music = []
|
|
75
|
+
if (global_music_tag != None):
|
|
76
|
+
|
|
77
|
+
global_music.append(
|
|
78
|
+
{
|
|
79
|
+
"src": global_music_tag.get('src'),
|
|
80
|
+
"start": parse_duration(global_music_tag.get('start')),
|
|
81
|
+
"end": parse_duration(global_music_tag.get('end')),
|
|
82
|
+
"loop": global_music_tag.get("loop") == "true",
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
return {"segments": composites, "global_music": global_music}
|
|
86
|
+
|
|
87
|
+
def _extract_templates(self, root):
|
|
88
|
+
"""
|
|
89
|
+
Extract and store reusable templates.
|
|
90
|
+
"""
|
|
91
|
+
for template in root.findall("template"):
|
|
92
|
+
template_id = template.get("id")
|
|
93
|
+
if not template_id:
|
|
94
|
+
raise ValueError("Template missing required 'id' attribute.")
|
|
95
|
+
if template_id in self.templates:
|
|
96
|
+
raise ValueError(f"Duplicate template ID found: {template_id}")
|
|
97
|
+
self.templates[template_id] = template
|
|
98
|
+
|
|
99
|
+
def _parse_composite(self, composite):
|
|
100
|
+
"""
|
|
101
|
+
Parse a single composite, handling <code> tags as raw text.
|
|
102
|
+
"""
|
|
103
|
+
parsed_composite = {
|
|
104
|
+
"frames": [],
|
|
105
|
+
"styles": self.global_styles,
|
|
106
|
+
"voiceovers": [],
|
|
107
|
+
"music": [],
|
|
108
|
+
"transitions": [],
|
|
109
|
+
"duration": '',
|
|
110
|
+
"static": False
|
|
111
|
+
}
|
|
112
|
+
current_time = 0.0
|
|
113
|
+
|
|
114
|
+
# Parse frames
|
|
115
|
+
for frame in composite.findall("frame"):
|
|
116
|
+
frame_data = frame.text.strip() if frame.text else ""
|
|
117
|
+
parsed_composite["frames"].append(frame_data)
|
|
118
|
+
parsed_composite['duration'] = parse_duration(
|
|
119
|
+
frame.get('duration') or '2s')
|
|
120
|
+
parsed_composite['frame_rate'] = frame.get('frame_rate')
|
|
121
|
+
parsed_composite["static"] = parse_boolean(frame.get("static"))
|
|
122
|
+
|
|
123
|
+
# Expand <use> tags with templates
|
|
124
|
+
for use in composite.findall("use"):
|
|
125
|
+
template_id = use.get("template")
|
|
126
|
+
if not template_id or template_id not in self.templates:
|
|
127
|
+
raise ValueError(
|
|
128
|
+
f"Referenced template '{template_id}' not found.")
|
|
129
|
+
template_content = ET.tostring(
|
|
130
|
+
self.templates[template_id], encoding="unicode").strip()
|
|
131
|
+
parsed_composite["frames"].append(template_content)
|
|
132
|
+
|
|
133
|
+
# Parse voiceovers
|
|
134
|
+
for voice in composite.findall("voice"):
|
|
135
|
+
start = self._resolve_timing(voice.get("start"), current_time)
|
|
136
|
+
end = self._resolve_timing(voice.get("end"), start)
|
|
137
|
+
current_time = max(current_time, end)
|
|
138
|
+
parsed_composite["voiceovers"].append({
|
|
139
|
+
"text": self.clean_text(voice.text),
|
|
140
|
+
"start": start,
|
|
141
|
+
"end": end
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
# Parse music
|
|
145
|
+
for music in composite.findall("music"):
|
|
146
|
+
start = self._resolve_timing(music.get("start"), current_time)
|
|
147
|
+
end = self._resolve_timing(music.get("end"), start)
|
|
148
|
+
current_time = max(current_time, end)
|
|
149
|
+
parsed_composite["music"].append({
|
|
150
|
+
"src": music.get("src"),
|
|
151
|
+
"start": start,
|
|
152
|
+
"end": end,
|
|
153
|
+
"loop": music.get("loop") == "true",
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
# Parse transitions
|
|
157
|
+
for transition in composite.findall("transition"):
|
|
158
|
+
tType = transition.get("type")
|
|
159
|
+
duration = self._resolve_timing(transition.get("duration"), "1s")
|
|
160
|
+
parsed_composite["transitions"].append({
|
|
161
|
+
"type": tType,
|
|
162
|
+
"duration": f"{duration}s",
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
return parsed_composite
|
|
166
|
+
|
|
167
|
+
def _resolve_timing(self, timing, current_time):
|
|
168
|
+
"""
|
|
169
|
+
Resolve timing values:
|
|
170
|
+
- Absolute values (e.g., "5s") remain unchanged.
|
|
171
|
+
- Relative values (e.g., "+2s") are added to the current time.
|
|
172
|
+
"""
|
|
173
|
+
if timing is None:
|
|
174
|
+
return current_time
|
|
175
|
+
if timing.startswith("+"):
|
|
176
|
+
return current_time + float(parse_duration(timing[1:]))
|
|
177
|
+
return float(parse_duration(timing))
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# CLI for testing
|
|
181
|
+
if __name__ == "__main__":
|
|
182
|
+
import sys
|
|
183
|
+
if len(sys.argv) < 2:
|
|
184
|
+
print("Usage: python parser.py <ytml_file>")
|
|
185
|
+
sys.exit(1)
|
|
186
|
+
|
|
187
|
+
parser = YTMLParser(sys.argv[1])
|
|
188
|
+
try:
|
|
189
|
+
result = parser.parse()
|
|
190
|
+
print(json.dumps(result, indent=2))
|
|
191
|
+
except Exception as e:
|
|
192
|
+
print(f"Error: {e}")
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
from parser import YTMLParser
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class TestYTMLParser(unittest.TestCase):
|
|
6
|
+
def test_basic_parsing(self):
|
|
7
|
+
parser = YTMLParser("backend/interpretron/samples/basic.ytml")
|
|
8
|
+
result = parser.parse()
|
|
9
|
+
self.assertIn("segments", result)
|
|
10
|
+
|
|
11
|
+
def test_voice_parsing(self):
|
|
12
|
+
parser = YTMLParser("backend/interpretron/samples/voice.ytml")
|
|
13
|
+
result = parser.parse()
|
|
14
|
+
self.assertEqual(result["segments"][0]
|
|
15
|
+
["voiceovers"][0]["text"], "Hello!")
|
|
16
|
+
|
|
17
|
+
def test_music_parsing(self):
|
|
18
|
+
parser = YTMLParser("backend/interpretron/samples/music.ytml")
|
|
19
|
+
result = parser.parse()
|
|
20
|
+
self.assertEqual(result["segments"][0]["music"]
|
|
21
|
+
[0]["src"], "background.mp3")
|
|
22
|
+
|
|
23
|
+
def test_dynamic_timing(self):
|
|
24
|
+
parser = YTMLParser("backend/interpretron/samples/dynamic_timing.ytml")
|
|
25
|
+
result = parser.parse()
|
|
26
|
+
self.assertEqual(result["segments"][0]["voiceovers"][0]["start"], 1.0)
|
|
27
|
+
|
|
28
|
+
def test_template_expansion(self):
|
|
29
|
+
parser = YTMLParser("backend/interpretron/samples/template.ytml")
|
|
30
|
+
result = parser.parse()
|
|
31
|
+
print(result)
|
|
32
|
+
self.assertIn("<div class='logo'>My Brand</div>",
|
|
33
|
+
result["segments"][0]["frames"][0])
|
|
34
|
+
|
|
35
|
+
def test_error_handling(self):
|
|
36
|
+
parser = YTMLParser(
|
|
37
|
+
"backend/interpretron/samples/invalid_template.ytml")
|
|
38
|
+
with self.assertRaises(ValueError):
|
|
39
|
+
parser.parse()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
if __name__ == "__main__":
|
|
43
|
+
unittest.main()
|
|
File without changes
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class VocalForgeBase(ABC):
|
|
5
|
+
@abstractmethod
|
|
6
|
+
def generate_voiceover(self, text: str, output_file: str) -> str:
|
|
7
|
+
"""
|
|
8
|
+
Generate a voiceover for the given text and save it to an audio file.
|
|
9
|
+
"""
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def process_voiceovers(self, parsed_json: dict, output_dir: str = "voiceovers") -> list:
|
|
14
|
+
"""
|
|
15
|
+
Process all voiceovers from the parsed JSON and generate audio files.
|
|
16
|
+
"""
|
|
17
|
+
pass
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from gtts import gTTS
|
|
3
|
+
|
|
4
|
+
from ytml.vocalforge.base_vocal_forge import VocalForgeBase
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class gTTSVocalForge(VocalForgeBase):
|
|
8
|
+
|
|
9
|
+
def generate_voiceover(self, text, output_file):
|
|
10
|
+
tts = gTTS(text)
|
|
11
|
+
tts.save(output_file)
|
|
12
|
+
return output_file
|
|
13
|
+
|
|
14
|
+
def process_voiceovers(self, parsed_json: dict, output_dir: str = "tmp/gtts_voiceovers") -> list:
|
|
15
|
+
"""
|
|
16
|
+
Generate gtts voiceovers for all text in the parsed JSON.
|
|
17
|
+
"""
|
|
18
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
19
|
+
audio_metadata = []
|
|
20
|
+
|
|
21
|
+
for segment_idx, segment in enumerate(parsed_json.get("segments", [])):
|
|
22
|
+
for voice_idx, voice in enumerate(segment.get("voiceovers", [])):
|
|
23
|
+
text = voice["text"]
|
|
24
|
+
output_file = os.path.join(
|
|
25
|
+
output_dir, f"segment{segment_idx+1}_voice{voice_idx+1}.mp3")
|
|
26
|
+
self.generate_voiceover(text, output_file)
|
|
27
|
+
audio_metadata.append({
|
|
28
|
+
"file": output_file,
|
|
29
|
+
"start": voice["start"],
|
|
30
|
+
"end": voice["end"],
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
return audio_metadata
|
|
File without changes
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import requests
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
from ytml.vocalforge.base_vocal_forge import VocalForgeBase
|
|
5
|
+
|
|
6
|
+
load_dotenv() # Reads .env file and loads environment variables
|
|
7
|
+
|
|
8
|
+
# Default fallback if environment variable is missing:
|
|
9
|
+
DEFAULT_ELEVEN_LABS_API_KEY = "key"
|
|
10
|
+
|
|
11
|
+
ELEVEN_LABS_API_KEY = os.getenv(
|
|
12
|
+
"ELEVEN_LABS_API_KEY", DEFAULT_ELEVEN_LABS_API_KEY)
|
|
13
|
+
ELEVEN_LABS_URL = "https://api.elevenlabs.io/v1/text-to-speech"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ElevenLabsVocalForge(VocalForgeBase):
|
|
17
|
+
def __init__(self, voice_id, api_key=None):
|
|
18
|
+
"""
|
|
19
|
+
If api_key is provided, use it. Otherwise, read from environment or the default.
|
|
20
|
+
"""
|
|
21
|
+
self.api_key = api_key if api_key else ELEVEN_LABS_API_KEY
|
|
22
|
+
|
|
23
|
+
if(self.api_key=='key'):
|
|
24
|
+
raise Exception(
|
|
25
|
+
"Invalid Eleven Labs API key. Please set the 'ELEVEN_LABS_API_KEY' environment variable to use Eleven Labs, "
|
|
26
|
+
"or use the '--use-gtts' flag to fall back to Google Text-to-Speech.")
|
|
27
|
+
|
|
28
|
+
self.voice_id = voice_id
|
|
29
|
+
|
|
30
|
+
def generate_voiceover(self, text, output_file):
|
|
31
|
+
"""
|
|
32
|
+
Generate voiceover for the given text and save it to an audio file.
|
|
33
|
+
"""
|
|
34
|
+
headers = {
|
|
35
|
+
"xi-api-key": self.api_key,
|
|
36
|
+
"Content-Type": "application/json",
|
|
37
|
+
}
|
|
38
|
+
payload = {
|
|
39
|
+
"text": text,
|
|
40
|
+
}
|
|
41
|
+
response = requests.post(
|
|
42
|
+
f"{ELEVEN_LABS_URL}/{self.voice_id}", json=payload, headers=headers)
|
|
43
|
+
|
|
44
|
+
if response.status_code == 200:
|
|
45
|
+
with open(output_file, "wb") as f:
|
|
46
|
+
f.write(response.content)
|
|
47
|
+
return output_file
|
|
48
|
+
else:
|
|
49
|
+
raise Exception(f"Error generating voice: {response.text}")
|
|
50
|
+
|
|
51
|
+
def process_voiceovers(self, parsed_json, output_dir="tmp/xi_voiceovers/1"):
|
|
52
|
+
"""
|
|
53
|
+
Process all voiceovers from the parsed JSON.
|
|
54
|
+
"""
|
|
55
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
56
|
+
audio_metadata = []
|
|
57
|
+
|
|
58
|
+
for segment_idx, segment in enumerate(parsed_json.get("segments", [])):
|
|
59
|
+
for voice_idx, voice in enumerate(segment.get("voiceovers", [])):
|
|
60
|
+
text = voice["text"]
|
|
61
|
+
start = voice["start"]
|
|
62
|
+
end = voice["end"]
|
|
63
|
+
output_file = os.path.join(
|
|
64
|
+
output_dir, f"segment{segment_idx+1}_voice{voice_idx+1}.mp3")
|
|
65
|
+
|
|
66
|
+
self.generate_voiceover(text, output_file)
|
|
67
|
+
|
|
68
|
+
audio_metadata.append({
|
|
69
|
+
"file": output_file,
|
|
70
|
+
"start": start,
|
|
71
|
+
"end": end
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
return audio_metadata
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# Example Usage
|
|
78
|
+
if __name__ == "__main__":
|
|
79
|
+
import json
|
|
80
|
+
|
|
81
|
+
parsed_json = {
|
|
82
|
+
"segments": [
|
|
83
|
+
{
|
|
84
|
+
"voiceovers": [
|
|
85
|
+
{"text": "Hello and welcome!", "start": "0.5s", "end": "4.0s"}
|
|
86
|
+
]
|
|
87
|
+
}
|
|
88
|
+
]
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
# If you set ELEVEN_LABS_API_KEY in .env, it will be read automatically
|
|
92
|
+
forge = ElevenLabsVocalForge()
|
|
93
|
+
metadata = forge.process_voiceovers(parsed_json)
|
|
94
|
+
print(json.dumps(metadata, indent=2))
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: ytml-toolkit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Requires-Python: >=3.7
|
|
5
|
+
Requires-Dist: fastapi
|
|
6
|
+
Requires-Dist: uvicorn
|
|
7
|
+
Requires-Dist: websockets
|
|
8
|
+
Requires-Dist: boto3
|
|
9
|
+
Requires-Dist: gtts
|
|
10
|
+
Requires-Dist: pydub
|
|
11
|
+
Requires-Dist: moviepy
|
|
12
|
+
Requires-Dist: imageio
|
|
13
|
+
Requires-Dist: imageio-ffmpeg
|
|
14
|
+
Requires-Dist: playwright
|
|
15
|
+
Requires-Dist: numpy
|
|
16
|
+
Requires-Dist: requests
|
|
17
|
+
Requires-Dist: python-dotenv
|
|
18
|
+
Requires-Dist: beautifulsoup4
|
|
19
|
+
Requires-Dist: lxml
|
|
20
|
+
Requires-Dist: tqdm
|
|
21
|
+
Requires-Dist: pyttsx3
|
|
22
|
+
Requires-Dist: starlette
|
|
23
|
+
Requires-Dist: colorama
|
|
24
|
+
Dynamic: requires-dist
|
|
25
|
+
Dynamic: requires-python
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
setup.py
|
|
3
|
+
ytml/__init__.py
|
|
4
|
+
ytml/cli.py
|
|
5
|
+
ytml/interpretron/__init__.py
|
|
6
|
+
ytml/interpretron/parser.py
|
|
7
|
+
ytml/interpretron/test_parser.py
|
|
8
|
+
ytml/vocalforge/__init__.py
|
|
9
|
+
ytml/vocalforge/base_vocal_forge.py
|
|
10
|
+
ytml/vocalforge/gtts_vocal_forge.py
|
|
11
|
+
ytml/vocalforge/test_voice.py
|
|
12
|
+
ytml/vocalforge/xi_labs_vocal_forge.py
|
|
13
|
+
ytml_toolkit.egg-info/PKG-INFO
|
|
14
|
+
ytml_toolkit.egg-info/SOURCES.txt
|
|
15
|
+
ytml_toolkit.egg-info/dependency_links.txt
|
|
16
|
+
ytml_toolkit.egg-info/entry_points.txt
|
|
17
|
+
ytml_toolkit.egg-info/requires.txt
|
|
18
|
+
ytml_toolkit.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ytml
|