ytml-toolkit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ Metadata-Version: 2.2
2
+ Name: ytml-toolkit
3
+ Version: 0.1.0
4
+ Requires-Python: >=3.7
5
+ Requires-Dist: fastapi
6
+ Requires-Dist: uvicorn
7
+ Requires-Dist: websockets
8
+ Requires-Dist: boto3
9
+ Requires-Dist: gtts
10
+ Requires-Dist: pydub
11
+ Requires-Dist: moviepy
12
+ Requires-Dist: imageio
13
+ Requires-Dist: imageio-ffmpeg
14
+ Requires-Dist: playwright
15
+ Requires-Dist: numpy
16
+ Requires-Dist: requests
17
+ Requires-Dist: python-dotenv
18
+ Requires-Dist: beautifulsoup4
19
+ Requires-Dist: lxml
20
+ Requires-Dist: tqdm
21
+ Requires-Dist: pyttsx3
22
+ Requires-Dist: starlette
23
+ Requires-Dist: colorama
24
+ Dynamic: requires-dist
25
+ Dynamic: requires-python
File without changes
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,34 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="ytml-toolkit",
5
+ version="0.1.0",
6
+ packages=find_packages(),
7
+ entry_points={
8
+ "console_scripts": [
9
+ "ytml=ytml.cli:main", # This makes `ytml` a command
10
+ ],
11
+ },
12
+ install_requires=[
13
+ "fastapi",
14
+ "uvicorn",
15
+ "websockets",
16
+ "boto3",
17
+ "gtts",
18
+ "pydub", # Used for audio processing
19
+ "moviepy", # Used for video processing
20
+ "imageio", # Required for image/video handling
21
+ "imageio-ffmpeg", # Supports video encoding/decoding
22
+ "playwright", # Needed for rendering animations
23
+ "numpy", # If used in image/video processing
24
+ "requests", # Required for API requests (e.g., ElevenLabs)
25
+ "python-dotenv", # If you're using `.env` files for config
26
+ "beautifulsoup4", # If used for HTML parsing
27
+ "lxml", # If parsing XML or HTML
28
+ "tqdm", # If you're showing progress bars
29
+ "pyttsx3", # If using local TTS
30
+ "starlette", # Dependency of FastAPI,
31
+ "colorama"
32
+ ],
33
+ python_requires=">=3.7",
34
+ )
File without changes
@@ -0,0 +1,75 @@
1
+ import argparse
2
+ import os
3
+ import sys
4
+ from ytml.vocalforge.xi_labs_vocal_forge import ElevenLabsVocalForge
5
+ from ytml.vocalforge.gtts_vocal_forge import gTTSVocalForge
6
+ from ytml.conductor.conductor import Conductor
7
+ from ytml.utils.config import get_config_from_file
8
+ from tqdm import tqdm
9
+ from colorama import Fore, Style
10
+
11
+ VERSION = "0.1.0"
12
+
13
+ def check_elevenlabs_key():
14
+ """Check if ELEVEN_LABS_API_KEY is set, warn if missing."""
15
+ if not os.getenv("ELEVEN_LABS_API_KEY"):
16
+ print(Fore.YELLOW + "[WARNING] ELEVEN_LABS_API_KEY is not set. "
17
+ "Use --use-gtts or define the API key for Eleven Labs." + Style.RESET_ALL)
18
+ return False
19
+ return True
20
+
21
+
22
+ def main():
23
+ parser = argparse.ArgumentParser(description="YTML CLI - Video Generation Compiler")
24
+ parser.add_argument("-i", "--input", help="Path to the YTML input file.")
25
+ parser.add_argument("-o", "--output", default="output_video.mp4", help="Output video file.")
26
+ parser.add_argument("--use-gtts", action="store_true", help="Use gTTS VocalForge instead of Eleven Labs.")
27
+ parser.add_argument("--skip", nargs="*", choices=["parse", "voiceover", "render", "sync", "compose"], help="Steps to skip.")
28
+ parser.add_argument("--resume", help="Resume a job using the provided UUID.")
29
+ parser.add_argument("--job", help="Job ID of voiceovers to mix. Requires --skip voiceover.")
30
+ parser.add_argument("--preview", action="store_true", help="Preview HTML only.")
31
+ parser.add_argument("--version", action="store_true", help="Show CLI version.")
32
+
33
+ args = parser.parse_args()
34
+
35
+ # ✅ Handle version and help
36
+ if args.version:
37
+ print(Fore.CYAN + f"YTML CLI Version: {VERSION}" + Style.RESET_ALL)
38
+ sys.exit(0)
39
+
40
+ # ✅ Check if Eleven Labs API Key is missing
41
+ if not args.use_gtts:
42
+ if not check_elevenlabs_key():
43
+ return
44
+
45
+ config = get_config_from_file(args.input)
46
+
47
+ if args.preview:
48
+ conductor = Conductor(None, args.output, config)
49
+ conductor.previewHTML(args.input)
50
+ return
51
+
52
+ if args.resume:
53
+ job_dir = f"tmp/{args.resume}"
54
+ if not os.path.exists(job_dir):
55
+ print(Fore.RED + f"[ERROR] No job found with UUID {args.resume}." + Style.RESET_ALL)
56
+ return
57
+
58
+ print(Fore.BLUE + f"[INFO] Resuming job with UUID {args.resume}..." + Style.RESET_ALL)
59
+ conductor = Conductor(None, args.output, job_id=args.resume)
60
+ status = conductor.get_job_status()
61
+
62
+ skip_steps = [stage for stage in ["parse", "voiceover", "render", "sync"] if status.get(f"{stage}.json")]
63
+ conductor.run_workflow(f"{job_dir}/parsed.json", skip_steps)
64
+ return
65
+
66
+ if not os.path.exists(args.input):
67
+ print(Fore.RED + f"[ERROR] Input file '{args.input}' not found." + Style.RESET_ALL)
68
+ return
69
+ vocal_forge = gTTSVocalForge() if args.use_gtts or config.ENABLE_AI_VOICE == False else ElevenLabsVocalForge(config.AI_VOICE_ID)
70
+ conductor = Conductor(vocal_forge, args.output, config=config)
71
+ conductor.run_workflow(args.input, skip_steps=args.skip or [], job=args.job)
72
+
73
+
74
+ if __name__ == "__main__":
75
+ main()
File without changes
@@ -0,0 +1,192 @@
1
+ import re
2
+ import unicodedata
3
+ import xml.etree.ElementTree as ET
4
+ import json
5
+ from ytml.utils.utils import parse_boolean, parse_duration
6
+
7
+
8
+ class YTMLParser:
9
+ def __init__(self, ytml_file):
10
+ self.ytml_file = ytml_file
11
+ self.templates = {}
12
+ self.global_styles = ""
13
+
14
+ def clean_text(self, text):
15
+ """
16
+ Cleans and normalizes the input text by:
17
+ - Stripping leading and trailing whitespace
18
+ - Replacing multiple spaces and newlines with a single space
19
+ - Normalizing Unicode characters to ASCII equivalents
20
+ """
21
+ # Normalize Unicode characters
22
+ normalized_text = unicodedata.normalize('NFKC', text)
23
+ # Remove extra spaces and newlines
24
+ cleaned_text = re.sub(r'\s+', ' ', normalized_text.strip())
25
+ return cleaned_text
26
+
27
+ def _preprocess_file(self, file_path):
28
+ """
29
+ Preprocess the YTML file to wrap content inside <code> tags in <![CDATA[ ... ]]>
30
+ """
31
+ with open(file_path, "r") as file:
32
+ content = file.read()
33
+
34
+ # Wrap <frame> content in <![CDATA[ ... ]]>
35
+ content = re.sub(
36
+ r"(<frame[^>]*>)(.*?)(</frame>)",
37
+ lambda match: f"{match.group(1)}<![CDATA[{match.group(2)}]]>{match.group(3)}",
38
+ content,
39
+ flags=re.DOTALL
40
+ )
41
+ return content
42
+
43
+ def parse(self):
44
+ """
45
+ Parse the YTML file and return structured JSON.
46
+ """
47
+ try:
48
+ # Preprocess the file to handle <code> content
49
+ preprocessed_content = self._preprocess_file(self.ytml_file)
50
+ root = ET.fromstring(preprocessed_content)
51
+ except ET.ParseError as e:
52
+ raise ValueError(f"Invalid YTML format: {e}")
53
+
54
+ if root.tag != "ytml":
55
+ raise ValueError("Invalid root element. Expected <ytml>.")
56
+
57
+ # Extract templates
58
+ self._extract_templates(root)
59
+
60
+ # Extract styles
61
+ style_tags = root.find("style")
62
+ self.global_styles = (
63
+ ET.tostring(style_tags, encoding="unicode").strip(
64
+ ) if style_tags is not None else None
65
+ )
66
+ # Parse composites
67
+ composites = []
68
+ for composite in root.findall("composite"):
69
+ # Check conditional logic
70
+ composites.append(self._parse_composite(composite))
71
+
72
+ # Extract global-music tag
73
+ global_music_tag = root.find("global-music")
74
+ global_music = []
75
+ if (global_music_tag != None):
76
+
77
+ global_music.append(
78
+ {
79
+ "src": global_music_tag.get('src'),
80
+ "start": parse_duration(global_music_tag.get('start')),
81
+ "end": parse_duration(global_music_tag.get('end')),
82
+ "loop": global_music_tag.get("loop") == "true",
83
+ }
84
+ )
85
+ return {"segments": composites, "global_music": global_music}
86
+
87
+ def _extract_templates(self, root):
88
+ """
89
+ Extract and store reusable templates.
90
+ """
91
+ for template in root.findall("template"):
92
+ template_id = template.get("id")
93
+ if not template_id:
94
+ raise ValueError("Template missing required 'id' attribute.")
95
+ if template_id in self.templates:
96
+ raise ValueError(f"Duplicate template ID found: {template_id}")
97
+ self.templates[template_id] = template
98
+
99
+ def _parse_composite(self, composite):
100
+ """
101
+ Parse a single composite, handling <code> tags as raw text.
102
+ """
103
+ parsed_composite = {
104
+ "frames": [],
105
+ "styles": self.global_styles,
106
+ "voiceovers": [],
107
+ "music": [],
108
+ "transitions": [],
109
+ "duration": '',
110
+ "static": False
111
+ }
112
+ current_time = 0.0
113
+
114
+ # Parse frames
115
+ for frame in composite.findall("frame"):
116
+ frame_data = frame.text.strip() if frame.text else ""
117
+ parsed_composite["frames"].append(frame_data)
118
+ parsed_composite['duration'] = parse_duration(
119
+ frame.get('duration') or '2s')
120
+ parsed_composite['frame_rate'] = frame.get('frame_rate')
121
+ parsed_composite["static"] = parse_boolean(frame.get("static"))
122
+
123
+ # Expand <use> tags with templates
124
+ for use in composite.findall("use"):
125
+ template_id = use.get("template")
126
+ if not template_id or template_id not in self.templates:
127
+ raise ValueError(
128
+ f"Referenced template '{template_id}' not found.")
129
+ template_content = ET.tostring(
130
+ self.templates[template_id], encoding="unicode").strip()
131
+ parsed_composite["frames"].append(template_content)
132
+
133
+ # Parse voiceovers
134
+ for voice in composite.findall("voice"):
135
+ start = self._resolve_timing(voice.get("start"), current_time)
136
+ end = self._resolve_timing(voice.get("end"), start)
137
+ current_time = max(current_time, end)
138
+ parsed_composite["voiceovers"].append({
139
+ "text": self.clean_text(voice.text),
140
+ "start": start,
141
+ "end": end
142
+ })
143
+
144
+ # Parse music
145
+ for music in composite.findall("music"):
146
+ start = self._resolve_timing(music.get("start"), current_time)
147
+ end = self._resolve_timing(music.get("end"), start)
148
+ current_time = max(current_time, end)
149
+ parsed_composite["music"].append({
150
+ "src": music.get("src"),
151
+ "start": start,
152
+ "end": end,
153
+ "loop": music.get("loop") == "true",
154
+ })
155
+
156
+ # Parse transitions
157
+ for transition in composite.findall("transition"):
158
+ tType = transition.get("type")
159
+ duration = self._resolve_timing(transition.get("duration"), "1s")
160
+ parsed_composite["transitions"].append({
161
+ "type": tType,
162
+ "duration": f"{duration}s",
163
+ })
164
+
165
+ return parsed_composite
166
+
167
+ def _resolve_timing(self, timing, current_time):
168
+ """
169
+ Resolve timing values:
170
+ - Absolute values (e.g., "5s") remain unchanged.
171
+ - Relative values (e.g., "+2s") are added to the current time.
172
+ """
173
+ if timing is None:
174
+ return current_time
175
+ if timing.startswith("+"):
176
+ return current_time + float(parse_duration(timing[1:]))
177
+ return float(parse_duration(timing))
178
+
179
+
180
+ # CLI for testing
181
+ if __name__ == "__main__":
182
+ import sys
183
+ if len(sys.argv) < 2:
184
+ print("Usage: python parser.py <ytml_file>")
185
+ sys.exit(1)
186
+
187
+ parser = YTMLParser(sys.argv[1])
188
+ try:
189
+ result = parser.parse()
190
+ print(json.dumps(result, indent=2))
191
+ except Exception as e:
192
+ print(f"Error: {e}")
@@ -0,0 +1,43 @@
1
+ import unittest
2
+ from parser import YTMLParser
3
+
4
+
5
+ class TestYTMLParser(unittest.TestCase):
6
+ def test_basic_parsing(self):
7
+ parser = YTMLParser("backend/interpretron/samples/basic.ytml")
8
+ result = parser.parse()
9
+ self.assertIn("segments", result)
10
+
11
+ def test_voice_parsing(self):
12
+ parser = YTMLParser("backend/interpretron/samples/voice.ytml")
13
+ result = parser.parse()
14
+ self.assertEqual(result["segments"][0]
15
+ ["voiceovers"][0]["text"], "Hello!")
16
+
17
+ def test_music_parsing(self):
18
+ parser = YTMLParser("backend/interpretron/samples/music.ytml")
19
+ result = parser.parse()
20
+ self.assertEqual(result["segments"][0]["music"]
21
+ [0]["src"], "background.mp3")
22
+
23
+ def test_dynamic_timing(self):
24
+ parser = YTMLParser("backend/interpretron/samples/dynamic_timing.ytml")
25
+ result = parser.parse()
26
+ self.assertEqual(result["segments"][0]["voiceovers"][0]["start"], 1.0)
27
+
28
+ def test_template_expansion(self):
29
+ parser = YTMLParser("backend/interpretron/samples/template.ytml")
30
+ result = parser.parse()
31
+ print(result)
32
+ self.assertIn("<div class='logo'>My Brand</div>",
33
+ result["segments"][0]["frames"][0])
34
+
35
+ def test_error_handling(self):
36
+ parser = YTMLParser(
37
+ "backend/interpretron/samples/invalid_template.ytml")
38
+ with self.assertRaises(ValueError):
39
+ parser.parse()
40
+
41
+
42
+ if __name__ == "__main__":
43
+ unittest.main()
File without changes
@@ -0,0 +1,17 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class VocalForgeBase(ABC):
5
+ @abstractmethod
6
+ def generate_voiceover(self, text: str, output_file: str) -> str:
7
+ """
8
+ Generate a voiceover for the given text and save it to an audio file.
9
+ """
10
+ pass
11
+
12
+ @abstractmethod
13
+ def process_voiceovers(self, parsed_json: dict, output_dir: str = "voiceovers") -> list:
14
+ """
15
+ Process all voiceovers from the parsed JSON and generate audio files.
16
+ """
17
+ pass
@@ -0,0 +1,33 @@
1
+ import os
2
+ from gtts import gTTS
3
+
4
+ from ytml.vocalforge.base_vocal_forge import VocalForgeBase
5
+
6
+
7
+ class gTTSVocalForge(VocalForgeBase):
8
+
9
+ def generate_voiceover(self, text, output_file):
10
+ tts = gTTS(text)
11
+ tts.save(output_file)
12
+ return output_file
13
+
14
+ def process_voiceovers(self, parsed_json: dict, output_dir: str = "tmp/gtts_voiceovers") -> list:
15
+ """
16
+ Generate gtts voiceovers for all text in the parsed JSON.
17
+ """
18
+ os.makedirs(output_dir, exist_ok=True)
19
+ audio_metadata = []
20
+
21
+ for segment_idx, segment in enumerate(parsed_json.get("segments", [])):
22
+ for voice_idx, voice in enumerate(segment.get("voiceovers", [])):
23
+ text = voice["text"]
24
+ output_file = os.path.join(
25
+ output_dir, f"segment{segment_idx+1}_voice{voice_idx+1}.mp3")
26
+ self.generate_voiceover(text, output_file)
27
+ audio_metadata.append({
28
+ "file": output_file,
29
+ "start": voice["start"],
30
+ "end": voice["end"],
31
+ })
32
+
33
+ return audio_metadata
File without changes
@@ -0,0 +1,94 @@
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+ from ytml.vocalforge.base_vocal_forge import VocalForgeBase
5
+
6
+ load_dotenv() # Reads .env file and loads environment variables
7
+
8
+ # Default fallback if environment variable is missing:
9
+ DEFAULT_ELEVEN_LABS_API_KEY = "key"
10
+
11
+ ELEVEN_LABS_API_KEY = os.getenv(
12
+ "ELEVEN_LABS_API_KEY", DEFAULT_ELEVEN_LABS_API_KEY)
13
+ ELEVEN_LABS_URL = "https://api.elevenlabs.io/v1/text-to-speech"
14
+
15
+
16
+ class ElevenLabsVocalForge(VocalForgeBase):
17
+ def __init__(self, voice_id, api_key=None):
18
+ """
19
+ If api_key is provided, use it. Otherwise, read from environment or the default.
20
+ """
21
+ self.api_key = api_key if api_key else ELEVEN_LABS_API_KEY
22
+
23
+ if(self.api_key=='key'):
24
+ raise Exception(
25
+ "Invalid Eleven Labs API key. Please set the 'ELEVEN_LABS_API_KEY' environment variable to use Eleven Labs, "
26
+ "or use the '--use-gtts' flag to fall back to Google Text-to-Speech.")
27
+
28
+ self.voice_id = voice_id
29
+
30
+ def generate_voiceover(self, text, output_file):
31
+ """
32
+ Generate voiceover for the given text and save it to an audio file.
33
+ """
34
+ headers = {
35
+ "xi-api-key": self.api_key,
36
+ "Content-Type": "application/json",
37
+ }
38
+ payload = {
39
+ "text": text,
40
+ }
41
+ response = requests.post(
42
+ f"{ELEVEN_LABS_URL}/{self.voice_id}", json=payload, headers=headers)
43
+
44
+ if response.status_code == 200:
45
+ with open(output_file, "wb") as f:
46
+ f.write(response.content)
47
+ return output_file
48
+ else:
49
+ raise Exception(f"Error generating voice: {response.text}")
50
+
51
+ def process_voiceovers(self, parsed_json, output_dir="tmp/xi_voiceovers/1"):
52
+ """
53
+ Process all voiceovers from the parsed JSON.
54
+ """
55
+ os.makedirs(output_dir, exist_ok=True)
56
+ audio_metadata = []
57
+
58
+ for segment_idx, segment in enumerate(parsed_json.get("segments", [])):
59
+ for voice_idx, voice in enumerate(segment.get("voiceovers", [])):
60
+ text = voice["text"]
61
+ start = voice["start"]
62
+ end = voice["end"]
63
+ output_file = os.path.join(
64
+ output_dir, f"segment{segment_idx+1}_voice{voice_idx+1}.mp3")
65
+
66
+ self.generate_voiceover(text, output_file)
67
+
68
+ audio_metadata.append({
69
+ "file": output_file,
70
+ "start": start,
71
+ "end": end
72
+ })
73
+
74
+ return audio_metadata
75
+
76
+
77
+ # Example Usage
78
+ if __name__ == "__main__":
79
+ import json
80
+
81
+ parsed_json = {
82
+ "segments": [
83
+ {
84
+ "voiceovers": [
85
+ {"text": "Hello and welcome!", "start": "0.5s", "end": "4.0s"}
86
+ ]
87
+ }
88
+ ]
89
+ }
90
+
91
+ # If you set ELEVEN_LABS_API_KEY in .env, it will be read automatically
92
+ forge = ElevenLabsVocalForge()
93
+ metadata = forge.process_voiceovers(parsed_json)
94
+ print(json.dumps(metadata, indent=2))
@@ -0,0 +1,25 @@
1
+ Metadata-Version: 2.2
2
+ Name: ytml-toolkit
3
+ Version: 0.1.0
4
+ Requires-Python: >=3.7
5
+ Requires-Dist: fastapi
6
+ Requires-Dist: uvicorn
7
+ Requires-Dist: websockets
8
+ Requires-Dist: boto3
9
+ Requires-Dist: gtts
10
+ Requires-Dist: pydub
11
+ Requires-Dist: moviepy
12
+ Requires-Dist: imageio
13
+ Requires-Dist: imageio-ffmpeg
14
+ Requires-Dist: playwright
15
+ Requires-Dist: numpy
16
+ Requires-Dist: requests
17
+ Requires-Dist: python-dotenv
18
+ Requires-Dist: beautifulsoup4
19
+ Requires-Dist: lxml
20
+ Requires-Dist: tqdm
21
+ Requires-Dist: pyttsx3
22
+ Requires-Dist: starlette
23
+ Requires-Dist: colorama
24
+ Dynamic: requires-dist
25
+ Dynamic: requires-python
@@ -0,0 +1,18 @@
1
+ pyproject.toml
2
+ setup.py
3
+ ytml/__init__.py
4
+ ytml/cli.py
5
+ ytml/interpretron/__init__.py
6
+ ytml/interpretron/parser.py
7
+ ytml/interpretron/test_parser.py
8
+ ytml/vocalforge/__init__.py
9
+ ytml/vocalforge/base_vocal_forge.py
10
+ ytml/vocalforge/gtts_vocal_forge.py
11
+ ytml/vocalforge/test_voice.py
12
+ ytml/vocalforge/xi_labs_vocal_forge.py
13
+ ytml_toolkit.egg-info/PKG-INFO
14
+ ytml_toolkit.egg-info/SOURCES.txt
15
+ ytml_toolkit.egg-info/dependency_links.txt
16
+ ytml_toolkit.egg-info/entry_points.txt
17
+ ytml_toolkit.egg-info/requires.txt
18
+ ytml_toolkit.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ ytml = ytml.cli:main
@@ -0,0 +1,19 @@
1
+ fastapi
2
+ uvicorn
3
+ websockets
4
+ boto3
5
+ gtts
6
+ pydub
7
+ moviepy
8
+ imageio
9
+ imageio-ffmpeg
10
+ playwright
11
+ numpy
12
+ requests
13
+ python-dotenv
14
+ beautifulsoup4
15
+ lxml
16
+ tqdm
17
+ pyttsx3
18
+ starlette
19
+ colorama