youtube-to-docs 0.0.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- youtube_to_docs-0.0.18/LICENSE +21 -0
- youtube_to_docs-0.0.18/PKG-INFO +43 -0
- youtube_to_docs-0.0.18/README.md +18 -0
- youtube_to_docs-0.0.18/pyproject.toml +75 -0
- youtube_to_docs-0.0.18/setup.cfg +4 -0
- youtube_to_docs-0.0.18/tests/test_infographic.py +220 -0
- youtube_to_docs-0.0.18/tests/test_llms.py +218 -0
- youtube_to_docs-0.0.18/tests/test_main.py +646 -0
- youtube_to_docs-0.0.18/tests/test_mcp_server.py +79 -0
- youtube_to_docs-0.0.18/tests/test_sharepoint.py +223 -0
- youtube_to_docs-0.0.18/tests/test_storage_upload.py +48 -0
- youtube_to_docs-0.0.18/tests/test_transcript.py +192 -0
- youtube_to_docs-0.0.18/tests/test_tts.py +185 -0
- youtube_to_docs-0.0.18/tests/test_tts_filtering.py +83 -0
- youtube_to_docs-0.0.18/tests/test_utils.py +153 -0
- youtube_to_docs-0.0.18/tests/test_video.py +106 -0
- youtube_to_docs-0.0.18/tests/test_workspace.py +95 -0
- youtube_to_docs-0.0.18/youtube_to_docs/__init__.py +0 -0
- youtube_to_docs-0.0.18/youtube_to_docs/infographic.py +231 -0
- youtube_to_docs-0.0.18/youtube_to_docs/llms.py +373 -0
- youtube_to_docs-0.0.18/youtube_to_docs/main.py +1287 -0
- youtube_to_docs-0.0.18/youtube_to_docs/mcp_server.py +79 -0
- youtube_to_docs-0.0.18/youtube_to_docs/models.py +14 -0
- youtube_to_docs-0.0.18/youtube_to_docs/prices.py +867 -0
- youtube_to_docs-0.0.18/youtube_to_docs/storage.py +1062 -0
- youtube_to_docs-0.0.18/youtube_to_docs/transcript.py +242 -0
- youtube_to_docs-0.0.18/youtube_to_docs/tts.py +287 -0
- youtube_to_docs-0.0.18/youtube_to_docs/utils.py +185 -0
- youtube_to_docs-0.0.18/youtube_to_docs/video.py +196 -0
- youtube_to_docs-0.0.18/youtube_to_docs.egg-info/PKG-INFO +43 -0
- youtube_to_docs-0.0.18/youtube_to_docs.egg-info/SOURCES.txt +33 -0
- youtube_to_docs-0.0.18/youtube_to_docs.egg-info/dependency_links.txt +1 -0
- youtube_to_docs-0.0.18/youtube_to_docs.egg-info/entry_points.txt +2 -0
- youtube_to_docs-0.0.18/youtube_to_docs.egg-info/requires.txt +16 -0
- youtube_to_docs-0.0.18/youtube_to_docs.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 DoIT - Artificial Intelligence
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: youtube-to-docs
|
|
3
|
+
Version: 0.0.18
|
|
4
|
+
Summary: Convert YouTube videos into structured docs, summaries, audio, and visual assets for easier discovery
|
|
5
|
+
Requires-Python: >=3.14
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: fastexcel>=0.12.0
|
|
9
|
+
Requires-Dist: google-api-python-client>=2.187.0
|
|
10
|
+
Requires-Dist: google-auth-oauthlib
|
|
11
|
+
Requires-Dist: google-genai
|
|
12
|
+
Requires-Dist: isodate>=0.7.2
|
|
13
|
+
Requires-Dist: mcp>=1.25.0
|
|
14
|
+
Requires-Dist: msal>=1.34.0
|
|
15
|
+
Requires-Dist: openai>=1.56.0
|
|
16
|
+
Requires-Dist: polars>=1.36.1
|
|
17
|
+
Requires-Dist: pypandoc>=1.16.2
|
|
18
|
+
Requires-Dist: pypandoc_binary>=1.16.2
|
|
19
|
+
Requires-Dist: requests>=2.32.5
|
|
20
|
+
Requires-Dist: static-ffmpeg>=2.13
|
|
21
|
+
Requires-Dist: xlsxwriter>=3.2.9
|
|
22
|
+
Requires-Dist: youtube-transcript-api>=1.2.3
|
|
23
|
+
Requires-Dist: yt-dlp>=2025.12.8
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# youtube-to-docs
|
|
27
|
+
[](https://pypi.org/project/youtube-to-docs/)
|
|
28
|
+
|
|
29
|
+
Convert YouTube videos into structured docs, summaries, audio, and visual assets for easier discovery.
|
|
30
|
+
|
|
31
|
+
View all available CLI options:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
uvx youtube-to-docs --help
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Install as a Gemini CLI extension:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
gemini extensions install https://github.com/DoIT-Artificial-Intelligence/youtube-to-docs.git
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
*Created with the help of AI. All artifacts have been checked and work as expected.*
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# youtube-to-docs
|
|
2
|
+
[](https://pypi.org/project/youtube-to-docs/)
|
|
3
|
+
|
|
4
|
+
Convert YouTube videos into structured docs, summaries, audio, and visual assets for easier discovery.
|
|
5
|
+
|
|
6
|
+
View all available CLI options:
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
uvx youtube-to-docs --help
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
Install as a Gemini CLI extension:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
gemini extensions install https://github.com/DoIT-Artificial-Intelligence/youtube-to-docs.git
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
*Created with the help of AI. All artifacts have been checked and work as expected.*
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[dependency-groups]
|
|
6
|
+
dev = [
|
|
7
|
+
"mkdocs>=1.6.1",
|
|
8
|
+
"mkdocs-material>=9.7.1",
|
|
9
|
+
]
|
|
10
|
+
test = [
|
|
11
|
+
"pytest>=9.0.2",
|
|
12
|
+
"pytest-mock>=3.15.1",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[project]
|
|
16
|
+
name = "youtube-to-docs"
|
|
17
|
+
version = "0.0.18"
|
|
18
|
+
description = "Convert YouTube videos into structured docs, summaries, audio, and visual assets for easier discovery"
|
|
19
|
+
readme = "README.md"
|
|
20
|
+
requires-python = ">=3.14"
|
|
21
|
+
dependencies = [
|
|
22
|
+
"fastexcel>=0.12.0",
|
|
23
|
+
"google-api-python-client>=2.187.0",
|
|
24
|
+
"google-auth-oauthlib",
|
|
25
|
+
"google-genai",
|
|
26
|
+
"isodate>=0.7.2",
|
|
27
|
+
"mcp>=1.25.0",
|
|
28
|
+
"msal>=1.34.0",
|
|
29
|
+
"openai>=1.56.0",
|
|
30
|
+
"polars>=1.36.1",
|
|
31
|
+
"pypandoc>=1.16.2",
|
|
32
|
+
"pypandoc_binary>=1.16.2",
|
|
33
|
+
"requests>=2.32.5",
|
|
34
|
+
"static-ffmpeg>=2.13",
|
|
35
|
+
"xlsxwriter>=3.2.9",
|
|
36
|
+
"youtube-transcript-api>=1.2.3",
|
|
37
|
+
"yt-dlp>=2025.12.8",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.scripts]
|
|
41
|
+
youtube-to-docs = "youtube_to_docs.main:main"
|
|
42
|
+
|
|
43
|
+
[tool.setuptools.packages.find]
|
|
44
|
+
include = ["youtube_to_docs*"]
|
|
45
|
+
exclude = ["tests*", "docs*", "youtube-to-docs-artifacts*", "summary-files*", "transcript-files*", "audio-files*", "infographic-files*", "speaker-extraction-files*", "qa-files*", "video-files*"]
|
|
46
|
+
|
|
47
|
+
[tool.ruff]
|
|
48
|
+
# Match the project's Python version requirement
|
|
49
|
+
target-version = "py314"
|
|
50
|
+
line-length = 88
|
|
51
|
+
|
|
52
|
+
[tool.ruff.lint]
|
|
53
|
+
# Enable Pyflakes (`F`), pycodestyle (`E`, `W`), and isort (`I`)
|
|
54
|
+
select = ["E", "F", "I", "W"]
|
|
55
|
+
ignore = []
|
|
56
|
+
|
|
57
|
+
# Allow fix for all enabled rules (when `--fix`) is provided.
|
|
58
|
+
fixable = ["ALL"]
|
|
59
|
+
unfixable = []
|
|
60
|
+
|
|
61
|
+
[tool.ruff.format]
|
|
62
|
+
# Use double quotes for strings.
|
|
63
|
+
quote-style = "double"
|
|
64
|
+
# Indent with spaces, rather than tabs.
|
|
65
|
+
indent-style = "space"
|
|
66
|
+
# Respect magic trailing commas.
|
|
67
|
+
skip-magic-trailing-comma = false
|
|
68
|
+
# Automatically detect the appropriate line ending.
|
|
69
|
+
line-ending = "auto"
|
|
70
|
+
|
|
71
|
+
[tool.ty.environment]
|
|
72
|
+
# Target Python 3.14 to match project requirements
|
|
73
|
+
python-version = "3.14"
|
|
74
|
+
|
|
75
|
+
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import unittest
|
|
3
|
+
from unittest import mock
|
|
4
|
+
from unittest.mock import MagicMock, patch
|
|
5
|
+
|
|
6
|
+
from youtube_to_docs import infographic
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestInfographic(unittest.TestCase):
|
|
10
|
+
def setUp(self):
|
|
11
|
+
# Mock environment variables
|
|
12
|
+
self.env_patcher = patch.dict(
|
|
13
|
+
os.environ,
|
|
14
|
+
{
|
|
15
|
+
"GEMINI_API_KEY": "fake_gemini_key",
|
|
16
|
+
"AWS_BEARER_TOKEN_BEDROCK": "fake_bedrock_token",
|
|
17
|
+
"AZURE_FOUNDRY_ENDPOINT": "fake_endpoint",
|
|
18
|
+
"AZURE_FOUNDRY_API_KEY": "fake_foundry_key",
|
|
19
|
+
},
|
|
20
|
+
)
|
|
21
|
+
self.env_patcher.start()
|
|
22
|
+
|
|
23
|
+
def tearDown(self):
|
|
24
|
+
self.env_patcher.stop()
|
|
25
|
+
|
|
26
|
+
@patch("youtube_to_docs.infographic.genai.Client")
|
|
27
|
+
def test_generate_infographic_gemini(self, mock_client_cls):
|
|
28
|
+
mock_client = mock_client_cls.return_value
|
|
29
|
+
|
|
30
|
+
# Mocking the stream response for generate_content_stream
|
|
31
|
+
mock_chunk = MagicMock()
|
|
32
|
+
mock_part = MagicMock()
|
|
33
|
+
mock_part.inline_data.data = b"fake_gemini_bytes"
|
|
34
|
+
mock_chunk.candidates = [MagicMock(content=MagicMock(parts=[mock_part]))]
|
|
35
|
+
mock_chunk.usage_metadata.prompt_token_count = 10
|
|
36
|
+
mock_chunk.usage_metadata.candidates_token_count = 20
|
|
37
|
+
|
|
38
|
+
mock_client.models.generate_content_stream.return_value = [mock_chunk]
|
|
39
|
+
|
|
40
|
+
image_bytes, in_tok, out_tok = infographic.generate_infographic(
|
|
41
|
+
"gemini-2.5-flash-image", "Summary text", "Video Title"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
self.assertEqual(image_bytes, b"fake_gemini_bytes")
|
|
45
|
+
self.assertEqual(in_tok, 10)
|
|
46
|
+
self.assertEqual(out_tok, 20)
|
|
47
|
+
mock_client.models.generate_content_stream.assert_called_once()
|
|
48
|
+
|
|
49
|
+
@patch("youtube_to_docs.infographic.genai.Client")
|
|
50
|
+
def test_generate_infographic_imagen(self, mock_client_cls):
|
|
51
|
+
mock_client = mock_client_cls.return_value
|
|
52
|
+
mock_resp = MagicMock()
|
|
53
|
+
|
|
54
|
+
# Mocking the response structure for generate_images
|
|
55
|
+
mock_image = MagicMock()
|
|
56
|
+
mock_image.image.image_bytes = b"fake_imagen_bytes"
|
|
57
|
+
mock_resp.generated_images = [mock_image]
|
|
58
|
+
|
|
59
|
+
mock_client.models.generate_images.return_value = mock_resp
|
|
60
|
+
|
|
61
|
+
image_bytes, in_tok, out_tok = infographic.generate_infographic(
|
|
62
|
+
"imagen-4.0-generate-001", "Summary text", "Video Title"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
self.assertEqual(image_bytes, b"fake_imagen_bytes")
|
|
66
|
+
self.assertEqual(in_tok, 0)
|
|
67
|
+
self.assertEqual(out_tok, 1000)
|
|
68
|
+
mock_client.models.generate_images.assert_called_once()
|
|
69
|
+
|
|
70
|
+
def test_generate_infographic_none_model(self):
|
|
71
|
+
image_bytes, in_tok, out_tok = infographic.generate_infographic(
|
|
72
|
+
None, "Summary text", "Video Title"
|
|
73
|
+
)
|
|
74
|
+
self.assertIsNone(image_bytes)
|
|
75
|
+
self.assertEqual(in_tok, 0)
|
|
76
|
+
self.assertEqual(out_tok, 0)
|
|
77
|
+
|
|
78
|
+
def test_generate_infographic_unsupported_model(self):
|
|
79
|
+
image_bytes, in_tok, out_tok = infographic.generate_infographic(
|
|
80
|
+
"unsupported-model", "Summary text", "Video Title"
|
|
81
|
+
)
|
|
82
|
+
self.assertIsNone(image_bytes)
|
|
83
|
+
self.assertEqual(in_tok, 0)
|
|
84
|
+
self.assertEqual(out_tok, 0)
|
|
85
|
+
|
|
86
|
+
@patch("youtube_to_docs.infographic.genai.Client")
|
|
87
|
+
def test_generate_infographic_imagen_no_images(self, mock_client_cls):
|
|
88
|
+
mock_client = mock_client_cls.return_value
|
|
89
|
+
mock_resp = MagicMock()
|
|
90
|
+
mock_resp.generated_images = []
|
|
91
|
+
mock_client.models.generate_images.return_value = mock_resp
|
|
92
|
+
|
|
93
|
+
image_bytes, in_tok, out_tok = infographic.generate_infographic(
|
|
94
|
+
"imagen-4.0-generate-001", "Summary text", "Video Title"
|
|
95
|
+
)
|
|
96
|
+
self.assertIsNone(image_bytes)
|
|
97
|
+
self.assertEqual(in_tok, 0)
|
|
98
|
+
self.assertEqual(out_tok, 0)
|
|
99
|
+
|
|
100
|
+
@patch("youtube_to_docs.infographic.genai.Client")
|
|
101
|
+
def test_generate_infographic_gemini_no_data(self, mock_client_cls):
|
|
102
|
+
mock_client = mock_client_cls.return_value
|
|
103
|
+
|
|
104
|
+
# Mocking a stream with no inline data
|
|
105
|
+
mock_chunk = MagicMock()
|
|
106
|
+
mock_chunk.candidates = [
|
|
107
|
+
MagicMock(content=MagicMock(parts=[MagicMock(inline_data=None)]))
|
|
108
|
+
]
|
|
109
|
+
mock_chunk.usage_metadata = None
|
|
110
|
+
|
|
111
|
+
mock_client.models.generate_content_stream.return_value = [mock_chunk]
|
|
112
|
+
|
|
113
|
+
image_bytes, in_tok, out_tok = infographic.generate_infographic(
|
|
114
|
+
"gemini-2.5-flash-image", "Summary text", "Video Title"
|
|
115
|
+
)
|
|
116
|
+
self.assertIsNone(image_bytes)
|
|
117
|
+
self.assertEqual(in_tok, 0)
|
|
118
|
+
self.assertEqual(out_tok, 0)
|
|
119
|
+
|
|
120
|
+
@patch("youtube_to_docs.infographic.requests.post")
|
|
121
|
+
def test_generate_infographic_bedrock(self, mock_post):
|
|
122
|
+
mock_resp = MagicMock()
|
|
123
|
+
mock_resp.status_code = 200
|
|
124
|
+
# "fake_bytes" in base64 is "ZmFrZV9ieXRlcw=="
|
|
125
|
+
mock_resp.json.return_value = {"images": ["ZmFrZV9ieXRlcw=="]}
|
|
126
|
+
mock_post.return_value = mock_resp
|
|
127
|
+
|
|
128
|
+
image_bytes, in_tok, out_tok = infographic.generate_infographic(
|
|
129
|
+
"bedrock-titan-image-generator-v2", "Summary text", "Video Title"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
self.assertEqual(image_bytes, b"fake_bytes")
|
|
133
|
+
self.assertEqual(in_tok, 0)
|
|
134
|
+
self.assertEqual(out_tok, 1000)
|
|
135
|
+
mock_post.assert_called_once()
|
|
136
|
+
# Check if actual_model_id was mapped correctly
|
|
137
|
+
args, kwargs = mock_post.call_args
|
|
138
|
+
self.assertIn("amazon.titan-image-generator-v2:0", args[0])
|
|
139
|
+
|
|
140
|
+
@patch("youtube_to_docs.infographic.requests.post")
|
|
141
|
+
def test_generate_infographic_bedrock_nova(self, mock_post):
|
|
142
|
+
mock_resp = MagicMock()
|
|
143
|
+
mock_resp.status_code = 200
|
|
144
|
+
mock_resp.json.return_value = {"images": ["ZmFrZV9ieXRlcw=="]}
|
|
145
|
+
mock_post.return_value = mock_resp
|
|
146
|
+
|
|
147
|
+
image_bytes, in_tok, out_tok = infographic.generate_infographic(
|
|
148
|
+
"bedrock-nova-canvas-v1", "Summary text", "Video Title"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
self.assertEqual(image_bytes, b"fake_bytes")
|
|
152
|
+
self.assertEqual(in_tok, 0)
|
|
153
|
+
self.assertEqual(out_tok, 4000)
|
|
154
|
+
mock_post.assert_called_once()
|
|
155
|
+
# Check if actual_model_id was mapped correctly
|
|
156
|
+
args, kwargs = mock_post.call_args
|
|
157
|
+
self.assertIn("amazon.nova-canvas-v1:0", args[0])
|
|
158
|
+
|
|
159
|
+
@patch("youtube_to_docs.infographic.requests.post")
|
|
160
|
+
def test_generate_infographic_bedrock_with_suffix(self, mock_post):
|
|
161
|
+
mock_resp = MagicMock()
|
|
162
|
+
mock_resp.status_code = 200
|
|
163
|
+
mock_resp.json.return_value = {"images": ["ZmFrZV9ieXRlcw=="]}
|
|
164
|
+
mock_post.return_value = mock_resp
|
|
165
|
+
|
|
166
|
+
image_bytes, in_tok, out_tok = infographic.generate_infographic(
|
|
167
|
+
"bedrock-nova-canvas-v1:0", "Summary text", "Video Title"
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
self.assertEqual(image_bytes, b"fake_bytes")
|
|
171
|
+
mock_post.assert_called_once()
|
|
172
|
+
# Check that it didn't double the :0
|
|
173
|
+
args, kwargs = mock_post.call_args
|
|
174
|
+
self.assertIn("amazon.nova-canvas-v1:0", args[0])
|
|
175
|
+
self.assertNotIn("amazon.nova-canvas-v1:0:0", args[0])
|
|
176
|
+
|
|
177
|
+
@patch("youtube_to_docs.infographic.requests.post")
|
|
178
|
+
def test_generate_infographic_bedrock_skip_long_prompt(self, mock_post):
|
|
179
|
+
mock_resp = MagicMock()
|
|
180
|
+
mock_resp.status_code = 200
|
|
181
|
+
mock_resp.json.return_value = {"images": ["ZmFrZV9ieXRlcw=="]}
|
|
182
|
+
mock_post.return_value = mock_resp
|
|
183
|
+
|
|
184
|
+
long_summary = "A" * 2000
|
|
185
|
+
image_bytes, in_tok, out_tok = infographic.generate_infographic(
|
|
186
|
+
"amazon.nova-canvas-v1:0", long_summary, "Video Title"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
self.assertIsNone(image_bytes)
|
|
190
|
+
self.assertEqual(in_tok, 0)
|
|
191
|
+
self.assertEqual(out_tok, 0)
|
|
192
|
+
mock_post.assert_not_called()
|
|
193
|
+
|
|
194
|
+
@patch("youtube_to_docs.infographic.OpenAI")
|
|
195
|
+
def test_generate_infographic_foundry(self, mock_openai_cls):
|
|
196
|
+
mock_client = mock_openai_cls.return_value
|
|
197
|
+
mock_resp = MagicMock()
|
|
198
|
+
mock_image = MagicMock()
|
|
199
|
+
mock_image.b64_json = "ZmFrZV9ieXRlcw=="
|
|
200
|
+
mock_resp.data = [mock_image]
|
|
201
|
+
mock_client.images.generate.return_value = mock_resp
|
|
202
|
+
|
|
203
|
+
image_bytes, in_tok, out_tok = infographic.generate_infographic(
|
|
204
|
+
"foundry-gpt-image-1.5", "Summary text", "Video Title"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
self.assertEqual(image_bytes, b"fake_bytes")
|
|
208
|
+
self.assertEqual(in_tok, 0)
|
|
209
|
+
self.assertEqual(out_tok, 3400)
|
|
210
|
+
mock_client.images.generate.assert_called_once_with(
|
|
211
|
+
model="gpt-image-1.5",
|
|
212
|
+
prompt=mock.ANY,
|
|
213
|
+
n=1,
|
|
214
|
+
size="1536x1024",
|
|
215
|
+
response_format="b64_json",
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
if __name__ == "__main__":
|
|
220
|
+
unittest.main()
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import unittest
|
|
3
|
+
from unittest.mock import MagicMock, patch
|
|
4
|
+
|
|
5
|
+
from youtube_to_docs import llms
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestLLMs(unittest.TestCase):
|
|
9
|
+
def setUp(self):
|
|
10
|
+
# Mock environment variables
|
|
11
|
+
self.env_patcher = patch.dict(
|
|
12
|
+
os.environ,
|
|
13
|
+
{
|
|
14
|
+
"GEMINI_API_KEY": "fake_gemini_key",
|
|
15
|
+
"PROJECT_ID": "fake_project_id",
|
|
16
|
+
"AWS_BEARER_TOKEN_BEDROCK": "fake_bedrock_token",
|
|
17
|
+
"AZURE_FOUNDRY_ENDPOINT": "https://fake.openai.azure.com/",
|
|
18
|
+
"AZURE_FOUNDRY_API_KEY": "fake_foundry_key",
|
|
19
|
+
},
|
|
20
|
+
)
|
|
21
|
+
self.env_patcher.start()
|
|
22
|
+
|
|
23
|
+
def tearDown(self):
|
|
24
|
+
self.env_patcher.stop()
|
|
25
|
+
|
|
26
|
+
@patch("youtube_to_docs.llms.genai.Client")
|
|
27
|
+
def test_generate_summary_gemini(self, mock_client_cls):
|
|
28
|
+
mock_client = mock_client_cls.return_value
|
|
29
|
+
mock_resp = MagicMock()
|
|
30
|
+
mock_resp.text = "Gemini Summary"
|
|
31
|
+
mock_resp.usage_metadata.prompt_token_count = 100
|
|
32
|
+
mock_resp.usage_metadata.candidates_token_count = 50
|
|
33
|
+
mock_client.models.generate_content.return_value = mock_resp
|
|
34
|
+
|
|
35
|
+
summary, in_tokens, out_tokens = llms.generate_summary(
|
|
36
|
+
"gemini-pro", "transcript", "Title", "url"
|
|
37
|
+
)
|
|
38
|
+
self.assertEqual(summary, "Gemini Summary")
|
|
39
|
+
self.assertEqual(in_tokens, 100)
|
|
40
|
+
self.assertEqual(out_tokens, 50)
|
|
41
|
+
|
|
42
|
+
@patch("youtube_to_docs.llms.requests.post")
|
|
43
|
+
@patch("google.auth.default")
|
|
44
|
+
def test_generate_summary_vertex(self, mock_auth, mock_post):
|
|
45
|
+
mock_creds = MagicMock()
|
|
46
|
+
mock_creds.token = "fake_token"
|
|
47
|
+
mock_creds.expired = False
|
|
48
|
+
mock_auth.return_value = (mock_creds, "proj")
|
|
49
|
+
|
|
50
|
+
mock_resp = MagicMock()
|
|
51
|
+
mock_resp.status_code = 200
|
|
52
|
+
mock_resp.json.return_value = {
|
|
53
|
+
"content": [{"text": "Vertex Summary"}],
|
|
54
|
+
"usage": {"input_tokens": 100, "output_tokens": 50},
|
|
55
|
+
}
|
|
56
|
+
mock_post.return_value = mock_resp
|
|
57
|
+
|
|
58
|
+
summary, in_tokens, out_tokens = llms.generate_summary(
|
|
59
|
+
"vertex-claude-3-5", "transcript", "Title", "url"
|
|
60
|
+
)
|
|
61
|
+
self.assertEqual(summary, "Vertex Summary")
|
|
62
|
+
self.assertEqual(in_tokens, 100)
|
|
63
|
+
self.assertEqual(out_tokens, 50)
|
|
64
|
+
|
|
65
|
+
@patch("youtube_to_docs.llms.requests.post")
|
|
66
|
+
def test_generate_summary_bedrock(self, mock_post):
|
|
67
|
+
mock_resp = MagicMock()
|
|
68
|
+
mock_resp.status_code = 200
|
|
69
|
+
mock_resp.json.return_value = {
|
|
70
|
+
"output": {"message": {"content": [{"text": "Bedrock Summary"}]}},
|
|
71
|
+
"usage": {"inputTokens": 100, "outputTokens": 50},
|
|
72
|
+
}
|
|
73
|
+
mock_post.return_value = mock_resp
|
|
74
|
+
|
|
75
|
+
summary, in_tokens, out_tokens = llms.generate_summary(
|
|
76
|
+
"bedrock-claude-3-5", "transcript", "Title", "url"
|
|
77
|
+
)
|
|
78
|
+
self.assertEqual(summary, "Bedrock Summary")
|
|
79
|
+
self.assertEqual(in_tokens, 100)
|
|
80
|
+
self.assertEqual(out_tokens, 50)
|
|
81
|
+
|
|
82
|
+
@patch("youtube_to_docs.llms.OpenAI")
|
|
83
|
+
def test_generate_summary_foundry(self, mock_openai):
|
|
84
|
+
mock_client = mock_openai.return_value
|
|
85
|
+
mock_completion = MagicMock()
|
|
86
|
+
mock_completion.choices[0].message.content = "Foundry Summary"
|
|
87
|
+
mock_completion.usage.prompt_tokens = 100
|
|
88
|
+
mock_completion.usage.completion_tokens = 50
|
|
89
|
+
mock_client.chat.completions.create.return_value = mock_completion
|
|
90
|
+
|
|
91
|
+
summary, in_tokens, out_tokens = llms.generate_summary(
|
|
92
|
+
"foundry-gpt-4", "transcript", "Title", "url"
|
|
93
|
+
)
|
|
94
|
+
self.assertEqual(summary, "Foundry Summary")
|
|
95
|
+
self.assertEqual(in_tokens, 100)
|
|
96
|
+
self.assertEqual(out_tokens, 50)
|
|
97
|
+
|
|
98
|
+
@patch("youtube_to_docs.llms.genai.Client")
|
|
99
|
+
def test_extract_speakers_gemini(self, mock_client_cls):
|
|
100
|
+
mock_client = mock_client_cls.return_value
|
|
101
|
+
mock_resp = MagicMock()
|
|
102
|
+
mock_resp.text = "Speaker 1 (Expert)\nSpeaker 2 (UNKNOWN)"
|
|
103
|
+
mock_resp.usage_metadata.prompt_token_count = 120
|
|
104
|
+
mock_resp.usage_metadata.candidates_token_count = 30
|
|
105
|
+
mock_client.models.generate_content.return_value = mock_resp
|
|
106
|
+
|
|
107
|
+
speakers, in_tokens, out_tokens = llms.extract_speakers(
|
|
108
|
+
"gemini-pro", "transcript content"
|
|
109
|
+
)
|
|
110
|
+
self.assertEqual(speakers, "Speaker 1 (Expert)\nSpeaker 2 (UNKNOWN)")
|
|
111
|
+
self.assertEqual(in_tokens, 120)
|
|
112
|
+
self.assertEqual(out_tokens, 30)
|
|
113
|
+
|
|
114
|
+
@patch("youtube_to_docs.llms.genai.Client")
|
|
115
|
+
def test_generate_qa_gemini(self, mock_client_cls):
|
|
116
|
+
mock_client = mock_client_cls.return_value
|
|
117
|
+
mock_resp = MagicMock()
|
|
118
|
+
mock_resp.text = "| Q | A |\n|---|---|\n| Q1 | A1 |"
|
|
119
|
+
mock_resp.usage_metadata.prompt_token_count = 150
|
|
120
|
+
mock_resp.usage_metadata.candidates_token_count = 60
|
|
121
|
+
mock_client.models.generate_content.return_value = mock_resp
|
|
122
|
+
|
|
123
|
+
qa, in_tokens, out_tokens = llms.generate_qa(
|
|
124
|
+
"gemini-pro", "transcript content", "Speaker 1, Speaker 2"
|
|
125
|
+
)
|
|
126
|
+
# Expecting the added column
|
|
127
|
+
expected_qa = "| question number | Q | A |\n|---|---|---|\n| 1 | Q1 | A1 |"
|
|
128
|
+
self.assertEqual(qa, expected_qa)
|
|
129
|
+
self.assertEqual(in_tokens, 150)
|
|
130
|
+
self.assertEqual(out_tokens, 60)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class TestPricing(unittest.TestCase):
|
|
134
|
+
@patch(
|
|
135
|
+
"youtube_to_docs.llms.PRICES",
|
|
136
|
+
{"prices": [{"id": "gpt-4", "input": 30.0, "output": 60.0}]},
|
|
137
|
+
)
|
|
138
|
+
def test_get_model_pricing_found(self):
|
|
139
|
+
inp, outp = llms.get_model_pricing("gpt-4")
|
|
140
|
+
self.assertEqual(inp, 30.0)
|
|
141
|
+
self.assertEqual(outp, 60.0)
|
|
142
|
+
|
|
143
|
+
@patch(
|
|
144
|
+
"youtube_to_docs.llms.PRICES",
|
|
145
|
+
{"prices": [{"id": "gpt-4", "input": 30.0, "output": 60.0}]},
|
|
146
|
+
)
|
|
147
|
+
def test_get_model_pricing_normalized(self):
|
|
148
|
+
inp, outp = llms.get_model_pricing("vertex-gpt-4")
|
|
149
|
+
self.assertEqual(inp, 30.0)
|
|
150
|
+
self.assertEqual(outp, 60.0)
|
|
151
|
+
|
|
152
|
+
@patch(
|
|
153
|
+
"youtube_to_docs.llms.PRICES",
|
|
154
|
+
{
|
|
155
|
+
"prices": [{"id": "claude-4.5-haiku", "input": 1.0, "output": 5.0}],
|
|
156
|
+
"aliases": {"claude-haiku-4-5": "claude-4.5-haiku"},
|
|
157
|
+
},
|
|
158
|
+
)
|
|
159
|
+
def test_get_model_pricing_aliased(self):
|
|
160
|
+
"""Test that aliases (like claude-haiku-4-5 -> claude-4.5-haiku) work."""
|
|
161
|
+
# This model name normalizes to 'claude-haiku-4-5'
|
|
162
|
+
# which should alias to 'claude-4.5-haiku'
|
|
163
|
+
inp, outp = llms.get_model_pricing("bedrock-claude-haiku-4-5-20251001-v1")
|
|
164
|
+
self.assertEqual(inp, 1.0)
|
|
165
|
+
self.assertEqual(outp, 5.0)
|
|
166
|
+
|
|
167
|
+
@patch(
|
|
168
|
+
"youtube_to_docs.llms.PRICES",
|
|
169
|
+
{
|
|
170
|
+
"prices": [
|
|
171
|
+
{"id": "claude-4.5-haiku", "input": 1.0, "output": 5.0},
|
|
172
|
+
{"id": "gemini-3-flash-preview", "input": 0.5, "output": 3.0},
|
|
173
|
+
{"id": "gpt-5-mini", "input": 0.25, "output": 2.0},
|
|
174
|
+
{
|
|
175
|
+
"id": "amazon-nova-2-lite",
|
|
176
|
+
"input": 0.3,
|
|
177
|
+
"output": 2.5,
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
"id": "imagen-4",
|
|
181
|
+
"input": 0.0,
|
|
182
|
+
"output": 40.0,
|
|
183
|
+
},
|
|
184
|
+
],
|
|
185
|
+
"aliases": {
|
|
186
|
+
"claude-haiku-4-5": "claude-4.5-haiku",
|
|
187
|
+
"nova-2-lite": "amazon-nova-2-lite",
|
|
188
|
+
},
|
|
189
|
+
},
|
|
190
|
+
)
|
|
191
|
+
def test_get_model_pricing_specific_models(self):
|
|
192
|
+
"""Test getting pricing for specific models requested by user."""
|
|
193
|
+
models = [
|
|
194
|
+
"gemini-3-flash-preview",
|
|
195
|
+
"vertex-claude-haiku-4-5@20251001",
|
|
196
|
+
"bedrock-claude-haiku-4-5-20251001-v1",
|
|
197
|
+
"bedrock-nova-2-lite-v1",
|
|
198
|
+
"foundry-gpt-5-mini",
|
|
199
|
+
"imagen-4",
|
|
200
|
+
]
|
|
201
|
+
|
|
202
|
+
for model in models:
|
|
203
|
+
with self.subTest(model=model):
|
|
204
|
+
inp, outp = llms.get_model_pricing(model)
|
|
205
|
+
self.assertIsNotNone(inp, f"Input price for {model} should not be None")
|
|
206
|
+
self.assertIsNotNone(
|
|
207
|
+
outp, f"Output price for {model} should not be None"
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
@patch("youtube_to_docs.llms.PRICES", {"prices": [{"id": "gpt-4"}]})
|
|
211
|
+
def test_get_model_pricing_not_found(self):
|
|
212
|
+
inp, outp = llms.get_model_pricing("non-existent-model")
|
|
213
|
+
self.assertIsNone(inp)
|
|
214
|
+
self.assertIsNone(outp)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
if __name__ == "__main__":
|
|
218
|
+
unittest.main()
|