youtube-to-docs 0.0.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. youtube_to_docs-0.0.18/LICENSE +21 -0
  2. youtube_to_docs-0.0.18/PKG-INFO +43 -0
  3. youtube_to_docs-0.0.18/README.md +18 -0
  4. youtube_to_docs-0.0.18/pyproject.toml +75 -0
  5. youtube_to_docs-0.0.18/setup.cfg +4 -0
  6. youtube_to_docs-0.0.18/tests/test_infographic.py +220 -0
  7. youtube_to_docs-0.0.18/tests/test_llms.py +218 -0
  8. youtube_to_docs-0.0.18/tests/test_main.py +646 -0
  9. youtube_to_docs-0.0.18/tests/test_mcp_server.py +79 -0
  10. youtube_to_docs-0.0.18/tests/test_sharepoint.py +223 -0
  11. youtube_to_docs-0.0.18/tests/test_storage_upload.py +48 -0
  12. youtube_to_docs-0.0.18/tests/test_transcript.py +192 -0
  13. youtube_to_docs-0.0.18/tests/test_tts.py +185 -0
  14. youtube_to_docs-0.0.18/tests/test_tts_filtering.py +83 -0
  15. youtube_to_docs-0.0.18/tests/test_utils.py +153 -0
  16. youtube_to_docs-0.0.18/tests/test_video.py +106 -0
  17. youtube_to_docs-0.0.18/tests/test_workspace.py +95 -0
  18. youtube_to_docs-0.0.18/youtube_to_docs/__init__.py +0 -0
  19. youtube_to_docs-0.0.18/youtube_to_docs/infographic.py +231 -0
  20. youtube_to_docs-0.0.18/youtube_to_docs/llms.py +373 -0
  21. youtube_to_docs-0.0.18/youtube_to_docs/main.py +1287 -0
  22. youtube_to_docs-0.0.18/youtube_to_docs/mcp_server.py +79 -0
  23. youtube_to_docs-0.0.18/youtube_to_docs/models.py +14 -0
  24. youtube_to_docs-0.0.18/youtube_to_docs/prices.py +867 -0
  25. youtube_to_docs-0.0.18/youtube_to_docs/storage.py +1062 -0
  26. youtube_to_docs-0.0.18/youtube_to_docs/transcript.py +242 -0
  27. youtube_to_docs-0.0.18/youtube_to_docs/tts.py +287 -0
  28. youtube_to_docs-0.0.18/youtube_to_docs/utils.py +185 -0
  29. youtube_to_docs-0.0.18/youtube_to_docs/video.py +196 -0
  30. youtube_to_docs-0.0.18/youtube_to_docs.egg-info/PKG-INFO +43 -0
  31. youtube_to_docs-0.0.18/youtube_to_docs.egg-info/SOURCES.txt +33 -0
  32. youtube_to_docs-0.0.18/youtube_to_docs.egg-info/dependency_links.txt +1 -0
  33. youtube_to_docs-0.0.18/youtube_to_docs.egg-info/entry_points.txt +2 -0
  34. youtube_to_docs-0.0.18/youtube_to_docs.egg-info/requires.txt +16 -0
  35. youtube_to_docs-0.0.18/youtube_to_docs.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 DoIT - Artificial Intelligence
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,43 @@
1
+ Metadata-Version: 2.4
2
+ Name: youtube-to-docs
3
+ Version: 0.0.18
4
+ Summary: Convert YouTube videos into structured docs, summaries, audio, and visual assets for easier discovery
5
+ Requires-Python: >=3.14
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE
8
+ Requires-Dist: fastexcel>=0.12.0
9
+ Requires-Dist: google-api-python-client>=2.187.0
10
+ Requires-Dist: google-auth-oauthlib
11
+ Requires-Dist: google-genai
12
+ Requires-Dist: isodate>=0.7.2
13
+ Requires-Dist: mcp>=1.25.0
14
+ Requires-Dist: msal>=1.34.0
15
+ Requires-Dist: openai>=1.56.0
16
+ Requires-Dist: polars>=1.36.1
17
+ Requires-Dist: pypandoc>=1.16.2
18
+ Requires-Dist: pypandoc_binary>=1.16.2
19
+ Requires-Dist: requests>=2.32.5
20
+ Requires-Dist: static-ffmpeg>=2.13
21
+ Requires-Dist: xlsxwriter>=3.2.9
22
+ Requires-Dist: youtube-transcript-api>=1.2.3
23
+ Requires-Dist: yt-dlp>=2025.12.8
24
+ Dynamic: license-file
25
+
26
+ # youtube-to-docs
27
+ [![PyPI version](https://img.shields.io/pypi/v/youtube-to-docs.svg)](https://pypi.org/project/youtube-to-docs/)
28
+
29
+ Convert YouTube videos into structured docs, summaries, audio, and visual assets for easier discovery.
30
+
31
+ View all available CLI options:
32
+
33
+ ```bash
34
+ uvx youtube-to-docs --help
35
+ ```
36
+
37
+ Install as a Gemini CLI extension:
38
+
39
+ ```bash
40
+ gemini extensions install https://github.com/DoIT-Artificial-Intelligence/youtube-to-docs.git
41
+ ```
42
+
43
+ *Created with the help of AI. All artifacts have been checked and work as expected.*
@@ -0,0 +1,18 @@
1
+ # youtube-to-docs
2
+ [![PyPI version](https://img.shields.io/pypi/v/youtube-to-docs.svg)](https://pypi.org/project/youtube-to-docs/)
3
+
4
+ Convert YouTube videos into structured docs, summaries, audio, and visual assets for easier discovery.
5
+
6
+ View all available CLI options:
7
+
8
+ ```bash
9
+ uvx youtube-to-docs --help
10
+ ```
11
+
12
+ Install as a Gemini CLI extension:
13
+
14
+ ```bash
15
+ gemini extensions install https://github.com/DoIT-Artificial-Intelligence/youtube-to-docs.git
16
+ ```
17
+
18
+ *Created with the help of AI. All artifacts have been checked and work as expected.*
@@ -0,0 +1,75 @@
1
+ [build-system]
2
+ requires = ["setuptools"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [dependency-groups]
6
+ dev = [
7
+ "mkdocs>=1.6.1",
8
+ "mkdocs-material>=9.7.1",
9
+ ]
10
+ test = [
11
+ "pytest>=9.0.2",
12
+ "pytest-mock>=3.15.1",
13
+ ]
14
+
15
+ [project]
16
+ name = "youtube-to-docs"
17
+ version = "0.0.18"
18
+ description = "Convert YouTube videos into structured docs, summaries, audio, and visual assets for easier discovery"
19
+ readme = "README.md"
20
+ requires-python = ">=3.14"
21
+ dependencies = [
22
+ "fastexcel>=0.12.0",
23
+ "google-api-python-client>=2.187.0",
24
+ "google-auth-oauthlib",
25
+ "google-genai",
26
+ "isodate>=0.7.2",
27
+ "mcp>=1.25.0",
28
+ "msal>=1.34.0",
29
+ "openai>=1.56.0",
30
+ "polars>=1.36.1",
31
+ "pypandoc>=1.16.2",
32
+ "pypandoc_binary>=1.16.2",
33
+ "requests>=2.32.5",
34
+ "static-ffmpeg>=2.13",
35
+ "xlsxwriter>=3.2.9",
36
+ "youtube-transcript-api>=1.2.3",
37
+ "yt-dlp>=2025.12.8",
38
+ ]
39
+
40
+ [project.scripts]
41
+ youtube-to-docs = "youtube_to_docs.main:main"
42
+
43
+ [tool.setuptools.packages.find]
44
+ include = ["youtube_to_docs*"]
45
+ exclude = ["tests*", "docs*", "youtube-to-docs-artifacts*", "summary-files*", "transcript-files*", "audio-files*", "infographic-files*", "speaker-extraction-files*", "qa-files*", "video-files*"]
46
+
47
+ [tool.ruff]
48
+ # Match the project's Python version requirement
49
+ target-version = "py314"
50
+ line-length = 88
51
+
52
+ [tool.ruff.lint]
53
+ # Enable Pyflakes (`F`), pycodestyle (`E`, `W`), and isort (`I`)
54
+ select = ["E", "F", "I", "W"]
55
+ ignore = []
56
+
57
+ # Allow fix for all enabled rules (when `--fix`) is provided.
58
+ fixable = ["ALL"]
59
+ unfixable = []
60
+
61
+ [tool.ruff.format]
62
+ # Use double quotes for strings.
63
+ quote-style = "double"
64
+ # Indent with spaces, rather than tabs.
65
+ indent-style = "space"
66
+ # Respect magic trailing commas.
67
+ skip-magic-trailing-comma = false
68
+ # Automatically detect the appropriate line ending.
69
+ line-ending = "auto"
70
+
71
+ [tool.ty.environment]
72
+ # Target Python 3.14 to match project requirements
73
+ python-version = "3.14"
74
+
75
+
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,220 @@
1
+ import os
2
+ import unittest
3
+ from unittest import mock
4
+ from unittest.mock import MagicMock, patch
5
+
6
+ from youtube_to_docs import infographic
7
+
8
+
9
+ class TestInfographic(unittest.TestCase):
10
+ def setUp(self):
11
+ # Mock environment variables
12
+ self.env_patcher = patch.dict(
13
+ os.environ,
14
+ {
15
+ "GEMINI_API_KEY": "fake_gemini_key",
16
+ "AWS_BEARER_TOKEN_BEDROCK": "fake_bedrock_token",
17
+ "AZURE_FOUNDRY_ENDPOINT": "fake_endpoint",
18
+ "AZURE_FOUNDRY_API_KEY": "fake_foundry_key",
19
+ },
20
+ )
21
+ self.env_patcher.start()
22
+
23
+ def tearDown(self):
24
+ self.env_patcher.stop()
25
+
26
+ @patch("youtube_to_docs.infographic.genai.Client")
27
+ def test_generate_infographic_gemini(self, mock_client_cls):
28
+ mock_client = mock_client_cls.return_value
29
+
30
+ # Mocking the stream response for generate_content_stream
31
+ mock_chunk = MagicMock()
32
+ mock_part = MagicMock()
33
+ mock_part.inline_data.data = b"fake_gemini_bytes"
34
+ mock_chunk.candidates = [MagicMock(content=MagicMock(parts=[mock_part]))]
35
+ mock_chunk.usage_metadata.prompt_token_count = 10
36
+ mock_chunk.usage_metadata.candidates_token_count = 20
37
+
38
+ mock_client.models.generate_content_stream.return_value = [mock_chunk]
39
+
40
+ image_bytes, in_tok, out_tok = infographic.generate_infographic(
41
+ "gemini-2.5-flash-image", "Summary text", "Video Title"
42
+ )
43
+
44
+ self.assertEqual(image_bytes, b"fake_gemini_bytes")
45
+ self.assertEqual(in_tok, 10)
46
+ self.assertEqual(out_tok, 20)
47
+ mock_client.models.generate_content_stream.assert_called_once()
48
+
49
+ @patch("youtube_to_docs.infographic.genai.Client")
50
+ def test_generate_infographic_imagen(self, mock_client_cls):
51
+ mock_client = mock_client_cls.return_value
52
+ mock_resp = MagicMock()
53
+
54
+ # Mocking the response structure for generate_images
55
+ mock_image = MagicMock()
56
+ mock_image.image.image_bytes = b"fake_imagen_bytes"
57
+ mock_resp.generated_images = [mock_image]
58
+
59
+ mock_client.models.generate_images.return_value = mock_resp
60
+
61
+ image_bytes, in_tok, out_tok = infographic.generate_infographic(
62
+ "imagen-4.0-generate-001", "Summary text", "Video Title"
63
+ )
64
+
65
+ self.assertEqual(image_bytes, b"fake_imagen_bytes")
66
+ self.assertEqual(in_tok, 0)
67
+ self.assertEqual(out_tok, 1000)
68
+ mock_client.models.generate_images.assert_called_once()
69
+
70
+ def test_generate_infographic_none_model(self):
71
+ image_bytes, in_tok, out_tok = infographic.generate_infographic(
72
+ None, "Summary text", "Video Title"
73
+ )
74
+ self.assertIsNone(image_bytes)
75
+ self.assertEqual(in_tok, 0)
76
+ self.assertEqual(out_tok, 0)
77
+
78
+ def test_generate_infographic_unsupported_model(self):
79
+ image_bytes, in_tok, out_tok = infographic.generate_infographic(
80
+ "unsupported-model", "Summary text", "Video Title"
81
+ )
82
+ self.assertIsNone(image_bytes)
83
+ self.assertEqual(in_tok, 0)
84
+ self.assertEqual(out_tok, 0)
85
+
86
+ @patch("youtube_to_docs.infographic.genai.Client")
87
+ def test_generate_infographic_imagen_no_images(self, mock_client_cls):
88
+ mock_client = mock_client_cls.return_value
89
+ mock_resp = MagicMock()
90
+ mock_resp.generated_images = []
91
+ mock_client.models.generate_images.return_value = mock_resp
92
+
93
+ image_bytes, in_tok, out_tok = infographic.generate_infographic(
94
+ "imagen-4.0-generate-001", "Summary text", "Video Title"
95
+ )
96
+ self.assertIsNone(image_bytes)
97
+ self.assertEqual(in_tok, 0)
98
+ self.assertEqual(out_tok, 0)
99
+
100
+ @patch("youtube_to_docs.infographic.genai.Client")
101
+ def test_generate_infographic_gemini_no_data(self, mock_client_cls):
102
+ mock_client = mock_client_cls.return_value
103
+
104
+ # Mocking a stream with no inline data
105
+ mock_chunk = MagicMock()
106
+ mock_chunk.candidates = [
107
+ MagicMock(content=MagicMock(parts=[MagicMock(inline_data=None)]))
108
+ ]
109
+ mock_chunk.usage_metadata = None
110
+
111
+ mock_client.models.generate_content_stream.return_value = [mock_chunk]
112
+
113
+ image_bytes, in_tok, out_tok = infographic.generate_infographic(
114
+ "gemini-2.5-flash-image", "Summary text", "Video Title"
115
+ )
116
+ self.assertIsNone(image_bytes)
117
+ self.assertEqual(in_tok, 0)
118
+ self.assertEqual(out_tok, 0)
119
+
120
+ @patch("youtube_to_docs.infographic.requests.post")
121
+ def test_generate_infographic_bedrock(self, mock_post):
122
+ mock_resp = MagicMock()
123
+ mock_resp.status_code = 200
124
+ # "fake_bytes" in base64 is "ZmFrZV9ieXRlcw=="
125
+ mock_resp.json.return_value = {"images": ["ZmFrZV9ieXRlcw=="]}
126
+ mock_post.return_value = mock_resp
127
+
128
+ image_bytes, in_tok, out_tok = infographic.generate_infographic(
129
+ "bedrock-titan-image-generator-v2", "Summary text", "Video Title"
130
+ )
131
+
132
+ self.assertEqual(image_bytes, b"fake_bytes")
133
+ self.assertEqual(in_tok, 0)
134
+ self.assertEqual(out_tok, 1000)
135
+ mock_post.assert_called_once()
136
+ # Check if actual_model_id was mapped correctly
137
+ args, kwargs = mock_post.call_args
138
+ self.assertIn("amazon.titan-image-generator-v2:0", args[0])
139
+
140
+ @patch("youtube_to_docs.infographic.requests.post")
141
+ def test_generate_infographic_bedrock_nova(self, mock_post):
142
+ mock_resp = MagicMock()
143
+ mock_resp.status_code = 200
144
+ mock_resp.json.return_value = {"images": ["ZmFrZV9ieXRlcw=="]}
145
+ mock_post.return_value = mock_resp
146
+
147
+ image_bytes, in_tok, out_tok = infographic.generate_infographic(
148
+ "bedrock-nova-canvas-v1", "Summary text", "Video Title"
149
+ )
150
+
151
+ self.assertEqual(image_bytes, b"fake_bytes")
152
+ self.assertEqual(in_tok, 0)
153
+ self.assertEqual(out_tok, 4000)
154
+ mock_post.assert_called_once()
155
+ # Check if actual_model_id was mapped correctly
156
+ args, kwargs = mock_post.call_args
157
+ self.assertIn("amazon.nova-canvas-v1:0", args[0])
158
+
159
+ @patch("youtube_to_docs.infographic.requests.post")
160
+ def test_generate_infographic_bedrock_with_suffix(self, mock_post):
161
+ mock_resp = MagicMock()
162
+ mock_resp.status_code = 200
163
+ mock_resp.json.return_value = {"images": ["ZmFrZV9ieXRlcw=="]}
164
+ mock_post.return_value = mock_resp
165
+
166
+ image_bytes, in_tok, out_tok = infographic.generate_infographic(
167
+ "bedrock-nova-canvas-v1:0", "Summary text", "Video Title"
168
+ )
169
+
170
+ self.assertEqual(image_bytes, b"fake_bytes")
171
+ mock_post.assert_called_once()
172
+ # Check that it didn't double the :0
173
+ args, kwargs = mock_post.call_args
174
+ self.assertIn("amazon.nova-canvas-v1:0", args[0])
175
+ self.assertNotIn("amazon.nova-canvas-v1:0:0", args[0])
176
+
177
+ @patch("youtube_to_docs.infographic.requests.post")
178
+ def test_generate_infographic_bedrock_skip_long_prompt(self, mock_post):
179
+ mock_resp = MagicMock()
180
+ mock_resp.status_code = 200
181
+ mock_resp.json.return_value = {"images": ["ZmFrZV9ieXRlcw=="]}
182
+ mock_post.return_value = mock_resp
183
+
184
+ long_summary = "A" * 2000
185
+ image_bytes, in_tok, out_tok = infographic.generate_infographic(
186
+ "amazon.nova-canvas-v1:0", long_summary, "Video Title"
187
+ )
188
+
189
+ self.assertIsNone(image_bytes)
190
+ self.assertEqual(in_tok, 0)
191
+ self.assertEqual(out_tok, 0)
192
+ mock_post.assert_not_called()
193
+
194
+ @patch("youtube_to_docs.infographic.OpenAI")
195
+ def test_generate_infographic_foundry(self, mock_openai_cls):
196
+ mock_client = mock_openai_cls.return_value
197
+ mock_resp = MagicMock()
198
+ mock_image = MagicMock()
199
+ mock_image.b64_json = "ZmFrZV9ieXRlcw=="
200
+ mock_resp.data = [mock_image]
201
+ mock_client.images.generate.return_value = mock_resp
202
+
203
+ image_bytes, in_tok, out_tok = infographic.generate_infographic(
204
+ "foundry-gpt-image-1.5", "Summary text", "Video Title"
205
+ )
206
+
207
+ self.assertEqual(image_bytes, b"fake_bytes")
208
+ self.assertEqual(in_tok, 0)
209
+ self.assertEqual(out_tok, 3400)
210
+ mock_client.images.generate.assert_called_once_with(
211
+ model="gpt-image-1.5",
212
+ prompt=mock.ANY,
213
+ n=1,
214
+ size="1536x1024",
215
+ response_format="b64_json",
216
+ )
217
+
218
+
219
+ if __name__ == "__main__":
220
+ unittest.main()
@@ -0,0 +1,218 @@
1
+ import os
2
+ import unittest
3
+ from unittest.mock import MagicMock, patch
4
+
5
+ from youtube_to_docs import llms
6
+
7
+
8
+ class TestLLMs(unittest.TestCase):
9
+ def setUp(self):
10
+ # Mock environment variables
11
+ self.env_patcher = patch.dict(
12
+ os.environ,
13
+ {
14
+ "GEMINI_API_KEY": "fake_gemini_key",
15
+ "PROJECT_ID": "fake_project_id",
16
+ "AWS_BEARER_TOKEN_BEDROCK": "fake_bedrock_token",
17
+ "AZURE_FOUNDRY_ENDPOINT": "https://fake.openai.azure.com/",
18
+ "AZURE_FOUNDRY_API_KEY": "fake_foundry_key",
19
+ },
20
+ )
21
+ self.env_patcher.start()
22
+
23
+ def tearDown(self):
24
+ self.env_patcher.stop()
25
+
26
+ @patch("youtube_to_docs.llms.genai.Client")
27
+ def test_generate_summary_gemini(self, mock_client_cls):
28
+ mock_client = mock_client_cls.return_value
29
+ mock_resp = MagicMock()
30
+ mock_resp.text = "Gemini Summary"
31
+ mock_resp.usage_metadata.prompt_token_count = 100
32
+ mock_resp.usage_metadata.candidates_token_count = 50
33
+ mock_client.models.generate_content.return_value = mock_resp
34
+
35
+ summary, in_tokens, out_tokens = llms.generate_summary(
36
+ "gemini-pro", "transcript", "Title", "url"
37
+ )
38
+ self.assertEqual(summary, "Gemini Summary")
39
+ self.assertEqual(in_tokens, 100)
40
+ self.assertEqual(out_tokens, 50)
41
+
42
+ @patch("youtube_to_docs.llms.requests.post")
43
+ @patch("google.auth.default")
44
+ def test_generate_summary_vertex(self, mock_auth, mock_post):
45
+ mock_creds = MagicMock()
46
+ mock_creds.token = "fake_token"
47
+ mock_creds.expired = False
48
+ mock_auth.return_value = (mock_creds, "proj")
49
+
50
+ mock_resp = MagicMock()
51
+ mock_resp.status_code = 200
52
+ mock_resp.json.return_value = {
53
+ "content": [{"text": "Vertex Summary"}],
54
+ "usage": {"input_tokens": 100, "output_tokens": 50},
55
+ }
56
+ mock_post.return_value = mock_resp
57
+
58
+ summary, in_tokens, out_tokens = llms.generate_summary(
59
+ "vertex-claude-3-5", "transcript", "Title", "url"
60
+ )
61
+ self.assertEqual(summary, "Vertex Summary")
62
+ self.assertEqual(in_tokens, 100)
63
+ self.assertEqual(out_tokens, 50)
64
+
65
+ @patch("youtube_to_docs.llms.requests.post")
66
+ def test_generate_summary_bedrock(self, mock_post):
67
+ mock_resp = MagicMock()
68
+ mock_resp.status_code = 200
69
+ mock_resp.json.return_value = {
70
+ "output": {"message": {"content": [{"text": "Bedrock Summary"}]}},
71
+ "usage": {"inputTokens": 100, "outputTokens": 50},
72
+ }
73
+ mock_post.return_value = mock_resp
74
+
75
+ summary, in_tokens, out_tokens = llms.generate_summary(
76
+ "bedrock-claude-3-5", "transcript", "Title", "url"
77
+ )
78
+ self.assertEqual(summary, "Bedrock Summary")
79
+ self.assertEqual(in_tokens, 100)
80
+ self.assertEqual(out_tokens, 50)
81
+
82
+ @patch("youtube_to_docs.llms.OpenAI")
83
+ def test_generate_summary_foundry(self, mock_openai):
84
+ mock_client = mock_openai.return_value
85
+ mock_completion = MagicMock()
86
+ mock_completion.choices[0].message.content = "Foundry Summary"
87
+ mock_completion.usage.prompt_tokens = 100
88
+ mock_completion.usage.completion_tokens = 50
89
+ mock_client.chat.completions.create.return_value = mock_completion
90
+
91
+ summary, in_tokens, out_tokens = llms.generate_summary(
92
+ "foundry-gpt-4", "transcript", "Title", "url"
93
+ )
94
+ self.assertEqual(summary, "Foundry Summary")
95
+ self.assertEqual(in_tokens, 100)
96
+ self.assertEqual(out_tokens, 50)
97
+
98
+ @patch("youtube_to_docs.llms.genai.Client")
99
+ def test_extract_speakers_gemini(self, mock_client_cls):
100
+ mock_client = mock_client_cls.return_value
101
+ mock_resp = MagicMock()
102
+ mock_resp.text = "Speaker 1 (Expert)\nSpeaker 2 (UNKNOWN)"
103
+ mock_resp.usage_metadata.prompt_token_count = 120
104
+ mock_resp.usage_metadata.candidates_token_count = 30
105
+ mock_client.models.generate_content.return_value = mock_resp
106
+
107
+ speakers, in_tokens, out_tokens = llms.extract_speakers(
108
+ "gemini-pro", "transcript content"
109
+ )
110
+ self.assertEqual(speakers, "Speaker 1 (Expert)\nSpeaker 2 (UNKNOWN)")
111
+ self.assertEqual(in_tokens, 120)
112
+ self.assertEqual(out_tokens, 30)
113
+
114
+ @patch("youtube_to_docs.llms.genai.Client")
115
+ def test_generate_qa_gemini(self, mock_client_cls):
116
+ mock_client = mock_client_cls.return_value
117
+ mock_resp = MagicMock()
118
+ mock_resp.text = "| Q | A |\n|---|---|\n| Q1 | A1 |"
119
+ mock_resp.usage_metadata.prompt_token_count = 150
120
+ mock_resp.usage_metadata.candidates_token_count = 60
121
+ mock_client.models.generate_content.return_value = mock_resp
122
+
123
+ qa, in_tokens, out_tokens = llms.generate_qa(
124
+ "gemini-pro", "transcript content", "Speaker 1, Speaker 2"
125
+ )
126
+ # Expecting the added column
127
+ expected_qa = "| question number | Q | A |\n|---|---|---|\n| 1 | Q1 | A1 |"
128
+ self.assertEqual(qa, expected_qa)
129
+ self.assertEqual(in_tokens, 150)
130
+ self.assertEqual(out_tokens, 60)
131
+
132
+
133
+ class TestPricing(unittest.TestCase):
134
+ @patch(
135
+ "youtube_to_docs.llms.PRICES",
136
+ {"prices": [{"id": "gpt-4", "input": 30.0, "output": 60.0}]},
137
+ )
138
+ def test_get_model_pricing_found(self):
139
+ inp, outp = llms.get_model_pricing("gpt-4")
140
+ self.assertEqual(inp, 30.0)
141
+ self.assertEqual(outp, 60.0)
142
+
143
+ @patch(
144
+ "youtube_to_docs.llms.PRICES",
145
+ {"prices": [{"id": "gpt-4", "input": 30.0, "output": 60.0}]},
146
+ )
147
+ def test_get_model_pricing_normalized(self):
148
+ inp, outp = llms.get_model_pricing("vertex-gpt-4")
149
+ self.assertEqual(inp, 30.0)
150
+ self.assertEqual(outp, 60.0)
151
+
152
+ @patch(
153
+ "youtube_to_docs.llms.PRICES",
154
+ {
155
+ "prices": [{"id": "claude-4.5-haiku", "input": 1.0, "output": 5.0}],
156
+ "aliases": {"claude-haiku-4-5": "claude-4.5-haiku"},
157
+ },
158
+ )
159
+ def test_get_model_pricing_aliased(self):
160
+ """Test that aliases (like claude-haiku-4-5 -> claude-4.5-haiku) work."""
161
+ # This model name normalizes to 'claude-haiku-4-5'
162
+ # which should alias to 'claude-4.5-haiku'
163
+ inp, outp = llms.get_model_pricing("bedrock-claude-haiku-4-5-20251001-v1")
164
+ self.assertEqual(inp, 1.0)
165
+ self.assertEqual(outp, 5.0)
166
+
167
+ @patch(
168
+ "youtube_to_docs.llms.PRICES",
169
+ {
170
+ "prices": [
171
+ {"id": "claude-4.5-haiku", "input": 1.0, "output": 5.0},
172
+ {"id": "gemini-3-flash-preview", "input": 0.5, "output": 3.0},
173
+ {"id": "gpt-5-mini", "input": 0.25, "output": 2.0},
174
+ {
175
+ "id": "amazon-nova-2-lite",
176
+ "input": 0.3,
177
+ "output": 2.5,
178
+ },
179
+ {
180
+ "id": "imagen-4",
181
+ "input": 0.0,
182
+ "output": 40.0,
183
+ },
184
+ ],
185
+ "aliases": {
186
+ "claude-haiku-4-5": "claude-4.5-haiku",
187
+ "nova-2-lite": "amazon-nova-2-lite",
188
+ },
189
+ },
190
+ )
191
+ def test_get_model_pricing_specific_models(self):
192
+ """Test getting pricing for specific models requested by user."""
193
+ models = [
194
+ "gemini-3-flash-preview",
195
+ "vertex-claude-haiku-4-5@20251001",
196
+ "bedrock-claude-haiku-4-5-20251001-v1",
197
+ "bedrock-nova-2-lite-v1",
198
+ "foundry-gpt-5-mini",
199
+ "imagen-4",
200
+ ]
201
+
202
+ for model in models:
203
+ with self.subTest(model=model):
204
+ inp, outp = llms.get_model_pricing(model)
205
+ self.assertIsNotNone(inp, f"Input price for {model} should not be None")
206
+ self.assertIsNotNone(
207
+ outp, f"Output price for {model} should not be None"
208
+ )
209
+
210
+ @patch("youtube_to_docs.llms.PRICES", {"prices": [{"id": "gpt-4"}]})
211
+ def test_get_model_pricing_not_found(self):
212
+ inp, outp = llms.get_model_pricing("non-existent-model")
213
+ self.assertIsNone(inp)
214
+ self.assertIsNone(outp)
215
+
216
+
217
+ if __name__ == "__main__":
218
+ unittest.main()