webscout 8.2.2__py3-none-any.whl → 8.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (306) hide show
  1. webscout/AIauto.py +112 -22
  2. webscout/AIbase.py +144 -7
  3. webscout/AIutel.py +249 -131
  4. webscout/Bard.py +579 -206
  5. webscout/DWEBS.py +78 -35
  6. webscout/__init__.py +0 -1
  7. webscout/cli.py +256 -0
  8. webscout/conversation.py +307 -436
  9. webscout/exceptions.py +23 -0
  10. webscout/prompt_manager.py +56 -42
  11. webscout/version.py +1 -1
  12. webscout/webscout_search.py +65 -47
  13. webscout/webscout_search_async.py +81 -126
  14. webscout/yep_search.py +93 -43
  15. {webscout-8.2.2.dist-info → webscout-8.2.7.dist-info}/METADATA +172 -52
  16. webscout-8.2.7.dist-info/RECORD +26 -0
  17. {webscout-8.2.2.dist-info → webscout-8.2.7.dist-info}/WHEEL +1 -1
  18. webscout-8.2.7.dist-info/entry_points.txt +3 -0
  19. webscout-8.2.7.dist-info/top_level.txt +1 -0
  20. inferno/__init__.py +0 -6
  21. inferno/__main__.py +0 -9
  22. inferno/cli.py +0 -6
  23. webscout/Extra/GitToolkit/__init__.py +0 -10
  24. webscout/Extra/GitToolkit/gitapi/__init__.py +0 -12
  25. webscout/Extra/GitToolkit/gitapi/repository.py +0 -195
  26. webscout/Extra/GitToolkit/gitapi/user.py +0 -96
  27. webscout/Extra/GitToolkit/gitapi/utils.py +0 -62
  28. webscout/Extra/YTToolkit/YTdownloader.py +0 -957
  29. webscout/Extra/YTToolkit/__init__.py +0 -3
  30. webscout/Extra/YTToolkit/transcriber.py +0 -476
  31. webscout/Extra/YTToolkit/ytapi/__init__.py +0 -6
  32. webscout/Extra/YTToolkit/ytapi/channel.py +0 -307
  33. webscout/Extra/YTToolkit/ytapi/errors.py +0 -13
  34. webscout/Extra/YTToolkit/ytapi/extras.py +0 -45
  35. webscout/Extra/YTToolkit/ytapi/https.py +0 -88
  36. webscout/Extra/YTToolkit/ytapi/patterns.py +0 -61
  37. webscout/Extra/YTToolkit/ytapi/playlist.py +0 -59
  38. webscout/Extra/YTToolkit/ytapi/pool.py +0 -8
  39. webscout/Extra/YTToolkit/ytapi/query.py +0 -40
  40. webscout/Extra/YTToolkit/ytapi/stream.py +0 -63
  41. webscout/Extra/YTToolkit/ytapi/utils.py +0 -62
  42. webscout/Extra/YTToolkit/ytapi/video.py +0 -232
  43. webscout/Extra/__init__.py +0 -7
  44. webscout/Extra/autocoder/__init__.py +0 -9
  45. webscout/Extra/autocoder/autocoder.py +0 -849
  46. webscout/Extra/autocoder/autocoder_utiles.py +0 -332
  47. webscout/Extra/gguf.py +0 -682
  48. webscout/Extra/tempmail/__init__.py +0 -28
  49. webscout/Extra/tempmail/async_utils.py +0 -141
  50. webscout/Extra/tempmail/base.py +0 -161
  51. webscout/Extra/tempmail/cli.py +0 -187
  52. webscout/Extra/tempmail/emailnator.py +0 -84
  53. webscout/Extra/tempmail/mail_tm.py +0 -361
  54. webscout/Extra/tempmail/temp_mail_io.py +0 -292
  55. webscout/Extra/weather.py +0 -194
  56. webscout/Extra/weather_ascii.py +0 -76
  57. webscout/LLM.py +0 -442
  58. webscout/Litlogger/__init__.py +0 -67
  59. webscout/Litlogger/core/__init__.py +0 -6
  60. webscout/Litlogger/core/level.py +0 -23
  61. webscout/Litlogger/core/logger.py +0 -165
  62. webscout/Litlogger/handlers/__init__.py +0 -12
  63. webscout/Litlogger/handlers/console.py +0 -33
  64. webscout/Litlogger/handlers/file.py +0 -143
  65. webscout/Litlogger/handlers/network.py +0 -173
  66. webscout/Litlogger/styles/__init__.py +0 -7
  67. webscout/Litlogger/styles/colors.py +0 -249
  68. webscout/Litlogger/styles/formats.py +0 -458
  69. webscout/Litlogger/styles/text.py +0 -87
  70. webscout/Litlogger/utils/__init__.py +0 -6
  71. webscout/Litlogger/utils/detectors.py +0 -153
  72. webscout/Litlogger/utils/formatters.py +0 -200
  73. webscout/Local/__init__.py +0 -12
  74. webscout/Local/__main__.py +0 -9
  75. webscout/Local/api.py +0 -576
  76. webscout/Local/cli.py +0 -516
  77. webscout/Local/config.py +0 -75
  78. webscout/Local/llm.py +0 -287
  79. webscout/Local/model_manager.py +0 -253
  80. webscout/Local/server.py +0 -721
  81. webscout/Local/utils.py +0 -93
  82. webscout/Provider/AI21.py +0 -177
  83. webscout/Provider/AISEARCH/DeepFind.py +0 -250
  84. webscout/Provider/AISEARCH/ISou.py +0 -256
  85. webscout/Provider/AISEARCH/Perplexity.py +0 -359
  86. webscout/Provider/AISEARCH/__init__.py +0 -10
  87. webscout/Provider/AISEARCH/felo_search.py +0 -228
  88. webscout/Provider/AISEARCH/genspark_search.py +0 -208
  89. webscout/Provider/AISEARCH/hika_search.py +0 -194
  90. webscout/Provider/AISEARCH/iask_search.py +0 -436
  91. webscout/Provider/AISEARCH/monica_search.py +0 -246
  92. webscout/Provider/AISEARCH/scira_search.py +0 -324
  93. webscout/Provider/AISEARCH/webpilotai_search.py +0 -281
  94. webscout/Provider/Aitopia.py +0 -292
  95. webscout/Provider/AllenAI.py +0 -413
  96. webscout/Provider/Andi.py +0 -228
  97. webscout/Provider/Blackboxai.py +0 -229
  98. webscout/Provider/C4ai.py +0 -432
  99. webscout/Provider/ChatGPTClone.py +0 -226
  100. webscout/Provider/ChatGPTES.py +0 -237
  101. webscout/Provider/ChatGPTGratis.py +0 -194
  102. webscout/Provider/Chatify.py +0 -175
  103. webscout/Provider/Cloudflare.py +0 -273
  104. webscout/Provider/Cohere.py +0 -208
  105. webscout/Provider/DeepSeek.py +0 -196
  106. webscout/Provider/Deepinfra.py +0 -297
  107. webscout/Provider/ElectronHub.py +0 -709
  108. webscout/Provider/ExaAI.py +0 -261
  109. webscout/Provider/ExaChat.py +0 -342
  110. webscout/Provider/Free2GPT.py +0 -241
  111. webscout/Provider/GPTWeb.py +0 -193
  112. webscout/Provider/Gemini.py +0 -169
  113. webscout/Provider/GithubChat.py +0 -367
  114. webscout/Provider/Glider.py +0 -211
  115. webscout/Provider/Groq.py +0 -670
  116. webscout/Provider/HF_space/__init__.py +0 -0
  117. webscout/Provider/HF_space/qwen_qwen2.py +0 -206
  118. webscout/Provider/HeckAI.py +0 -233
  119. webscout/Provider/HuggingFaceChat.py +0 -462
  120. webscout/Provider/Hunyuan.py +0 -272
  121. webscout/Provider/Jadve.py +0 -266
  122. webscout/Provider/Koboldai.py +0 -381
  123. webscout/Provider/LambdaChat.py +0 -392
  124. webscout/Provider/Llama.py +0 -200
  125. webscout/Provider/Llama3.py +0 -204
  126. webscout/Provider/Marcus.py +0 -148
  127. webscout/Provider/Netwrck.py +0 -228
  128. webscout/Provider/OLLAMA.py +0 -396
  129. webscout/Provider/OPENAI/__init__.py +0 -25
  130. webscout/Provider/OPENAI/base.py +0 -46
  131. webscout/Provider/OPENAI/c4ai.py +0 -367
  132. webscout/Provider/OPENAI/chatgpt.py +0 -549
  133. webscout/Provider/OPENAI/chatgptclone.py +0 -460
  134. webscout/Provider/OPENAI/deepinfra.py +0 -272
  135. webscout/Provider/OPENAI/e2b.py +0 -1350
  136. webscout/Provider/OPENAI/exaai.py +0 -404
  137. webscout/Provider/OPENAI/exachat.py +0 -433
  138. webscout/Provider/OPENAI/freeaichat.py +0 -352
  139. webscout/Provider/OPENAI/glider.py +0 -316
  140. webscout/Provider/OPENAI/heckai.py +0 -337
  141. webscout/Provider/OPENAI/llmchatco.py +0 -327
  142. webscout/Provider/OPENAI/netwrck.py +0 -348
  143. webscout/Provider/OPENAI/opkfc.py +0 -488
  144. webscout/Provider/OPENAI/scirachat.py +0 -463
  145. webscout/Provider/OPENAI/sonus.py +0 -294
  146. webscout/Provider/OPENAI/standardinput.py +0 -425
  147. webscout/Provider/OPENAI/textpollinations.py +0 -285
  148. webscout/Provider/OPENAI/toolbaz.py +0 -405
  149. webscout/Provider/OPENAI/typegpt.py +0 -346
  150. webscout/Provider/OPENAI/uncovrAI.py +0 -455
  151. webscout/Provider/OPENAI/utils.py +0 -211
  152. webscout/Provider/OPENAI/venice.py +0 -413
  153. webscout/Provider/OPENAI/wisecat.py +0 -381
  154. webscout/Provider/OPENAI/writecream.py +0 -156
  155. webscout/Provider/OPENAI/x0gpt.py +0 -371
  156. webscout/Provider/OPENAI/yep.py +0 -327
  157. webscout/Provider/OpenGPT.py +0 -199
  158. webscout/Provider/Openai.py +0 -496
  159. webscout/Provider/PI.py +0 -344
  160. webscout/Provider/Perplexitylabs.py +0 -415
  161. webscout/Provider/Phind.py +0 -535
  162. webscout/Provider/PizzaGPT.py +0 -198
  163. webscout/Provider/QwenLM.py +0 -254
  164. webscout/Provider/Reka.py +0 -214
  165. webscout/Provider/StandardInput.py +0 -278
  166. webscout/Provider/TTI/AiForce/__init__.py +0 -22
  167. webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
  168. webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
  169. webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
  170. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
  171. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
  172. webscout/Provider/TTI/ImgSys/__init__.py +0 -23
  173. webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
  174. webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
  175. webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
  176. webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
  177. webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
  178. webscout/Provider/TTI/Nexra/__init__.py +0 -22
  179. webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
  180. webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
  181. webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
  182. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
  183. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
  184. webscout/Provider/TTI/__init__.py +0 -12
  185. webscout/Provider/TTI/aiarta/__init__.py +0 -2
  186. webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
  187. webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
  188. webscout/Provider/TTI/artbit/__init__.py +0 -22
  189. webscout/Provider/TTI/artbit/async_artbit.py +0 -155
  190. webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
  191. webscout/Provider/TTI/fastflux/__init__.py +0 -22
  192. webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
  193. webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
  194. webscout/Provider/TTI/huggingface/__init__.py +0 -22
  195. webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
  196. webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
  197. webscout/Provider/TTI/piclumen/__init__.py +0 -23
  198. webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
  199. webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
  200. webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
  201. webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
  202. webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
  203. webscout/Provider/TTI/talkai/__init__.py +0 -4
  204. webscout/Provider/TTI/talkai/async_talkai.py +0 -229
  205. webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
  206. webscout/Provider/TTS/__init__.py +0 -7
  207. webscout/Provider/TTS/deepgram.py +0 -156
  208. webscout/Provider/TTS/elevenlabs.py +0 -111
  209. webscout/Provider/TTS/gesserit.py +0 -127
  210. webscout/Provider/TTS/murfai.py +0 -113
  211. webscout/Provider/TTS/parler.py +0 -111
  212. webscout/Provider/TTS/speechma.py +0 -180
  213. webscout/Provider/TTS/streamElements.py +0 -333
  214. webscout/Provider/TTS/utils.py +0 -280
  215. webscout/Provider/TeachAnything.py +0 -187
  216. webscout/Provider/TextPollinationsAI.py +0 -231
  217. webscout/Provider/TwoAI.py +0 -199
  218. webscout/Provider/Venice.py +0 -219
  219. webscout/Provider/VercelAI.py +0 -234
  220. webscout/Provider/WebSim.py +0 -228
  221. webscout/Provider/WiseCat.py +0 -196
  222. webscout/Provider/Writecream.py +0 -211
  223. webscout/Provider/WritingMate.py +0 -197
  224. webscout/Provider/Youchat.py +0 -330
  225. webscout/Provider/__init__.py +0 -198
  226. webscout/Provider/ai4chat.py +0 -202
  227. webscout/Provider/aimathgpt.py +0 -189
  228. webscout/Provider/akashgpt.py +0 -342
  229. webscout/Provider/askmyai.py +0 -158
  230. webscout/Provider/asksteve.py +0 -203
  231. webscout/Provider/bagoodex.py +0 -145
  232. webscout/Provider/cerebras.py +0 -242
  233. webscout/Provider/chatglm.py +0 -205
  234. webscout/Provider/cleeai.py +0 -213
  235. webscout/Provider/copilot.py +0 -428
  236. webscout/Provider/elmo.py +0 -234
  237. webscout/Provider/freeaichat.py +0 -271
  238. webscout/Provider/gaurish.py +0 -244
  239. webscout/Provider/geminiapi.py +0 -208
  240. webscout/Provider/geminiprorealtime.py +0 -160
  241. webscout/Provider/granite.py +0 -187
  242. webscout/Provider/hermes.py +0 -219
  243. webscout/Provider/julius.py +0 -223
  244. webscout/Provider/koala.py +0 -268
  245. webscout/Provider/labyrinth.py +0 -340
  246. webscout/Provider/learnfastai.py +0 -266
  247. webscout/Provider/lepton.py +0 -194
  248. webscout/Provider/llama3mitril.py +0 -180
  249. webscout/Provider/llamatutor.py +0 -192
  250. webscout/Provider/llmchat.py +0 -213
  251. webscout/Provider/llmchatco.py +0 -311
  252. webscout/Provider/meta.py +0 -794
  253. webscout/Provider/multichat.py +0 -325
  254. webscout/Provider/promptrefine.py +0 -193
  255. webscout/Provider/scira_chat.py +0 -277
  256. webscout/Provider/scnet.py +0 -187
  257. webscout/Provider/searchchat.py +0 -293
  258. webscout/Provider/sonus.py +0 -208
  259. webscout/Provider/talkai.py +0 -194
  260. webscout/Provider/toolbaz.py +0 -320
  261. webscout/Provider/turboseek.py +0 -219
  262. webscout/Provider/tutorai.py +0 -252
  263. webscout/Provider/typefully.py +0 -280
  264. webscout/Provider/typegpt.py +0 -232
  265. webscout/Provider/uncovr.py +0 -312
  266. webscout/Provider/x0gpt.py +0 -256
  267. webscout/Provider/yep.py +0 -376
  268. webscout/litagent/__init__.py +0 -29
  269. webscout/litagent/agent.py +0 -455
  270. webscout/litagent/constants.py +0 -60
  271. webscout/litprinter/__init__.py +0 -59
  272. webscout/scout/__init__.py +0 -8
  273. webscout/scout/core/__init__.py +0 -7
  274. webscout/scout/core/crawler.py +0 -140
  275. webscout/scout/core/scout.py +0 -568
  276. webscout/scout/core/search_result.py +0 -96
  277. webscout/scout/core/text_analyzer.py +0 -63
  278. webscout/scout/core/text_utils.py +0 -277
  279. webscout/scout/core/web_analyzer.py +0 -52
  280. webscout/scout/core.py +0 -881
  281. webscout/scout/element.py +0 -460
  282. webscout/scout/parsers/__init__.py +0 -69
  283. webscout/scout/parsers/html5lib_parser.py +0 -172
  284. webscout/scout/parsers/html_parser.py +0 -236
  285. webscout/scout/parsers/lxml_parser.py +0 -178
  286. webscout/scout/utils.py +0 -37
  287. webscout/swiftcli/__init__.py +0 -809
  288. webscout/zeroart/__init__.py +0 -55
  289. webscout/zeroart/base.py +0 -60
  290. webscout/zeroart/effects.py +0 -99
  291. webscout/zeroart/fonts.py +0 -816
  292. webscout-8.2.2.dist-info/RECORD +0 -309
  293. webscout-8.2.2.dist-info/entry_points.txt +0 -5
  294. webscout-8.2.2.dist-info/top_level.txt +0 -3
  295. webstoken/__init__.py +0 -30
  296. webstoken/classifier.py +0 -189
  297. webstoken/keywords.py +0 -216
  298. webstoken/language.py +0 -128
  299. webstoken/ner.py +0 -164
  300. webstoken/normalizer.py +0 -35
  301. webstoken/processor.py +0 -77
  302. webstoken/sentiment.py +0 -206
  303. webstoken/stemmer.py +0 -73
  304. webstoken/tagger.py +0 -60
  305. webstoken/tokenizer.py +0 -158
  306. {webscout-8.2.2.dist-info → webscout-8.2.7.dist-info/licenses}/LICENSE.md +0 -0
@@ -1,3 +0,0 @@
1
- from .YTdownloader import *
2
- from .transcriber import *
3
- from .ytapi import *
@@ -1,476 +0,0 @@
1
- """
2
- >>> from webscout import YTTranscriber
3
- >>> transcript = YTTranscriber.get_transcript('https://www.youtube.com/watch?v=dQw4w9WgXcQ')
4
- >>> print(transcript)
5
- {'text': 'Never gonna give you up', 'start': 0.0, 'duration': 4.5}
6
-
7
- """
8
-
9
- import requests
10
- import http.cookiejar as cookiejar
11
- import json
12
- from xml.etree import ElementTree
13
- import re
14
- import html
15
- from typing import List, Dict, Union, Optional
16
- from functools import lru_cache #
17
- from concurrent.futures import ThreadPoolExecutor
18
- from webscout.exceptions import *
19
-
20
- WATCH_URL = 'https://www.youtube.com/watch?v={video_id}'
21
- MAX_WORKERS = 4
22
-
23
- class YTTranscriber:
24
- """Transcribe YouTube videos with style! 🎤
25
-
26
- >>> transcript = YTTranscriber.get_transcript('https://youtu.be/dQw4w9WgXcQ')
27
- >>> print(transcript[0]['text'])
28
- 'Never gonna give you up'
29
- """
30
-
31
- _session = None
32
- _executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
33
-
34
- @classmethod
35
- def _get_session(cls):
36
- if cls._session is None:
37
- cls._session = requests.Session()
38
- cls._session.headers.update({
39
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
40
- })
41
- return cls._session
42
-
43
- @classmethod
44
- @lru_cache(maxsize=100)
45
- def get_transcript(cls, video_url: str, languages: Optional[str] = 'en',
46
- proxies: Dict[str, str] = None,
47
- cookies: str = None,
48
- preserve_formatting: bool = False) -> List[Dict[str, Union[str, float]]]:
49
- """
50
- Retrieves the transcript for a given YouTube video URL.
51
-
52
- Args:
53
- video_url (str): YouTube video URL (supports various formats).
54
- languages (str, optional): Language code for the transcript.
55
- If None, fetches the auto-generated transcript.
56
- Defaults to 'en'.
57
- proxies (Dict[str, str], optional): Proxies to use for the request. Defaults to None.
58
- cookies (str, optional): Path to the cookie file. Defaults to None.
59
- preserve_formatting (bool, optional): Whether to preserve formatting tags. Defaults to False.
60
-
61
- Returns:
62
- List[Dict[str, Union[str, float]]]: A list of dictionaries, each containing:
63
- - 'text': The transcribed text.
64
- - 'start': The start time of the text segment (in seconds).
65
- - 'duration': The duration of the text segment (in seconds).
66
-
67
- Raises:
68
- TranscriptRetrievalError: If there's an error retrieving the transcript.
69
- """
70
- video_id = cls._extract_video_id(video_url)
71
- http_client = cls._get_session()
72
-
73
- if proxies:
74
- http_client.proxies.update(proxies)
75
-
76
- if cookies:
77
- cls._load_cookies(cookies, video_id)
78
-
79
- transcript_list = TranscriptListFetcher(http_client).fetch(video_id)
80
- language_codes = [languages] if languages else None
81
- transcript = transcript_list.find_transcript(language_codes)
82
-
83
- return transcript.fetch(preserve_formatting)
84
-
85
- @staticmethod
86
- def _extract_video_id(video_url: str) -> str:
87
- """Extracts the video ID from different YouTube URL formats."""
88
- patterns = [
89
- r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
90
- r'youtu\.be\/([0-9A-Za-z_-]{11})',
91
- r'youtube\.com\/embed\/([0-9A-Za-z_-]{11})'
92
- ]
93
-
94
- for pattern in patterns:
95
- match = re.search(pattern, video_url)
96
- if match:
97
- return match.group(1)
98
-
99
- if re.match(r'^[0-9A-Za-z_-]{11}$', video_url):
100
- return video_url
101
-
102
- raise InvalidVideoIdError(video_url)
103
-
104
- @staticmethod
105
- def _load_cookies(cookies: str, video_id: str) -> None:
106
- """Loads cookies from a file."""
107
- try:
108
- cj = cookiejar.MozillaCookieJar(cookies)
109
- cj.load()
110
- return cj
111
- except (cookiejar.LoadError, FileNotFoundError):
112
- raise CookiePathInvalidError(video_id)
113
-
114
- class TranscriptListFetcher:
115
- """Fetches the list of transcripts for a YouTube video."""
116
-
117
- def __init__(self, http_client: requests.Session):
118
- """Initializes TranscriptListFetcher."""
119
- self._http_client = http_client
120
-
121
- def fetch(self, video_id: str):
122
- """Fetches and returns a TranscriptList."""
123
- return TranscriptList.build(
124
- self._http_client,
125
- video_id,
126
- self._extract_captions_json(self._fetch_video_html(video_id), video_id),
127
- )
128
-
129
- def _extract_captions_json(self, html: str, video_id: str) -> dict:
130
- """Extracts the captions JSON data from the video's HTML."""
131
- splitted_html = html.split('"captions":')
132
-
133
- if len(splitted_html) <= 1:
134
- if video_id.startswith('http://') or video_id.startswith('https://'):
135
- raise InvalidVideoIdError(video_id)
136
- if 'class="g-recaptcha"' in html:
137
- raise TooManyRequestsError(video_id)
138
- if '"playabilityStatus":' not in html:
139
- raise VideoUnavailableError(video_id)
140
-
141
- raise TranscriptsDisabledError(video_id)
142
-
143
- captions_json = json.loads(
144
- splitted_html[1].split(',"videoDetails')[0].replace('\n', '')
145
- ).get('playerCaptionsTracklistRenderer')
146
- if captions_json is None:
147
- raise TranscriptsDisabledError(video_id)
148
-
149
- if 'captionTracks' not in captions_json:
150
- raise TranscriptsDisabledError(video_id)
151
-
152
- return captions_json
153
-
154
- def _create_consent_cookie(self, html, video_id):
155
- match = re.search('name="v" value="(.*?)"', html)
156
- if match is None:
157
- raise FailedToCreateConsentCookieError(video_id)
158
- self._http_client.cookies.set('CONSENT', 'YES+' + match.group(1), domain='.youtube.com')
159
-
160
- def _fetch_video_html(self, video_id):
161
- html = self._fetch_html(video_id)
162
- if 'action="https://consent.youtube.com/s"' in html:
163
- self._create_consent_cookie(html, video_id)
164
- html = self._fetch_html(video_id)
165
- if 'action="https://consent.youtube.com/s"' in html:
166
- raise FailedToCreateConsentCookieError(video_id)
167
- return html
168
-
169
- def _fetch_html(self, video_id):
170
- response = self._http_client.get(WATCH_URL.format(video_id=video_id), headers={'Accept-Language': 'en-US'})
171
- return html.unescape(_raise_http_errors(response, video_id).text)
172
-
173
-
174
- class TranscriptList:
175
- """
176
- >>> transcript_list = TranscriptList.build(http_client, video_id, captions_json)
177
- >>> transcript = transcript_list.find_transcript(['en'])
178
- >>> print(transcript)
179
- en ("English")[TRANSLATABLE]
180
- """
181
-
182
- def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages):
183
- """Init that transcript list with all the good stuff! 💯"""
184
- self.video_id = video_id
185
- self._manually_created_transcripts = manually_created_transcripts
186
- self._generated_transcripts = generated_transcripts
187
- self._translation_languages = translation_languages
188
-
189
- @staticmethod
190
- def build(http_client, video_id, captions_json):
191
- """
192
- Factory method for TranscriptList.
193
-
194
- :param http_client: http client which is used to make the transcript retrieving http calls
195
- :type http_client: requests.Session
196
- :param video_id: the id of the video this TranscriptList is for
197
- :type video_id: str
198
- :param captions_json: the JSON parsed from the YouTube pages static HTML
199
- :type captions_json: dict
200
- :return: the created TranscriptList
201
- :rtype TranscriptList:
202
- """
203
- translation_languages = [
204
- {
205
- 'language': translation_language['languageName']['simpleText'],
206
- 'language_code': translation_language['languageCode'],
207
- } for translation_language in captions_json.get('translationLanguages', [])
208
- ]
209
-
210
- manually_created_transcripts = {}
211
- generated_transcripts = {}
212
-
213
- for caption in captions_json['captionTracks']:
214
- if caption.get('kind', '') == 'asr':
215
- transcript_dict = generated_transcripts
216
- else:
217
- transcript_dict = manually_created_transcripts
218
-
219
- transcript_dict[caption['languageCode']] = Transcript(
220
- http_client,
221
- video_id,
222
- caption['baseUrl'],
223
- caption['name']['simpleText'],
224
- caption['languageCode'],
225
- caption.get('kind', '') == 'asr',
226
- translation_languages if caption.get('isTranslatable', False) else [],
227
- )
228
-
229
- return TranscriptList(
230
- video_id,
231
- manually_created_transcripts,
232
- generated_transcripts,
233
- translation_languages,
234
- )
235
-
236
- def __iter__(self):
237
- return iter(list(self._manually_created_transcripts.values()) + list(self._generated_transcripts.values()))
238
-
239
- def find_transcript(self, language_codes):
240
- """
241
- Finds a transcript for a given language code. If no language is provided, it will
242
- return the auto-generated transcript.
243
-
244
- :param language_codes: A list of language codes in a descending priority.
245
- :type languages: list[str]
246
- :return: the found Transcript
247
- :rtype Transcript:
248
- :raises: NoTranscriptFound
249
- """
250
- if 'any' in language_codes:
251
- for transcript in self:
252
- return transcript
253
- return self._find_transcript(language_codes, [self._manually_created_transcripts, self._generated_transcripts])
254
-
255
- def find_generated_transcript(self, language_codes):
256
- """
257
- Finds an automatically generated transcript for a given language code.
258
-
259
- :param language_codes: A list of language codes in a descending priority. For example, if this is set to
260
- ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
261
- it fails to do so.
262
- :type languages: list[str]
263
- :return: the found Transcript
264
- :rtype Transcript:
265
- :raises: NoTranscriptFound
266
- """
267
- if 'any' in language_codes:
268
- for transcript in self:
269
- if transcript.is_generated:
270
- return transcript
271
- return self._find_transcript(language_codes, [self._generated_transcripts])
272
-
273
- def find_manually_created_transcript(self, language_codes):
274
- """
275
- Finds a manually created transcript for a given language code.
276
-
277
- :param language_codes: A list of language codes in a descending priority. For example, if this is set to
278
- ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
279
- it fails to do so.
280
- :type languages: list[str]
281
- :return: the found Transcript
282
- :rtype Transcript:
283
- :raises: NoTranscriptFound
284
- """
285
- return self._find_transcript(language_codes, [self._manually_created_transcripts])
286
-
287
- def _find_transcript(self, language_codes, transcript_dicts):
288
- for language_code in language_codes:
289
- for transcript_dict in transcript_dicts:
290
- if language_code in transcript_dict:
291
- return transcript_dict[language_code]
292
-
293
- raise NoTranscriptFoundError(
294
- self.video_id,
295
- language_codes,
296
- self
297
- )
298
-
299
- def __str__(self):
300
- return (
301
- 'For this video ({video_id}) transcripts are available in the following languages:\n\n'
302
- '(MANUALLY CREATED)\n'
303
- '{available_manually_created_transcript_languages}\n\n'
304
- '(GENERATED)\n'
305
- '{available_generated_transcripts}\n\n'
306
- '(TRANSLATION LANGUAGES)\n'
307
- '{available_translation_languages}'
308
- ).format(
309
- video_id=self.video_id,
310
- available_manually_created_transcript_languages=self._get_language_description(
311
- str(transcript) for transcript in self._manually_created_transcripts.values()
312
- ),
313
- available_generated_transcripts=self._get_language_description(
314
- str(transcript) for transcript in self._generated_transcripts.values()
315
- ),
316
- available_translation_languages=self._get_language_description(
317
- '{language_code} ("{language}")'.format(
318
- language=translation_language['language'],
319
- language_code=translation_language['language_code'],
320
- ) for translation_language in self._translation_languages
321
- )
322
- )
323
-
324
- def _get_language_description(self, transcript_strings):
325
- description = '\n'.join(' - {transcript}'.format(transcript=transcript) for transcript in transcript_strings)
326
- return description if description else 'None'
327
-
328
-
329
- class Transcript:
330
- """Your personal transcript handler! 🎭
331
-
332
- >>> transcript = transcript_list.find_transcript(['en'])
333
- >>> print(transcript.language)
334
- 'English'
335
- >>> if transcript.is_translatable:
336
- ... es_transcript = transcript.translate('es')
337
- ... print(es_transcript.language)
338
- 'Spanish'
339
- """
340
-
341
- def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages):
342
- """Initialize with all the goodies! 🎁"""
343
- self._http_client = http_client
344
- self.video_id = video_id
345
- self._url = url
346
- self.language = language
347
- self.language_code = language_code
348
- self.is_generated = is_generated
349
- self.translation_languages = translation_languages
350
- self._translation_languages_dict = {
351
- translation_language['language_code']: translation_language['language']
352
- for translation_language in translation_languages
353
- }
354
-
355
- def fetch(self, preserve_formatting=False):
356
- """Get that transcript data! 🎯
357
-
358
- Args:
359
- preserve_formatting (bool): Keep HTML formatting? Default is nah fam.
360
-
361
- Returns:
362
- list: That sweet transcript data with text, start time, and duration! 📝
363
- """
364
- response = self._http_client.get(self._url, headers={'Accept-Language': 'en-US'})
365
- return TranscriptParser(preserve_formatting=preserve_formatting).parse(
366
- _raise_http_errors(response, self.video_id).text,
367
- )
368
-
369
- def __str__(self):
370
- """String representation looking clean! 💅"""
371
- return '{language_code} ("{language}"){translation_description}'.format(
372
- language=self.language,
373
- language_code=self.language_code,
374
- translation_description='[TRANSLATABLE]' if self.is_translatable else ''
375
- )
376
-
377
- @property
378
- def is_translatable(self):
379
- """Can we translate this? 🌍"""
380
- return len(self.translation_languages) > 0
381
-
382
- def translate(self, language_code):
383
- """Translate to another language! 🌎
384
-
385
- Args:
386
- language_code (str): Which language you want fam?
387
-
388
- Returns:
389
- Transcript: A fresh transcript in your requested language! 🔄
390
-
391
- Raises:
392
- NotTranslatableError: If we can't translate this one 😢
393
- TranslationLanguageNotAvailableError: If that language isn't available 🚫
394
- """
395
- if not self.is_translatable:
396
- raise NotTranslatableError(self.video_id)
397
-
398
- if language_code not in self._translation_languages_dict:
399
- raise TranslationLanguageNotAvailableError(self.video_id)
400
-
401
- return Transcript(
402
- self._http_client,
403
- self.video_id,
404
- '{url}&tlang={language_code}'.format(url=self._url, language_code=language_code),
405
- self._translation_languages_dict[language_code],
406
- language_code,
407
- True,
408
- [],
409
- )
410
-
411
-
412
- class TranscriptParser:
413
- """Parsing those transcripts like a pro! 🎯
414
-
415
- >>> parser = TranscriptParser(preserve_formatting=True)
416
- >>> data = parser.parse(xml_data)
417
- >>> print(data[0])
418
- {'text': 'Never gonna give you up', 'start': 0.0, 'duration': 4.5}
419
- """
420
-
421
- _FORMATTING_TAGS = [
422
- 'strong', # For that extra emphasis 💪
423
- 'em', # When you need that italic swag 🎨
424
- 'b', # Bold and beautiful 💯
425
- 'i', # More italic vibes ✨
426
- 'mark', # Highlight that text 🌟
427
- 'small', # Keep it lowkey 🤫
428
- 'del', # Strike it out ⚡
429
- 'ins', # Insert new stuff 🆕
430
- 'sub', # Subscript gang 📉
431
- 'sup', # Superscript squad 📈
432
- ]
433
-
434
- def __init__(self, preserve_formatting=False):
435
- """Get ready to parse with style! 🎨"""
436
- self._html_regex = self._get_html_regex(preserve_formatting)
437
-
438
- def _get_html_regex(self, preserve_formatting):
439
- """Get that regex pattern ready! 🎯"""
440
- if preserve_formatting:
441
- formats_regex = '|'.join(self._FORMATTING_TAGS)
442
- formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>'
443
- html_regex = re.compile(formats_regex, re.IGNORECASE)
444
- else:
445
- html_regex = re.compile(r'<[^>]*>', re.IGNORECASE)
446
- return html_regex
447
-
448
- def parse(self, plain_data):
449
- """Parse that XML data into something beautiful! ✨"""
450
- return [
451
- {
452
- 'text': re.sub(self._html_regex, '', html.unescape(xml_element.text)),
453
- 'start': float(xml_element.attrib['start']),
454
- 'duration': float(xml_element.attrib.get('dur', '0.0')),
455
- }
456
- for xml_element in ElementTree.fromstring(plain_data)
457
- if xml_element.text is not None
458
- ]
459
-
460
-
461
- def _raise_http_errors(response, video_id):
462
- """Handle those HTTP errors with style! 🛠️"""
463
- try:
464
- response.raise_for_status()
465
- return response
466
- except requests.exceptions.HTTPError as error:
467
- raise YouTubeRequestFailedError(video_id, error)
468
-
469
-
470
- if __name__ == "__main__":
471
- # Let's get this party started! 🎉
472
- from rich import print
473
- video_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
474
- transcript = YTTranscriber.get_transcript(video_url, languages=None)
475
- print("Here's what we got! 🔥")
476
- print(transcript)
@@ -1,6 +0,0 @@
1
- from .errors import *
2
- from .video import Video
3
- from .query import Search
4
- from .extras import Extras
5
- from .channel import Channel
6
- from .playlist import Playlist