webscout 8.2.9__py3-none-any.whl → 2026.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (413) hide show
  1. webscout/AIauto.py +524 -251
  2. webscout/AIbase.py +247 -319
  3. webscout/AIutel.py +68 -703
  4. webscout/Bard.py +1072 -1026
  5. webscout/Extra/GitToolkit/__init__.py +10 -10
  6. webscout/Extra/GitToolkit/gitapi/__init__.py +20 -12
  7. webscout/Extra/GitToolkit/gitapi/gist.py +142 -0
  8. webscout/Extra/GitToolkit/gitapi/organization.py +91 -0
  9. webscout/Extra/GitToolkit/gitapi/repository.py +308 -195
  10. webscout/Extra/GitToolkit/gitapi/search.py +162 -0
  11. webscout/Extra/GitToolkit/gitapi/trending.py +236 -0
  12. webscout/Extra/GitToolkit/gitapi/user.py +128 -96
  13. webscout/Extra/GitToolkit/gitapi/utils.py +82 -62
  14. webscout/Extra/YTToolkit/README.md +443 -375
  15. webscout/Extra/YTToolkit/YTdownloader.py +953 -957
  16. webscout/Extra/YTToolkit/__init__.py +3 -3
  17. webscout/Extra/YTToolkit/transcriber.py +595 -476
  18. webscout/Extra/YTToolkit/ytapi/README.md +230 -44
  19. webscout/Extra/YTToolkit/ytapi/__init__.py +22 -6
  20. webscout/Extra/YTToolkit/ytapi/captions.py +190 -0
  21. webscout/Extra/YTToolkit/ytapi/channel.py +302 -307
  22. webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
  23. webscout/Extra/YTToolkit/ytapi/extras.py +178 -118
  24. webscout/Extra/YTToolkit/ytapi/hashtag.py +120 -0
  25. webscout/Extra/YTToolkit/ytapi/https.py +89 -88
  26. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
  27. webscout/Extra/YTToolkit/ytapi/playlist.py +59 -59
  28. webscout/Extra/YTToolkit/ytapi/pool.py +8 -8
  29. webscout/Extra/YTToolkit/ytapi/query.py +143 -40
  30. webscout/Extra/YTToolkit/ytapi/shorts.py +122 -0
  31. webscout/Extra/YTToolkit/ytapi/stream.py +68 -63
  32. webscout/Extra/YTToolkit/ytapi/suggestions.py +97 -0
  33. webscout/Extra/YTToolkit/ytapi/utils.py +66 -62
  34. webscout/Extra/YTToolkit/ytapi/video.py +403 -232
  35. webscout/Extra/__init__.py +2 -3
  36. webscout/Extra/gguf.py +1298 -684
  37. webscout/Extra/tempmail/README.md +487 -487
  38. webscout/Extra/tempmail/__init__.py +28 -28
  39. webscout/Extra/tempmail/async_utils.py +143 -141
  40. webscout/Extra/tempmail/base.py +172 -161
  41. webscout/Extra/tempmail/cli.py +191 -187
  42. webscout/Extra/tempmail/emailnator.py +88 -84
  43. webscout/Extra/tempmail/mail_tm.py +378 -361
  44. webscout/Extra/tempmail/temp_mail_io.py +304 -292
  45. webscout/Extra/weather.py +196 -194
  46. webscout/Extra/weather_ascii.py +17 -15
  47. webscout/Provider/AISEARCH/PERPLEXED_search.py +175 -0
  48. webscout/Provider/AISEARCH/Perplexity.py +292 -333
  49. webscout/Provider/AISEARCH/README.md +106 -279
  50. webscout/Provider/AISEARCH/__init__.py +16 -9
  51. webscout/Provider/AISEARCH/brave_search.py +298 -0
  52. webscout/Provider/AISEARCH/iask_search.py +357 -410
  53. webscout/Provider/AISEARCH/monica_search.py +200 -220
  54. webscout/Provider/AISEARCH/webpilotai_search.py +242 -255
  55. webscout/Provider/Algion.py +413 -0
  56. webscout/Provider/Andi.py +74 -69
  57. webscout/Provider/Apriel.py +313 -0
  58. webscout/Provider/Ayle.py +323 -0
  59. webscout/Provider/ChatSandbox.py +329 -342
  60. webscout/Provider/ClaudeOnline.py +365 -0
  61. webscout/Provider/Cohere.py +232 -208
  62. webscout/Provider/DeepAI.py +367 -0
  63. webscout/Provider/Deepinfra.py +467 -340
  64. webscout/Provider/EssentialAI.py +217 -0
  65. webscout/Provider/ExaAI.py +274 -261
  66. webscout/Provider/Gemini.py +175 -169
  67. webscout/Provider/GithubChat.py +385 -369
  68. webscout/Provider/Gradient.py +286 -0
  69. webscout/Provider/Groq.py +556 -801
  70. webscout/Provider/HadadXYZ.py +323 -0
  71. webscout/Provider/HeckAI.py +392 -375
  72. webscout/Provider/HuggingFace.py +387 -0
  73. webscout/Provider/IBM.py +340 -0
  74. webscout/Provider/Jadve.py +317 -291
  75. webscout/Provider/K2Think.py +306 -0
  76. webscout/Provider/Koboldai.py +221 -384
  77. webscout/Provider/Netwrck.py +273 -270
  78. webscout/Provider/Nvidia.py +310 -0
  79. webscout/Provider/OPENAI/DeepAI.py +489 -0
  80. webscout/Provider/OPENAI/K2Think.py +423 -0
  81. webscout/Provider/OPENAI/PI.py +463 -0
  82. webscout/Provider/OPENAI/README.md +890 -952
  83. webscout/Provider/OPENAI/TogetherAI.py +405 -0
  84. webscout/Provider/OPENAI/TwoAI.py +255 -357
  85. webscout/Provider/OPENAI/__init__.py +148 -40
  86. webscout/Provider/OPENAI/ai4chat.py +348 -293
  87. webscout/Provider/OPENAI/akashgpt.py +436 -0
  88. webscout/Provider/OPENAI/algion.py +303 -0
  89. webscout/Provider/OPENAI/{exachat.py → ayle.py} +365 -444
  90. webscout/Provider/OPENAI/base.py +253 -249
  91. webscout/Provider/OPENAI/cerebras.py +296 -0
  92. webscout/Provider/OPENAI/chatgpt.py +870 -556
  93. webscout/Provider/OPENAI/chatsandbox.py +233 -173
  94. webscout/Provider/OPENAI/deepinfra.py +403 -322
  95. webscout/Provider/OPENAI/e2b.py +2370 -1414
  96. webscout/Provider/OPENAI/elmo.py +278 -0
  97. webscout/Provider/OPENAI/exaai.py +452 -417
  98. webscout/Provider/OPENAI/freeassist.py +446 -0
  99. webscout/Provider/OPENAI/gradient.py +448 -0
  100. webscout/Provider/OPENAI/groq.py +380 -364
  101. webscout/Provider/OPENAI/hadadxyz.py +292 -0
  102. webscout/Provider/OPENAI/heckai.py +333 -308
  103. webscout/Provider/OPENAI/huggingface.py +321 -0
  104. webscout/Provider/OPENAI/ibm.py +425 -0
  105. webscout/Provider/OPENAI/llmchat.py +253 -0
  106. webscout/Provider/OPENAI/llmchatco.py +378 -335
  107. webscout/Provider/OPENAI/meta.py +541 -0
  108. webscout/Provider/OPENAI/netwrck.py +374 -357
  109. webscout/Provider/OPENAI/nvidia.py +317 -0
  110. webscout/Provider/OPENAI/oivscode.py +348 -287
  111. webscout/Provider/OPENAI/openrouter.py +328 -0
  112. webscout/Provider/OPENAI/pydantic_imports.py +1 -172
  113. webscout/Provider/OPENAI/sambanova.py +397 -0
  114. webscout/Provider/OPENAI/sonus.py +305 -304
  115. webscout/Provider/OPENAI/textpollinations.py +370 -339
  116. webscout/Provider/OPENAI/toolbaz.py +375 -413
  117. webscout/Provider/OPENAI/typefully.py +419 -355
  118. webscout/Provider/OPENAI/typliai.py +279 -0
  119. webscout/Provider/OPENAI/utils.py +314 -318
  120. webscout/Provider/OPENAI/wisecat.py +359 -387
  121. webscout/Provider/OPENAI/writecream.py +185 -163
  122. webscout/Provider/OPENAI/x0gpt.py +462 -365
  123. webscout/Provider/OPENAI/zenmux.py +380 -0
  124. webscout/Provider/OpenRouter.py +386 -0
  125. webscout/Provider/Openai.py +337 -496
  126. webscout/Provider/PI.py +443 -429
  127. webscout/Provider/QwenLM.py +346 -254
  128. webscout/Provider/STT/__init__.py +28 -0
  129. webscout/Provider/STT/base.py +303 -0
  130. webscout/Provider/STT/elevenlabs.py +264 -0
  131. webscout/Provider/Sambanova.py +317 -0
  132. webscout/Provider/TTI/README.md +69 -82
  133. webscout/Provider/TTI/__init__.py +37 -7
  134. webscout/Provider/TTI/base.py +147 -64
  135. webscout/Provider/TTI/claudeonline.py +393 -0
  136. webscout/Provider/TTI/magicstudio.py +292 -201
  137. webscout/Provider/TTI/miragic.py +180 -0
  138. webscout/Provider/TTI/pollinations.py +331 -221
  139. webscout/Provider/TTI/together.py +334 -0
  140. webscout/Provider/TTI/utils.py +14 -11
  141. webscout/Provider/TTS/README.md +186 -192
  142. webscout/Provider/TTS/__init__.py +43 -10
  143. webscout/Provider/TTS/base.py +523 -159
  144. webscout/Provider/TTS/deepgram.py +286 -156
  145. webscout/Provider/TTS/elevenlabs.py +189 -111
  146. webscout/Provider/TTS/freetts.py +218 -0
  147. webscout/Provider/TTS/murfai.py +288 -113
  148. webscout/Provider/TTS/openai_fm.py +364 -129
  149. webscout/Provider/TTS/parler.py +203 -111
  150. webscout/Provider/TTS/qwen.py +334 -0
  151. webscout/Provider/TTS/sherpa.py +286 -0
  152. webscout/Provider/TTS/speechma.py +693 -580
  153. webscout/Provider/TTS/streamElements.py +275 -333
  154. webscout/Provider/TTS/utils.py +280 -280
  155. webscout/Provider/TextPollinationsAI.py +331 -308
  156. webscout/Provider/TogetherAI.py +450 -0
  157. webscout/Provider/TwoAI.py +309 -475
  158. webscout/Provider/TypliAI.py +311 -305
  159. webscout/Provider/UNFINISHED/ChatHub.py +219 -209
  160. webscout/Provider/{OPENAI/glider.py → UNFINISHED/ChutesAI.py} +331 -326
  161. webscout/Provider/{GizAI.py → UNFINISHED/GizAI.py} +300 -295
  162. webscout/Provider/{Marcus.py → UNFINISHED/Marcus.py} +218 -198
  163. webscout/Provider/UNFINISHED/Qodo.py +481 -0
  164. webscout/Provider/{MCPCore.py → UNFINISHED/XenAI.py} +330 -315
  165. webscout/Provider/UNFINISHED/Youchat.py +347 -330
  166. webscout/Provider/UNFINISHED/aihumanizer.py +41 -0
  167. webscout/Provider/UNFINISHED/grammerchecker.py +37 -0
  168. webscout/Provider/UNFINISHED/liner.py +342 -0
  169. webscout/Provider/UNFINISHED/liner_api_request.py +246 -263
  170. webscout/Provider/{samurai.py → UNFINISHED/samurai.py} +231 -224
  171. webscout/Provider/WiseCat.py +256 -233
  172. webscout/Provider/WrDoChat.py +390 -370
  173. webscout/Provider/__init__.py +115 -174
  174. webscout/Provider/ai4chat.py +181 -174
  175. webscout/Provider/akashgpt.py +330 -335
  176. webscout/Provider/cerebras.py +397 -290
  177. webscout/Provider/cleeai.py +236 -213
  178. webscout/Provider/elmo.py +291 -283
  179. webscout/Provider/geminiapi.py +343 -208
  180. webscout/Provider/julius.py +245 -223
  181. webscout/Provider/learnfastai.py +333 -325
  182. webscout/Provider/llama3mitril.py +230 -215
  183. webscout/Provider/llmchat.py +308 -258
  184. webscout/Provider/llmchatco.py +321 -306
  185. webscout/Provider/meta.py +996 -801
  186. webscout/Provider/oivscode.py +332 -309
  187. webscout/Provider/searchchat.py +316 -292
  188. webscout/Provider/sonus.py +264 -258
  189. webscout/Provider/toolbaz.py +359 -353
  190. webscout/Provider/turboseek.py +332 -266
  191. webscout/Provider/typefully.py +262 -202
  192. webscout/Provider/x0gpt.py +332 -299
  193. webscout/__init__.py +31 -39
  194. webscout/__main__.py +5 -5
  195. webscout/cli.py +585 -524
  196. webscout/client.py +1497 -70
  197. webscout/conversation.py +140 -436
  198. webscout/exceptions.py +383 -362
  199. webscout/litagent/__init__.py +29 -29
  200. webscout/litagent/agent.py +492 -455
  201. webscout/litagent/constants.py +60 -60
  202. webscout/models.py +505 -181
  203. webscout/optimizers.py +74 -420
  204. webscout/prompt_manager.py +376 -288
  205. webscout/sanitize.py +1514 -0
  206. webscout/scout/README.md +452 -404
  207. webscout/scout/__init__.py +8 -8
  208. webscout/scout/core/__init__.py +7 -7
  209. webscout/scout/core/crawler.py +330 -210
  210. webscout/scout/core/scout.py +800 -607
  211. webscout/scout/core/search_result.py +51 -96
  212. webscout/scout/core/text_analyzer.py +64 -63
  213. webscout/scout/core/text_utils.py +412 -277
  214. webscout/scout/core/web_analyzer.py +54 -52
  215. webscout/scout/element.py +872 -478
  216. webscout/scout/parsers/__init__.py +70 -69
  217. webscout/scout/parsers/html5lib_parser.py +182 -172
  218. webscout/scout/parsers/html_parser.py +238 -236
  219. webscout/scout/parsers/lxml_parser.py +203 -178
  220. webscout/scout/utils.py +38 -37
  221. webscout/search/__init__.py +47 -0
  222. webscout/search/base.py +201 -0
  223. webscout/search/bing_main.py +45 -0
  224. webscout/search/brave_main.py +92 -0
  225. webscout/search/duckduckgo_main.py +57 -0
  226. webscout/search/engines/__init__.py +127 -0
  227. webscout/search/engines/bing/__init__.py +15 -0
  228. webscout/search/engines/bing/base.py +35 -0
  229. webscout/search/engines/bing/images.py +114 -0
  230. webscout/search/engines/bing/news.py +96 -0
  231. webscout/search/engines/bing/suggestions.py +36 -0
  232. webscout/search/engines/bing/text.py +109 -0
  233. webscout/search/engines/brave/__init__.py +19 -0
  234. webscout/search/engines/brave/base.py +47 -0
  235. webscout/search/engines/brave/images.py +213 -0
  236. webscout/search/engines/brave/news.py +353 -0
  237. webscout/search/engines/brave/suggestions.py +318 -0
  238. webscout/search/engines/brave/text.py +167 -0
  239. webscout/search/engines/brave/videos.py +364 -0
  240. webscout/search/engines/duckduckgo/__init__.py +25 -0
  241. webscout/search/engines/duckduckgo/answers.py +80 -0
  242. webscout/search/engines/duckduckgo/base.py +189 -0
  243. webscout/search/engines/duckduckgo/images.py +100 -0
  244. webscout/search/engines/duckduckgo/maps.py +183 -0
  245. webscout/search/engines/duckduckgo/news.py +70 -0
  246. webscout/search/engines/duckduckgo/suggestions.py +22 -0
  247. webscout/search/engines/duckduckgo/text.py +221 -0
  248. webscout/search/engines/duckduckgo/translate.py +48 -0
  249. webscout/search/engines/duckduckgo/videos.py +80 -0
  250. webscout/search/engines/duckduckgo/weather.py +84 -0
  251. webscout/search/engines/mojeek.py +61 -0
  252. webscout/search/engines/wikipedia.py +77 -0
  253. webscout/search/engines/yahoo/__init__.py +41 -0
  254. webscout/search/engines/yahoo/answers.py +19 -0
  255. webscout/search/engines/yahoo/base.py +34 -0
  256. webscout/search/engines/yahoo/images.py +323 -0
  257. webscout/search/engines/yahoo/maps.py +19 -0
  258. webscout/search/engines/yahoo/news.py +258 -0
  259. webscout/search/engines/yahoo/suggestions.py +140 -0
  260. webscout/search/engines/yahoo/text.py +273 -0
  261. webscout/search/engines/yahoo/translate.py +19 -0
  262. webscout/search/engines/yahoo/videos.py +302 -0
  263. webscout/search/engines/yahoo/weather.py +220 -0
  264. webscout/search/engines/yandex.py +67 -0
  265. webscout/search/engines/yep/__init__.py +13 -0
  266. webscout/search/engines/yep/base.py +34 -0
  267. webscout/search/engines/yep/images.py +101 -0
  268. webscout/search/engines/yep/suggestions.py +38 -0
  269. webscout/search/engines/yep/text.py +99 -0
  270. webscout/search/http_client.py +172 -0
  271. webscout/search/results.py +141 -0
  272. webscout/search/yahoo_main.py +57 -0
  273. webscout/search/yep_main.py +48 -0
  274. webscout/server/__init__.py +48 -0
  275. webscout/server/config.py +78 -0
  276. webscout/server/exceptions.py +69 -0
  277. webscout/server/providers.py +286 -0
  278. webscout/server/request_models.py +131 -0
  279. webscout/server/request_processing.py +404 -0
  280. webscout/server/routes.py +642 -0
  281. webscout/server/server.py +351 -0
  282. webscout/server/ui_templates.py +1171 -0
  283. webscout/swiftcli/__init__.py +79 -95
  284. webscout/swiftcli/core/__init__.py +7 -7
  285. webscout/swiftcli/core/cli.py +574 -297
  286. webscout/swiftcli/core/context.py +98 -104
  287. webscout/swiftcli/core/group.py +268 -241
  288. webscout/swiftcli/decorators/__init__.py +28 -28
  289. webscout/swiftcli/decorators/command.py +243 -221
  290. webscout/swiftcli/decorators/options.py +247 -220
  291. webscout/swiftcli/decorators/output.py +392 -252
  292. webscout/swiftcli/exceptions.py +21 -21
  293. webscout/swiftcli/plugins/__init__.py +9 -9
  294. webscout/swiftcli/plugins/base.py +134 -135
  295. webscout/swiftcli/plugins/manager.py +269 -269
  296. webscout/swiftcli/utils/__init__.py +58 -59
  297. webscout/swiftcli/utils/formatting.py +251 -252
  298. webscout/swiftcli/utils/parsing.py +368 -267
  299. webscout/update_checker.py +280 -136
  300. webscout/utils.py +28 -14
  301. webscout/version.py +2 -1
  302. webscout/version.py.bak +3 -0
  303. webscout/zeroart/__init__.py +218 -135
  304. webscout/zeroart/base.py +70 -66
  305. webscout/zeroart/effects.py +155 -101
  306. webscout/zeroart/fonts.py +1799 -1239
  307. webscout-2026.1.19.dist-info/METADATA +638 -0
  308. webscout-2026.1.19.dist-info/RECORD +312 -0
  309. {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/WHEEL +1 -1
  310. {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/entry_points.txt +1 -1
  311. webscout/DWEBS.py +0 -520
  312. webscout/Extra/Act.md +0 -309
  313. webscout/Extra/GitToolkit/gitapi/README.md +0 -110
  314. webscout/Extra/autocoder/__init__.py +0 -9
  315. webscout/Extra/autocoder/autocoder.py +0 -1105
  316. webscout/Extra/autocoder/autocoder_utiles.py +0 -332
  317. webscout/Extra/gguf.md +0 -430
  318. webscout/Extra/weather.md +0 -281
  319. webscout/Litlogger/README.md +0 -10
  320. webscout/Litlogger/__init__.py +0 -15
  321. webscout/Litlogger/formats.py +0 -4
  322. webscout/Litlogger/handlers.py +0 -103
  323. webscout/Litlogger/levels.py +0 -13
  324. webscout/Litlogger/logger.py +0 -92
  325. webscout/Provider/AI21.py +0 -177
  326. webscout/Provider/AISEARCH/DeepFind.py +0 -254
  327. webscout/Provider/AISEARCH/felo_search.py +0 -202
  328. webscout/Provider/AISEARCH/genspark_search.py +0 -324
  329. webscout/Provider/AISEARCH/hika_search.py +0 -186
  330. webscout/Provider/AISEARCH/scira_search.py +0 -298
  331. webscout/Provider/Aitopia.py +0 -316
  332. webscout/Provider/AllenAI.py +0 -440
  333. webscout/Provider/Blackboxai.py +0 -791
  334. webscout/Provider/ChatGPTClone.py +0 -237
  335. webscout/Provider/ChatGPTGratis.py +0 -194
  336. webscout/Provider/Cloudflare.py +0 -324
  337. webscout/Provider/ExaChat.py +0 -358
  338. webscout/Provider/Flowith.py +0 -217
  339. webscout/Provider/FreeGemini.py +0 -250
  340. webscout/Provider/Glider.py +0 -225
  341. webscout/Provider/HF_space/__init__.py +0 -0
  342. webscout/Provider/HF_space/qwen_qwen2.py +0 -206
  343. webscout/Provider/HuggingFaceChat.py +0 -469
  344. webscout/Provider/Hunyuan.py +0 -283
  345. webscout/Provider/LambdaChat.py +0 -411
  346. webscout/Provider/Llama3.py +0 -259
  347. webscout/Provider/Nemotron.py +0 -218
  348. webscout/Provider/OLLAMA.py +0 -396
  349. webscout/Provider/OPENAI/BLACKBOXAI.py +0 -766
  350. webscout/Provider/OPENAI/Cloudflare.py +0 -378
  351. webscout/Provider/OPENAI/FreeGemini.py +0 -283
  352. webscout/Provider/OPENAI/NEMOTRON.py +0 -232
  353. webscout/Provider/OPENAI/Qwen3.py +0 -283
  354. webscout/Provider/OPENAI/api.py +0 -969
  355. webscout/Provider/OPENAI/c4ai.py +0 -373
  356. webscout/Provider/OPENAI/chatgptclone.py +0 -494
  357. webscout/Provider/OPENAI/copilot.py +0 -242
  358. webscout/Provider/OPENAI/flowith.py +0 -162
  359. webscout/Provider/OPENAI/freeaichat.py +0 -359
  360. webscout/Provider/OPENAI/mcpcore.py +0 -389
  361. webscout/Provider/OPENAI/multichat.py +0 -376
  362. webscout/Provider/OPENAI/opkfc.py +0 -496
  363. webscout/Provider/OPENAI/scirachat.py +0 -477
  364. webscout/Provider/OPENAI/standardinput.py +0 -433
  365. webscout/Provider/OPENAI/typegpt.py +0 -364
  366. webscout/Provider/OPENAI/uncovrAI.py +0 -463
  367. webscout/Provider/OPENAI/venice.py +0 -431
  368. webscout/Provider/OPENAI/yep.py +0 -382
  369. webscout/Provider/OpenGPT.py +0 -209
  370. webscout/Provider/Perplexitylabs.py +0 -415
  371. webscout/Provider/Reka.py +0 -214
  372. webscout/Provider/StandardInput.py +0 -290
  373. webscout/Provider/TTI/aiarta.py +0 -365
  374. webscout/Provider/TTI/artbit.py +0 -0
  375. webscout/Provider/TTI/fastflux.py +0 -200
  376. webscout/Provider/TTI/piclumen.py +0 -203
  377. webscout/Provider/TTI/pixelmuse.py +0 -225
  378. webscout/Provider/TTS/gesserit.py +0 -128
  379. webscout/Provider/TTS/sthir.py +0 -94
  380. webscout/Provider/TeachAnything.py +0 -229
  381. webscout/Provider/UNFINISHED/puterjs.py +0 -635
  382. webscout/Provider/UNFINISHED/test_lmarena.py +0 -119
  383. webscout/Provider/Venice.py +0 -258
  384. webscout/Provider/VercelAI.py +0 -253
  385. webscout/Provider/Writecream.py +0 -246
  386. webscout/Provider/WritingMate.py +0 -269
  387. webscout/Provider/asksteve.py +0 -220
  388. webscout/Provider/chatglm.py +0 -215
  389. webscout/Provider/copilot.py +0 -425
  390. webscout/Provider/freeaichat.py +0 -285
  391. webscout/Provider/granite.py +0 -235
  392. webscout/Provider/hermes.py +0 -266
  393. webscout/Provider/koala.py +0 -170
  394. webscout/Provider/lmarena.py +0 -198
  395. webscout/Provider/multichat.py +0 -364
  396. webscout/Provider/scira_chat.py +0 -299
  397. webscout/Provider/scnet.py +0 -243
  398. webscout/Provider/talkai.py +0 -194
  399. webscout/Provider/typegpt.py +0 -289
  400. webscout/Provider/uncovr.py +0 -368
  401. webscout/Provider/yep.py +0 -389
  402. webscout/litagent/Readme.md +0 -276
  403. webscout/litprinter/__init__.py +0 -59
  404. webscout/swiftcli/Readme.md +0 -323
  405. webscout/tempid.py +0 -128
  406. webscout/webscout_search.py +0 -1184
  407. webscout/webscout_search_async.py +0 -654
  408. webscout/yep_search.py +0 -347
  409. webscout/zeroart/README.md +0 -89
  410. webscout-8.2.9.dist-info/METADATA +0 -1033
  411. webscout-8.2.9.dist-info/RECORD +0 -289
  412. {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/licenses/LICENSE.md +0 -0
  413. {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/top_level.txt +0 -0
@@ -1,476 +1,595 @@
1
- """
2
- >>> from webscout import YTTranscriber
3
- >>> transcript = YTTranscriber.get_transcript('https://www.youtube.com/watch?v=dQw4w9WgXcQ')
4
- >>> print(transcript)
5
- {'text': 'Never gonna give you up', 'start': 0.0, 'duration': 4.5}
6
-
7
- """
8
-
9
- import requests
10
- import http.cookiejar as cookiejar
11
- import json
12
- from xml.etree import ElementTree
13
- import re
14
- import html
15
- from typing import List, Dict, Union, Optional
16
- from functools import lru_cache #
17
- from concurrent.futures import ThreadPoolExecutor
18
- from webscout.exceptions import *
19
-
20
- WATCH_URL = 'https://www.youtube.com/watch?v={video_id}'
21
- MAX_WORKERS = 4
22
-
23
- class YTTranscriber:
24
- """Transcribe YouTube videos with style! 🎤
25
-
26
- >>> transcript = YTTranscriber.get_transcript('https://youtu.be/dQw4w9WgXcQ')
27
- >>> print(transcript[0]['text'])
28
- 'Never gonna give you up'
29
- """
30
-
31
- _session = None
32
- _executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
33
-
34
- @classmethod
35
- def _get_session(cls):
36
- if cls._session is None:
37
- cls._session = requests.Session()
38
- cls._session.headers.update({
39
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
40
- })
41
- return cls._session
42
-
43
- @classmethod
44
- @lru_cache(maxsize=100)
45
- def get_transcript(cls, video_url: str, languages: Optional[str] = 'en',
46
- proxies: Dict[str, str] = None,
47
- cookies: str = None,
48
- preserve_formatting: bool = False) -> List[Dict[str, Union[str, float]]]:
49
- """
50
- Retrieves the transcript for a given YouTube video URL.
51
-
52
- Args:
53
- video_url (str): YouTube video URL (supports various formats).
54
- languages (str, optional): Language code for the transcript.
55
- If None, fetches the auto-generated transcript.
56
- Defaults to 'en'.
57
- proxies (Dict[str, str], optional): Proxies to use for the request. Defaults to None.
58
- cookies (str, optional): Path to the cookie file. Defaults to None.
59
- preserve_formatting (bool, optional): Whether to preserve formatting tags. Defaults to False.
60
-
61
- Returns:
62
- List[Dict[str, Union[str, float]]]: A list of dictionaries, each containing:
63
- - 'text': The transcribed text.
64
- - 'start': The start time of the text segment (in seconds).
65
- - 'duration': The duration of the text segment (in seconds).
66
-
67
- Raises:
68
- TranscriptRetrievalError: If there's an error retrieving the transcript.
69
- """
70
- video_id = cls._extract_video_id(video_url)
71
- http_client = cls._get_session()
72
-
73
- if proxies:
74
- http_client.proxies.update(proxies)
75
-
76
- if cookies:
77
- cls._load_cookies(cookies, video_id)
78
-
79
- transcript_list = TranscriptListFetcher(http_client).fetch(video_id)
80
- language_codes = [languages] if languages else None
81
- transcript = transcript_list.find_transcript(language_codes)
82
-
83
- return transcript.fetch(preserve_formatting)
84
-
85
- @staticmethod
86
- def _extract_video_id(video_url: str) -> str:
87
- """Extracts the video ID from different YouTube URL formats."""
88
- patterns = [
89
- r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
90
- r'youtu\.be\/([0-9A-Za-z_-]{11})',
91
- r'youtube\.com\/embed\/([0-9A-Za-z_-]{11})'
92
- ]
93
-
94
- for pattern in patterns:
95
- match = re.search(pattern, video_url)
96
- if match:
97
- return match.group(1)
98
-
99
- if re.match(r'^[0-9A-Za-z_-]{11}$', video_url):
100
- return video_url
101
-
102
- raise InvalidVideoIdError(video_url)
103
-
104
- @staticmethod
105
- def _load_cookies(cookies: str, video_id: str) -> None:
106
- """Loads cookies from a file."""
107
- try:
108
- cj = cookiejar.MozillaCookieJar(cookies)
109
- cj.load()
110
- return cj
111
- except (cookiejar.LoadError, FileNotFoundError):
112
- raise CookiePathInvalidError(video_id)
113
-
114
- class TranscriptListFetcher:
115
- """Fetches the list of transcripts for a YouTube video."""
116
-
117
- def __init__(self, http_client: requests.Session):
118
- """Initializes TranscriptListFetcher."""
119
- self._http_client = http_client
120
-
121
- def fetch(self, video_id: str):
122
- """Fetches and returns a TranscriptList."""
123
- return TranscriptList.build(
124
- self._http_client,
125
- video_id,
126
- self._extract_captions_json(self._fetch_video_html(video_id), video_id),
127
- )
128
-
129
- def _extract_captions_json(self, html: str, video_id: str) -> dict:
130
- """Extracts the captions JSON data from the video's HTML."""
131
- splitted_html = html.split('"captions":')
132
-
133
- if len(splitted_html) <= 1:
134
- if video_id.startswith('http://') or video_id.startswith('https://'):
135
- raise InvalidVideoIdError(video_id)
136
- if 'class="g-recaptcha"' in html:
137
- raise TooManyRequestsError(video_id)
138
- if '"playabilityStatus":' not in html:
139
- raise VideoUnavailableError(video_id)
140
-
141
- raise TranscriptsDisabledError(video_id)
142
-
143
- captions_json = json.loads(
144
- splitted_html[1].split(',"videoDetails')[0].replace('\n', '')
145
- ).get('playerCaptionsTracklistRenderer')
146
- if captions_json is None:
147
- raise TranscriptsDisabledError(video_id)
148
-
149
- if 'captionTracks' not in captions_json:
150
- raise TranscriptsDisabledError(video_id)
151
-
152
- return captions_json
153
-
154
- def _create_consent_cookie(self, html, video_id):
155
- match = re.search('name="v" value="(.*?)"', html)
156
- if match is None:
157
- raise FailedToCreateConsentCookieError(video_id)
158
- self._http_client.cookies.set('CONSENT', 'YES+' + match.group(1), domain='.youtube.com')
159
-
160
- def _fetch_video_html(self, video_id):
161
- html = self._fetch_html(video_id)
162
- if 'action="https://consent.youtube.com/s"' in html:
163
- self._create_consent_cookie(html, video_id)
164
- html = self._fetch_html(video_id)
165
- if 'action="https://consent.youtube.com/s"' in html:
166
- raise FailedToCreateConsentCookieError(video_id)
167
- return html
168
-
169
- def _fetch_html(self, video_id):
170
- response = self._http_client.get(WATCH_URL.format(video_id=video_id), headers={'Accept-Language': 'en-US'})
171
- return html.unescape(_raise_http_errors(response, video_id).text)
172
-
173
-
174
- class TranscriptList:
175
- """
176
- >>> transcript_list = TranscriptList.build(http_client, video_id, captions_json)
177
- >>> transcript = transcript_list.find_transcript(['en'])
178
- >>> print(transcript)
179
- en ("English")[TRANSLATABLE]
180
- """
181
-
182
- def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages):
183
- """Init that transcript list with all the good stuff! 💯"""
184
- self.video_id = video_id
185
- self._manually_created_transcripts = manually_created_transcripts
186
- self._generated_transcripts = generated_transcripts
187
- self._translation_languages = translation_languages
188
-
189
- @staticmethod
190
- def build(http_client, video_id, captions_json):
191
- """
192
- Factory method for TranscriptList.
193
-
194
- :param http_client: http client which is used to make the transcript retrieving http calls
195
- :type http_client: requests.Session
196
- :param video_id: the id of the video this TranscriptList is for
197
- :type video_id: str
198
- :param captions_json: the JSON parsed from the YouTube pages static HTML
199
- :type captions_json: dict
200
- :return: the created TranscriptList
201
- :rtype TranscriptList:
202
- """
203
- translation_languages = [
204
- {
205
- 'language': translation_language['languageName']['simpleText'],
206
- 'language_code': translation_language['languageCode'],
207
- } for translation_language in captions_json.get('translationLanguages', [])
208
- ]
209
-
210
- manually_created_transcripts = {}
211
- generated_transcripts = {}
212
-
213
- for caption in captions_json['captionTracks']:
214
- if caption.get('kind', '') == 'asr':
215
- transcript_dict = generated_transcripts
216
- else:
217
- transcript_dict = manually_created_transcripts
218
-
219
- transcript_dict[caption['languageCode']] = Transcript(
220
- http_client,
221
- video_id,
222
- caption['baseUrl'],
223
- caption['name']['simpleText'],
224
- caption['languageCode'],
225
- caption.get('kind', '') == 'asr',
226
- translation_languages if caption.get('isTranslatable', False) else [],
227
- )
228
-
229
- return TranscriptList(
230
- video_id,
231
- manually_created_transcripts,
232
- generated_transcripts,
233
- translation_languages,
234
- )
235
-
236
- def __iter__(self):
237
- return iter(list(self._manually_created_transcripts.values()) + list(self._generated_transcripts.values()))
238
-
239
- def find_transcript(self, language_codes):
240
- """
241
- Finds a transcript for a given language code. If no language is provided, it will
242
- return the auto-generated transcript.
243
-
244
- :param language_codes: A list of language codes in a descending priority.
245
- :type languages: list[str]
246
- :return: the found Transcript
247
- :rtype Transcript:
248
- :raises: NoTranscriptFound
249
- """
250
- if 'any' in language_codes:
251
- for transcript in self:
252
- return transcript
253
- return self._find_transcript(language_codes, [self._manually_created_transcripts, self._generated_transcripts])
254
-
255
- def find_generated_transcript(self, language_codes):
256
- """
257
- Finds an automatically generated transcript for a given language code.
258
-
259
- :param language_codes: A list of language codes in a descending priority. For example, if this is set to
260
- ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
261
- it fails to do so.
262
- :type languages: list[str]
263
- :return: the found Transcript
264
- :rtype Transcript:
265
- :raises: NoTranscriptFound
266
- """
267
- if 'any' in language_codes:
268
- for transcript in self:
269
- if transcript.is_generated:
270
- return transcript
271
- return self._find_transcript(language_codes, [self._generated_transcripts])
272
-
273
- def find_manually_created_transcript(self, language_codes):
274
- """
275
- Finds a manually created transcript for a given language code.
276
-
277
- :param language_codes: A list of language codes in a descending priority. For example, if this is set to
278
- ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
279
- it fails to do so.
280
- :type languages: list[str]
281
- :return: the found Transcript
282
- :rtype Transcript:
283
- :raises: NoTranscriptFound
284
- """
285
- return self._find_transcript(language_codes, [self._manually_created_transcripts])
286
-
287
- def _find_transcript(self, language_codes, transcript_dicts):
288
- for language_code in language_codes:
289
- for transcript_dict in transcript_dicts:
290
- if language_code in transcript_dict:
291
- return transcript_dict[language_code]
292
-
293
- raise NoTranscriptFoundError(
294
- self.video_id,
295
- language_codes,
296
- self
297
- )
298
-
299
- def __str__(self):
300
- return (
301
- 'For this video ({video_id}) transcripts are available in the following languages:\n\n'
302
- '(MANUALLY CREATED)\n'
303
- '{available_manually_created_transcript_languages}\n\n'
304
- '(GENERATED)\n'
305
- '{available_generated_transcripts}\n\n'
306
- '(TRANSLATION LANGUAGES)\n'
307
- '{available_translation_languages}'
308
- ).format(
309
- video_id=self.video_id,
310
- available_manually_created_transcript_languages=self._get_language_description(
311
- str(transcript) for transcript in self._manually_created_transcripts.values()
312
- ),
313
- available_generated_transcripts=self._get_language_description(
314
- str(transcript) for transcript in self._generated_transcripts.values()
315
- ),
316
- available_translation_languages=self._get_language_description(
317
- '{language_code} ("{language}")'.format(
318
- language=translation_language['language'],
319
- language_code=translation_language['language_code'],
320
- ) for translation_language in self._translation_languages
321
- )
322
- )
323
-
324
- def _get_language_description(self, transcript_strings):
325
- description = '\n'.join(' - {transcript}'.format(transcript=transcript) for transcript in transcript_strings)
326
- return description if description else 'None'
327
-
328
-
329
- class Transcript:
330
- """Your personal transcript handler! 🎭
331
-
332
- >>> transcript = transcript_list.find_transcript(['en'])
333
- >>> print(transcript.language)
334
- 'English'
335
- >>> if transcript.is_translatable:
336
- ... es_transcript = transcript.translate('es')
337
- ... print(es_transcript.language)
338
- 'Spanish'
339
- """
340
-
341
- def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages):
342
- """Initialize with all the goodies! 🎁"""
343
- self._http_client = http_client
344
- self.video_id = video_id
345
- self._url = url
346
- self.language = language
347
- self.language_code = language_code
348
- self.is_generated = is_generated
349
- self.translation_languages = translation_languages
350
- self._translation_languages_dict = {
351
- translation_language['language_code']: translation_language['language']
352
- for translation_language in translation_languages
353
- }
354
-
355
- def fetch(self, preserve_formatting=False):
356
- """Get that transcript data! 🎯
357
-
358
- Args:
359
- preserve_formatting (bool): Keep HTML formatting? Default is nah fam.
360
-
361
- Returns:
362
- list: That sweet transcript data with text, start time, and duration! 📝
363
- """
364
- response = self._http_client.get(self._url, headers={'Accept-Language': 'en-US'})
365
- return TranscriptParser(preserve_formatting=preserve_formatting).parse(
366
- _raise_http_errors(response, self.video_id).text,
367
- )
368
-
369
- def __str__(self):
370
- """String representation looking clean! 💅"""
371
- return '{language_code} ("{language}"){translation_description}'.format(
372
- language=self.language,
373
- language_code=self.language_code,
374
- translation_description='[TRANSLATABLE]' if self.is_translatable else ''
375
- )
376
-
377
- @property
378
- def is_translatable(self):
379
- """Can we translate this? 🌍"""
380
- return len(self.translation_languages) > 0
381
-
382
- def translate(self, language_code):
383
- """Translate to another language! 🌎
384
-
385
- Args:
386
- language_code (str): Which language you want fam?
387
-
388
- Returns:
389
- Transcript: A fresh transcript in your requested language! 🔄
390
-
391
- Raises:
392
- NotTranslatableError: If we can't translate this one 😢
393
- TranslationLanguageNotAvailableError: If that language isn't available 🚫
394
- """
395
- if not self.is_translatable:
396
- raise NotTranslatableError(self.video_id)
397
-
398
- if language_code not in self._translation_languages_dict:
399
- raise TranslationLanguageNotAvailableError(self.video_id)
400
-
401
- return Transcript(
402
- self._http_client,
403
- self.video_id,
404
- '{url}&tlang={language_code}'.format(url=self._url, language_code=language_code),
405
- self._translation_languages_dict[language_code],
406
- language_code,
407
- True,
408
- [],
409
- )
410
-
411
-
412
- class TranscriptParser:
413
- """Parsing those transcripts like a pro! 🎯
414
-
415
- >>> parser = TranscriptParser(preserve_formatting=True)
416
- >>> data = parser.parse(xml_data)
417
- >>> print(data[0])
418
- {'text': 'Never gonna give you up', 'start': 0.0, 'duration': 4.5}
419
- """
420
-
421
- _FORMATTING_TAGS = [
422
- 'strong', # For that extra emphasis 💪
423
- 'em', # When you need that italic swag 🎨
424
- 'b', # Bold and beautiful 💯
425
- 'i', # More italic vibes ✨
426
- 'mark', # Highlight that text 🌟
427
- 'small', # Keep it lowkey 🤫
428
- 'del', # Strike it out ⚡
429
- 'ins', # Insert new stuff 🆕
430
- 'sub', # Subscript gang 📉
431
- 'sup', # Superscript squad 📈
432
- ]
433
-
434
- def __init__(self, preserve_formatting=False):
435
- """Get ready to parse with style! 🎨"""
436
- self._html_regex = self._get_html_regex(preserve_formatting)
437
-
438
- def _get_html_regex(self, preserve_formatting):
439
- """Get that regex pattern ready! 🎯"""
440
- if preserve_formatting:
441
- formats_regex = '|'.join(self._FORMATTING_TAGS)
442
- formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>'
443
- html_regex = re.compile(formats_regex, re.IGNORECASE)
444
- else:
445
- html_regex = re.compile(r'<[^>]*>', re.IGNORECASE)
446
- return html_regex
447
-
448
- def parse(self, plain_data):
449
- """Parse that XML data into something beautiful! ✨"""
450
- return [
451
- {
452
- 'text': re.sub(self._html_regex, '', html.unescape(xml_element.text)),
453
- 'start': float(xml_element.attrib['start']),
454
- 'duration': float(xml_element.attrib.get('dur', '0.0')),
455
- }
456
- for xml_element in ElementTree.fromstring(plain_data)
457
- if xml_element.text is not None
458
- ]
459
-
460
-
461
- def _raise_http_errors(response, video_id):
462
- """Handle those HTTP errors with style! 🛠️"""
463
- try:
464
- response.raise_for_status()
465
- return response
466
- except requests.exceptions.HTTPError as error:
467
- raise YouTubeRequestFailedError(video_id, error)
468
-
469
-
470
- if __name__ == "__main__":
471
- # Let's get this party started! 🎉
472
- from rich import print
473
- video_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
474
- transcript = YTTranscriber.get_transcript(video_url, languages=None)
475
- print("Here's what we got! 🔥")
476
- print(transcript)
1
+ """
2
+ >>> from webscout import YTTranscriber
3
+ >>> transcript = YTTranscriber.get_transcript('https://www.youtube.com/watch?v=dQw4w9WgXcQ')
4
+ >>> print(transcript)
5
+ {'text': 'Never gonna give you up', 'start': 0.0, 'duration': 4.5}
6
+
7
+ """
8
+
9
+ import html
10
+ import http.cookiejar as cookiejar
11
+ import re
12
+ from concurrent.futures import ThreadPoolExecutor
13
+ from functools import lru_cache
14
+ from typing import Dict, List, Optional, Union
15
+ from xml.etree import ElementTree
16
+
17
+ from curl_cffi.requests import Session
18
+
19
+ from webscout.exceptions import (
20
+ CookiePathInvalidError,
21
+ FailedToCreateConsentCookieError,
22
+ InvalidVideoIdError,
23
+ NoTranscriptFoundError,
24
+ NotTranslatableError,
25
+ TooManyRequestsError,
26
+ TranscriptRetrievalError,
27
+ TranscriptsDisabledError,
28
+ TranslationLanguageNotAvailableError,
29
+ VideoUnavailableError,
30
+ YouTubeRequestFailedError,
31
+ )
32
+ from webscout.litagent import LitAgent
33
+
34
+ # YouTube API settings
35
+ WATCH_URL = 'https://www.youtube.com/watch?v={video_id}'
36
+ INNERTUBE_API_URL = "https://www.youtube.com/youtubei/v1/player?key={api_key}"
37
+ INNERTUBE_CONTEXT = {"client": {"clientName": "ANDROID", "clientVersion": "20.10.38"}}
38
+ MAX_WORKERS = 4
39
+
40
+
41
+ class YTTranscriber:
42
+ """Transcribe YouTube videos with style! 🎤
43
+
44
+ >>> transcript = YTTranscriber.get_transcript('https://youtu.be/dQw4w9WgXcQ')
45
+ >>> print(transcript[0]['text'])
46
+ 'Never gonna give you up'
47
+ """
48
+
49
+ _session = None
50
+ _executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
51
+
52
+ @classmethod
53
+ def _get_session(cls):
54
+ if cls._session is None:
55
+ cls._session = Session()
56
+ cls._session.headers.update({
57
+ 'User-Agent': LitAgent().random()
58
+ })
59
+ return cls._session
60
+
61
+ @classmethod
62
+ @lru_cache(maxsize=100)
63
+ def get_transcript(cls, video_url: str, languages: Optional[str] = 'en',
64
+ proxies: Optional[Dict[str, str]] = None,
65
+ cookies: Optional[str] = None,
66
+ preserve_formatting: bool = False) -> List[Dict[str, Union[str, float]]]:
67
+ """
68
+ Retrieves the transcript for a given YouTube video URL.
69
+
70
+ Args:
71
+ video_url (str): YouTube video URL (supports various formats).
72
+ languages (str, optional): Language code for the transcript.
73
+ If None, fetches the first available transcript.
74
+ Defaults to 'en'.
75
+ proxies (Dict[str, str], optional): Proxies to use for the request. Defaults to None.
76
+ cookies (str, optional): Path to the cookie file. Defaults to None.
77
+ preserve_formatting (bool, optional): Whether to preserve formatting tags. Defaults to False.
78
+
79
+ Returns:
80
+ List[Dict[str, Union[str, float]]]: A list of dictionaries, each containing:
81
+ - 'text': The transcribed text.
82
+ - 'start': The start time of the text segment (in seconds).
83
+ - 'duration': The duration of the text segment (in seconds).
84
+
85
+ Raises:
86
+ TranscriptRetrievalError: If there's an error retrieving the transcript.
87
+ """
88
+ video_id = cls._extract_video_id(video_url)
89
+ http_client = cls._get_session()
90
+
91
+ if proxies:
92
+ http_client.proxies.update(proxies)
93
+
94
+ if cookies:
95
+ cls._load_cookies(cookies, video_id)
96
+
97
+ transcript_list = TranscriptListFetcher(http_client).fetch(video_id)
98
+ language_codes = [languages] if languages else None
99
+ transcript = transcript_list.find_transcript(language_codes)
100
+
101
+ return transcript.fetch(preserve_formatting)
102
+
103
+ @staticmethod
104
+ def _extract_video_id(video_url: str) -> str:
105
+ """Extracts the video ID from different YouTube URL formats."""
106
+ patterns = [
107
+ r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
108
+ r'youtu\.be\/([0-9A-Za-z_-]{11})',
109
+ r'youtube\.com\/embed\/([0-9A-Za-z_-]{11})',
110
+ r'youtube\.com\/shorts\/([0-9A-Za-z_-]{11})'
111
+ ]
112
+
113
+ for pattern in patterns:
114
+ match = re.search(pattern, video_url)
115
+ if match:
116
+ return match.group(1)
117
+
118
+ if re.match(r'^[0-9A-Za-z_-]{11}$', video_url):
119
+ return video_url
120
+
121
+ raise InvalidVideoIdError(video_url)
122
+
123
+ @staticmethod
124
+ def _load_cookies(cookies: str, video_id: str) -> cookiejar.MozillaCookieJar:
125
+ """Loads cookies from a file."""
126
+ try:
127
+ cj = cookiejar.MozillaCookieJar(cookies)
128
+ cj.load()
129
+ return cj
130
+ except (cookiejar.LoadError, FileNotFoundError):
131
+ raise CookiePathInvalidError(video_id)
132
+
133
+
134
+ class TranscriptListFetcher:
135
+ """Fetches the list of transcripts for a YouTube video using InnerTube API."""
136
+
137
+ def __init__(self, http_client: Session):
138
+ """Initializes TranscriptListFetcher."""
139
+ self._http_client = http_client
140
+
141
+ def fetch(self, video_id: str):
142
+ """Fetches and returns a TranscriptList."""
143
+ captions_json = self._fetch_captions_json(video_id)
144
+ return TranscriptList.build(
145
+ self._http_client,
146
+ video_id,
147
+ captions_json,
148
+ )
149
+
150
+ def _fetch_captions_json(self, video_id: str) -> dict:
151
+ """Fetches captions JSON using InnerTube API."""
152
+ # First get the HTML to extract the API key
153
+ video_html = self._fetch_video_html(video_id)
154
+ api_key = self._extract_innertube_api_key(video_html, video_id)
155
+
156
+ # Use InnerTube API to get video data
157
+ innertube_data = self._fetch_innertube_data(video_id, api_key)
158
+ return self._extract_captions_from_innertube(innertube_data, video_id)
159
+
160
+ def _extract_innertube_api_key(self, html_content: str, video_id: str) -> str:
161
+ """Extracts the InnerTube API key from HTML."""
162
+ pattern = r'"INNERTUBE_API_KEY":\s*"([a-zA-Z0-9_-]+)"'
163
+ match = re.search(pattern, html_content)
164
+ if match and len(match.groups()) == 1:
165
+ return match.group(1)
166
+
167
+ # Check for IP block
168
+ if 'class="g-recaptcha"' in html_content:
169
+ raise TooManyRequestsError(video_id)
170
+
171
+ raise TranscriptRetrievalError(video_id, "Could not extract InnerTube API key")
172
+
173
+ def _fetch_innertube_data(self, video_id: str, api_key: str) -> dict:
174
+ """Fetches video data from InnerTube API."""
175
+ response = self._http_client.post(
176
+ INNERTUBE_API_URL.format(api_key=api_key),
177
+ json={
178
+ "context": INNERTUBE_CONTEXT,
179
+ "videoId": video_id,
180
+ },
181
+ )
182
+ return _raise_http_errors(response, video_id).json()
183
+
184
+ def _extract_captions_from_innertube(self, innertube_data: dict, video_id: str) -> dict:
185
+ """Extracts captions JSON from InnerTube API response."""
186
+ # Check playability status
187
+ playability_status = innertube_data.get("playabilityStatus", {})
188
+ status = playability_status.get("status")
189
+
190
+ if status == "ERROR":
191
+ reason = playability_status.get("reason", "Unknown error")
192
+ if "unavailable" in reason.lower():
193
+ raise VideoUnavailableError(video_id)
194
+ raise TranscriptRetrievalError(video_id, reason)
195
+
196
+ if status == "LOGIN_REQUIRED":
197
+ reason = playability_status.get("reason", "")
198
+ if "bot" in reason.lower():
199
+ raise TooManyRequestsError(video_id)
200
+ if "age" in reason.lower() or "inappropriate" in reason.lower():
201
+ raise TranscriptRetrievalError(video_id, "Video is age-restricted")
202
+ raise TranscriptRetrievalError(video_id, reason or "Login required")
203
+
204
+ # Get captions
205
+ captions = innertube_data.get("captions", {})
206
+ captions_json = captions.get("playerCaptionsTracklistRenderer")
207
+
208
+ if captions_json is None or "captionTracks" not in captions_json:
209
+ raise TranscriptsDisabledError(video_id)
210
+
211
+ return captions_json
212
+
213
+ def _create_consent_cookie(self, html_content, video_id):
214
+ match = re.search('name="v" value="(.*?)"', html_content)
215
+ if match is None:
216
+ raise FailedToCreateConsentCookieError(video_id)
217
+ self._http_client.cookies.set('CONSENT', 'YES+' + match.group(1), domain='.youtube.com')
218
+
219
+ def _fetch_video_html(self, video_id):
220
+ html_content = self._fetch_html(video_id)
221
+ if 'action="https://consent.youtube.com/s"' in html_content:
222
+ self._create_consent_cookie(html_content, video_id)
223
+ html_content = self._fetch_html(video_id)
224
+ if 'action="https://consent.youtube.com/s"' in html_content:
225
+ raise FailedToCreateConsentCookieError(video_id)
226
+ return html_content
227
+
228
+ def _fetch_html(self, video_id):
229
+ response = self._http_client.get(WATCH_URL.format(video_id=video_id), headers={'Accept-Language': 'en-US'})
230
+ return html.unescape(_raise_http_errors(response, video_id).text)
231
+
232
+
233
+ class TranscriptList:
234
+ """
235
+ >>> transcript_list = TranscriptList.build(http_client, video_id, captions_json)
236
+ >>> transcript = transcript_list.find_transcript(['en'])
237
+ >>> print(transcript)
238
+ en ("English")[TRANSLATABLE]
239
+ """
240
+
241
+ def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages):
242
+ """Init that transcript list with all the good stuff! 💯"""
243
+ self.video_id = video_id
244
+ self._manually_created_transcripts = manually_created_transcripts
245
+ self._generated_transcripts = generated_transcripts
246
+ self._translation_languages = translation_languages
247
+
248
+ @staticmethod
249
+ def build(http_client, video_id, captions_json):
250
+ """
251
+ Factory method for TranscriptList.
252
+
253
+ :param http_client: http client which is used to make the transcript retrieving http calls
254
+ :type http_client: Session
255
+ :param video_id: the id of the video this TranscriptList is for
256
+ :type video_id: str
257
+ :param captions_json: the JSON parsed from the YouTube API
258
+ :type captions_json: dict
259
+ :return: the created TranscriptList
260
+ :rtype TranscriptList:
261
+ """
262
+ # Handle both old format (simpleText) and new format (runs)
263
+ translation_languages = []
264
+ for tl in captions_json.get('translationLanguages', []):
265
+ lang_name = tl.get('languageName', {})
266
+ if isinstance(lang_name, dict):
267
+ # Try new format first (runs), then old format (simpleText)
268
+ if 'runs' in lang_name:
269
+ name = lang_name['runs'][0]['text']
270
+ elif 'simpleText' in lang_name:
271
+ name = lang_name['simpleText']
272
+ else:
273
+ name = tl.get('languageCode', 'Unknown')
274
+ else:
275
+ name = str(lang_name)
276
+ translation_languages.append({
277
+ 'language': name,
278
+ 'language_code': tl['languageCode'],
279
+ })
280
+
281
+ manually_created_transcripts = {}
282
+ generated_transcripts = {}
283
+
284
+ for caption in captions_json['captionTracks']:
285
+ if caption.get('kind', '') == 'asr':
286
+ transcript_dict = generated_transcripts
287
+ else:
288
+ transcript_dict = manually_created_transcripts
289
+
290
+ # Extract caption name - handle both formats
291
+ caption_name = caption.get('name', {})
292
+ if isinstance(caption_name, dict):
293
+ if 'runs' in caption_name:
294
+ name = caption_name['runs'][0]['text']
295
+ elif 'simpleText' in caption_name:
296
+ name = caption_name['simpleText']
297
+ else:
298
+ name = caption.get('languageCode', 'Unknown')
299
+ else:
300
+ name = str(caption_name) if caption_name else caption.get('languageCode', 'Unknown')
301
+
302
+ # Remove &fmt=srv3 from URL as it can cause issues
303
+ base_url = caption['baseUrl'].replace("&fmt=srv3", "")
304
+
305
+ transcript_dict[caption['languageCode']] = Transcript(
306
+ http_client,
307
+ video_id,
308
+ base_url,
309
+ name,
310
+ caption['languageCode'],
311
+ caption.get('kind', '') == 'asr',
312
+ translation_languages if caption.get('isTranslatable', False) else [],
313
+ )
314
+
315
+ return TranscriptList(
316
+ video_id,
317
+ manually_created_transcripts,
318
+ generated_transcripts,
319
+ translation_languages,
320
+ )
321
+
322
+ def __iter__(self):
323
+ return iter(list(self._manually_created_transcripts.values()) + list(self._generated_transcripts.values()))
324
+
325
+ def find_transcript(self, language_codes):
326
+ """
327
+ Finds a transcript for a given language code. If no language is provided, it will
328
+ return the first available transcript.
329
+
330
+ :param language_codes: A list of language codes in a descending priority.
331
+ :type languages: list[str]
332
+ :return: the found Transcript
333
+ :rtype Transcript:
334
+ :raises: NoTranscriptFound
335
+ """
336
+ if not language_codes:
337
+ language_codes = ['any']
338
+
339
+ if 'any' in language_codes:
340
+ for transcript in self:
341
+ return transcript
342
+ return self._find_transcript(language_codes, [self._manually_created_transcripts, self._generated_transcripts])
343
+
344
+ def find_generated_transcript(self, language_codes):
345
+ """
346
+ Finds an automatically generated transcript for a given language code.
347
+
348
+ :param language_codes: A list of language codes in a descending priority. For example, if this is set to
349
+ ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
350
+ it fails to do so.
351
+ :type languages: list[str]
352
+ :return: the found Transcript
353
+ :rtype Transcript:
354
+ :raises: NoTranscriptFound
355
+ """
356
+ if not language_codes:
357
+ language_codes = ['any']
358
+
359
+ if 'any' in language_codes:
360
+ for transcript in self:
361
+ if transcript.is_generated:
362
+ return transcript
363
+ return self._find_transcript(language_codes, [self._generated_transcripts])
364
+
365
+ def find_manually_created_transcript(self, language_codes):
366
+ """
367
+ Finds a manually created transcript for a given language code.
368
+
369
+ :param language_codes: A list of language codes in a descending priority. For example, if this is set to
370
+ ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
371
+ it fails to do so.
372
+ :type languages: list[str]
373
+ :return: the found Transcript
374
+ :rtype Transcript:
375
+ :raises: NoTranscriptFound
376
+ """
377
+ if not language_codes:
378
+ language_codes = ['any']
379
+ return self._find_transcript(language_codes, [self._manually_created_transcripts])
380
+
381
+ def _find_transcript(self, language_codes, transcript_dicts):
382
+ for language_code in language_codes:
383
+ for transcript_dict in transcript_dicts:
384
+ if language_code in transcript_dict:
385
+ return transcript_dict[language_code]
386
+
387
+ raise NoTranscriptFoundError(
388
+ self.video_id,
389
+ language_codes,
390
+ self
391
+ )
392
+
393
+ def __str__(self):
394
+ return (
395
+ 'For this video ({video_id}) transcripts are available in the following languages:\n\n'
396
+ '(MANUALLY CREATED)\n'
397
+ '{available_manually_created_transcript_languages}\n\n'
398
+ '(GENERATED)\n'
399
+ '{available_generated_transcripts}\n\n'
400
+ '(TRANSLATION LANGUAGES)\n'
401
+ '{available_translation_languages}'
402
+ ).format(
403
+ video_id=self.video_id,
404
+ available_manually_created_transcript_languages=self._get_language_description(
405
+ str(transcript) for transcript in self._manually_created_transcripts.values()
406
+ ),
407
+ available_generated_transcripts=self._get_language_description(
408
+ str(transcript) for transcript in self._generated_transcripts.values()
409
+ ),
410
+ available_translation_languages=self._get_language_description(
411
+ '{language_code} ("{language}")'.format(
412
+ language=translation_language['language'],
413
+ language_code=translation_language['language_code'],
414
+ ) for translation_language in self._translation_languages
415
+ )
416
+ )
417
+
418
+ def _get_language_description(self, transcript_strings):
419
+ description = '\n'.join(' - {transcript}'.format(transcript=transcript) for transcript in transcript_strings)
420
+ return description if description else 'None'
421
+
422
+
423
+ class Transcript:
424
+ """Your personal transcript handler! 🎭
425
+
426
+ >>> transcript = transcript_list.find_transcript(['en'])
427
+ >>> print(transcript.language)
428
+ 'English'
429
+ >>> if transcript.is_translatable:
430
+ ... es_transcript = transcript.translate('es')
431
+ ... print(es_transcript.language)
432
+ 'Spanish'
433
+ """
434
+
435
+ def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages):
436
+ """Initialize with all the goodies! 🎁"""
437
+ self._http_client = http_client
438
+ self.video_id = video_id
439
+ self._url = url
440
+ self.language = language
441
+ self.language_code = language_code
442
+ self.is_generated = is_generated
443
+ self.translation_languages = translation_languages
444
+ self._translation_languages_dict = {
445
+ translation_language['language_code']: translation_language['language']
446
+ for translation_language in translation_languages
447
+ }
448
+
449
+ def fetch(self, preserve_formatting=False):
450
+ """Get that transcript data! 🎯
451
+
452
+ Args:
453
+ preserve_formatting (bool): Keep HTML formatting? Default is nah fam.
454
+
455
+ Returns:
456
+ list: That sweet transcript data with text, start time, and duration! 📝
457
+ """
458
+ response = self._http_client.get(self._url, headers={'Accept-Language': 'en-US'})
459
+ return TranscriptParser(preserve_formatting=preserve_formatting).parse(
460
+ _raise_http_errors(response, self.video_id).text,
461
+ )
462
+
463
+ def __str__(self):
464
+ """String representation looking clean! 💅"""
465
+ return '{language_code} ("{language}"){translation_description}'.format(
466
+ language=self.language,
467
+ language_code=self.language_code,
468
+ translation_description='[TRANSLATABLE]' if self.is_translatable else ''
469
+ )
470
+
471
+ @property
472
+ def is_translatable(self):
473
+ """Can we translate this? 🌍"""
474
+ return len(self.translation_languages) > 0
475
+
476
+ def translate(self, language_code):
477
+ """Translate to another language! 🌎
478
+
479
+ Args:
480
+ language_code (str): Which language you want fam?
481
+
482
+ Returns:
483
+ Transcript: A fresh transcript in your requested language! 🔄
484
+
485
+ Raises:
486
+ NotTranslatableError: If we can't translate this one 😢
487
+ TranslationLanguageNotAvailableError: If that language isn't available 🚫
488
+ """
489
+ if not self.is_translatable:
490
+ raise NotTranslatableError(self.video_id)
491
+
492
+ if language_code not in self._translation_languages_dict:
493
+ raise TranslationLanguageNotAvailableError(self.video_id)
494
+
495
+ return Transcript(
496
+ self._http_client,
497
+ self.video_id,
498
+ '{url}&tlang={language_code}'.format(url=self._url, language_code=language_code),
499
+ self._translation_languages_dict[language_code],
500
+ language_code,
501
+ True,
502
+ [],
503
+ )
504
+
505
+
506
+ class TranscriptParser:
507
+ """Parsing those transcripts like a pro! 🎯
508
+
509
+ >>> parser = TranscriptParser(preserve_formatting=True)
510
+ >>> data = parser.parse(xml_data)
511
+ >>> print(data[0])
512
+ {'text': 'Never gonna give you up', 'start': 0.0, 'duration': 4.5}
513
+ """
514
+
515
+ _FORMATTING_TAGS = [
516
+ 'strong', # For that extra emphasis 💪
517
+ 'em', # When you need that italic swag 🎨
518
+ 'b', # Bold and beautiful 💯
519
+ 'i', # More italic vibes ✨
520
+ 'mark', # Highlight that text 🌟
521
+ 'small', # Keep it lowkey 🤫
522
+ 'del', # Strike it out ⚡
523
+ 'ins', # Insert new stuff 🆕
524
+ 'sub', # Subscript gang 📉
525
+ 'sup', # Superscript squad 📈
526
+ ]
527
+
528
+ def __init__(self, preserve_formatting=False):
529
+ """Get ready to parse with style! 🎨"""
530
+ self._html_regex = self._get_html_regex(preserve_formatting)
531
+
532
+ def _get_html_regex(self, preserve_formatting):
533
+ """Get that regex pattern ready! 🎯"""
534
+ if preserve_formatting:
535
+ formats_regex = '|'.join(self._FORMATTING_TAGS)
536
+ formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>'
537
+ html_regex = re.compile(formats_regex, re.IGNORECASE)
538
+ else:
539
+ html_regex = re.compile(r'<[^>]*>', re.IGNORECASE)
540
+ return html_regex
541
+
542
+ def parse(self, plain_data):
543
+ """Parse that XML data into something beautiful! ✨"""
544
+ try:
545
+ return [
546
+ {
547
+ 'text': re.sub(self._html_regex, '', html.unescape(xml_element.text or '')),
548
+ 'start': float(xml_element.attrib['start']),
549
+ 'duration': float(xml_element.attrib.get('dur', '0.0')),
550
+ }
551
+ for xml_element in ElementTree.fromstring(plain_data)
552
+ if xml_element.text is not None
553
+ ]
554
+ except ElementTree.ParseError:
555
+ # If XML parsing fails, try to extract text manually
556
+ return self._fallback_parse(plain_data)
557
+
558
+ def _fallback_parse(self, plain_data):
559
+ """Fallback parsing method if XML parsing fails."""
560
+ results = []
561
+ # Try regex pattern matching
562
+ pattern = r'<text start="([^"]+)" dur="([^"]+)"[^>]*>([^<]*)</text>'
563
+ matches = re.findall(pattern, plain_data, re.DOTALL)
564
+
565
+ for start, dur, text in matches:
566
+ text = html.unescape(text)
567
+ text = re.sub(self._html_regex, '', text)
568
+ if text.strip():
569
+ results.append({
570
+ 'text': text.strip(),
571
+ 'start': float(start),
572
+ 'duration': float(dur),
573
+ })
574
+
575
+ return results
576
+
577
+
578
+ def _raise_http_errors(response, video_id):
579
+ """Handle those HTTP errors with style! 🛠️"""
580
+ try:
581
+ if response.status_code == 429:
582
+ raise TooManyRequestsError(video_id)
583
+ response.raise_for_status()
584
+ return response
585
+ except Exception as error:
586
+ raise YouTubeRequestFailedError(video_id, error)
587
+
588
+
589
+ if __name__ == "__main__":
590
+ # Let's get this party started! 🎉
591
+ from rich import print
592
+ video_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
593
+ transcript = YTTranscriber.get_transcript(video_url, languages=None)
594
+ print("Here's what we got! 🔥")
595
+ print(transcript[:5])