webscout 8.2.2__py3-none-any.whl → 2026.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (483) hide show
  1. webscout/AIauto.py +524 -143
  2. webscout/AIbase.py +247 -123
  3. webscout/AIutel.py +68 -132
  4. webscout/Bard.py +1072 -535
  5. webscout/Extra/GitToolkit/__init__.py +2 -2
  6. webscout/Extra/GitToolkit/gitapi/__init__.py +20 -12
  7. webscout/Extra/GitToolkit/gitapi/gist.py +142 -0
  8. webscout/Extra/GitToolkit/gitapi/organization.py +91 -0
  9. webscout/Extra/GitToolkit/gitapi/repository.py +308 -195
  10. webscout/Extra/GitToolkit/gitapi/search.py +162 -0
  11. webscout/Extra/GitToolkit/gitapi/trending.py +236 -0
  12. webscout/Extra/GitToolkit/gitapi/user.py +128 -96
  13. webscout/Extra/GitToolkit/gitapi/utils.py +82 -62
  14. webscout/Extra/YTToolkit/README.md +443 -0
  15. webscout/Extra/YTToolkit/YTdownloader.py +953 -957
  16. webscout/Extra/YTToolkit/__init__.py +3 -3
  17. webscout/Extra/YTToolkit/transcriber.py +595 -476
  18. webscout/Extra/YTToolkit/ytapi/README.md +230 -0
  19. webscout/Extra/YTToolkit/ytapi/__init__.py +22 -6
  20. webscout/Extra/YTToolkit/ytapi/captions.py +190 -0
  21. webscout/Extra/YTToolkit/ytapi/channel.py +302 -307
  22. webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
  23. webscout/Extra/YTToolkit/ytapi/extras.py +178 -45
  24. webscout/Extra/YTToolkit/ytapi/hashtag.py +120 -0
  25. webscout/Extra/YTToolkit/ytapi/https.py +89 -88
  26. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
  27. webscout/Extra/YTToolkit/ytapi/playlist.py +59 -59
  28. webscout/Extra/YTToolkit/ytapi/pool.py +8 -8
  29. webscout/Extra/YTToolkit/ytapi/query.py +143 -40
  30. webscout/Extra/YTToolkit/ytapi/shorts.py +122 -0
  31. webscout/Extra/YTToolkit/ytapi/stream.py +68 -63
  32. webscout/Extra/YTToolkit/ytapi/suggestions.py +97 -0
  33. webscout/Extra/YTToolkit/ytapi/utils.py +66 -62
  34. webscout/Extra/YTToolkit/ytapi/video.py +189 -18
  35. webscout/Extra/__init__.py +2 -3
  36. webscout/Extra/gguf.py +1298 -682
  37. webscout/Extra/tempmail/README.md +488 -0
  38. webscout/Extra/tempmail/__init__.py +28 -28
  39. webscout/Extra/tempmail/async_utils.py +143 -141
  40. webscout/Extra/tempmail/base.py +172 -161
  41. webscout/Extra/tempmail/cli.py +191 -187
  42. webscout/Extra/tempmail/emailnator.py +88 -84
  43. webscout/Extra/tempmail/mail_tm.py +378 -361
  44. webscout/Extra/tempmail/temp_mail_io.py +304 -292
  45. webscout/Extra/weather.py +196 -194
  46. webscout/Extra/weather_ascii.py +17 -15
  47. webscout/Provider/AISEARCH/PERPLEXED_search.py +175 -0
  48. webscout/Provider/AISEARCH/Perplexity.py +237 -304
  49. webscout/Provider/AISEARCH/README.md +106 -0
  50. webscout/Provider/AISEARCH/__init__.py +16 -10
  51. webscout/Provider/AISEARCH/brave_search.py +298 -0
  52. webscout/Provider/AISEARCH/iask_search.py +130 -209
  53. webscout/Provider/AISEARCH/monica_search.py +200 -246
  54. webscout/Provider/AISEARCH/webpilotai_search.py +242 -281
  55. webscout/Provider/Algion.py +413 -0
  56. webscout/Provider/Andi.py +74 -69
  57. webscout/Provider/Apriel.py +313 -0
  58. webscout/Provider/Ayle.py +323 -0
  59. webscout/Provider/ChatSandbox.py +329 -0
  60. webscout/Provider/ClaudeOnline.py +365 -0
  61. webscout/Provider/Cohere.py +232 -208
  62. webscout/Provider/DeepAI.py +367 -0
  63. webscout/Provider/Deepinfra.py +343 -173
  64. webscout/Provider/EssentialAI.py +217 -0
  65. webscout/Provider/ExaAI.py +274 -261
  66. webscout/Provider/Gemini.py +60 -54
  67. webscout/Provider/GithubChat.py +385 -367
  68. webscout/Provider/Gradient.py +286 -0
  69. webscout/Provider/Groq.py +556 -670
  70. webscout/Provider/HadadXYZ.py +323 -0
  71. webscout/Provider/HeckAI.py +392 -233
  72. webscout/Provider/HuggingFace.py +387 -0
  73. webscout/Provider/IBM.py +340 -0
  74. webscout/Provider/Jadve.py +317 -266
  75. webscout/Provider/K2Think.py +306 -0
  76. webscout/Provider/Koboldai.py +221 -381
  77. webscout/Provider/Netwrck.py +273 -228
  78. webscout/Provider/Nvidia.py +310 -0
  79. webscout/Provider/OPENAI/DeepAI.py +489 -0
  80. webscout/Provider/OPENAI/K2Think.py +423 -0
  81. webscout/Provider/OPENAI/PI.py +463 -0
  82. webscout/Provider/OPENAI/README.md +890 -0
  83. webscout/Provider/OPENAI/TogetherAI.py +405 -0
  84. webscout/Provider/OPENAI/TwoAI.py +255 -0
  85. webscout/Provider/OPENAI/__init__.py +148 -25
  86. webscout/Provider/OPENAI/ai4chat.py +348 -0
  87. webscout/Provider/OPENAI/akashgpt.py +436 -0
  88. webscout/Provider/OPENAI/algion.py +303 -0
  89. webscout/Provider/OPENAI/ayle.py +365 -0
  90. webscout/Provider/OPENAI/base.py +253 -46
  91. webscout/Provider/OPENAI/cerebras.py +296 -0
  92. webscout/Provider/OPENAI/chatgpt.py +514 -193
  93. webscout/Provider/OPENAI/chatsandbox.py +233 -0
  94. webscout/Provider/OPENAI/deepinfra.py +403 -272
  95. webscout/Provider/OPENAI/e2b.py +2370 -1350
  96. webscout/Provider/OPENAI/elmo.py +278 -0
  97. webscout/Provider/OPENAI/exaai.py +186 -138
  98. webscout/Provider/OPENAI/freeassist.py +446 -0
  99. webscout/Provider/OPENAI/gradient.py +448 -0
  100. webscout/Provider/OPENAI/groq.py +380 -0
  101. webscout/Provider/OPENAI/hadadxyz.py +292 -0
  102. webscout/Provider/OPENAI/heckai.py +100 -104
  103. webscout/Provider/OPENAI/huggingface.py +321 -0
  104. webscout/Provider/OPENAI/ibm.py +425 -0
  105. webscout/Provider/OPENAI/llmchat.py +253 -0
  106. webscout/Provider/OPENAI/llmchatco.py +378 -327
  107. webscout/Provider/OPENAI/meta.py +541 -0
  108. webscout/Provider/OPENAI/netwrck.py +110 -84
  109. webscout/Provider/OPENAI/nvidia.py +317 -0
  110. webscout/Provider/OPENAI/oivscode.py +348 -0
  111. webscout/Provider/OPENAI/openrouter.py +328 -0
  112. webscout/Provider/OPENAI/pydantic_imports.py +1 -0
  113. webscout/Provider/OPENAI/sambanova.py +397 -0
  114. webscout/Provider/OPENAI/sonus.py +126 -115
  115. webscout/Provider/OPENAI/textpollinations.py +218 -133
  116. webscout/Provider/OPENAI/toolbaz.py +136 -166
  117. webscout/Provider/OPENAI/typefully.py +419 -0
  118. webscout/Provider/OPENAI/typliai.py +279 -0
  119. webscout/Provider/OPENAI/utils.py +314 -211
  120. webscout/Provider/OPENAI/wisecat.py +103 -125
  121. webscout/Provider/OPENAI/writecream.py +185 -156
  122. webscout/Provider/OPENAI/x0gpt.py +227 -136
  123. webscout/Provider/OPENAI/zenmux.py +380 -0
  124. webscout/Provider/OpenRouter.py +386 -0
  125. webscout/Provider/Openai.py +337 -496
  126. webscout/Provider/PI.py +443 -344
  127. webscout/Provider/QwenLM.py +346 -254
  128. webscout/Provider/STT/__init__.py +28 -0
  129. webscout/Provider/STT/base.py +303 -0
  130. webscout/Provider/STT/elevenlabs.py +264 -0
  131. webscout/Provider/Sambanova.py +317 -0
  132. webscout/Provider/TTI/README.md +69 -0
  133. webscout/Provider/TTI/__init__.py +37 -12
  134. webscout/Provider/TTI/base.py +147 -0
  135. webscout/Provider/TTI/claudeonline.py +393 -0
  136. webscout/Provider/TTI/magicstudio.py +292 -0
  137. webscout/Provider/TTI/miragic.py +180 -0
  138. webscout/Provider/TTI/pollinations.py +331 -0
  139. webscout/Provider/TTI/together.py +334 -0
  140. webscout/Provider/TTI/utils.py +14 -0
  141. webscout/Provider/TTS/README.md +186 -0
  142. webscout/Provider/TTS/__init__.py +43 -7
  143. webscout/Provider/TTS/base.py +523 -0
  144. webscout/Provider/TTS/deepgram.py +286 -156
  145. webscout/Provider/TTS/elevenlabs.py +189 -111
  146. webscout/Provider/TTS/freetts.py +218 -0
  147. webscout/Provider/TTS/murfai.py +288 -113
  148. webscout/Provider/TTS/openai_fm.py +364 -0
  149. webscout/Provider/TTS/parler.py +203 -111
  150. webscout/Provider/TTS/qwen.py +334 -0
  151. webscout/Provider/TTS/sherpa.py +286 -0
  152. webscout/Provider/TTS/speechma.py +693 -180
  153. webscout/Provider/TTS/streamElements.py +275 -333
  154. webscout/Provider/TTS/utils.py +280 -280
  155. webscout/Provider/TextPollinationsAI.py +221 -121
  156. webscout/Provider/TogetherAI.py +450 -0
  157. webscout/Provider/TwoAI.py +309 -199
  158. webscout/Provider/TypliAI.py +311 -0
  159. webscout/Provider/UNFINISHED/ChatHub.py +219 -0
  160. webscout/Provider/{OPENAI/glider.py → UNFINISHED/ChutesAI.py} +160 -145
  161. webscout/Provider/UNFINISHED/GizAI.py +300 -0
  162. webscout/Provider/UNFINISHED/Marcus.py +218 -0
  163. webscout/Provider/UNFINISHED/Qodo.py +481 -0
  164. webscout/Provider/UNFINISHED/XenAI.py +330 -0
  165. webscout/Provider/{Youchat.py → UNFINISHED/Youchat.py} +64 -47
  166. webscout/Provider/UNFINISHED/aihumanizer.py +41 -0
  167. webscout/Provider/UNFINISHED/grammerchecker.py +37 -0
  168. webscout/Provider/UNFINISHED/liner.py +342 -0
  169. webscout/Provider/UNFINISHED/liner_api_request.py +246 -0
  170. webscout/Provider/UNFINISHED/samurai.py +231 -0
  171. webscout/Provider/WiseCat.py +256 -196
  172. webscout/Provider/WrDoChat.py +390 -0
  173. webscout/Provider/__init__.py +115 -198
  174. webscout/Provider/ai4chat.py +181 -202
  175. webscout/Provider/akashgpt.py +330 -342
  176. webscout/Provider/cerebras.py +397 -242
  177. webscout/Provider/cleeai.py +236 -213
  178. webscout/Provider/elmo.py +291 -234
  179. webscout/Provider/geminiapi.py +343 -208
  180. webscout/Provider/julius.py +245 -223
  181. webscout/Provider/learnfastai.py +333 -266
  182. webscout/Provider/llama3mitril.py +230 -180
  183. webscout/Provider/llmchat.py +308 -213
  184. webscout/Provider/llmchatco.py +321 -311
  185. webscout/Provider/meta.py +996 -794
  186. webscout/Provider/oivscode.py +332 -0
  187. webscout/Provider/searchchat.py +316 -293
  188. webscout/Provider/sonus.py +264 -208
  189. webscout/Provider/toolbaz.py +359 -320
  190. webscout/Provider/turboseek.py +332 -219
  191. webscout/Provider/typefully.py +262 -280
  192. webscout/Provider/x0gpt.py +332 -256
  193. webscout/__init__.py +31 -38
  194. webscout/__main__.py +5 -5
  195. webscout/cli.py +585 -293
  196. webscout/client.py +1497 -0
  197. webscout/conversation.py +140 -565
  198. webscout/exceptions.py +383 -339
  199. webscout/litagent/__init__.py +29 -29
  200. webscout/litagent/agent.py +492 -455
  201. webscout/litagent/constants.py +60 -60
  202. webscout/models.py +505 -181
  203. webscout/optimizers.py +32 -378
  204. webscout/prompt_manager.py +376 -274
  205. webscout/sanitize.py +1514 -0
  206. webscout/scout/README.md +452 -0
  207. webscout/scout/__init__.py +8 -8
  208. webscout/scout/core/__init__.py +7 -7
  209. webscout/scout/core/crawler.py +330 -140
  210. webscout/scout/core/scout.py +800 -568
  211. webscout/scout/core/search_result.py +51 -96
  212. webscout/scout/core/text_analyzer.py +64 -63
  213. webscout/scout/core/text_utils.py +412 -277
  214. webscout/scout/core/web_analyzer.py +54 -52
  215. webscout/scout/element.py +872 -460
  216. webscout/scout/parsers/__init__.py +70 -69
  217. webscout/scout/parsers/html5lib_parser.py +182 -172
  218. webscout/scout/parsers/html_parser.py +238 -236
  219. webscout/scout/parsers/lxml_parser.py +203 -178
  220. webscout/scout/utils.py +38 -37
  221. webscout/search/__init__.py +47 -0
  222. webscout/search/base.py +201 -0
  223. webscout/search/bing_main.py +45 -0
  224. webscout/search/brave_main.py +92 -0
  225. webscout/search/duckduckgo_main.py +57 -0
  226. webscout/search/engines/__init__.py +127 -0
  227. webscout/search/engines/bing/__init__.py +15 -0
  228. webscout/search/engines/bing/base.py +35 -0
  229. webscout/search/engines/bing/images.py +114 -0
  230. webscout/search/engines/bing/news.py +96 -0
  231. webscout/search/engines/bing/suggestions.py +36 -0
  232. webscout/search/engines/bing/text.py +109 -0
  233. webscout/search/engines/brave/__init__.py +19 -0
  234. webscout/search/engines/brave/base.py +47 -0
  235. webscout/search/engines/brave/images.py +213 -0
  236. webscout/search/engines/brave/news.py +353 -0
  237. webscout/search/engines/brave/suggestions.py +318 -0
  238. webscout/search/engines/brave/text.py +167 -0
  239. webscout/search/engines/brave/videos.py +364 -0
  240. webscout/search/engines/duckduckgo/__init__.py +25 -0
  241. webscout/search/engines/duckduckgo/answers.py +80 -0
  242. webscout/search/engines/duckduckgo/base.py +189 -0
  243. webscout/search/engines/duckduckgo/images.py +100 -0
  244. webscout/search/engines/duckduckgo/maps.py +183 -0
  245. webscout/search/engines/duckduckgo/news.py +70 -0
  246. webscout/search/engines/duckduckgo/suggestions.py +22 -0
  247. webscout/search/engines/duckduckgo/text.py +221 -0
  248. webscout/search/engines/duckduckgo/translate.py +48 -0
  249. webscout/search/engines/duckduckgo/videos.py +80 -0
  250. webscout/search/engines/duckduckgo/weather.py +84 -0
  251. webscout/search/engines/mojeek.py +61 -0
  252. webscout/search/engines/wikipedia.py +77 -0
  253. webscout/search/engines/yahoo/__init__.py +41 -0
  254. webscout/search/engines/yahoo/answers.py +19 -0
  255. webscout/search/engines/yahoo/base.py +34 -0
  256. webscout/search/engines/yahoo/images.py +323 -0
  257. webscout/search/engines/yahoo/maps.py +19 -0
  258. webscout/search/engines/yahoo/news.py +258 -0
  259. webscout/search/engines/yahoo/suggestions.py +140 -0
  260. webscout/search/engines/yahoo/text.py +273 -0
  261. webscout/search/engines/yahoo/translate.py +19 -0
  262. webscout/search/engines/yahoo/videos.py +302 -0
  263. webscout/search/engines/yahoo/weather.py +220 -0
  264. webscout/search/engines/yandex.py +67 -0
  265. webscout/search/engines/yep/__init__.py +13 -0
  266. webscout/search/engines/yep/base.py +34 -0
  267. webscout/search/engines/yep/images.py +101 -0
  268. webscout/search/engines/yep/suggestions.py +38 -0
  269. webscout/search/engines/yep/text.py +99 -0
  270. webscout/search/http_client.py +172 -0
  271. webscout/search/results.py +141 -0
  272. webscout/search/yahoo_main.py +57 -0
  273. webscout/search/yep_main.py +48 -0
  274. webscout/server/__init__.py +48 -0
  275. webscout/server/config.py +78 -0
  276. webscout/server/exceptions.py +69 -0
  277. webscout/server/providers.py +286 -0
  278. webscout/server/request_models.py +131 -0
  279. webscout/server/request_processing.py +404 -0
  280. webscout/server/routes.py +642 -0
  281. webscout/server/server.py +351 -0
  282. webscout/server/ui_templates.py +1171 -0
  283. webscout/swiftcli/__init__.py +79 -809
  284. webscout/swiftcli/core/__init__.py +7 -0
  285. webscout/swiftcli/core/cli.py +574 -0
  286. webscout/swiftcli/core/context.py +98 -0
  287. webscout/swiftcli/core/group.py +268 -0
  288. webscout/swiftcli/decorators/__init__.py +28 -0
  289. webscout/swiftcli/decorators/command.py +243 -0
  290. webscout/swiftcli/decorators/options.py +247 -0
  291. webscout/swiftcli/decorators/output.py +392 -0
  292. webscout/swiftcli/exceptions.py +21 -0
  293. webscout/swiftcli/plugins/__init__.py +9 -0
  294. webscout/swiftcli/plugins/base.py +134 -0
  295. webscout/swiftcli/plugins/manager.py +269 -0
  296. webscout/swiftcli/utils/__init__.py +58 -0
  297. webscout/swiftcli/utils/formatting.py +251 -0
  298. webscout/swiftcli/utils/parsing.py +368 -0
  299. webscout/update_checker.py +280 -136
  300. webscout/utils.py +28 -14
  301. webscout/version.py +2 -1
  302. webscout/version.py.bak +3 -0
  303. webscout/zeroart/__init__.py +218 -55
  304. webscout/zeroart/base.py +70 -60
  305. webscout/zeroart/effects.py +155 -99
  306. webscout/zeroart/fonts.py +1799 -816
  307. webscout-2026.1.19.dist-info/METADATA +638 -0
  308. webscout-2026.1.19.dist-info/RECORD +312 -0
  309. {webscout-8.2.2.dist-info → webscout-2026.1.19.dist-info}/WHEEL +1 -1
  310. webscout-2026.1.19.dist-info/entry_points.txt +4 -0
  311. webscout-2026.1.19.dist-info/top_level.txt +1 -0
  312. inferno/__init__.py +0 -6
  313. inferno/__main__.py +0 -9
  314. inferno/cli.py +0 -6
  315. webscout/DWEBS.py +0 -477
  316. webscout/Extra/autocoder/__init__.py +0 -9
  317. webscout/Extra/autocoder/autocoder.py +0 -849
  318. webscout/Extra/autocoder/autocoder_utiles.py +0 -332
  319. webscout/LLM.py +0 -442
  320. webscout/Litlogger/__init__.py +0 -67
  321. webscout/Litlogger/core/__init__.py +0 -6
  322. webscout/Litlogger/core/level.py +0 -23
  323. webscout/Litlogger/core/logger.py +0 -165
  324. webscout/Litlogger/handlers/__init__.py +0 -12
  325. webscout/Litlogger/handlers/console.py +0 -33
  326. webscout/Litlogger/handlers/file.py +0 -143
  327. webscout/Litlogger/handlers/network.py +0 -173
  328. webscout/Litlogger/styles/__init__.py +0 -7
  329. webscout/Litlogger/styles/colors.py +0 -249
  330. webscout/Litlogger/styles/formats.py +0 -458
  331. webscout/Litlogger/styles/text.py +0 -87
  332. webscout/Litlogger/utils/__init__.py +0 -6
  333. webscout/Litlogger/utils/detectors.py +0 -153
  334. webscout/Litlogger/utils/formatters.py +0 -200
  335. webscout/Local/__init__.py +0 -12
  336. webscout/Local/__main__.py +0 -9
  337. webscout/Local/api.py +0 -576
  338. webscout/Local/cli.py +0 -516
  339. webscout/Local/config.py +0 -75
  340. webscout/Local/llm.py +0 -287
  341. webscout/Local/model_manager.py +0 -253
  342. webscout/Local/server.py +0 -721
  343. webscout/Local/utils.py +0 -93
  344. webscout/Provider/AI21.py +0 -177
  345. webscout/Provider/AISEARCH/DeepFind.py +0 -250
  346. webscout/Provider/AISEARCH/ISou.py +0 -256
  347. webscout/Provider/AISEARCH/felo_search.py +0 -228
  348. webscout/Provider/AISEARCH/genspark_search.py +0 -208
  349. webscout/Provider/AISEARCH/hika_search.py +0 -194
  350. webscout/Provider/AISEARCH/scira_search.py +0 -324
  351. webscout/Provider/Aitopia.py +0 -292
  352. webscout/Provider/AllenAI.py +0 -413
  353. webscout/Provider/Blackboxai.py +0 -229
  354. webscout/Provider/C4ai.py +0 -432
  355. webscout/Provider/ChatGPTClone.py +0 -226
  356. webscout/Provider/ChatGPTES.py +0 -237
  357. webscout/Provider/ChatGPTGratis.py +0 -194
  358. webscout/Provider/Chatify.py +0 -175
  359. webscout/Provider/Cloudflare.py +0 -273
  360. webscout/Provider/DeepSeek.py +0 -196
  361. webscout/Provider/ElectronHub.py +0 -709
  362. webscout/Provider/ExaChat.py +0 -342
  363. webscout/Provider/Free2GPT.py +0 -241
  364. webscout/Provider/GPTWeb.py +0 -193
  365. webscout/Provider/Glider.py +0 -211
  366. webscout/Provider/HF_space/__init__.py +0 -0
  367. webscout/Provider/HF_space/qwen_qwen2.py +0 -206
  368. webscout/Provider/HuggingFaceChat.py +0 -462
  369. webscout/Provider/Hunyuan.py +0 -272
  370. webscout/Provider/LambdaChat.py +0 -392
  371. webscout/Provider/Llama.py +0 -200
  372. webscout/Provider/Llama3.py +0 -204
  373. webscout/Provider/Marcus.py +0 -148
  374. webscout/Provider/OLLAMA.py +0 -396
  375. webscout/Provider/OPENAI/c4ai.py +0 -367
  376. webscout/Provider/OPENAI/chatgptclone.py +0 -460
  377. webscout/Provider/OPENAI/exachat.py +0 -433
  378. webscout/Provider/OPENAI/freeaichat.py +0 -352
  379. webscout/Provider/OPENAI/opkfc.py +0 -488
  380. webscout/Provider/OPENAI/scirachat.py +0 -463
  381. webscout/Provider/OPENAI/standardinput.py +0 -425
  382. webscout/Provider/OPENAI/typegpt.py +0 -346
  383. webscout/Provider/OPENAI/uncovrAI.py +0 -455
  384. webscout/Provider/OPENAI/venice.py +0 -413
  385. webscout/Provider/OPENAI/yep.py +0 -327
  386. webscout/Provider/OpenGPT.py +0 -199
  387. webscout/Provider/Perplexitylabs.py +0 -415
  388. webscout/Provider/Phind.py +0 -535
  389. webscout/Provider/PizzaGPT.py +0 -198
  390. webscout/Provider/Reka.py +0 -214
  391. webscout/Provider/StandardInput.py +0 -278
  392. webscout/Provider/TTI/AiForce/__init__.py +0 -22
  393. webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
  394. webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
  395. webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
  396. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
  397. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
  398. webscout/Provider/TTI/ImgSys/__init__.py +0 -23
  399. webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
  400. webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
  401. webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
  402. webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
  403. webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
  404. webscout/Provider/TTI/Nexra/__init__.py +0 -22
  405. webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
  406. webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
  407. webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
  408. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
  409. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
  410. webscout/Provider/TTI/aiarta/__init__.py +0 -2
  411. webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
  412. webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
  413. webscout/Provider/TTI/artbit/__init__.py +0 -22
  414. webscout/Provider/TTI/artbit/async_artbit.py +0 -155
  415. webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
  416. webscout/Provider/TTI/fastflux/__init__.py +0 -22
  417. webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
  418. webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
  419. webscout/Provider/TTI/huggingface/__init__.py +0 -22
  420. webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
  421. webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
  422. webscout/Provider/TTI/piclumen/__init__.py +0 -23
  423. webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
  424. webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
  425. webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
  426. webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
  427. webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
  428. webscout/Provider/TTI/talkai/__init__.py +0 -4
  429. webscout/Provider/TTI/talkai/async_talkai.py +0 -229
  430. webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
  431. webscout/Provider/TTS/gesserit.py +0 -127
  432. webscout/Provider/TeachAnything.py +0 -187
  433. webscout/Provider/Venice.py +0 -219
  434. webscout/Provider/VercelAI.py +0 -234
  435. webscout/Provider/WebSim.py +0 -228
  436. webscout/Provider/Writecream.py +0 -211
  437. webscout/Provider/WritingMate.py +0 -197
  438. webscout/Provider/aimathgpt.py +0 -189
  439. webscout/Provider/askmyai.py +0 -158
  440. webscout/Provider/asksteve.py +0 -203
  441. webscout/Provider/bagoodex.py +0 -145
  442. webscout/Provider/chatglm.py +0 -205
  443. webscout/Provider/copilot.py +0 -428
  444. webscout/Provider/freeaichat.py +0 -271
  445. webscout/Provider/gaurish.py +0 -244
  446. webscout/Provider/geminiprorealtime.py +0 -160
  447. webscout/Provider/granite.py +0 -187
  448. webscout/Provider/hermes.py +0 -219
  449. webscout/Provider/koala.py +0 -268
  450. webscout/Provider/labyrinth.py +0 -340
  451. webscout/Provider/lepton.py +0 -194
  452. webscout/Provider/llamatutor.py +0 -192
  453. webscout/Provider/multichat.py +0 -325
  454. webscout/Provider/promptrefine.py +0 -193
  455. webscout/Provider/scira_chat.py +0 -277
  456. webscout/Provider/scnet.py +0 -187
  457. webscout/Provider/talkai.py +0 -194
  458. webscout/Provider/tutorai.py +0 -252
  459. webscout/Provider/typegpt.py +0 -232
  460. webscout/Provider/uncovr.py +0 -312
  461. webscout/Provider/yep.py +0 -376
  462. webscout/litprinter/__init__.py +0 -59
  463. webscout/scout/core.py +0 -881
  464. webscout/tempid.py +0 -128
  465. webscout/webscout_search.py +0 -1346
  466. webscout/webscout_search_async.py +0 -877
  467. webscout/yep_search.py +0 -297
  468. webscout-8.2.2.dist-info/METADATA +0 -734
  469. webscout-8.2.2.dist-info/RECORD +0 -309
  470. webscout-8.2.2.dist-info/entry_points.txt +0 -5
  471. webscout-8.2.2.dist-info/top_level.txt +0 -3
  472. webstoken/__init__.py +0 -30
  473. webstoken/classifier.py +0 -189
  474. webstoken/keywords.py +0 -216
  475. webstoken/language.py +0 -128
  476. webstoken/ner.py +0 -164
  477. webstoken/normalizer.py +0 -35
  478. webstoken/processor.py +0 -77
  479. webstoken/sentiment.py +0 -206
  480. webstoken/stemmer.py +0 -73
  481. webstoken/tagger.py +0 -60
  482. webstoken/tokenizer.py +0 -158
  483. {webscout-8.2.2.dist-info → webscout-2026.1.19.dist-info/licenses}/LICENSE.md +0 -0
@@ -1,140 +1,330 @@
1
- """
2
- Scout Crawler Module
3
- """
4
-
5
- import concurrent.futures
6
- import urllib.parse
7
- from typing import Union, List, Dict
8
- import requests
9
-
10
- from .scout import Scout
11
-
12
- class ScoutCrawler:
13
- """
14
- Advanced web crawling utility for Scout library.
15
- """
16
- def __init__(self, base_url: str, max_pages: int = 50, tags_to_remove: List[str] = None):
17
- """
18
- Initialize the web crawler.
19
-
20
- Args:
21
- base_url (str): Starting URL to crawl
22
- max_pages (int, optional): Maximum number of pages to crawl
23
- tags_to_remove (List[str], optional): List of tags to remove
24
- """
25
- self.base_url = base_url
26
- self.max_pages = max_pages
27
- self.tags_to_remove = tags_to_remove if tags_to_remove is not None else ["script", "style", "header", "footer", "nav", "aside", "form", "button"]
28
- self.visited_urls = set()
29
- self.crawled_pages = []
30
-
31
- def _is_valid_url(self, url: str) -> bool:
32
- """
33
- Check if a URL is valid and within the same domain.
34
-
35
- Args:
36
- url (str): URL to validate
37
-
38
- Returns:
39
- bool: Whether the URL is valid
40
- """
41
- try:
42
- parsed_base = urllib.parse.urlparse(self.base_url)
43
- parsed_url = urllib.parse.urlparse(url)
44
-
45
- return (
46
- parsed_url.scheme in ['http', 'https'] and
47
- parsed_base.netloc == parsed_url.netloc and
48
- len(self.visited_urls) < self.max_pages
49
- )
50
- except Exception:
51
- return False
52
-
53
- def _crawl_page(self, url: str, depth: int = 0) -> Dict[str, Union[str, List[str]]]:
54
- """
55
- Crawl a single page and extract information.
56
-
57
- Args:
58
- url (str): URL to crawl
59
- depth (int, optional): Current crawl depth
60
-
61
- Returns:
62
- Dict[str, Union[str, List[str]]]: Crawled page information
63
- """
64
- if url in self.visited_urls:
65
- return {}
66
-
67
- try:
68
- response = requests.get(url, timeout=10)
69
- response.raise_for_status()
70
-
71
- scout = Scout(response.content, features='lxml')
72
-
73
- title_result = scout.find('title')
74
- title = title_result[0].get_text() if title_result else ''
75
-
76
- visible_text = scout._soup.get_text(strip=True)
77
-
78
- for tag in scout._soup(self.tags_to_remove):
79
- tag.extract()
80
-
81
- page_info = {
82
- 'url': url,
83
- 'title': title,
84
- 'links': [
85
- urllib.parse.urljoin(url, link.get('href'))
86
- for link in scout.find_all('a', href=True)
87
- if self._is_valid_url(urllib.parse.urljoin(url, link.get('href')))
88
- ],
89
- 'text': visible_text,
90
- 'depth': depth
91
- }
92
-
93
- self.visited_urls.add(url)
94
- self.crawled_pages.append(page_info)
95
-
96
- return page_info
97
- except Exception as e:
98
- print(f"Error crawling {url}: {e}")
99
- return {}
100
-
101
- def crawl(self) -> List[Dict[str, Union[str, List[str]]]]:
102
- """
103
- Start web crawling from base URL.
104
-
105
- Returns:
106
- List[Dict[str, Union[str, List[str]]]]: List of crawled pages
107
- """
108
- with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
109
- futures = {executor.submit(self._crawl_page, self.base_url, 0)}
110
-
111
- while futures:
112
- done, futures = concurrent.futures.wait(
113
- futures, return_when=concurrent.futures.FIRST_COMPLETED
114
- )
115
-
116
- for future in done:
117
- page_info = future.result()
118
-
119
- if len(self.visited_urls) >= self.max_pages:
120
- break
121
-
122
- submitted_links = set() # New set to track submitted links
123
- for link in page_info.get('links', []):
124
- if (
125
- len(self.visited_urls) < self.max_pages and
126
- link not in self.visited_urls
127
- ):
128
- if link not in submitted_links: # Check against submitted links
129
- submitted_links.add(link) # Add to submitted links
130
- futures.add(
131
- executor.submit(
132
- self._crawl_page,
133
- link,
134
- page_info.get('depth', 0) + 1
135
- )
136
- )
137
- if len(self.visited_urls) >= self.max_pages:
138
- break
139
-
140
- return self.crawled_pages
1
+ """
2
+ Scout Crawler Module - Ultra Advanced Web Crawling System
3
+ """
4
+
5
+ import concurrent.futures
6
+ import hashlib
7
+ import time
8
+ import urllib.parse
9
+ from dataclasses import dataclass
10
+ from datetime import datetime
11
+ from typing import Any, Dict, List, Optional, Set, Union
12
+ from urllib import robotparser
13
+
14
+ try:
15
+ from webscout.litagent import LitAgent
16
+ except ImportError:
17
+ LitAgent: Any = None
18
+
19
+ try:
20
+ from curl_cffi.requests import Session
21
+ except ImportError:
22
+ import requests
23
+ Session: Any = requests.Session
24
+
25
+ from ..parsers import ParserRegistry
26
+ from .scout import Scout
27
+
28
+
29
+ @dataclass
30
+ class CrawlConfig:
31
+ """Configuration for the crawler."""
32
+ max_pages: int = 1000
33
+ max_depth: int = 10
34
+ delay: float = 0.5
35
+ obey_robots: bool = True
36
+ crawl_subdomains: bool = True
37
+ max_workers: int = 10
38
+ timeout: int = 30
39
+ retry_attempts: int = 3
40
+ include_external_links: bool = False
41
+ extract_metadata: bool = True
42
+ extract_structured_data: bool = True
43
+ extract_semantic_content: bool = True
44
+
45
+
46
+ @dataclass
47
+ class PageData:
48
+ """Comprehensive page data for LLM training."""
49
+ url: str
50
+ title: str
51
+ text: str
52
+ clean_text: str
53
+ markdown_text: str
54
+ links: List[str]
55
+ internal_links: List[str]
56
+ external_links: List[str]
57
+ metadata: Dict[str, Any]
58
+ structured_data: Dict[str, Any]
59
+ semantic_content: Dict[str, Any]
60
+ headers: Dict[str, str]
61
+ status_code: int
62
+ content_type: str
63
+ language: str
64
+ timestamp: str
65
+ depth: int
66
+ word_count: int
67
+
68
+
69
+ class ScoutCrawler:
70
+ """
71
+ Ultra-advanced web crawling utility optimized for LLM data collection.
72
+ """
73
+ def __init__(self, base_url: str, max_pages: int = 50, tags_to_remove: Optional[List[str]] = None, session: Optional[Any] = None, delay: float = 0.5, obey_robots: bool = True, allowed_domains: Optional[List[str]] = None):
74
+ """
75
+ Initialize the web crawler.
76
+
77
+ Args:
78
+ base_url (str): Starting URL to crawl
79
+ max_pages (int, optional): Maximum number of pages to crawl
80
+ tags_to_remove (List[str], optional): List of tags to remove
81
+ """
82
+ self.base_url = base_url
83
+ self.max_pages = max_pages
84
+ self.tags_to_remove = tags_to_remove if tags_to_remove is not None else [
85
+ "script",
86
+ "style"
87
+ ]
88
+ self.visited_urls = set()
89
+ self.crawled_pages = []
90
+ self.session = session or Session()
91
+ # LitAgent may not be available in minimal installs - provide a safe fallback
92
+ if LitAgent is not None:
93
+ self.agent = LitAgent()
94
+ else:
95
+ class _SimpleAgent:
96
+ def generate_fingerprint(self) -> Dict[str, str]:
97
+ return {"user_agent": "Mozilla/5.0"}
98
+
99
+ def chrome(self) -> str:
100
+ return "Mozilla/5.0"
101
+
102
+ self.agent = _SimpleAgent()
103
+
104
+ # Use fingerprint to update session headers (normalize keys)
105
+ fingerprint = self.agent.generate_fingerprint()
106
+ headers: Dict[str, str] = {}
107
+ if isinstance(fingerprint, dict):
108
+ for k, v in fingerprint.items():
109
+ if k == "user_agent":
110
+ headers["User-Agent"] = str(v)
111
+ else:
112
+ headers[k.replace("_", "-").title()] = str(v)
113
+ try:
114
+ self.session.headers.update(headers)
115
+ except Exception:
116
+ # Some session implementations may not expose update() directly
117
+ for hk, hv in headers.items():
118
+ try:
119
+ self.session.headers[hk] = hv
120
+ except Exception:
121
+ pass
122
+
123
+ # Ensure a User-Agent is always present
124
+ try:
125
+ self.session.headers.setdefault("User-Agent", self.agent.chrome())
126
+ except Exception:
127
+ pass
128
+ self.delay = delay
129
+ self.obey_robots = obey_robots
130
+ self.features = "lxml" if "lxml" in ParserRegistry.list_parsers() else "html.parser"
131
+
132
+ # Secure domain handling
133
+ parsed_base = urllib.parse.urlparse(base_url)
134
+ self.base_netloc = parsed_base.netloc
135
+ base_domain_parts = self.base_netloc.split('.')
136
+ self.base_domain = '.'.join(base_domain_parts[-2:]) if len(base_domain_parts) > 1 else self.base_netloc
137
+
138
+ self.allowed_domains = allowed_domains or [self.base_netloc]
139
+ self.last_request_time = 0
140
+ self.url_hashes = set()
141
+
142
+ if obey_robots:
143
+ self.robots = robotparser.RobotFileParser()
144
+ robots_url = urllib.parse.urljoin(base_url, '/robots.txt')
145
+ try:
146
+ # Use session for robots.txt to respect headers/UA
147
+ robots_resp = self.session.get(robots_url, timeout=5)
148
+ if robots_resp.status_code == 200:
149
+ self.robots.parse(robots_resp.text.splitlines())
150
+ else:
151
+ self.robots = None
152
+ except Exception:
153
+ self.robots = None
154
+ else:
155
+ self.robots = None
156
+
157
+ def _normalize_url(self, url: str) -> str:
158
+ """Normalize URL by removing fragments and trailing slashes."""
159
+ url = url.split('#')[0]
160
+ return url.rstrip('/')
161
+
162
+ def _is_valid_url(self, url: str) -> bool:
163
+ """
164
+ Check if a URL is valid and within allowed domains.
165
+ """
166
+ try:
167
+ parsed_url = urllib.parse.urlparse(url)
168
+ if parsed_url.scheme not in ["http", "https"]:
169
+ return False
170
+
171
+ # Secure domain check
172
+ target_netloc = parsed_url.netloc.lower()
173
+ is_allowed = False
174
+ for allowed in self.allowed_domains:
175
+ if target_netloc == allowed.lower() or target_netloc.endswith('.' + allowed.lower()):
176
+ is_allowed = True
177
+ break
178
+
179
+ if not is_allowed:
180
+ return False
181
+
182
+ if self.obey_robots and self.robots:
183
+ # Ensure we pass a str user-agent to robotparser.can_fetch
184
+ ua = str(self.session.headers.get("User-Agent", "*"))
185
+ return self.robots.can_fetch(ua, url)
186
+ return True
187
+ except Exception:
188
+ return False
189
+
190
+ def _is_duplicate(self, url: str) -> bool:
191
+ norm = self._normalize_url(url)
192
+ url_hash = hashlib.md5(norm.encode()).hexdigest()
193
+ if url_hash in self.url_hashes:
194
+ return True
195
+ self.url_hashes.add(url_hash)
196
+ return False
197
+
198
+ def _extract_main_text(self, soup):
199
+ # Try to extract main content (simple heuristic)
200
+ main = soup.find('main')
201
+ if main:
202
+ return main.get_text(separator=" ", strip=True)
203
+ article = soup.find('article')
204
+ if article:
205
+ return article.get_text(separator=" ", strip=True)
206
+ # fallback to body
207
+ body = soup.find('body')
208
+ if body:
209
+ return body.get_text(separator=" ", strip=True)
210
+ return soup.get_text(separator=" ", strip=True)
211
+
212
+ def _crawl_page(self, url: str, depth: int = 0) -> Dict[str, Any]:
213
+ """
214
+ Crawl a single page and extract information.
215
+
216
+ Args:
217
+ url (str): URL to crawl
218
+ depth (int, optional): Current crawl depth
219
+
220
+ Returns:
221
+ Dict[str, Any]: Crawled page information
222
+ """
223
+ if url in self.visited_urls or self._is_duplicate(url):
224
+ return {}
225
+ # Log URL to crawl
226
+ print(f"Attempting to crawl URL: {url} (depth: {depth})")
227
+
228
+ # Throttle requests
229
+ now = time.time()
230
+ if self.last_request_time:
231
+ elapsed = now - self.last_request_time
232
+ if elapsed < self.delay:
233
+ time.sleep(self.delay - elapsed)
234
+ self.last_request_time = time.time()
235
+ try:
236
+ response = self.session.get(url, timeout=10)
237
+ response.raise_for_status()
238
+ if not response.headers.get('Content-Type', '').startswith('text/html'):
239
+ return {}
240
+ scout = Scout(response.content, features=self.features)
241
+ title_tag = scout.find("title")
242
+ title = title_tag.get_text() if title_tag else ""
243
+
244
+ # Remove only script and style tags before extracting text
245
+ for tag_name in self.tags_to_remove:
246
+ for tag in scout._soup.find_all(tag_name):
247
+ tag.decompose()
248
+
249
+ visible_text = self._extract_main_text(scout._soup)
250
+
251
+ # Extract links from header, footer, nav, etc.
252
+ essential_links = []
253
+ for essential_tag in ['header', 'nav', 'footer']:
254
+ elements = scout.find_all(essential_tag)
255
+ for element in elements:
256
+ links = element.find_all('a', href=True)
257
+ essential_links.extend(
258
+ urllib.parse.urljoin(url, link.get('href'))
259
+ for link in links
260
+ if link.get('href') and self._is_valid_url(urllib.parse.urljoin(url, link.get('href')))
261
+ )
262
+
263
+ all_links = [
264
+ urllib.parse.urljoin(url, link.get('href'))
265
+ for link in scout.find_all('a', href=True)
266
+ if self._is_valid_url(urllib.parse.urljoin(url, link.get('href')))
267
+ ]
268
+
269
+ combined_links = list(set(all_links + essential_links))
270
+
271
+ page_info = {
272
+ 'url': url,
273
+ 'title': title,
274
+ 'links': combined_links,
275
+ 'text': visible_text,
276
+ 'depth': depth,
277
+ 'timestamp': datetime.now().isoformat(),
278
+ 'headers': dict(response.headers),
279
+ }
280
+ self.visited_urls.add(url)
281
+ self.crawled_pages.append(page_info)
282
+ return page_info
283
+ except Exception as e:
284
+ print(f"Error crawling {url}: {e}")
285
+ return {}
286
+
287
+ def crawl(self):
288
+ """
289
+ Start web crawling from base URL and yield each crawled page in real time.
290
+
291
+ Yields:
292
+ Dict[str, Union[str, List[str]]]: Crawled page information
293
+ """
294
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
295
+ futures = {executor.submit(self._crawl_page, self.base_url, 0)}
296
+ submitted_links: Set[str] = set()
297
+
298
+ while futures:
299
+ if self.max_pages is not None and len(self.visited_urls) >= self.max_pages:
300
+ break
301
+ done, not_done = concurrent.futures.wait(
302
+ futures, return_when=concurrent.futures.FIRST_COMPLETED
303
+ )
304
+ futures = not_done
305
+
306
+ for future in done:
307
+ page_info = future.result()
308
+
309
+ if page_info:
310
+ yield page_info
311
+
312
+ if self.max_pages is not None and len(self.visited_urls) >= self.max_pages:
313
+ return
314
+
315
+ for link in page_info.get("links", []):
316
+ if (
317
+ (self.max_pages is None or len(self.visited_urls) < self.max_pages)
318
+ and link not in self.visited_urls
319
+ and link not in submitted_links
320
+ ):
321
+ submitted_links.add(link)
322
+ futures.add(
323
+ executor.submit(
324
+ self._crawl_page,
325
+ link,
326
+ int(page_info.get("depth", 0)) + 1,
327
+ )
328
+ )
329
+ else:
330
+ print("No page info retrieved from crawling")