webscout 8.2.2__py3-none-any.whl → 2026.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (483) hide show
  1. webscout/AIauto.py +524 -143
  2. webscout/AIbase.py +247 -123
  3. webscout/AIutel.py +68 -132
  4. webscout/Bard.py +1072 -535
  5. webscout/Extra/GitToolkit/__init__.py +2 -2
  6. webscout/Extra/GitToolkit/gitapi/__init__.py +20 -12
  7. webscout/Extra/GitToolkit/gitapi/gist.py +142 -0
  8. webscout/Extra/GitToolkit/gitapi/organization.py +91 -0
  9. webscout/Extra/GitToolkit/gitapi/repository.py +308 -195
  10. webscout/Extra/GitToolkit/gitapi/search.py +162 -0
  11. webscout/Extra/GitToolkit/gitapi/trending.py +236 -0
  12. webscout/Extra/GitToolkit/gitapi/user.py +128 -96
  13. webscout/Extra/GitToolkit/gitapi/utils.py +82 -62
  14. webscout/Extra/YTToolkit/README.md +443 -0
  15. webscout/Extra/YTToolkit/YTdownloader.py +953 -957
  16. webscout/Extra/YTToolkit/__init__.py +3 -3
  17. webscout/Extra/YTToolkit/transcriber.py +595 -476
  18. webscout/Extra/YTToolkit/ytapi/README.md +230 -0
  19. webscout/Extra/YTToolkit/ytapi/__init__.py +22 -6
  20. webscout/Extra/YTToolkit/ytapi/captions.py +190 -0
  21. webscout/Extra/YTToolkit/ytapi/channel.py +302 -307
  22. webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
  23. webscout/Extra/YTToolkit/ytapi/extras.py +178 -45
  24. webscout/Extra/YTToolkit/ytapi/hashtag.py +120 -0
  25. webscout/Extra/YTToolkit/ytapi/https.py +89 -88
  26. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
  27. webscout/Extra/YTToolkit/ytapi/playlist.py +59 -59
  28. webscout/Extra/YTToolkit/ytapi/pool.py +8 -8
  29. webscout/Extra/YTToolkit/ytapi/query.py +143 -40
  30. webscout/Extra/YTToolkit/ytapi/shorts.py +122 -0
  31. webscout/Extra/YTToolkit/ytapi/stream.py +68 -63
  32. webscout/Extra/YTToolkit/ytapi/suggestions.py +97 -0
  33. webscout/Extra/YTToolkit/ytapi/utils.py +66 -62
  34. webscout/Extra/YTToolkit/ytapi/video.py +189 -18
  35. webscout/Extra/__init__.py +2 -3
  36. webscout/Extra/gguf.py +1298 -682
  37. webscout/Extra/tempmail/README.md +488 -0
  38. webscout/Extra/tempmail/__init__.py +28 -28
  39. webscout/Extra/tempmail/async_utils.py +143 -141
  40. webscout/Extra/tempmail/base.py +172 -161
  41. webscout/Extra/tempmail/cli.py +191 -187
  42. webscout/Extra/tempmail/emailnator.py +88 -84
  43. webscout/Extra/tempmail/mail_tm.py +378 -361
  44. webscout/Extra/tempmail/temp_mail_io.py +304 -292
  45. webscout/Extra/weather.py +196 -194
  46. webscout/Extra/weather_ascii.py +17 -15
  47. webscout/Provider/AISEARCH/PERPLEXED_search.py +175 -0
  48. webscout/Provider/AISEARCH/Perplexity.py +237 -304
  49. webscout/Provider/AISEARCH/README.md +106 -0
  50. webscout/Provider/AISEARCH/__init__.py +16 -10
  51. webscout/Provider/AISEARCH/brave_search.py +298 -0
  52. webscout/Provider/AISEARCH/iask_search.py +130 -209
  53. webscout/Provider/AISEARCH/monica_search.py +200 -246
  54. webscout/Provider/AISEARCH/webpilotai_search.py +242 -281
  55. webscout/Provider/Algion.py +413 -0
  56. webscout/Provider/Andi.py +74 -69
  57. webscout/Provider/Apriel.py +313 -0
  58. webscout/Provider/Ayle.py +323 -0
  59. webscout/Provider/ChatSandbox.py +329 -0
  60. webscout/Provider/ClaudeOnline.py +365 -0
  61. webscout/Provider/Cohere.py +232 -208
  62. webscout/Provider/DeepAI.py +367 -0
  63. webscout/Provider/Deepinfra.py +343 -173
  64. webscout/Provider/EssentialAI.py +217 -0
  65. webscout/Provider/ExaAI.py +274 -261
  66. webscout/Provider/Gemini.py +60 -54
  67. webscout/Provider/GithubChat.py +385 -367
  68. webscout/Provider/Gradient.py +286 -0
  69. webscout/Provider/Groq.py +556 -670
  70. webscout/Provider/HadadXYZ.py +323 -0
  71. webscout/Provider/HeckAI.py +392 -233
  72. webscout/Provider/HuggingFace.py +387 -0
  73. webscout/Provider/IBM.py +340 -0
  74. webscout/Provider/Jadve.py +317 -266
  75. webscout/Provider/K2Think.py +306 -0
  76. webscout/Provider/Koboldai.py +221 -381
  77. webscout/Provider/Netwrck.py +273 -228
  78. webscout/Provider/Nvidia.py +310 -0
  79. webscout/Provider/OPENAI/DeepAI.py +489 -0
  80. webscout/Provider/OPENAI/K2Think.py +423 -0
  81. webscout/Provider/OPENAI/PI.py +463 -0
  82. webscout/Provider/OPENAI/README.md +890 -0
  83. webscout/Provider/OPENAI/TogetherAI.py +405 -0
  84. webscout/Provider/OPENAI/TwoAI.py +255 -0
  85. webscout/Provider/OPENAI/__init__.py +148 -25
  86. webscout/Provider/OPENAI/ai4chat.py +348 -0
  87. webscout/Provider/OPENAI/akashgpt.py +436 -0
  88. webscout/Provider/OPENAI/algion.py +303 -0
  89. webscout/Provider/OPENAI/ayle.py +365 -0
  90. webscout/Provider/OPENAI/base.py +253 -46
  91. webscout/Provider/OPENAI/cerebras.py +296 -0
  92. webscout/Provider/OPENAI/chatgpt.py +514 -193
  93. webscout/Provider/OPENAI/chatsandbox.py +233 -0
  94. webscout/Provider/OPENAI/deepinfra.py +403 -272
  95. webscout/Provider/OPENAI/e2b.py +2370 -1350
  96. webscout/Provider/OPENAI/elmo.py +278 -0
  97. webscout/Provider/OPENAI/exaai.py +186 -138
  98. webscout/Provider/OPENAI/freeassist.py +446 -0
  99. webscout/Provider/OPENAI/gradient.py +448 -0
  100. webscout/Provider/OPENAI/groq.py +380 -0
  101. webscout/Provider/OPENAI/hadadxyz.py +292 -0
  102. webscout/Provider/OPENAI/heckai.py +100 -104
  103. webscout/Provider/OPENAI/huggingface.py +321 -0
  104. webscout/Provider/OPENAI/ibm.py +425 -0
  105. webscout/Provider/OPENAI/llmchat.py +253 -0
  106. webscout/Provider/OPENAI/llmchatco.py +378 -327
  107. webscout/Provider/OPENAI/meta.py +541 -0
  108. webscout/Provider/OPENAI/netwrck.py +110 -84
  109. webscout/Provider/OPENAI/nvidia.py +317 -0
  110. webscout/Provider/OPENAI/oivscode.py +348 -0
  111. webscout/Provider/OPENAI/openrouter.py +328 -0
  112. webscout/Provider/OPENAI/pydantic_imports.py +1 -0
  113. webscout/Provider/OPENAI/sambanova.py +397 -0
  114. webscout/Provider/OPENAI/sonus.py +126 -115
  115. webscout/Provider/OPENAI/textpollinations.py +218 -133
  116. webscout/Provider/OPENAI/toolbaz.py +136 -166
  117. webscout/Provider/OPENAI/typefully.py +419 -0
  118. webscout/Provider/OPENAI/typliai.py +279 -0
  119. webscout/Provider/OPENAI/utils.py +314 -211
  120. webscout/Provider/OPENAI/wisecat.py +103 -125
  121. webscout/Provider/OPENAI/writecream.py +185 -156
  122. webscout/Provider/OPENAI/x0gpt.py +227 -136
  123. webscout/Provider/OPENAI/zenmux.py +380 -0
  124. webscout/Provider/OpenRouter.py +386 -0
  125. webscout/Provider/Openai.py +337 -496
  126. webscout/Provider/PI.py +443 -344
  127. webscout/Provider/QwenLM.py +346 -254
  128. webscout/Provider/STT/__init__.py +28 -0
  129. webscout/Provider/STT/base.py +303 -0
  130. webscout/Provider/STT/elevenlabs.py +264 -0
  131. webscout/Provider/Sambanova.py +317 -0
  132. webscout/Provider/TTI/README.md +69 -0
  133. webscout/Provider/TTI/__init__.py +37 -12
  134. webscout/Provider/TTI/base.py +147 -0
  135. webscout/Provider/TTI/claudeonline.py +393 -0
  136. webscout/Provider/TTI/magicstudio.py +292 -0
  137. webscout/Provider/TTI/miragic.py +180 -0
  138. webscout/Provider/TTI/pollinations.py +331 -0
  139. webscout/Provider/TTI/together.py +334 -0
  140. webscout/Provider/TTI/utils.py +14 -0
  141. webscout/Provider/TTS/README.md +186 -0
  142. webscout/Provider/TTS/__init__.py +43 -7
  143. webscout/Provider/TTS/base.py +523 -0
  144. webscout/Provider/TTS/deepgram.py +286 -156
  145. webscout/Provider/TTS/elevenlabs.py +189 -111
  146. webscout/Provider/TTS/freetts.py +218 -0
  147. webscout/Provider/TTS/murfai.py +288 -113
  148. webscout/Provider/TTS/openai_fm.py +364 -0
  149. webscout/Provider/TTS/parler.py +203 -111
  150. webscout/Provider/TTS/qwen.py +334 -0
  151. webscout/Provider/TTS/sherpa.py +286 -0
  152. webscout/Provider/TTS/speechma.py +693 -180
  153. webscout/Provider/TTS/streamElements.py +275 -333
  154. webscout/Provider/TTS/utils.py +280 -280
  155. webscout/Provider/TextPollinationsAI.py +221 -121
  156. webscout/Provider/TogetherAI.py +450 -0
  157. webscout/Provider/TwoAI.py +309 -199
  158. webscout/Provider/TypliAI.py +311 -0
  159. webscout/Provider/UNFINISHED/ChatHub.py +219 -0
  160. webscout/Provider/{OPENAI/glider.py → UNFINISHED/ChutesAI.py} +160 -145
  161. webscout/Provider/UNFINISHED/GizAI.py +300 -0
  162. webscout/Provider/UNFINISHED/Marcus.py +218 -0
  163. webscout/Provider/UNFINISHED/Qodo.py +481 -0
  164. webscout/Provider/UNFINISHED/XenAI.py +330 -0
  165. webscout/Provider/{Youchat.py → UNFINISHED/Youchat.py} +64 -47
  166. webscout/Provider/UNFINISHED/aihumanizer.py +41 -0
  167. webscout/Provider/UNFINISHED/grammerchecker.py +37 -0
  168. webscout/Provider/UNFINISHED/liner.py +342 -0
  169. webscout/Provider/UNFINISHED/liner_api_request.py +246 -0
  170. webscout/Provider/UNFINISHED/samurai.py +231 -0
  171. webscout/Provider/WiseCat.py +256 -196
  172. webscout/Provider/WrDoChat.py +390 -0
  173. webscout/Provider/__init__.py +115 -198
  174. webscout/Provider/ai4chat.py +181 -202
  175. webscout/Provider/akashgpt.py +330 -342
  176. webscout/Provider/cerebras.py +397 -242
  177. webscout/Provider/cleeai.py +236 -213
  178. webscout/Provider/elmo.py +291 -234
  179. webscout/Provider/geminiapi.py +343 -208
  180. webscout/Provider/julius.py +245 -223
  181. webscout/Provider/learnfastai.py +333 -266
  182. webscout/Provider/llama3mitril.py +230 -180
  183. webscout/Provider/llmchat.py +308 -213
  184. webscout/Provider/llmchatco.py +321 -311
  185. webscout/Provider/meta.py +996 -794
  186. webscout/Provider/oivscode.py +332 -0
  187. webscout/Provider/searchchat.py +316 -293
  188. webscout/Provider/sonus.py +264 -208
  189. webscout/Provider/toolbaz.py +359 -320
  190. webscout/Provider/turboseek.py +332 -219
  191. webscout/Provider/typefully.py +262 -280
  192. webscout/Provider/x0gpt.py +332 -256
  193. webscout/__init__.py +31 -38
  194. webscout/__main__.py +5 -5
  195. webscout/cli.py +585 -293
  196. webscout/client.py +1497 -0
  197. webscout/conversation.py +140 -565
  198. webscout/exceptions.py +383 -339
  199. webscout/litagent/__init__.py +29 -29
  200. webscout/litagent/agent.py +492 -455
  201. webscout/litagent/constants.py +60 -60
  202. webscout/models.py +505 -181
  203. webscout/optimizers.py +32 -378
  204. webscout/prompt_manager.py +376 -274
  205. webscout/sanitize.py +1514 -0
  206. webscout/scout/README.md +452 -0
  207. webscout/scout/__init__.py +8 -8
  208. webscout/scout/core/__init__.py +7 -7
  209. webscout/scout/core/crawler.py +330 -140
  210. webscout/scout/core/scout.py +800 -568
  211. webscout/scout/core/search_result.py +51 -96
  212. webscout/scout/core/text_analyzer.py +64 -63
  213. webscout/scout/core/text_utils.py +412 -277
  214. webscout/scout/core/web_analyzer.py +54 -52
  215. webscout/scout/element.py +872 -460
  216. webscout/scout/parsers/__init__.py +70 -69
  217. webscout/scout/parsers/html5lib_parser.py +182 -172
  218. webscout/scout/parsers/html_parser.py +238 -236
  219. webscout/scout/parsers/lxml_parser.py +203 -178
  220. webscout/scout/utils.py +38 -37
  221. webscout/search/__init__.py +47 -0
  222. webscout/search/base.py +201 -0
  223. webscout/search/bing_main.py +45 -0
  224. webscout/search/brave_main.py +92 -0
  225. webscout/search/duckduckgo_main.py +57 -0
  226. webscout/search/engines/__init__.py +127 -0
  227. webscout/search/engines/bing/__init__.py +15 -0
  228. webscout/search/engines/bing/base.py +35 -0
  229. webscout/search/engines/bing/images.py +114 -0
  230. webscout/search/engines/bing/news.py +96 -0
  231. webscout/search/engines/bing/suggestions.py +36 -0
  232. webscout/search/engines/bing/text.py +109 -0
  233. webscout/search/engines/brave/__init__.py +19 -0
  234. webscout/search/engines/brave/base.py +47 -0
  235. webscout/search/engines/brave/images.py +213 -0
  236. webscout/search/engines/brave/news.py +353 -0
  237. webscout/search/engines/brave/suggestions.py +318 -0
  238. webscout/search/engines/brave/text.py +167 -0
  239. webscout/search/engines/brave/videos.py +364 -0
  240. webscout/search/engines/duckduckgo/__init__.py +25 -0
  241. webscout/search/engines/duckduckgo/answers.py +80 -0
  242. webscout/search/engines/duckduckgo/base.py +189 -0
  243. webscout/search/engines/duckduckgo/images.py +100 -0
  244. webscout/search/engines/duckduckgo/maps.py +183 -0
  245. webscout/search/engines/duckduckgo/news.py +70 -0
  246. webscout/search/engines/duckduckgo/suggestions.py +22 -0
  247. webscout/search/engines/duckduckgo/text.py +221 -0
  248. webscout/search/engines/duckduckgo/translate.py +48 -0
  249. webscout/search/engines/duckduckgo/videos.py +80 -0
  250. webscout/search/engines/duckduckgo/weather.py +84 -0
  251. webscout/search/engines/mojeek.py +61 -0
  252. webscout/search/engines/wikipedia.py +77 -0
  253. webscout/search/engines/yahoo/__init__.py +41 -0
  254. webscout/search/engines/yahoo/answers.py +19 -0
  255. webscout/search/engines/yahoo/base.py +34 -0
  256. webscout/search/engines/yahoo/images.py +323 -0
  257. webscout/search/engines/yahoo/maps.py +19 -0
  258. webscout/search/engines/yahoo/news.py +258 -0
  259. webscout/search/engines/yahoo/suggestions.py +140 -0
  260. webscout/search/engines/yahoo/text.py +273 -0
  261. webscout/search/engines/yahoo/translate.py +19 -0
  262. webscout/search/engines/yahoo/videos.py +302 -0
  263. webscout/search/engines/yahoo/weather.py +220 -0
  264. webscout/search/engines/yandex.py +67 -0
  265. webscout/search/engines/yep/__init__.py +13 -0
  266. webscout/search/engines/yep/base.py +34 -0
  267. webscout/search/engines/yep/images.py +101 -0
  268. webscout/search/engines/yep/suggestions.py +38 -0
  269. webscout/search/engines/yep/text.py +99 -0
  270. webscout/search/http_client.py +172 -0
  271. webscout/search/results.py +141 -0
  272. webscout/search/yahoo_main.py +57 -0
  273. webscout/search/yep_main.py +48 -0
  274. webscout/server/__init__.py +48 -0
  275. webscout/server/config.py +78 -0
  276. webscout/server/exceptions.py +69 -0
  277. webscout/server/providers.py +286 -0
  278. webscout/server/request_models.py +131 -0
  279. webscout/server/request_processing.py +404 -0
  280. webscout/server/routes.py +642 -0
  281. webscout/server/server.py +351 -0
  282. webscout/server/ui_templates.py +1171 -0
  283. webscout/swiftcli/__init__.py +79 -809
  284. webscout/swiftcli/core/__init__.py +7 -0
  285. webscout/swiftcli/core/cli.py +574 -0
  286. webscout/swiftcli/core/context.py +98 -0
  287. webscout/swiftcli/core/group.py +268 -0
  288. webscout/swiftcli/decorators/__init__.py +28 -0
  289. webscout/swiftcli/decorators/command.py +243 -0
  290. webscout/swiftcli/decorators/options.py +247 -0
  291. webscout/swiftcli/decorators/output.py +392 -0
  292. webscout/swiftcli/exceptions.py +21 -0
  293. webscout/swiftcli/plugins/__init__.py +9 -0
  294. webscout/swiftcli/plugins/base.py +134 -0
  295. webscout/swiftcli/plugins/manager.py +269 -0
  296. webscout/swiftcli/utils/__init__.py +58 -0
  297. webscout/swiftcli/utils/formatting.py +251 -0
  298. webscout/swiftcli/utils/parsing.py +368 -0
  299. webscout/update_checker.py +280 -136
  300. webscout/utils.py +28 -14
  301. webscout/version.py +2 -1
  302. webscout/version.py.bak +3 -0
  303. webscout/zeroart/__init__.py +218 -55
  304. webscout/zeroart/base.py +70 -60
  305. webscout/zeroart/effects.py +155 -99
  306. webscout/zeroart/fonts.py +1799 -816
  307. webscout-2026.1.19.dist-info/METADATA +638 -0
  308. webscout-2026.1.19.dist-info/RECORD +312 -0
  309. {webscout-8.2.2.dist-info → webscout-2026.1.19.dist-info}/WHEEL +1 -1
  310. webscout-2026.1.19.dist-info/entry_points.txt +4 -0
  311. webscout-2026.1.19.dist-info/top_level.txt +1 -0
  312. inferno/__init__.py +0 -6
  313. inferno/__main__.py +0 -9
  314. inferno/cli.py +0 -6
  315. webscout/DWEBS.py +0 -477
  316. webscout/Extra/autocoder/__init__.py +0 -9
  317. webscout/Extra/autocoder/autocoder.py +0 -849
  318. webscout/Extra/autocoder/autocoder_utiles.py +0 -332
  319. webscout/LLM.py +0 -442
  320. webscout/Litlogger/__init__.py +0 -67
  321. webscout/Litlogger/core/__init__.py +0 -6
  322. webscout/Litlogger/core/level.py +0 -23
  323. webscout/Litlogger/core/logger.py +0 -165
  324. webscout/Litlogger/handlers/__init__.py +0 -12
  325. webscout/Litlogger/handlers/console.py +0 -33
  326. webscout/Litlogger/handlers/file.py +0 -143
  327. webscout/Litlogger/handlers/network.py +0 -173
  328. webscout/Litlogger/styles/__init__.py +0 -7
  329. webscout/Litlogger/styles/colors.py +0 -249
  330. webscout/Litlogger/styles/formats.py +0 -458
  331. webscout/Litlogger/styles/text.py +0 -87
  332. webscout/Litlogger/utils/__init__.py +0 -6
  333. webscout/Litlogger/utils/detectors.py +0 -153
  334. webscout/Litlogger/utils/formatters.py +0 -200
  335. webscout/Local/__init__.py +0 -12
  336. webscout/Local/__main__.py +0 -9
  337. webscout/Local/api.py +0 -576
  338. webscout/Local/cli.py +0 -516
  339. webscout/Local/config.py +0 -75
  340. webscout/Local/llm.py +0 -287
  341. webscout/Local/model_manager.py +0 -253
  342. webscout/Local/server.py +0 -721
  343. webscout/Local/utils.py +0 -93
  344. webscout/Provider/AI21.py +0 -177
  345. webscout/Provider/AISEARCH/DeepFind.py +0 -250
  346. webscout/Provider/AISEARCH/ISou.py +0 -256
  347. webscout/Provider/AISEARCH/felo_search.py +0 -228
  348. webscout/Provider/AISEARCH/genspark_search.py +0 -208
  349. webscout/Provider/AISEARCH/hika_search.py +0 -194
  350. webscout/Provider/AISEARCH/scira_search.py +0 -324
  351. webscout/Provider/Aitopia.py +0 -292
  352. webscout/Provider/AllenAI.py +0 -413
  353. webscout/Provider/Blackboxai.py +0 -229
  354. webscout/Provider/C4ai.py +0 -432
  355. webscout/Provider/ChatGPTClone.py +0 -226
  356. webscout/Provider/ChatGPTES.py +0 -237
  357. webscout/Provider/ChatGPTGratis.py +0 -194
  358. webscout/Provider/Chatify.py +0 -175
  359. webscout/Provider/Cloudflare.py +0 -273
  360. webscout/Provider/DeepSeek.py +0 -196
  361. webscout/Provider/ElectronHub.py +0 -709
  362. webscout/Provider/ExaChat.py +0 -342
  363. webscout/Provider/Free2GPT.py +0 -241
  364. webscout/Provider/GPTWeb.py +0 -193
  365. webscout/Provider/Glider.py +0 -211
  366. webscout/Provider/HF_space/__init__.py +0 -0
  367. webscout/Provider/HF_space/qwen_qwen2.py +0 -206
  368. webscout/Provider/HuggingFaceChat.py +0 -462
  369. webscout/Provider/Hunyuan.py +0 -272
  370. webscout/Provider/LambdaChat.py +0 -392
  371. webscout/Provider/Llama.py +0 -200
  372. webscout/Provider/Llama3.py +0 -204
  373. webscout/Provider/Marcus.py +0 -148
  374. webscout/Provider/OLLAMA.py +0 -396
  375. webscout/Provider/OPENAI/c4ai.py +0 -367
  376. webscout/Provider/OPENAI/chatgptclone.py +0 -460
  377. webscout/Provider/OPENAI/exachat.py +0 -433
  378. webscout/Provider/OPENAI/freeaichat.py +0 -352
  379. webscout/Provider/OPENAI/opkfc.py +0 -488
  380. webscout/Provider/OPENAI/scirachat.py +0 -463
  381. webscout/Provider/OPENAI/standardinput.py +0 -425
  382. webscout/Provider/OPENAI/typegpt.py +0 -346
  383. webscout/Provider/OPENAI/uncovrAI.py +0 -455
  384. webscout/Provider/OPENAI/venice.py +0 -413
  385. webscout/Provider/OPENAI/yep.py +0 -327
  386. webscout/Provider/OpenGPT.py +0 -199
  387. webscout/Provider/Perplexitylabs.py +0 -415
  388. webscout/Provider/Phind.py +0 -535
  389. webscout/Provider/PizzaGPT.py +0 -198
  390. webscout/Provider/Reka.py +0 -214
  391. webscout/Provider/StandardInput.py +0 -278
  392. webscout/Provider/TTI/AiForce/__init__.py +0 -22
  393. webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
  394. webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
  395. webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
  396. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
  397. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
  398. webscout/Provider/TTI/ImgSys/__init__.py +0 -23
  399. webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
  400. webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
  401. webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
  402. webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
  403. webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
  404. webscout/Provider/TTI/Nexra/__init__.py +0 -22
  405. webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
  406. webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
  407. webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
  408. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
  409. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
  410. webscout/Provider/TTI/aiarta/__init__.py +0 -2
  411. webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
  412. webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
  413. webscout/Provider/TTI/artbit/__init__.py +0 -22
  414. webscout/Provider/TTI/artbit/async_artbit.py +0 -155
  415. webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
  416. webscout/Provider/TTI/fastflux/__init__.py +0 -22
  417. webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
  418. webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
  419. webscout/Provider/TTI/huggingface/__init__.py +0 -22
  420. webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
  421. webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
  422. webscout/Provider/TTI/piclumen/__init__.py +0 -23
  423. webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
  424. webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
  425. webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
  426. webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
  427. webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
  428. webscout/Provider/TTI/talkai/__init__.py +0 -4
  429. webscout/Provider/TTI/talkai/async_talkai.py +0 -229
  430. webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
  431. webscout/Provider/TTS/gesserit.py +0 -127
  432. webscout/Provider/TeachAnything.py +0 -187
  433. webscout/Provider/Venice.py +0 -219
  434. webscout/Provider/VercelAI.py +0 -234
  435. webscout/Provider/WebSim.py +0 -228
  436. webscout/Provider/Writecream.py +0 -211
  437. webscout/Provider/WritingMate.py +0 -197
  438. webscout/Provider/aimathgpt.py +0 -189
  439. webscout/Provider/askmyai.py +0 -158
  440. webscout/Provider/asksteve.py +0 -203
  441. webscout/Provider/bagoodex.py +0 -145
  442. webscout/Provider/chatglm.py +0 -205
  443. webscout/Provider/copilot.py +0 -428
  444. webscout/Provider/freeaichat.py +0 -271
  445. webscout/Provider/gaurish.py +0 -244
  446. webscout/Provider/geminiprorealtime.py +0 -160
  447. webscout/Provider/granite.py +0 -187
  448. webscout/Provider/hermes.py +0 -219
  449. webscout/Provider/koala.py +0 -268
  450. webscout/Provider/labyrinth.py +0 -340
  451. webscout/Provider/lepton.py +0 -194
  452. webscout/Provider/llamatutor.py +0 -192
  453. webscout/Provider/multichat.py +0 -325
  454. webscout/Provider/promptrefine.py +0 -193
  455. webscout/Provider/scira_chat.py +0 -277
  456. webscout/Provider/scnet.py +0 -187
  457. webscout/Provider/talkai.py +0 -194
  458. webscout/Provider/tutorai.py +0 -252
  459. webscout/Provider/typegpt.py +0 -232
  460. webscout/Provider/uncovr.py +0 -312
  461. webscout/Provider/yep.py +0 -376
  462. webscout/litprinter/__init__.py +0 -59
  463. webscout/scout/core.py +0 -881
  464. webscout/tempid.py +0 -128
  465. webscout/webscout_search.py +0 -1346
  466. webscout/webscout_search_async.py +0 -877
  467. webscout/yep_search.py +0 -297
  468. webscout-8.2.2.dist-info/METADATA +0 -734
  469. webscout-8.2.2.dist-info/RECORD +0 -309
  470. webscout-8.2.2.dist-info/entry_points.txt +0 -5
  471. webscout-8.2.2.dist-info/top_level.txt +0 -3
  472. webstoken/__init__.py +0 -30
  473. webstoken/classifier.py +0 -189
  474. webstoken/keywords.py +0 -216
  475. webstoken/language.py +0 -128
  476. webstoken/ner.py +0 -164
  477. webstoken/normalizer.py +0 -35
  478. webstoken/processor.py +0 -77
  479. webstoken/sentiment.py +0 -206
  480. webstoken/stemmer.py +0 -73
  481. webstoken/tagger.py +0 -60
  482. webstoken/tokenizer.py +0 -158
  483. {webscout-8.2.2.dist-info → webscout-2026.1.19.dist-info/licenses}/LICENSE.md +0 -0
webscout/scout/core.py DELETED
@@ -1,881 +0,0 @@
1
- """
2
- Scout - The next-gen web scraping library! 🚀
3
- A powerful, flexible, and performant HTML parsing library.
4
- Enhanced with advanced features and intelligent parsing.
5
- """
6
-
7
- import re
8
- import json
9
- import hashlib
10
- import unicodedata
11
- import requests
12
- from markdownify import MarkdownConverter
13
- import concurrent.futures
14
- import urllib.parse
15
- from collections import Counter
16
- from typing import Union, List, Dict, Optional, Callable, Any, Iterator, Set
17
-
18
- from .parsers.html_parser import HTMLParser
19
- from .parsers.lxml_parser import LXMLParser
20
- from .parsers.html5lib_parser import HTML5Parser
21
- from .element import Tag, NavigableString
22
- from .utils import decode_markup
23
-
24
- class ScoutTextAnalyzer:
25
- """
26
- Advanced text analysis and processing utility.
27
- """
28
- @staticmethod
29
- def tokenize(text: str, lowercase=True, remove_punctuation=True) -> List[str]:
30
- """
31
- Tokenize text into words.
32
-
33
- Args:
34
- text (str): Input text
35
- lowercase (bool, optional): Convert to lowercase
36
- remove_punctuation (bool, optional): Remove punctuation
37
-
38
- Returns:
39
- List[str]: List of tokens
40
- """
41
- if lowercase:
42
- text = text.lower()
43
-
44
- if remove_punctuation:
45
- text = re.sub(r'[^\w\s]', '', text)
46
-
47
- return text.split()
48
-
49
- @staticmethod
50
- def count_words(text: str) -> Dict[str, int]:
51
- """
52
- Count word frequencies.
53
-
54
- Args:
55
- text (str): Input text
56
-
57
- Returns:
58
- Dict[str, int]: Word frequency dictionary
59
- """
60
- return dict(Counter(ScoutTextAnalyzer.tokenize(text)))
61
-
62
- @staticmethod
63
- def extract_entities(text: str) -> Dict[str, Set[str]]:
64
- """
65
- Extract named entities from text.
66
-
67
- Args:
68
- text (str): Input text
69
-
70
- Returns:
71
- Dict[str, Set[str]]: Extracted entities
72
- """
73
- entities = {
74
- 'emails': set(re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)),
75
- 'urls': set(re.findall(r'https?://\S+', text)),
76
- 'phones': set(re.findall(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', text)),
77
- 'dates': set(re.findall(r'\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b', text))
78
- }
79
- return entities
80
-
81
- class ScoutWebAnalyzer:
82
- """
83
- Advanced web content analysis utility.
84
- """
85
- @staticmethod
86
- def analyze_page_structure(scout_obj: 'Scout') -> Dict[str, Any]:
87
- """
88
- Analyze the structure of a web page.
89
-
90
- Args:
91
- scout_obj (Scout): Parsed Scout object
92
-
93
- Returns:
94
- Dict[str, Any]: Page structure analysis
95
- """
96
- analysis = {
97
- 'tag_distribution': {},
98
- 'class_distribution': {},
99
- 'id_distribution': {},
100
- 'depth_analysis': {}
101
- }
102
-
103
- # Tag distribution
104
- for tag in scout_obj.find_all():
105
- analysis['tag_distribution'][tag.name] = analysis['tag_distribution'].get(tag.name, 0) + 1
106
-
107
- # Class distribution
108
- for tag in scout_obj.find_all(attrs={'class': True}):
109
- for cls in tag.get('class', []):
110
- analysis['class_distribution'][cls] = analysis['class_distribution'].get(cls, 0) + 1
111
-
112
- # ID distribution
113
- for tag in scout_obj.find_all(attrs={'id': True}):
114
- analysis['id_distribution'][tag.get('id')] = analysis['id_distribution'].get(tag.get('id'), 0) + 1
115
-
116
- # Depth analysis
117
- def _analyze_depth(tag, current_depth=0):
118
- analysis['depth_analysis'][current_depth] = analysis['depth_analysis'].get(current_depth, 0) + 1
119
- for child in tag.contents:
120
- if isinstance(child, Tag):
121
- _analyze_depth(child, current_depth + 1)
122
-
123
- _analyze_depth(scout_obj._soup)
124
-
125
- return analysis
126
-
127
- class ScoutSearchResult:
128
- """
129
- Represents a search result with advanced querying capabilities.
130
- Enhanced with more intelligent filtering and processing.
131
- """
132
- def __init__(self, results: List[Tag]):
133
- """
134
- Initialize a search result collection.
135
-
136
- Args:
137
- results (List[Tag]): List of matching tags
138
- """
139
- self._results = results
140
-
141
- def __len__(self) -> int:
142
- return len(self._results)
143
-
144
- def __iter__(self) -> Iterator[Tag]:
145
- return iter(self._results)
146
-
147
- def __getitem__(self, index: Union[int, slice]) -> Union[Tag, List[Tag]]:
148
- return self._results[index]
149
-
150
- def texts(self, separator=' ', strip=True) -> List[str]:
151
- """
152
- Extract texts from all results.
153
-
154
- Args:
155
- separator (str, optional): Text separator
156
- strip (bool, optional): Strip whitespace
157
-
158
- Returns:
159
- List[str]: List of extracted texts
160
- """
161
- return [tag.get_text(separator, strip) for tag in self._results]
162
-
163
- def attrs(self, attr_name: str) -> List[Any]:
164
- """
165
- Extract a specific attribute from all results.
166
-
167
- Args:
168
- attr_name (str): Attribute name to extract
169
-
170
- Returns:
171
- List[Any]: List of attribute values
172
- """
173
- return [tag.get(attr_name) for tag in self._results]
174
-
175
- def filter(self, predicate: Callable[[Tag], bool]) -> 'ScoutSearchResult':
176
- """
177
- Filter results using a predicate function.
178
-
179
- Args:
180
- predicate (Callable[[Tag], bool]): Filtering function
181
-
182
- Returns:
183
- ScoutSearchResult: Filtered search results
184
- """
185
- return ScoutSearchResult([tag for tag in self._results if predicate(tag)])
186
-
187
- def map(self, transform: Callable[[Tag], Any]) -> List[Any]:
188
- """
189
- Transform results using a mapping function.
190
-
191
- Args:
192
- transform (Callable[[Tag], Any]): Transformation function
193
-
194
- Returns:
195
- List[Any]: Transformed results
196
- """
197
- return [transform(tag) for tag in self._results]
198
-
199
- def analyze_text(self) -> Dict[str, Any]:
200
- """
201
- Perform text analysis on search results.
202
-
203
- Returns:
204
- Dict[str, Any]: Text analysis results
205
- """
206
- texts = self.texts(strip=True)
207
- full_text = ' '.join(texts)
208
-
209
- return {
210
- 'total_results': len(self._results),
211
- 'word_count': ScoutTextAnalyzer.count_words(full_text),
212
- 'entities': ScoutTextAnalyzer.extract_entities(full_text)
213
- }
214
-
215
- class ScoutCrawler:
216
- """
217
- Advanced web crawling utility for Scout library.
218
- """
219
- def __init__(self, base_url: str, max_depth: int = 3, max_pages: int = 50):
220
- """
221
- Initialize the web crawler.
222
-
223
- Args:
224
- base_url (str): Starting URL to crawl
225
- max_depth (int, optional): Maximum crawl depth
226
- max_pages (int, optional): Maximum number of pages to crawl
227
- """
228
- self.base_url = base_url
229
- self.max_depth = max_depth
230
- self.max_pages = max_pages
231
- self.visited_urls = set()
232
- self.crawled_pages = []
233
-
234
- def _is_valid_url(self, url: str) -> bool:
235
- """
236
- Check if a URL is valid and within the same domain.
237
-
238
- Args:
239
- url (str): URL to validate
240
-
241
- Returns:
242
- bool: Whether the URL is valid
243
- """
244
- try:
245
- parsed_base = urllib.parse.urlparse(self.base_url)
246
- parsed_url = urllib.parse.urlparse(url)
247
-
248
- return (
249
- parsed_url.scheme in ['http', 'https'] and
250
- parsed_base.netloc == parsed_url.netloc and
251
- len(self.visited_urls) < self.max_pages
252
- )
253
- except Exception:
254
- return False
255
-
256
- def _crawl_page(self, url: str, depth: int = 0) -> Dict[str, Union[str, List[str]]]:
257
- """
258
- Crawl a single page and extract information.
259
-
260
- Args:
261
- url (str): URL to crawl
262
- depth (int, optional): Current crawl depth
263
-
264
- Returns:
265
- Dict[str, Union[str, List[str]]]: Crawled page information
266
- """
267
- if depth > self.max_depth or url in self.visited_urls:
268
- return {}
269
-
270
- try:
271
- response = requests.get(url, timeout=10)
272
- response.raise_for_status()
273
-
274
- scout = Scout(response.text, features='lxml')
275
-
276
- page_info = {
277
- 'url': url,
278
- 'title': scout.find('title').get_text() if scout.find('title') else '',
279
- 'links': [
280
- urllib.parse.urljoin(url, link.get('href'))
281
- for link in scout.find_all('a', href=True)
282
- if self._is_valid_url(urllib.parse.urljoin(url, link.get('href')))
283
- ],
284
- 'text': scout.get_text(),
285
- 'markdown': scout.to_markdown()
286
- }
287
-
288
- self.visited_urls.add(url)
289
- self.crawled_pages.append(page_info)
290
-
291
- return page_info
292
- except Exception as e:
293
- print(f"Error crawling {url}: {e}")
294
- return {}
295
-
296
- def crawl(self) -> List[Dict[str, Union[str, List[str]]]]:
297
- """
298
- Start web crawling from base URL.
299
-
300
- Returns:
301
- List[Dict[str, Union[str, List[str]]]]: List of crawled pages
302
- """
303
- with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
304
- futures = {executor.submit(self._crawl_page, self.base_url, 0)}
305
-
306
- while futures:
307
- done, futures = concurrent.futures.wait(
308
- futures, return_when=concurrent.futures.FIRST_COMPLETED
309
- )
310
-
311
- for future in done:
312
- page_info = future.result()
313
-
314
- for link in page_info.get('links', []):
315
- if link not in self.visited_urls:
316
- futures.add(
317
- executor.submit(
318
- self._crawl_page,
319
- link,
320
- self.visited_urls.count(self.base_url) + 1
321
- )
322
- )
323
-
324
- return self.crawled_pages
325
-
326
- class Scout:
327
- """
328
- Scout - Making web scraping a breeze! 🌊
329
- A comprehensive HTML parsing and traversal library.
330
- Enhanced with advanced features and intelligent parsing.
331
- """
332
-
333
- # Available parsers - choose your weapon! ⚔️
334
- PARSERS = {
335
- 'html.parser': HTMLParser,
336
- 'lxml': LXMLParser,
337
- 'html5lib': HTML5Parser,
338
- 'lxml-xml': LXMLParser, # For XML parsing
339
- }
340
-
341
- def __init__(self, markup="", features='html.parser', from_encoding=None, **kwargs):
342
- """
343
- Initialize Scout with HTML content.
344
-
345
- Args:
346
- markup (str): HTML content to parse
347
- features (str): Parser to use ('html.parser', 'lxml', 'html5lib', 'lxml-xml')
348
- from_encoding (str): Source encoding (if known)
349
- **kwargs: Additional parsing options
350
- """
351
- # Intelligent markup handling
352
- self.markup = self._preprocess_markup(markup, from_encoding)
353
- self.features = features
354
- self.from_encoding = from_encoding
355
-
356
- # Get the right parser for the job
357
- if features not in self.PARSERS:
358
- raise ValueError(
359
- f"Invalid parser '{features}'! Choose from: {', '.join(self.PARSERS.keys())}"
360
- )
361
-
362
- parser_class = self.PARSERS[features]
363
- self.parser = parser_class()
364
-
365
- # Parse that HTML! 🎯
366
- self._soup = self.parser.parse(self.markup)
367
-
368
- # BeautifulSoup-like attributes
369
- self.name = self._soup.name if hasattr(self._soup, 'name') else None
370
- self.attrs = self._soup.attrs if hasattr(self._soup, 'attrs') else {}
371
-
372
- # Advanced parsing options
373
- self._cache = {}
374
-
375
- # Text and web analyzers
376
- self.text_analyzer = ScoutTextAnalyzer()
377
- self.web_analyzer = ScoutWebAnalyzer()
378
-
379
- def normalize_text(self, text: str, form='NFKD') -> str:
380
- """
381
- Normalize text using Unicode normalization.
382
-
383
- Args:
384
- text (str): Input text
385
- form (str, optional): Normalization form
386
-
387
- Returns:
388
- str: Normalized text
389
- """
390
- return unicodedata.normalize(form, text)
391
-
392
- def url_parse(self, url: str) -> Dict[str, str]:
393
- """
394
- Parse and analyze a URL.
395
-
396
- Args:
397
- url (str): URL to parse
398
-
399
- Returns:
400
- Dict[str, str]: Parsed URL components
401
- """
402
- parsed = urllib.parse.urlparse(url)
403
- return {
404
- 'scheme': parsed.scheme,
405
- 'netloc': parsed.netloc,
406
- 'path': parsed.path,
407
- 'params': parsed.params,
408
- 'query': parsed.query,
409
- 'fragment': parsed.fragment
410
- }
411
-
412
- def analyze_page_structure(self) -> Dict[str, Any]:
413
- """
414
- Analyze the structure of the parsed page.
415
-
416
- Returns:
417
- Dict[str, Any]: Page structure analysis
418
- """
419
- return self.web_analyzer.analyze_page_structure(self)
420
-
421
- def analyze_text(self, text: Optional[str] = None) -> Dict[str, Any]:
422
- """
423
- Perform advanced text analysis.
424
-
425
- Args:
426
- text (str, optional): Text to analyze. If None, uses page text.
427
-
428
- Returns:
429
- Dict[str, Any]: Text analysis results
430
- """
431
- if text is None:
432
- text = self.get_text()
433
-
434
- return {
435
- 'word_count': self.text_analyzer.count_words(text),
436
- 'entities': self.text_analyzer.extract_entities(text),
437
- 'tokens': self.text_analyzer.tokenize(text)
438
- }
439
-
440
- def extract_semantic_info(self) -> Dict[str, Any]:
441
- """
442
- Extract semantic information from the document.
443
-
444
- Returns:
445
- Dict[str, Any]: Semantic information
446
- """
447
- semantic_info = {
448
- 'headings': {
449
- 'h1': [h.get_text(strip=True) for h in self.find_all('h1')],
450
- 'h2': [h.get_text(strip=True) for h in self.find_all('h2')],
451
- 'h3': [h.get_text(strip=True) for h in self.find_all('h3')]
452
- },
453
- 'lists': {
454
- 'ul': [ul.find_all('li') for ul in self.find_all('ul')],
455
- 'ol': [ol.find_all('li') for ol in self.find_all('ol')]
456
- },
457
- 'tables': {
458
- 'count': len(self.find_all('table')),
459
- 'headers': [table.find_all('th') for table in self.find_all('table')]
460
- }
461
- }
462
- return semantic_info
463
-
464
- def cache(self, key: str, value: Any = None) -> Any:
465
- """
466
- Manage a cache for parsed content.
467
-
468
- Args:
469
- key (str): Cache key
470
- value (Any, optional): Value to cache
471
-
472
- Returns:
473
- Any: Cached value or None
474
- """
475
- if value is not None:
476
- self._cache[key] = value
477
- return self._cache.get(key)
478
-
479
- def hash_content(self, method='md5') -> str:
480
- """
481
- Generate a hash of the parsed content.
482
-
483
- Args:
484
- method (str, optional): Hashing method
485
-
486
- Returns:
487
- str: Content hash
488
- """
489
- hash_methods = {
490
- 'md5': hashlib.md5,
491
- 'sha1': hashlib.sha1,
492
- 'sha256': hashlib.sha256
493
- }
494
-
495
- if method not in hash_methods:
496
- raise ValueError(f"Unsupported hash method: {method}")
497
-
498
- hasher = hash_methods[method]()
499
- hasher.update(str(self._soup).encode('utf-8'))
500
- return hasher.hexdigest()
501
-
502
- def extract_links(self, base_url: Optional[str] = None) -> List[Dict[str, str]]:
503
- """
504
- Extract all links from the document.
505
-
506
- Args:
507
- base_url (str, optional): Base URL for resolving relative links
508
-
509
- Returns:
510
- List[Dict[str, str]]: List of link dictionaries
511
- """
512
- links = []
513
- for link in self.find_all(['a', 'link']):
514
- href = link.get('href')
515
- if href:
516
- # Resolve relative URLs if base_url is provided
517
- if base_url and not href.startswith(('http://', 'https://', '//')):
518
- href = f"{base_url.rstrip('/')}/{href.lstrip('/')}"
519
-
520
- links.append({
521
- 'href': href,
522
- 'text': link.get_text(strip=True),
523
- 'rel': link.get('rel', [None])[0],
524
- 'type': link.get('type')
525
- })
526
- return links
527
-
528
- def extract_metadata(self) -> Dict[str, Any]:
529
- """
530
- Extract metadata from HTML document.
531
-
532
- Returns:
533
- Dict[str, Any]: Extracted metadata
534
- """
535
- metadata = {
536
- 'title': self.find('title').get_text() if self.find('title') else None,
537
- 'description': self.find('meta', attrs={'name': 'description'}).get('content') if self.find('meta', attrs={'name': 'description'}) else None,
538
- 'keywords': self.find('meta', attrs={'name': 'keywords'}).get('content', '').split(',') if self.find('meta', attrs={'name': 'keywords'}) else [],
539
- 'og_metadata': {},
540
- 'twitter_metadata': {}
541
- }
542
-
543
- # Open Graph metadata
544
- for meta in self.find_all('meta', attrs={'property': re.compile(r'^og:')}):
545
- key = meta.get('property')[3:]
546
- metadata['og_metadata'][key] = meta.get('content')
547
-
548
- # Twitter Card metadata
549
- for meta in self.find_all('meta', attrs={'name': re.compile(r'^twitter:')}):
550
- key = meta.get('name')[8:]
551
- metadata['twitter_metadata'][key] = meta.get('content')
552
-
553
- return metadata
554
-
555
- def to_json(self, indent=2) -> str:
556
- """
557
- Convert parsed content to JSON.
558
-
559
- Args:
560
- indent (int, optional): JSON indentation
561
-
562
- Returns:
563
- str: JSON representation of the document
564
- """
565
- def _tag_to_dict(tag):
566
- if isinstance(tag, NavigableString):
567
- return str(tag)
568
-
569
- result = {
570
- 'name': tag.name,
571
- 'attrs': tag.attrs,
572
- 'text': tag.get_text(strip=True)
573
- }
574
-
575
- if tag.contents:
576
- result['children'] = [_tag_to_dict(child) for child in tag.contents]
577
-
578
- return result
579
-
580
- return json.dumps(_tag_to_dict(self._soup), indent=indent)
581
-
582
- def find(self, name=None, attrs={}, recursive=True, text=None, **kwargs) -> Optional[Tag]:
583
- """
584
- Find the first matching element.
585
-
586
- Args:
587
- name (str, optional): Tag name to search for
588
- attrs (dict, optional): Attributes to match
589
- recursive (bool, optional): Search recursively
590
- text (str, optional): Text content to match
591
-
592
- Returns:
593
- Tag or None: First matching element
594
- """
595
- result = self._soup.find(name, attrs, recursive, text, **kwargs)
596
- return ScoutSearchResult([result]) if result else ScoutSearchResult([])
597
-
598
- def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs) -> ScoutSearchResult:
599
- """
600
- Find all matching elements.
601
-
602
- Args:
603
- name (str, optional): Tag name to search for
604
- attrs (dict, optional): Attributes to match
605
- recursive (bool, optional): Search recursively
606
- text (str, optional): Text content to match
607
- limit (int, optional): Maximum number of results
608
-
609
- Returns:
610
- ScoutSearchResult: List of matching elements
611
- """
612
- results = self._soup.find_all(name, attrs, recursive, text, limit, **kwargs)
613
- return ScoutSearchResult(results)
614
-
615
- def find_parent(self, name=None, attrs={}, **kwargs):
616
- """
617
- Find the first parent matching given criteria.
618
-
619
- Args:
620
- name (str, optional): Tag name to search for
621
- attrs (dict, optional): Attributes to match
622
-
623
- Returns:
624
- Tag or None: First matching parent
625
- """
626
- current = self._soup.parent
627
- while current:
628
- if (name is None or current.name == name) and \
629
- all(current.get(k) == v for k, v in attrs.items()):
630
- return current
631
- current = current.parent
632
- return None
633
-
634
- def find_parents(self, name=None, attrs={}, limit=None, **kwargs):
635
- """
636
- Find all parents matching given criteria.
637
-
638
- Args:
639
- name (str, optional): Tag name to search for
640
- attrs (dict, optional): Attributes to match
641
- limit (int, optional): Maximum number of results
642
-
643
- Returns:
644
- List[Tag]: List of matching parents
645
- """
646
- parents = []
647
- current = self._soup.parent
648
- while current and (limit is None or len(parents) < limit):
649
- if (name is None or current.name == name) and \
650
- all(current.get(k) == v for k, v in attrs.items()):
651
- parents.append(current)
652
- current = current.parent
653
- return parents
654
-
655
- def find_next_sibling(self, name=None, attrs={}, **kwargs):
656
- """
657
- Find the next sibling matching given criteria.
658
-
659
- Args:
660
- name (str, optional): Tag name to search for
661
- attrs (dict, optional): Attributes to match
662
-
663
- Returns:
664
- Tag or None: First matching next sibling
665
- """
666
- if not self._soup.parent:
667
- return None
668
-
669
- siblings = self._soup.parent.contents
670
- try:
671
- current_index = siblings.index(self._soup)
672
- for sibling in siblings[current_index + 1:]:
673
- if isinstance(sibling, Tag):
674
- if (name is None or sibling.name == name) and \
675
- all(sibling.get(k) == v for k, v in attrs.items()):
676
- return sibling
677
- except ValueError:
678
- pass
679
- return None
680
-
681
- def find_next_siblings(self, name=None, attrs={}, limit=None, **kwargs):
682
- """
683
- Find all next siblings matching given criteria.
684
-
685
- Args:
686
- name (str, optional): Tag name to search for
687
- attrs (dict, optional): Attributes to match
688
- limit (int, optional): Maximum number of results
689
-
690
- Returns:
691
- List[Tag]: List of matching next siblings
692
- """
693
- if not self._soup.parent:
694
- return []
695
-
696
- siblings = []
697
- siblings_list = self._soup.parent.contents
698
- try:
699
- current_index = siblings_list.index(self._soup)
700
- for sibling in siblings_list[current_index + 1:]:
701
- if isinstance(sibling, Tag):
702
- if (name is None or sibling.name == name) and \
703
- all(sibling.get(k) == v for k, v in attrs.items()):
704
- siblings.append(sibling)
705
- if limit and len(siblings) == limit:
706
- break
707
- except ValueError:
708
- pass
709
- return siblings
710
-
711
- def select(self, selector: str) -> List[Tag]:
712
- """
713
- Select elements using CSS selector.
714
-
715
- Args:
716
- selector (str): CSS selector string
717
-
718
- Returns:
719
- List[Tag]: List of matching elements
720
- """
721
- return self._soup.select(selector)
722
-
723
- def select_one(self, selector: str) -> Optional[Tag]:
724
- """
725
- Select the first element matching the CSS selector.
726
-
727
- Args:
728
- selector (str): CSS selector string
729
-
730
- Returns:
731
- Tag or None: First matching element
732
- """
733
- return self._soup.select_one(selector)
734
-
735
- def get_text(self, separator=' ', strip=False, types=None) -> str:
736
- """
737
- Extract all text from the parsed document.
738
-
739
- Args:
740
- separator (str, optional): Text separator
741
- strip (bool, optional): Strip whitespace
742
- types (list, optional): Types of content to extract
743
-
744
- Returns:
745
- str: Extracted text
746
- """
747
- return self._soup.get_text(separator, strip, types)
748
-
749
- def prettify(self, formatter='minimal') -> str:
750
- """
751
- Return a formatted, pretty-printed version of the HTML.
752
-
753
- Args:
754
- formatter (str, optional): Formatting style
755
-
756
- Returns:
757
- str: Prettified HTML
758
- """
759
- return self._soup.prettify(formatter)
760
-
761
- def decompose(self, tag: Tag = None) -> None:
762
- """
763
- Remove a tag and its contents from the document.
764
-
765
- Args:
766
- tag (Tag, optional): Tag to remove. If None, removes the root tag.
767
- """
768
- if tag is None:
769
- tag = self._soup
770
- tag.decompose()
771
-
772
- def extract(self, tag: Tag = None) -> Tag:
773
- """
774
- Remove a tag from the document and return it.
775
-
776
- Args:
777
- tag (Tag, optional): Tag to extract. If None, extracts the root tag.
778
-
779
- Returns:
780
- Tag: Extracted tag
781
- """
782
- if tag is None:
783
- tag = self._soup
784
- return tag.extract()
785
-
786
- def clear(self, tag: Tag = None) -> None:
787
- """
788
- Remove a tag's contents while keeping the tag itself.
789
-
790
- Args:
791
- tag (Tag, optional): Tag to clear. If None, clears the root tag.
792
- """
793
- if tag is None:
794
- tag = self._soup
795
- tag.clear()
796
-
797
- def replace_with(self, old_tag: Tag, new_tag: Tag) -> None:
798
- """
799
- Replace one tag with another.
800
-
801
- Args:
802
- old_tag (Tag): Tag to replace
803
- new_tag (Tag): Replacement tag
804
- """
805
- old_tag.replace_with(new_tag)
806
-
807
- def encode(self, encoding='utf-8') -> bytes:
808
- """
809
- Encode the document to a specific encoding.
810
-
811
- Args:
812
- encoding (str, optional): Encoding to use
813
-
814
- Returns:
815
- bytes: Encoded document
816
- """
817
- return str(self._soup).encode(encoding)
818
-
819
- def decode(self, encoding='utf-8') -> str:
820
- """
821
- Decode the document from a specific encoding.
822
-
823
- Args:
824
- encoding (str, optional): Encoding to use
825
-
826
- Returns:
827
- str: Decoded document
828
- """
829
- return str(self._soup)
830
-
831
- def __str__(self) -> str:
832
- """
833
- String representation of the parsed document.
834
-
835
- Returns:
836
- str: HTML content
837
- """
838
- return str(self._soup)
839
-
840
- def __repr__(self) -> str:
841
- """
842
- Detailed representation of the Scout object.
843
-
844
- Returns:
845
- str: Scout object description
846
- """
847
- return f"Scout(features='{self.features}', content_length={len(self.markup)})"
848
-
849
- def _preprocess_markup(self, markup: str, encoding: Optional[str] = None) -> str:
850
- """
851
- Preprocess markup before parsing.
852
-
853
- Args:
854
- markup (str): Input markup
855
- encoding (str, optional): Encoding to use
856
-
857
- Returns:
858
- str: Preprocessed markup
859
- """
860
- # Decode markup
861
- decoded_markup = decode_markup(markup, encoding)
862
-
863
- # Basic HTML cleaning
864
- # Remove comments, normalize whitespace, etc.
865
- decoded_markup = re.sub(r'<!--.*?-->', '', decoded_markup, flags=re.DOTALL)
866
- decoded_markup = re.sub(r'\s+', ' ', decoded_markup)
867
-
868
- return decoded_markup
869
-
870
- def to_markdown(self, heading_style='ATX') -> str:
871
- """
872
- Convert HTML to Markdown.
873
-
874
- Args:
875
- heading_style (str, optional): Markdown heading style
876
-
877
- Returns:
878
- str: Markdown representation of the document
879
- """
880
- converter = MarkdownConverter(heading_style=heading_style)
881
- return converter.convert(str(self._soup))