webscout 8.2.2__py3-none-any.whl → 2026.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (483) hide show
  1. webscout/AIauto.py +524 -143
  2. webscout/AIbase.py +247 -123
  3. webscout/AIutel.py +68 -132
  4. webscout/Bard.py +1072 -535
  5. webscout/Extra/GitToolkit/__init__.py +2 -2
  6. webscout/Extra/GitToolkit/gitapi/__init__.py +20 -12
  7. webscout/Extra/GitToolkit/gitapi/gist.py +142 -0
  8. webscout/Extra/GitToolkit/gitapi/organization.py +91 -0
  9. webscout/Extra/GitToolkit/gitapi/repository.py +308 -195
  10. webscout/Extra/GitToolkit/gitapi/search.py +162 -0
  11. webscout/Extra/GitToolkit/gitapi/trending.py +236 -0
  12. webscout/Extra/GitToolkit/gitapi/user.py +128 -96
  13. webscout/Extra/GitToolkit/gitapi/utils.py +82 -62
  14. webscout/Extra/YTToolkit/README.md +443 -0
  15. webscout/Extra/YTToolkit/YTdownloader.py +953 -957
  16. webscout/Extra/YTToolkit/__init__.py +3 -3
  17. webscout/Extra/YTToolkit/transcriber.py +595 -476
  18. webscout/Extra/YTToolkit/ytapi/README.md +230 -0
  19. webscout/Extra/YTToolkit/ytapi/__init__.py +22 -6
  20. webscout/Extra/YTToolkit/ytapi/captions.py +190 -0
  21. webscout/Extra/YTToolkit/ytapi/channel.py +302 -307
  22. webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
  23. webscout/Extra/YTToolkit/ytapi/extras.py +178 -45
  24. webscout/Extra/YTToolkit/ytapi/hashtag.py +120 -0
  25. webscout/Extra/YTToolkit/ytapi/https.py +89 -88
  26. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
  27. webscout/Extra/YTToolkit/ytapi/playlist.py +59 -59
  28. webscout/Extra/YTToolkit/ytapi/pool.py +8 -8
  29. webscout/Extra/YTToolkit/ytapi/query.py +143 -40
  30. webscout/Extra/YTToolkit/ytapi/shorts.py +122 -0
  31. webscout/Extra/YTToolkit/ytapi/stream.py +68 -63
  32. webscout/Extra/YTToolkit/ytapi/suggestions.py +97 -0
  33. webscout/Extra/YTToolkit/ytapi/utils.py +66 -62
  34. webscout/Extra/YTToolkit/ytapi/video.py +189 -18
  35. webscout/Extra/__init__.py +2 -3
  36. webscout/Extra/gguf.py +1298 -682
  37. webscout/Extra/tempmail/README.md +488 -0
  38. webscout/Extra/tempmail/__init__.py +28 -28
  39. webscout/Extra/tempmail/async_utils.py +143 -141
  40. webscout/Extra/tempmail/base.py +172 -161
  41. webscout/Extra/tempmail/cli.py +191 -187
  42. webscout/Extra/tempmail/emailnator.py +88 -84
  43. webscout/Extra/tempmail/mail_tm.py +378 -361
  44. webscout/Extra/tempmail/temp_mail_io.py +304 -292
  45. webscout/Extra/weather.py +196 -194
  46. webscout/Extra/weather_ascii.py +17 -15
  47. webscout/Provider/AISEARCH/PERPLEXED_search.py +175 -0
  48. webscout/Provider/AISEARCH/Perplexity.py +237 -304
  49. webscout/Provider/AISEARCH/README.md +106 -0
  50. webscout/Provider/AISEARCH/__init__.py +16 -10
  51. webscout/Provider/AISEARCH/brave_search.py +298 -0
  52. webscout/Provider/AISEARCH/iask_search.py +130 -209
  53. webscout/Provider/AISEARCH/monica_search.py +200 -246
  54. webscout/Provider/AISEARCH/webpilotai_search.py +242 -281
  55. webscout/Provider/Algion.py +413 -0
  56. webscout/Provider/Andi.py +74 -69
  57. webscout/Provider/Apriel.py +313 -0
  58. webscout/Provider/Ayle.py +323 -0
  59. webscout/Provider/ChatSandbox.py +329 -0
  60. webscout/Provider/ClaudeOnline.py +365 -0
  61. webscout/Provider/Cohere.py +232 -208
  62. webscout/Provider/DeepAI.py +367 -0
  63. webscout/Provider/Deepinfra.py +343 -173
  64. webscout/Provider/EssentialAI.py +217 -0
  65. webscout/Provider/ExaAI.py +274 -261
  66. webscout/Provider/Gemini.py +60 -54
  67. webscout/Provider/GithubChat.py +385 -367
  68. webscout/Provider/Gradient.py +286 -0
  69. webscout/Provider/Groq.py +556 -670
  70. webscout/Provider/HadadXYZ.py +323 -0
  71. webscout/Provider/HeckAI.py +392 -233
  72. webscout/Provider/HuggingFace.py +387 -0
  73. webscout/Provider/IBM.py +340 -0
  74. webscout/Provider/Jadve.py +317 -266
  75. webscout/Provider/K2Think.py +306 -0
  76. webscout/Provider/Koboldai.py +221 -381
  77. webscout/Provider/Netwrck.py +273 -228
  78. webscout/Provider/Nvidia.py +310 -0
  79. webscout/Provider/OPENAI/DeepAI.py +489 -0
  80. webscout/Provider/OPENAI/K2Think.py +423 -0
  81. webscout/Provider/OPENAI/PI.py +463 -0
  82. webscout/Provider/OPENAI/README.md +890 -0
  83. webscout/Provider/OPENAI/TogetherAI.py +405 -0
  84. webscout/Provider/OPENAI/TwoAI.py +255 -0
  85. webscout/Provider/OPENAI/__init__.py +148 -25
  86. webscout/Provider/OPENAI/ai4chat.py +348 -0
  87. webscout/Provider/OPENAI/akashgpt.py +436 -0
  88. webscout/Provider/OPENAI/algion.py +303 -0
  89. webscout/Provider/OPENAI/ayle.py +365 -0
  90. webscout/Provider/OPENAI/base.py +253 -46
  91. webscout/Provider/OPENAI/cerebras.py +296 -0
  92. webscout/Provider/OPENAI/chatgpt.py +514 -193
  93. webscout/Provider/OPENAI/chatsandbox.py +233 -0
  94. webscout/Provider/OPENAI/deepinfra.py +403 -272
  95. webscout/Provider/OPENAI/e2b.py +2370 -1350
  96. webscout/Provider/OPENAI/elmo.py +278 -0
  97. webscout/Provider/OPENAI/exaai.py +186 -138
  98. webscout/Provider/OPENAI/freeassist.py +446 -0
  99. webscout/Provider/OPENAI/gradient.py +448 -0
  100. webscout/Provider/OPENAI/groq.py +380 -0
  101. webscout/Provider/OPENAI/hadadxyz.py +292 -0
  102. webscout/Provider/OPENAI/heckai.py +100 -104
  103. webscout/Provider/OPENAI/huggingface.py +321 -0
  104. webscout/Provider/OPENAI/ibm.py +425 -0
  105. webscout/Provider/OPENAI/llmchat.py +253 -0
  106. webscout/Provider/OPENAI/llmchatco.py +378 -327
  107. webscout/Provider/OPENAI/meta.py +541 -0
  108. webscout/Provider/OPENAI/netwrck.py +110 -84
  109. webscout/Provider/OPENAI/nvidia.py +317 -0
  110. webscout/Provider/OPENAI/oivscode.py +348 -0
  111. webscout/Provider/OPENAI/openrouter.py +328 -0
  112. webscout/Provider/OPENAI/pydantic_imports.py +1 -0
  113. webscout/Provider/OPENAI/sambanova.py +397 -0
  114. webscout/Provider/OPENAI/sonus.py +126 -115
  115. webscout/Provider/OPENAI/textpollinations.py +218 -133
  116. webscout/Provider/OPENAI/toolbaz.py +136 -166
  117. webscout/Provider/OPENAI/typefully.py +419 -0
  118. webscout/Provider/OPENAI/typliai.py +279 -0
  119. webscout/Provider/OPENAI/utils.py +314 -211
  120. webscout/Provider/OPENAI/wisecat.py +103 -125
  121. webscout/Provider/OPENAI/writecream.py +185 -156
  122. webscout/Provider/OPENAI/x0gpt.py +227 -136
  123. webscout/Provider/OPENAI/zenmux.py +380 -0
  124. webscout/Provider/OpenRouter.py +386 -0
  125. webscout/Provider/Openai.py +337 -496
  126. webscout/Provider/PI.py +443 -344
  127. webscout/Provider/QwenLM.py +346 -254
  128. webscout/Provider/STT/__init__.py +28 -0
  129. webscout/Provider/STT/base.py +303 -0
  130. webscout/Provider/STT/elevenlabs.py +264 -0
  131. webscout/Provider/Sambanova.py +317 -0
  132. webscout/Provider/TTI/README.md +69 -0
  133. webscout/Provider/TTI/__init__.py +37 -12
  134. webscout/Provider/TTI/base.py +147 -0
  135. webscout/Provider/TTI/claudeonline.py +393 -0
  136. webscout/Provider/TTI/magicstudio.py +292 -0
  137. webscout/Provider/TTI/miragic.py +180 -0
  138. webscout/Provider/TTI/pollinations.py +331 -0
  139. webscout/Provider/TTI/together.py +334 -0
  140. webscout/Provider/TTI/utils.py +14 -0
  141. webscout/Provider/TTS/README.md +186 -0
  142. webscout/Provider/TTS/__init__.py +43 -7
  143. webscout/Provider/TTS/base.py +523 -0
  144. webscout/Provider/TTS/deepgram.py +286 -156
  145. webscout/Provider/TTS/elevenlabs.py +189 -111
  146. webscout/Provider/TTS/freetts.py +218 -0
  147. webscout/Provider/TTS/murfai.py +288 -113
  148. webscout/Provider/TTS/openai_fm.py +364 -0
  149. webscout/Provider/TTS/parler.py +203 -111
  150. webscout/Provider/TTS/qwen.py +334 -0
  151. webscout/Provider/TTS/sherpa.py +286 -0
  152. webscout/Provider/TTS/speechma.py +693 -180
  153. webscout/Provider/TTS/streamElements.py +275 -333
  154. webscout/Provider/TTS/utils.py +280 -280
  155. webscout/Provider/TextPollinationsAI.py +221 -121
  156. webscout/Provider/TogetherAI.py +450 -0
  157. webscout/Provider/TwoAI.py +309 -199
  158. webscout/Provider/TypliAI.py +311 -0
  159. webscout/Provider/UNFINISHED/ChatHub.py +219 -0
  160. webscout/Provider/{OPENAI/glider.py → UNFINISHED/ChutesAI.py} +160 -145
  161. webscout/Provider/UNFINISHED/GizAI.py +300 -0
  162. webscout/Provider/UNFINISHED/Marcus.py +218 -0
  163. webscout/Provider/UNFINISHED/Qodo.py +481 -0
  164. webscout/Provider/UNFINISHED/XenAI.py +330 -0
  165. webscout/Provider/{Youchat.py → UNFINISHED/Youchat.py} +64 -47
  166. webscout/Provider/UNFINISHED/aihumanizer.py +41 -0
  167. webscout/Provider/UNFINISHED/grammerchecker.py +37 -0
  168. webscout/Provider/UNFINISHED/liner.py +342 -0
  169. webscout/Provider/UNFINISHED/liner_api_request.py +246 -0
  170. webscout/Provider/UNFINISHED/samurai.py +231 -0
  171. webscout/Provider/WiseCat.py +256 -196
  172. webscout/Provider/WrDoChat.py +390 -0
  173. webscout/Provider/__init__.py +115 -198
  174. webscout/Provider/ai4chat.py +181 -202
  175. webscout/Provider/akashgpt.py +330 -342
  176. webscout/Provider/cerebras.py +397 -242
  177. webscout/Provider/cleeai.py +236 -213
  178. webscout/Provider/elmo.py +291 -234
  179. webscout/Provider/geminiapi.py +343 -208
  180. webscout/Provider/julius.py +245 -223
  181. webscout/Provider/learnfastai.py +333 -266
  182. webscout/Provider/llama3mitril.py +230 -180
  183. webscout/Provider/llmchat.py +308 -213
  184. webscout/Provider/llmchatco.py +321 -311
  185. webscout/Provider/meta.py +996 -794
  186. webscout/Provider/oivscode.py +332 -0
  187. webscout/Provider/searchchat.py +316 -293
  188. webscout/Provider/sonus.py +264 -208
  189. webscout/Provider/toolbaz.py +359 -320
  190. webscout/Provider/turboseek.py +332 -219
  191. webscout/Provider/typefully.py +262 -280
  192. webscout/Provider/x0gpt.py +332 -256
  193. webscout/__init__.py +31 -38
  194. webscout/__main__.py +5 -5
  195. webscout/cli.py +585 -293
  196. webscout/client.py +1497 -0
  197. webscout/conversation.py +140 -565
  198. webscout/exceptions.py +383 -339
  199. webscout/litagent/__init__.py +29 -29
  200. webscout/litagent/agent.py +492 -455
  201. webscout/litagent/constants.py +60 -60
  202. webscout/models.py +505 -181
  203. webscout/optimizers.py +32 -378
  204. webscout/prompt_manager.py +376 -274
  205. webscout/sanitize.py +1514 -0
  206. webscout/scout/README.md +452 -0
  207. webscout/scout/__init__.py +8 -8
  208. webscout/scout/core/__init__.py +7 -7
  209. webscout/scout/core/crawler.py +330 -140
  210. webscout/scout/core/scout.py +800 -568
  211. webscout/scout/core/search_result.py +51 -96
  212. webscout/scout/core/text_analyzer.py +64 -63
  213. webscout/scout/core/text_utils.py +412 -277
  214. webscout/scout/core/web_analyzer.py +54 -52
  215. webscout/scout/element.py +872 -460
  216. webscout/scout/parsers/__init__.py +70 -69
  217. webscout/scout/parsers/html5lib_parser.py +182 -172
  218. webscout/scout/parsers/html_parser.py +238 -236
  219. webscout/scout/parsers/lxml_parser.py +203 -178
  220. webscout/scout/utils.py +38 -37
  221. webscout/search/__init__.py +47 -0
  222. webscout/search/base.py +201 -0
  223. webscout/search/bing_main.py +45 -0
  224. webscout/search/brave_main.py +92 -0
  225. webscout/search/duckduckgo_main.py +57 -0
  226. webscout/search/engines/__init__.py +127 -0
  227. webscout/search/engines/bing/__init__.py +15 -0
  228. webscout/search/engines/bing/base.py +35 -0
  229. webscout/search/engines/bing/images.py +114 -0
  230. webscout/search/engines/bing/news.py +96 -0
  231. webscout/search/engines/bing/suggestions.py +36 -0
  232. webscout/search/engines/bing/text.py +109 -0
  233. webscout/search/engines/brave/__init__.py +19 -0
  234. webscout/search/engines/brave/base.py +47 -0
  235. webscout/search/engines/brave/images.py +213 -0
  236. webscout/search/engines/brave/news.py +353 -0
  237. webscout/search/engines/brave/suggestions.py +318 -0
  238. webscout/search/engines/brave/text.py +167 -0
  239. webscout/search/engines/brave/videos.py +364 -0
  240. webscout/search/engines/duckduckgo/__init__.py +25 -0
  241. webscout/search/engines/duckduckgo/answers.py +80 -0
  242. webscout/search/engines/duckduckgo/base.py +189 -0
  243. webscout/search/engines/duckduckgo/images.py +100 -0
  244. webscout/search/engines/duckduckgo/maps.py +183 -0
  245. webscout/search/engines/duckduckgo/news.py +70 -0
  246. webscout/search/engines/duckduckgo/suggestions.py +22 -0
  247. webscout/search/engines/duckduckgo/text.py +221 -0
  248. webscout/search/engines/duckduckgo/translate.py +48 -0
  249. webscout/search/engines/duckduckgo/videos.py +80 -0
  250. webscout/search/engines/duckduckgo/weather.py +84 -0
  251. webscout/search/engines/mojeek.py +61 -0
  252. webscout/search/engines/wikipedia.py +77 -0
  253. webscout/search/engines/yahoo/__init__.py +41 -0
  254. webscout/search/engines/yahoo/answers.py +19 -0
  255. webscout/search/engines/yahoo/base.py +34 -0
  256. webscout/search/engines/yahoo/images.py +323 -0
  257. webscout/search/engines/yahoo/maps.py +19 -0
  258. webscout/search/engines/yahoo/news.py +258 -0
  259. webscout/search/engines/yahoo/suggestions.py +140 -0
  260. webscout/search/engines/yahoo/text.py +273 -0
  261. webscout/search/engines/yahoo/translate.py +19 -0
  262. webscout/search/engines/yahoo/videos.py +302 -0
  263. webscout/search/engines/yahoo/weather.py +220 -0
  264. webscout/search/engines/yandex.py +67 -0
  265. webscout/search/engines/yep/__init__.py +13 -0
  266. webscout/search/engines/yep/base.py +34 -0
  267. webscout/search/engines/yep/images.py +101 -0
  268. webscout/search/engines/yep/suggestions.py +38 -0
  269. webscout/search/engines/yep/text.py +99 -0
  270. webscout/search/http_client.py +172 -0
  271. webscout/search/results.py +141 -0
  272. webscout/search/yahoo_main.py +57 -0
  273. webscout/search/yep_main.py +48 -0
  274. webscout/server/__init__.py +48 -0
  275. webscout/server/config.py +78 -0
  276. webscout/server/exceptions.py +69 -0
  277. webscout/server/providers.py +286 -0
  278. webscout/server/request_models.py +131 -0
  279. webscout/server/request_processing.py +404 -0
  280. webscout/server/routes.py +642 -0
  281. webscout/server/server.py +351 -0
  282. webscout/server/ui_templates.py +1171 -0
  283. webscout/swiftcli/__init__.py +79 -809
  284. webscout/swiftcli/core/__init__.py +7 -0
  285. webscout/swiftcli/core/cli.py +574 -0
  286. webscout/swiftcli/core/context.py +98 -0
  287. webscout/swiftcli/core/group.py +268 -0
  288. webscout/swiftcli/decorators/__init__.py +28 -0
  289. webscout/swiftcli/decorators/command.py +243 -0
  290. webscout/swiftcli/decorators/options.py +247 -0
  291. webscout/swiftcli/decorators/output.py +392 -0
  292. webscout/swiftcli/exceptions.py +21 -0
  293. webscout/swiftcli/plugins/__init__.py +9 -0
  294. webscout/swiftcli/plugins/base.py +134 -0
  295. webscout/swiftcli/plugins/manager.py +269 -0
  296. webscout/swiftcli/utils/__init__.py +58 -0
  297. webscout/swiftcli/utils/formatting.py +251 -0
  298. webscout/swiftcli/utils/parsing.py +368 -0
  299. webscout/update_checker.py +280 -136
  300. webscout/utils.py +28 -14
  301. webscout/version.py +2 -1
  302. webscout/version.py.bak +3 -0
  303. webscout/zeroart/__init__.py +218 -55
  304. webscout/zeroart/base.py +70 -60
  305. webscout/zeroart/effects.py +155 -99
  306. webscout/zeroart/fonts.py +1799 -816
  307. webscout-2026.1.19.dist-info/METADATA +638 -0
  308. webscout-2026.1.19.dist-info/RECORD +312 -0
  309. {webscout-8.2.2.dist-info → webscout-2026.1.19.dist-info}/WHEEL +1 -1
  310. webscout-2026.1.19.dist-info/entry_points.txt +4 -0
  311. webscout-2026.1.19.dist-info/top_level.txt +1 -0
  312. inferno/__init__.py +0 -6
  313. inferno/__main__.py +0 -9
  314. inferno/cli.py +0 -6
  315. webscout/DWEBS.py +0 -477
  316. webscout/Extra/autocoder/__init__.py +0 -9
  317. webscout/Extra/autocoder/autocoder.py +0 -849
  318. webscout/Extra/autocoder/autocoder_utiles.py +0 -332
  319. webscout/LLM.py +0 -442
  320. webscout/Litlogger/__init__.py +0 -67
  321. webscout/Litlogger/core/__init__.py +0 -6
  322. webscout/Litlogger/core/level.py +0 -23
  323. webscout/Litlogger/core/logger.py +0 -165
  324. webscout/Litlogger/handlers/__init__.py +0 -12
  325. webscout/Litlogger/handlers/console.py +0 -33
  326. webscout/Litlogger/handlers/file.py +0 -143
  327. webscout/Litlogger/handlers/network.py +0 -173
  328. webscout/Litlogger/styles/__init__.py +0 -7
  329. webscout/Litlogger/styles/colors.py +0 -249
  330. webscout/Litlogger/styles/formats.py +0 -458
  331. webscout/Litlogger/styles/text.py +0 -87
  332. webscout/Litlogger/utils/__init__.py +0 -6
  333. webscout/Litlogger/utils/detectors.py +0 -153
  334. webscout/Litlogger/utils/formatters.py +0 -200
  335. webscout/Local/__init__.py +0 -12
  336. webscout/Local/__main__.py +0 -9
  337. webscout/Local/api.py +0 -576
  338. webscout/Local/cli.py +0 -516
  339. webscout/Local/config.py +0 -75
  340. webscout/Local/llm.py +0 -287
  341. webscout/Local/model_manager.py +0 -253
  342. webscout/Local/server.py +0 -721
  343. webscout/Local/utils.py +0 -93
  344. webscout/Provider/AI21.py +0 -177
  345. webscout/Provider/AISEARCH/DeepFind.py +0 -250
  346. webscout/Provider/AISEARCH/ISou.py +0 -256
  347. webscout/Provider/AISEARCH/felo_search.py +0 -228
  348. webscout/Provider/AISEARCH/genspark_search.py +0 -208
  349. webscout/Provider/AISEARCH/hika_search.py +0 -194
  350. webscout/Provider/AISEARCH/scira_search.py +0 -324
  351. webscout/Provider/Aitopia.py +0 -292
  352. webscout/Provider/AllenAI.py +0 -413
  353. webscout/Provider/Blackboxai.py +0 -229
  354. webscout/Provider/C4ai.py +0 -432
  355. webscout/Provider/ChatGPTClone.py +0 -226
  356. webscout/Provider/ChatGPTES.py +0 -237
  357. webscout/Provider/ChatGPTGratis.py +0 -194
  358. webscout/Provider/Chatify.py +0 -175
  359. webscout/Provider/Cloudflare.py +0 -273
  360. webscout/Provider/DeepSeek.py +0 -196
  361. webscout/Provider/ElectronHub.py +0 -709
  362. webscout/Provider/ExaChat.py +0 -342
  363. webscout/Provider/Free2GPT.py +0 -241
  364. webscout/Provider/GPTWeb.py +0 -193
  365. webscout/Provider/Glider.py +0 -211
  366. webscout/Provider/HF_space/__init__.py +0 -0
  367. webscout/Provider/HF_space/qwen_qwen2.py +0 -206
  368. webscout/Provider/HuggingFaceChat.py +0 -462
  369. webscout/Provider/Hunyuan.py +0 -272
  370. webscout/Provider/LambdaChat.py +0 -392
  371. webscout/Provider/Llama.py +0 -200
  372. webscout/Provider/Llama3.py +0 -204
  373. webscout/Provider/Marcus.py +0 -148
  374. webscout/Provider/OLLAMA.py +0 -396
  375. webscout/Provider/OPENAI/c4ai.py +0 -367
  376. webscout/Provider/OPENAI/chatgptclone.py +0 -460
  377. webscout/Provider/OPENAI/exachat.py +0 -433
  378. webscout/Provider/OPENAI/freeaichat.py +0 -352
  379. webscout/Provider/OPENAI/opkfc.py +0 -488
  380. webscout/Provider/OPENAI/scirachat.py +0 -463
  381. webscout/Provider/OPENAI/standardinput.py +0 -425
  382. webscout/Provider/OPENAI/typegpt.py +0 -346
  383. webscout/Provider/OPENAI/uncovrAI.py +0 -455
  384. webscout/Provider/OPENAI/venice.py +0 -413
  385. webscout/Provider/OPENAI/yep.py +0 -327
  386. webscout/Provider/OpenGPT.py +0 -199
  387. webscout/Provider/Perplexitylabs.py +0 -415
  388. webscout/Provider/Phind.py +0 -535
  389. webscout/Provider/PizzaGPT.py +0 -198
  390. webscout/Provider/Reka.py +0 -214
  391. webscout/Provider/StandardInput.py +0 -278
  392. webscout/Provider/TTI/AiForce/__init__.py +0 -22
  393. webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
  394. webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
  395. webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
  396. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
  397. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
  398. webscout/Provider/TTI/ImgSys/__init__.py +0 -23
  399. webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
  400. webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
  401. webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
  402. webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
  403. webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
  404. webscout/Provider/TTI/Nexra/__init__.py +0 -22
  405. webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
  406. webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
  407. webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
  408. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
  409. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
  410. webscout/Provider/TTI/aiarta/__init__.py +0 -2
  411. webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
  412. webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
  413. webscout/Provider/TTI/artbit/__init__.py +0 -22
  414. webscout/Provider/TTI/artbit/async_artbit.py +0 -155
  415. webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
  416. webscout/Provider/TTI/fastflux/__init__.py +0 -22
  417. webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
  418. webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
  419. webscout/Provider/TTI/huggingface/__init__.py +0 -22
  420. webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
  421. webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
  422. webscout/Provider/TTI/piclumen/__init__.py +0 -23
  423. webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
  424. webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
  425. webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
  426. webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
  427. webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
  428. webscout/Provider/TTI/talkai/__init__.py +0 -4
  429. webscout/Provider/TTI/talkai/async_talkai.py +0 -229
  430. webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
  431. webscout/Provider/TTS/gesserit.py +0 -127
  432. webscout/Provider/TeachAnything.py +0 -187
  433. webscout/Provider/Venice.py +0 -219
  434. webscout/Provider/VercelAI.py +0 -234
  435. webscout/Provider/WebSim.py +0 -228
  436. webscout/Provider/Writecream.py +0 -211
  437. webscout/Provider/WritingMate.py +0 -197
  438. webscout/Provider/aimathgpt.py +0 -189
  439. webscout/Provider/askmyai.py +0 -158
  440. webscout/Provider/asksteve.py +0 -203
  441. webscout/Provider/bagoodex.py +0 -145
  442. webscout/Provider/chatglm.py +0 -205
  443. webscout/Provider/copilot.py +0 -428
  444. webscout/Provider/freeaichat.py +0 -271
  445. webscout/Provider/gaurish.py +0 -244
  446. webscout/Provider/geminiprorealtime.py +0 -160
  447. webscout/Provider/granite.py +0 -187
  448. webscout/Provider/hermes.py +0 -219
  449. webscout/Provider/koala.py +0 -268
  450. webscout/Provider/labyrinth.py +0 -340
  451. webscout/Provider/lepton.py +0 -194
  452. webscout/Provider/llamatutor.py +0 -192
  453. webscout/Provider/multichat.py +0 -325
  454. webscout/Provider/promptrefine.py +0 -193
  455. webscout/Provider/scira_chat.py +0 -277
  456. webscout/Provider/scnet.py +0 -187
  457. webscout/Provider/talkai.py +0 -194
  458. webscout/Provider/tutorai.py +0 -252
  459. webscout/Provider/typegpt.py +0 -232
  460. webscout/Provider/uncovr.py +0 -312
  461. webscout/Provider/yep.py +0 -376
  462. webscout/litprinter/__init__.py +0 -59
  463. webscout/scout/core.py +0 -881
  464. webscout/tempid.py +0 -128
  465. webscout/webscout_search.py +0 -1346
  466. webscout/webscout_search_async.py +0 -877
  467. webscout/yep_search.py +0 -297
  468. webscout-8.2.2.dist-info/METADATA +0 -734
  469. webscout-8.2.2.dist-info/RECORD +0 -309
  470. webscout-8.2.2.dist-info/entry_points.txt +0 -5
  471. webscout-8.2.2.dist-info/top_level.txt +0 -3
  472. webstoken/__init__.py +0 -30
  473. webstoken/classifier.py +0 -189
  474. webstoken/keywords.py +0 -216
  475. webstoken/language.py +0 -128
  476. webstoken/ner.py +0 -164
  477. webstoken/normalizer.py +0 -35
  478. webstoken/processor.py +0 -77
  479. webstoken/sentiment.py +0 -206
  480. webstoken/stemmer.py +0 -73
  481. webstoken/tagger.py +0 -60
  482. webstoken/tokenizer.py +0 -158
  483. {webscout-8.2.2.dist-info → webscout-2026.1.19.dist-info/licenses}/LICENSE.md +0 -0
webscout/scout/element.py CHANGED
@@ -1,460 +1,872 @@
1
- """
2
- Scout Element Module - Advanced HTML Element Representation
3
- """
4
-
5
- import re
6
- from typing import Optional, List, Dict, Union, Any
7
-
8
- class NavigableString(str):
9
- """
10
- A string that knows its place in the document tree.
11
- Mimics BeautifulSoup's NavigableString for better compatibility.
12
- """
13
- def __new__(cls, text: str):
14
- """
15
- Create a new NavigableString instance.
16
-
17
- Args:
18
- text (str): String content
19
- """
20
- return str.__new__(cls, text)
21
-
22
- def __init__(self, text: str):
23
- """
24
- Initialize a navigable string.
25
-
26
- Args:
27
- text (str): String content
28
- """
29
- self.parent = None
30
-
31
- def __repr__(self):
32
- """String representation."""
33
- return f"NavigableString({super().__repr__()})"
34
-
35
- def __add__(self, other):
36
- """
37
- Allow concatenation of NavigableString with other strings.
38
-
39
- Args:
40
- other (str): String to concatenate
41
-
42
- Returns:
43
- str: Concatenated string
44
- """
45
- return str(self) + str(other)
46
-
47
- def strip(self, chars=None):
48
- """
49
- Strip whitespace or specified characters.
50
-
51
- Args:
52
- chars (str, optional): Characters to strip
53
-
54
- Returns:
55
- str: Stripped string
56
- """
57
- return NavigableString(super().strip(chars))
58
-
59
- class Tag:
60
- """
61
- Represents an HTML tag with advanced traversal and manipulation capabilities.
62
- Enhanced to closely mimic BeautifulSoup's Tag class.
63
- """
64
- def __init__(self, name: str, attrs: Dict[str, str] = None):
65
- """
66
- Initialize a Tag with name and attributes.
67
-
68
- Args:
69
- name (str): Tag name
70
- attrs (dict, optional): Tag attributes
71
- """
72
- self.name = name
73
- self.attrs = attrs or {}
74
- self.contents = []
75
- self.parent = None
76
- self.string = None # For single string content
77
-
78
- def __str__(self):
79
- """String representation of the tag."""
80
- return self.decode_contents()
81
-
82
- def __repr__(self):
83
- """Detailed representation of the tag."""
84
- return f"<{self.name} {self.attrs}>"
85
-
86
- def __call__(self, *args, **kwargs):
87
- """
88
- Allows calling find_all directly on the tag.
89
- Mimics BeautifulSoup's behavior.
90
- """
91
- return self.find_all(*args, **kwargs)
92
-
93
- def __contains__(self, item):
94
- """
95
- Check if an item is in the tag's contents.
96
-
97
- Args:
98
- item: Item to search for
99
-
100
- Returns:
101
- bool: True if item is in contents, False otherwise
102
- """
103
- return item in self.contents
104
-
105
- def __getitem__(self, key):
106
- """
107
- Get an attribute value using dictionary-like access.
108
-
109
- Args:
110
- key (str): Attribute name
111
-
112
- Returns:
113
- Any: Attribute value
114
- """
115
- return self.attrs[key]
116
-
117
- def __iter__(self):
118
- """
119
- Iterate through tag's contents.
120
-
121
- Returns:
122
- Iterator: Contents of the tag
123
- """
124
- return iter(self.contents)
125
-
126
- def __eq__(self, other):
127
- """
128
- Compare tags based on name and attributes.
129
-
130
- Args:
131
- other (Tag): Tag to compare
132
-
133
- Returns:
134
- bool: True if tags are equivalent
135
- """
136
- if not isinstance(other, Tag):
137
- return False
138
- return (
139
- self.name == other.name and
140
- self.attrs == other.attrs and
141
- str(self) == str(other)
142
- )
143
-
144
- def __hash__(self):
145
- """
146
- Generate a hash for the tag.
147
-
148
- Returns:
149
- int: Hash value
150
- """
151
- return hash((self.name, frozenset(self.attrs.items()), str(self)))
152
-
153
- def find(self, name=None, attrs={}, recursive=True, text=None, **kwargs) -> Optional['Tag']:
154
- """
155
- Find the first matching child element.
156
- Enhanced with more flexible matching.
157
-
158
- Args:
159
- name (str, optional): Tag name to search for
160
- attrs (dict, optional): Attributes to match
161
- recursive (bool, optional): Search recursively
162
- text (str, optional): Text content to match
163
-
164
- Returns:
165
- Tag or None: First matching element
166
- """
167
- results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
168
- return results[0] if results else None
169
-
170
- def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs) -> List['Tag']:
171
- """
172
- Find all matching child elements.
173
- Enhanced with more flexible matching and BeautifulSoup-like features.
174
-
175
- Args:
176
- name (str, optional): Tag name to search for
177
- attrs (dict, optional): Attributes to match
178
- recursive (bool, optional): Search recursively
179
- text (str, optional): Text content to match
180
- limit (int, optional): Maximum number of results
181
-
182
- Returns:
183
- List[Tag]: List of matching elements
184
- """
185
- results = []
186
-
187
- def _match(tag):
188
- # Check tag name with case-insensitive and regex support
189
- if name:
190
- if isinstance(name, str):
191
- if tag.name.lower() != name.lower():
192
- return False
193
- elif isinstance(name, re.Pattern):
194
- if not name.search(tag.name):
195
- return False
196
-
197
- # Check attributes with more flexible matching
198
- for k, v in attrs.items():
199
- # Handle special attribute matching
200
- if k == 'class':
201
- tag_classes = tag.get('class', [])
202
- if isinstance(v, str) and v not in tag_classes:
203
- return False
204
- elif isinstance(v, list) and not all(cls in tag_classes for cls in v):
205
- return False
206
- elif k == 'id':
207
- if tag.get('id') != v:
208
- return False
209
- else:
210
- # Regex or exact match for other attributes
211
- tag_attr = tag.attrs.get(k)
212
- if isinstance(v, re.Pattern):
213
- if not v.search(str(tag_attr)):
214
- return False
215
- elif tag_attr != v:
216
- return False
217
-
218
- # Check text content
219
- if text:
220
- tag_text = tag.get_text(strip=True)
221
- if isinstance(text, str) and text.lower() not in tag_text.lower():
222
- return False
223
- elif isinstance(text, re.Pattern) and not text.search(tag_text):
224
- return False
225
-
226
- return True
227
-
228
- def _search(element):
229
- if _match(element):
230
- results.append(element)
231
- if limit and len(results) == limit:
232
- return
233
-
234
- if recursive:
235
- for child in element.contents:
236
- if isinstance(child, Tag):
237
- _search(child)
238
-
239
- _search(self)
240
- return results
241
-
242
- def select(self, selector: str) -> List['Tag']:
243
- """
244
- Select elements using CSS selector.
245
- Enhanced to support more complex selectors.
246
-
247
- Args:
248
- selector (str): CSS selector string
249
-
250
- Returns:
251
- List[Tag]: List of matching elements
252
- """
253
- # More advanced CSS selector parsing
254
- # This is a simplified implementation and might need more robust parsing
255
- parts = re.split(r'\s+', selector.strip())
256
- results = []
257
-
258
- def _match_selector(tag, selector_part):
259
- # Support more complex selectors
260
- if selector_part.startswith('.'):
261
- # Class selector
262
- return selector_part[1:] in tag.get('class', [])
263
- elif selector_part.startswith('#'):
264
- # ID selector
265
- return tag.get('id') == selector_part[1:]
266
- elif '[' in selector_part and ']' in selector_part:
267
- # Attribute selector
268
- attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
269
- if attr_match:
270
- tag_name, attr, value = attr_match.groups()
271
- if tag_name and tag.name != tag_name:
272
- return False
273
- if value:
274
- return tag.get(attr) == value.strip("'\"")
275
- return attr in tag.attrs
276
- else:
277
- # Tag selector
278
- return tag.name == selector_part
279
-
280
- def _recursive_select(element, selector_parts):
281
- if not selector_parts:
282
- results.append(element)
283
- return
284
-
285
- current_selector = selector_parts[0]
286
- remaining_selectors = selector_parts[1:]
287
-
288
- if _match_selector(element, current_selector):
289
- if not remaining_selectors:
290
- results.append(element)
291
- else:
292
- for child in element.contents:
293
- if isinstance(child, Tag):
294
- _recursive_select(child, remaining_selectors)
295
-
296
- for child in self.contents:
297
- if isinstance(child, Tag):
298
- _recursive_select(child, parts)
299
-
300
- return results
301
-
302
- def select_one(self, selector: str) -> Optional['Tag']:
303
- """
304
- Select the first element matching the CSS selector.
305
-
306
- Args:
307
- selector (str): CSS selector string
308
-
309
- Returns:
310
- Tag or None: First matching element
311
- """
312
- results = self.select(selector)
313
- return results[0] if results else None
314
-
315
- def get_text(self, separator=' ', strip=False, types=None) -> str:
316
- """
317
- Extract text from the tag and its descendants.
318
- Enhanced to support more flexible text extraction.
319
-
320
- Args:
321
- separator (str, optional): Text separator
322
- strip (bool, optional): Strip whitespace
323
- types (list, optional): Types of content to extract
324
-
325
- Returns:
326
- str: Extracted text
327
- """
328
- texts = []
329
- for content in self.contents:
330
- # Support filtering by content type
331
- if types is None or type(content) in types:
332
- if isinstance(content, NavigableString):
333
- texts.append(str(content))
334
- elif isinstance(content, Tag):
335
- texts.append(content.get_text(separator, strip))
336
-
337
- text = separator.join(texts)
338
- text = re.sub(r'\n\n+', '\n', text) # Replace multiple newlines with single newlines
339
- return text.strip() if strip else text
340
-
341
- def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
342
- """
343
- Find the first text matching a pattern.
344
-
345
- Args:
346
- pattern (str or re.Pattern): Pattern to match
347
- **kwargs: Additional arguments for get_text()
348
-
349
- Returns:
350
- str or None: First matching text
351
- """
352
- text = self.get_text(**kwargs)
353
-
354
- if isinstance(pattern, str):
355
- return pattern if pattern in text else None
356
- elif isinstance(pattern, re.Pattern):
357
- match = pattern.search(text)
358
- return match.group(0) if match else None
359
-
360
- def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
361
- """
362
- Replace text matching a pattern.
363
-
364
- Args:
365
- old (str or re.Pattern): Pattern to replace
366
- new (str): Replacement text
367
- **kwargs: Additional arguments for get_text()
368
-
369
- Returns:
370
- str: Modified text
371
- """
372
- text = self.get_text(**kwargs)
373
-
374
- if isinstance(old, str):
375
- return text.replace(old, new)
376
- elif isinstance(old, re.Pattern):
377
- return old.sub(new, text)
378
-
379
- def get(self, key: str, default: Any = None) -> Any:
380
- """
381
- Get an attribute value.
382
-
383
- Args:
384
- key (str): Attribute name
385
- default (Any, optional): Default value if attribute not found
386
-
387
- Returns:
388
- Any: Attribute value or default
389
- """
390
- return self.attrs.get(key, default)
391
-
392
- def decompose(self) -> None:
393
- """Remove the tag and its contents from the document."""
394
- if self.parent:
395
- self.parent.contents.remove(self)
396
-
397
- def extract(self) -> 'Tag':
398
- """
399
- Remove the tag from the document and return it.
400
-
401
- Returns:
402
- Tag: Extracted tag
403
- """
404
- self.decompose()
405
- return self
406
-
407
- def clear(self) -> None:
408
- """Remove all contents of the tag."""
409
- self.contents.clear()
410
-
411
- def replace_with(self, new_tag: 'Tag') -> None:
412
- """
413
- Replace this tag with another tag.
414
-
415
- Args:
416
- new_tag (Tag): Tag to replace the current tag
417
- """
418
- if self.parent:
419
- index = self.parent.contents.index(self)
420
- self.parent.contents[index] = new_tag
421
- new_tag.parent = self.parent
422
-
423
- def decode_contents(self, eventual_encoding='utf-8') -> str:
424
- """
425
- Decode the contents of the tag to a string.
426
-
427
- Args:
428
- eventual_encoding (str, optional): Encoding to use
429
-
430
- Returns:
431
- str: Decoded contents
432
- """
433
- return ''.join(str(content) for content in self.contents)
434
-
435
- def prettify(self, formatter='minimal') -> str:
436
- """
437
- Return a nicely formatted representation of the tag.
438
-
439
- Args:
440
- formatter (str, optional): Formatting style
441
-
442
- Returns:
443
- str: Prettified tag representation
444
- """
445
- def _prettify(tag, indent=0):
446
- result = ' ' * indent + f'<{tag.name}'
447
- for k, v in tag.attrs.items():
448
- result += f' {k}="{v}"'
449
- result += '>\n'
450
-
451
- for content in tag.contents:
452
- if isinstance(content, Tag):
453
- result += _prettify(content, indent + 2)
454
- else:
455
- result += ' ' * (indent + 2) + str(content) + '\n'
456
-
457
- result += ' ' * indent + f'</{tag.name}>\n'
458
- return result
459
-
460
- return _prettify(self)
1
+ """
2
+ Scout Element Module - Advanced HTML Element Representation
3
+ """
4
+
5
+ import re
6
+ from typing import Any, Dict, List, Optional, Union
7
+
8
+
9
+ class NavigableString(str):
10
+ """
11
+ A string that knows its place in the document tree.
12
+ Mimics BS4's NavigableString for better compatibility.
13
+ """
14
+
15
+ # The parent may be a Tag or None
16
+ parent: Optional["Tag"]
17
+
18
+ def __new__(cls, text: str):
19
+ """
20
+ Create a new NavigableString instance.
21
+
22
+ Args:
23
+ text (str): String content
24
+ """
25
+ return str.__new__(cls, text)
26
+
27
+ def __init__(self, text: str):
28
+ """
29
+ Initialize a navigable string.
30
+
31
+ Args:
32
+ text (str): String content
33
+ """
34
+ self.parent = None
35
+
36
+ def __repr__(self):
37
+ """String representation."""
38
+ return f"NavigableString({super().__repr__()})"
39
+
40
+ def __add__(self, other):
41
+ """
42
+ Allow concatenation of NavigableString with other strings.
43
+
44
+ Args:
45
+ other (str): String to concatenate
46
+
47
+ Returns:
48
+ str: Concatenated string
49
+ """
50
+ return str(self) + str(other)
51
+
52
+ def strip(self, chars=None):
53
+ """
54
+ Strip whitespace or specified characters.
55
+
56
+ Args:
57
+ chars (str, optional): Characters to strip
58
+
59
+ Returns:
60
+ str: Stripped string
61
+ """
62
+ return NavigableString(super().strip(chars))
63
+
64
+
65
+ class Tag:
66
+ """
67
+ Represents an HTML tag with advanced traversal and manipulation capabilities.
68
+ Enhanced to closely mimic BS4's Tag class.
69
+ """
70
+
71
+ def __init__(self, name: str, attrs: Optional[Dict[str, str]] = None):
72
+ """
73
+ Initialize a Tag with name and attributes.
74
+
75
+ Args:
76
+ name (str): Tag name
77
+ attrs (dict, optional): Tag attributes
78
+ """
79
+ self.name = name
80
+ self.attrs: Dict[str, str] = attrs or {}
81
+ self.contents: List[Union["Tag", NavigableString, str]] = []
82
+ self.parent: Optional["Tag"] = None
83
+ self._string: Optional[str] = None # For single string content
84
+
85
+ def __str__(self):
86
+ """String representation of the tag."""
87
+ return self.decode_contents()
88
+
89
+ def __repr__(self):
90
+ """Detailed representation of the tag."""
91
+ return f"<{self.name} {self.attrs}>"
92
+
93
+ def __call__(self, *args, **kwargs):
94
+ """
95
+ Allows calling find_all directly on the tag.
96
+ Mimics BS4's behavior.
97
+ """
98
+ return self.find_all(*args, **kwargs)
99
+
100
+ def __contains__(self, item):
101
+ """
102
+ Check if an item is in the tag's contents.
103
+
104
+ Args:
105
+ item: Item to search for
106
+
107
+ Returns:
108
+ bool: True if item is in contents, False otherwise
109
+ """
110
+ return item in self.contents
111
+
112
+ def __getitem__(self, key):
113
+ """
114
+ Get an attribute value using dictionary-like access.
115
+
116
+ Args:
117
+ key (str): Attribute name
118
+
119
+ Returns:
120
+ Any: Attribute value
121
+ """
122
+ return self.attrs[key]
123
+
124
+ def __iter__(self):
125
+ """
126
+ Iterate through tag's contents.
127
+
128
+ Returns:
129
+ Iterator: Contents of the tag
130
+ """
131
+ return iter(self.contents)
132
+
133
+ def __eq__(self, other):
134
+ """
135
+ Compare tags based on name and attributes.
136
+
137
+ Args:
138
+ other (Tag): Tag to compare
139
+
140
+ Returns:
141
+ bool: True if tags are equivalent
142
+ """
143
+ if not isinstance(other, Tag):
144
+ return False
145
+ return self.name == other.name and self.attrs == other.attrs and str(self) == str(other)
146
+
147
+ def __hash__(self):
148
+ """
149
+ Generate a hash for the tag.
150
+
151
+ Returns:
152
+ int: Hash value
153
+ """
154
+ return hash((self.name, frozenset(self.attrs.items()), str(self)))
155
+
156
+ def find(
157
+ self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs
158
+ ) -> Optional["Tag"]:
159
+ """
160
+ Find the first matching child element.
161
+ Enhanced with more flexible matching.
162
+
163
+ Args:
164
+ name (str, optional): Tag name to search for
165
+ attrs (dict, optional): Attributes to match
166
+ recursive (bool, optional): Search recursively
167
+ text (str, optional): Text content to match
168
+
169
+ Returns:
170
+ Tag or None: First matching element
171
+ """
172
+ # Merge class_ with attrs['class'] if both are present
173
+ attrs = dict(attrs) if attrs else {}
174
+ if class_ is not None:
175
+ if "class" in attrs:
176
+ # Merge both
177
+ if isinstance(attrs["class"], list):
178
+ class_list = attrs["class"]
179
+ else:
180
+ class_list = [
181
+ cls.strip()
182
+ for cls in re.split(r"[ ,]+", str(attrs["class"]))
183
+ if cls.strip()
184
+ ]
185
+ if isinstance(class_, list):
186
+ class_list += class_
187
+ else:
188
+ class_list += [
189
+ cls.strip() for cls in re.split(r"[ ,]+", str(class_)) if cls.strip()
190
+ ]
191
+ attrs["class"] = class_list
192
+ else:
193
+ attrs["class"] = class_
194
+ results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
195
+ return results[0] if results else None
196
+
197
+ def find_all(
198
+ self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs
199
+ ) -> List["Tag"]:
200
+ """
201
+ Find all matching child elements.
202
+ Enhanced with more flexible matching and BS4-like features.
203
+
204
+ Args:
205
+ name (str, optional): Tag name to search for
206
+ attrs (dict, optional): Attributes to match
207
+ recursive (bool, optional): Search recursively
208
+ text (str, optional): Text content to match
209
+ limit (int, optional): Maximum number of results
210
+
211
+ Returns:
212
+ List[Tag]: List of matching elements
213
+ """
214
+ results = []
215
+
216
+ def _match(tag):
217
+ # Check tag name with case-insensitive and regex support
218
+ if name:
219
+ if isinstance(name, str):
220
+ if name != "*" and tag.name.lower() != name.lower():
221
+ return False
222
+ elif isinstance(name, re.Pattern):
223
+ if not name.search(tag.name):
224
+ return False
225
+ elif isinstance(name, (list, tuple)):
226
+ if tag.name.lower() not in [n.lower() for n in name]:
227
+ return False
228
+
229
+ # Check attributes with more flexible matching
230
+ # Handle class_ parameter if provided
231
+ search_attrs = dict(attrs)
232
+ if class_ is not None:
233
+ search_attrs["class"] = class_
234
+
235
+ for k, v in search_attrs.items():
236
+ tag_attr = tag.attrs.get(k)
237
+
238
+ if k == "class":
239
+ # Support multiple classes and whole-word matching
240
+ tag_classes = tag_attr
241
+ if isinstance(tag_classes, str):
242
+ tag_classes = [
243
+ c.strip() for c in re.split(r"[ ,]+", tag_classes) if c.strip()
244
+ ]
245
+ elif not isinstance(tag_classes, list):
246
+ tag_classes = []
247
+
248
+ if isinstance(v, str):
249
+ v_classes = [c.strip() for c in re.split(r"[ ,]+", v) if c.strip()]
250
+ if not all(cls in tag_classes for cls in v_classes):
251
+ return False
252
+ elif isinstance(v, list):
253
+ if not all(cls in tag_classes for cls in v):
254
+ return False
255
+ elif isinstance(v, re.Pattern):
256
+ if not any(v.search(cls) for cls in tag_classes):
257
+ return False
258
+ else:
259
+ return False
260
+ else:
261
+ # Regex or exact match for other attributes
262
+ if v is True:
263
+ if tag_attr is None:
264
+ return False
265
+ elif v is False:
266
+ if tag_attr is not None:
267
+ return False
268
+ elif isinstance(v, re.Pattern):
269
+ if tag_attr is None or not v.search(str(tag_attr)):
270
+ return False
271
+ elif tag_attr != v:
272
+ return False
273
+
274
+ # Check text content
275
+ if text:
276
+ tag_text = tag.get_text(strip=True)
277
+ if isinstance(text, str):
278
+ if text not in tag_text:
279
+ return False
280
+ elif isinstance(text, re.Pattern):
281
+ if not text.search(tag_text):
282
+ return False
283
+
284
+ return True
285
+
286
+ def _search(element):
287
+ if _match(element):
288
+ results.append(element)
289
+ if limit and len(results) == limit:
290
+ return
291
+
292
+ if recursive:
293
+ for child in element.contents:
294
+ if isinstance(child, Tag):
295
+ _search(child)
296
+
297
+ _search(self)
298
+ return results
299
+
300
+ def select(self, selector: str) -> List["Tag"]:
301
+ """
302
+ Select elements using CSS selector.
303
+ Enhanced to support more complex selectors including:
304
+ - Tag selectors: 'p', 'div'
305
+ - Class selectors: '.class', 'p.class'
306
+ - ID selectors: '#id', 'div#id'
307
+ - Attribute selectors: '[attr]', '[attr=value]'
308
+ - Descendant selectors: 'div p'
309
+ - Child selectors: 'div > p'
310
+ - Multiple classes: '.class1.class2'
311
+
312
+ Args:
313
+ selector (str): CSS selector string
314
+
315
+ Returns:
316
+ List[Tag]: List of matching elements
317
+ """
318
+
319
+ def _parse_simple_selector(simple_sel: str) -> dict:
320
+ """Parse a simple selector like 'p.class#id[attr=value]' into components."""
321
+ components = {"tag": None, "id": None, "classes": [], "attrs": {}}
322
+
323
+ # Extract tag name (at the start)
324
+ tag_match = re.match(r"^([a-zA-Z][\w-]*)", simple_sel)
325
+ if tag_match:
326
+ components["tag"] = tag_match.group(1)
327
+ simple_sel = simple_sel[len(tag_match.group(1)) :]
328
+
329
+ # Extract ID
330
+ id_matches = re.findall(r"#([\w-]+)", simple_sel)
331
+ if id_matches:
332
+ components["id"] = id_matches[0]
333
+
334
+ # Extract classes
335
+ class_matches = re.findall(r"\.([\w-]+)", simple_sel)
336
+ components["classes"] = class_matches
337
+
338
+ # Extract attributes
339
+ attr_matches = re.findall(r"\[([^\]]+)\]", simple_sel)
340
+ for attr_expr in attr_matches:
341
+ if "=" in attr_expr:
342
+ attr_name, attr_value = attr_expr.split("=", 1)
343
+ if isinstance(components["attrs"], dict):
344
+ components["attrs"][attr_name.strip()] = attr_value.strip("'\"")
345
+ else:
346
+ if isinstance(components["attrs"], dict):
347
+ components["attrs"][attr_expr.strip()] = None
348
+
349
+ return components
350
+
351
+ def _match_simple_selector(tag: "Tag", components: dict) -> bool:
352
+ """Check if a tag matches the parsed selector components."""
353
+ # Check tag name
354
+ if components["tag"] and tag.name != components["tag"]:
355
+ return False
356
+
357
+ # Check ID
358
+ if components["id"] and tag.get("id") != components["id"]:
359
+ return False
360
+
361
+ # Check classes
362
+ tag_classes = tag.get("class", "")
363
+ if isinstance(tag_classes, str):
364
+ tag_classes = tag_classes.split()
365
+ elif not isinstance(tag_classes, list):
366
+ tag_classes = [str(tag_classes)] if tag_classes else []
367
+
368
+ for cls in components["classes"]:
369
+ if cls not in tag_classes:
370
+ return False
371
+
372
+ # Check attributes
373
+ for attr_name, attr_value in components["attrs"].items():
374
+ if attr_value is None:
375
+ # Just check attribute exists
376
+ if attr_name not in tag.attrs:
377
+ return False
378
+ else:
379
+ # Check attribute value
380
+ if tag.get(attr_name) != attr_value:
381
+ return False
382
+
383
+ return True
384
+
385
+ def _find_all_matching(element: "Tag", components: dict) -> List["Tag"]:
386
+ """Recursively find all elements matching the selector components."""
387
+ matches = []
388
+
389
+ # Check current element
390
+ if _match_simple_selector(element, components):
391
+ matches.append(element)
392
+
393
+ # Check children recursively
394
+ for child in element.contents:
395
+ if isinstance(child, Tag):
396
+ matches.extend(_find_all_matching(child, components))
397
+
398
+ return matches
399
+
400
+ # Handle combinators (descendant ' ' and child '>')
401
+ if " > " in selector:
402
+ # Child combinator
403
+ parts = [p.strip() for p in selector.split(" > ")]
404
+ return self._select_with_child_combinator(parts)
405
+ elif " " in selector.strip():
406
+ # Descendant combinator
407
+ parts = [p.strip() for p in selector.split()]
408
+ return self._select_with_descendant_combinator(parts)
409
+ else:
410
+ # Simple selector
411
+ components = _parse_simple_selector(selector)
412
+ return _find_all_matching(self, components)
413
+
414
+ def _select_with_descendant_combinator(self, parts: List[str]) -> List["Tag"]:
415
+ """Handle descendant combinator (space)."""
416
+ if not parts:
417
+ return []
418
+
419
+ if len(parts) == 1:
420
+ components = self._parse_selector_components(parts[0])
421
+ return self._find_all_matching_in_tree(self, components)
422
+
423
+ # Find elements matching the first part
424
+ first_components = self._parse_selector_components(parts[0])
425
+ first_matches = self._find_all_matching_in_tree(self, first_components)
426
+
427
+ # For each match, find descendants matching remaining parts
428
+ results = []
429
+ remaining_selector = " ".join(parts[1:])
430
+ for match in first_matches:
431
+ descendants = match.select(remaining_selector)
432
+ results.extend(descendants)
433
+
434
+ return results
435
+
436
+ def _select_with_child_combinator(self, parts: List[str]) -> List["Tag"]:
437
+ """Handle child combinator (>)."""
438
+ if not parts:
439
+ return []
440
+
441
+ if len(parts) == 1:
442
+ components = self._parse_selector_components(parts[0])
443
+ return self._find_all_matching_in_tree(self, components)
444
+
445
+ # Find elements matching the first part
446
+ first_components = self._parse_selector_components(parts[0])
447
+ first_matches = self._find_all_matching_in_tree(self, first_components)
448
+
449
+ # For each match, find direct children matching the next part
450
+ if len(parts) == 2:
451
+ # Last part, just check direct children
452
+ next_components = self._parse_selector_components(parts[1])
453
+ results = []
454
+ for match in first_matches:
455
+ for child in match.contents:
456
+ if isinstance(child, Tag) and self._match_selector_components(
457
+ child, next_components
458
+ ):
459
+ results.append(child)
460
+ return results
461
+ else:
462
+ # More parts, need to continue recursively
463
+ results = []
464
+ next_components = self._parse_selector_components(parts[1])
465
+ remaining_parts = parts[2:]
466
+ for match in first_matches:
467
+ for child in match.contents:
468
+ if isinstance(child, Tag) and self._match_selector_components(
469
+ child, next_components
470
+ ):
471
+ # Continue with remaining parts
472
+ remaining_selector = " > ".join(remaining_parts)
473
+ descendants = child.select(remaining_selector)
474
+ results.extend(descendants)
475
+ return results
476
+
477
+ def _parse_selector_components(self, simple_sel: str) -> dict:
478
+ """Parse a simple selector like 'p.class#id[attr=value]' into components."""
479
+ components = {"tag": None, "id": None, "classes": [], "attrs": {}}
480
+
481
+ # Extract tag name (at the start)
482
+ tag_match = re.match(r"^([a-zA-Z][\w-]*)", simple_sel)
483
+ if tag_match:
484
+ components["tag"] = tag_match.group(1)
485
+ simple_sel = simple_sel[len(tag_match.group(1)) :]
486
+
487
+ # Extract ID
488
+ id_matches = re.findall(r"#([\w-]+)", simple_sel)
489
+ if id_matches:
490
+ components["id"] = id_matches[0]
491
+
492
+ # Extract classes
493
+ class_matches = re.findall(r"\.([\w-]+)", simple_sel)
494
+ components["classes"] = class_matches
495
+
496
+ # Extract attributes
497
+ attr_matches = re.findall(r"\[([^\]]+)\]", simple_sel)
498
+ for attr_expr in attr_matches:
499
+ if "=" in attr_expr:
500
+ attr_name, attr_value = attr_expr.split("=", 1)
501
+ if isinstance(components["attrs"], dict):
502
+ components["attrs"][attr_name.strip()] = attr_value.strip("'\"")
503
+ else:
504
+ if isinstance(components["attrs"], dict):
505
+ components["attrs"][attr_expr.strip()] = None
506
+
507
+ return components
508
+
509
+ def _match_selector_components(self, tag: "Tag", components: dict) -> bool:
510
+ """Check if a tag matches the parsed selector components."""
511
+ # Check tag name
512
+ if components["tag"] and tag.name != components["tag"]:
513
+ return False
514
+
515
+ # Check ID
516
+ if components["id"] and tag.get("id") != components["id"]:
517
+ return False
518
+
519
+ # Check classes
520
+ tag_classes = tag.get("class", "")
521
+ if isinstance(tag_classes, str):
522
+ tag_classes = tag_classes.split()
523
+ elif not isinstance(tag_classes, list):
524
+ tag_classes = [str(tag_classes)] if tag_classes else []
525
+
526
+ for cls in components["classes"]:
527
+ if cls not in tag_classes:
528
+ return False
529
+
530
+ # Check attributes
531
+ for attr_name, attr_value in components["attrs"].items():
532
+ if attr_value is None:
533
+ # Just check attribute exists
534
+ if attr_name not in tag.attrs:
535
+ return False
536
+ else:
537
+ # Check attribute value
538
+ if tag.get(attr_name) != attr_value:
539
+ return False
540
+
541
+ return True
542
+
543
+ def _find_all_matching_in_tree(self, element: "Tag", components: dict) -> List["Tag"]:
544
+ """Recursively find all elements matching the selector components."""
545
+ matches = []
546
+
547
+ # Check current element
548
+ if self._match_selector_components(element, components):
549
+ matches.append(element)
550
+
551
+ # Check children recursively
552
+ for child in element.contents:
553
+ if isinstance(child, Tag):
554
+ matches.extend(self._find_all_matching_in_tree(child, components))
555
+
556
+ return matches
557
+
558
+ def select_one(self, selector: str) -> Optional["Tag"]:
559
+ """
560
+ Select the first element matching the CSS selector.
561
+
562
+ Args:
563
+ selector (str): CSS selector string
564
+
565
+ Returns:
566
+ Tag or None: First matching element
567
+ """
568
+ results = self.select(selector)
569
+ return results[0] if results else None
570
+
571
+ def get_text(self, separator=" ", strip=False, types=None) -> str:
572
+ """
573
+ Extract text from the tag and its descendants.
574
+ Enhanced to support more flexible text extraction.
575
+
576
+ Args:
577
+ separator (str, optional): Text separator
578
+ strip (bool, optional): Strip whitespace
579
+ types (list, optional): Types of content to extract
580
+
581
+ Returns:
582
+ str: Extracted text
583
+ """
584
+ texts = []
585
+ for content in self.contents:
586
+ # Support filtering by content type
587
+ if types is None or type(content) in types:
588
+ if isinstance(content, NavigableString):
589
+ texts.append(str(content))
590
+ elif isinstance(content, Tag):
591
+ texts.append(content.get_text(separator, strip))
592
+
593
+ text = separator.join(texts)
594
+ text = re.sub(r"\n\n+", "\n", text) # Replace multiple newlines with single newlines
595
+ return text.strip() if strip else text
596
+
597
+ def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
598
+ """
599
+ Find the first text matching a pattern.
600
+
601
+ Args:
602
+ pattern (str or re.Pattern): Pattern to match
603
+ **kwargs: Additional arguments for get_text()
604
+
605
+ Returns:
606
+ str or None: First matching text
607
+ """
608
+ text = self.get_text(**kwargs)
609
+
610
+ if isinstance(pattern, str):
611
+ return pattern if pattern in text else None
612
+ elif isinstance(pattern, re.Pattern):
613
+ match = pattern.search(text)
614
+ return match.group(0) if match else None
615
+
616
+ def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
617
+ """
618
+ Replace text matching a pattern.
619
+
620
+ Args:
621
+ old (str or re.Pattern): Pattern to replace
622
+ new (str): Replacement text
623
+ **kwargs: Additional arguments for get_text()
624
+
625
+ Returns:
626
+ str: Modified text
627
+ """
628
+ text = self.get_text(**kwargs)
629
+
630
+ if isinstance(old, str):
631
+ return text.replace(old, new)
632
+ elif isinstance(old, re.Pattern):
633
+ return old.sub(new, text)
634
+
635
+ def get(self, key: str, default: Any = None) -> Any:
636
+ """
637
+ Get an attribute value.
638
+
639
+ Args:
640
+ key (str): Attribute name
641
+ default (Any, optional): Default value if attribute not found
642
+
643
+ Returns:
644
+ Any: Attribute value or default
645
+ """
646
+ return self.attrs.get(key, default)
647
+
648
+ def decompose(self) -> None:
649
+ """Remove the tag and its contents from the document."""
650
+ if self.parent:
651
+ self.parent.contents.remove(self)
652
+
653
+ def extract(self) -> "Tag":
654
+ """
655
+ Remove the tag from the document and return it.
656
+
657
+ Returns:
658
+ Tag: Extracted tag
659
+ """
660
+ self.decompose()
661
+ return self
662
+
663
+ def clear(self) -> None:
664
+ """Remove all contents of the tag."""
665
+ self.contents.clear()
666
+
667
+ @property
668
+ def string(self) -> Optional[str]:
669
+ """
670
+ Get the string content of the tag.
671
+ Returns the explicitly set _string if present or the combined text of the tag's contents.
672
+ """
673
+ return self._string if self._string is not None else self.get_text()
674
+
675
+ @property
676
+ def text(self) -> str:
677
+ """BS4 compatible text property."""
678
+ return self.get_text()
679
+
680
+ @string.setter
681
+ def string(self, value: Optional[str]) -> None:
682
+ """
683
+ Set the string content of the tag.
684
+ Clears existing contents and sets new string value.
685
+
686
+ Args:
687
+ value (str | None): New string content
688
+ """
689
+ self._string = value
690
+ self.clear()
691
+ if value is not None:
692
+ self.append(value)
693
+
694
+ def append(self, new_child: Union["Tag", NavigableString, str]) -> None:
695
+ """Append a new child to this tag with error handling."""
696
+ if isinstance(new_child, str):
697
+ new_child = NavigableString(new_child)
698
+ if hasattr(new_child, "parent"):
699
+ new_child.parent = self
700
+ self.contents.append(new_child)
701
+
702
+ def extend(self, new_children: List[Union["Tag", NavigableString, str]]) -> None:
703
+ """Extend the contents of this tag with a list of new children."""
704
+ for child in new_children:
705
+ self.append(child)
706
+
707
+ def insert(self, index: int, new_child: Union["Tag", NavigableString, str]) -> None:
708
+ """Insert a new child at the given index with error handling."""
709
+ if isinstance(new_child, str):
710
+ new_child = NavigableString(new_child)
711
+ if hasattr(new_child, "parent"):
712
+ new_child.parent = self
713
+ self.contents.insert(index, new_child)
714
+
715
+ def replace_with(self, new_tag: "Tag") -> None:
716
+ """Replace this tag with another tag with error handling."""
717
+ if self.parent:
718
+ try:
719
+ index = self.parent.contents.index(self)
720
+ self.parent.contents[index] = new_tag
721
+ new_tag.parent = self.parent
722
+ except ValueError:
723
+ pass
724
+
725
+ def wrap(self, wrapper_tag: "Tag") -> "Tag":
726
+ """Wrap this tag in another tag."""
727
+ if self.parent:
728
+ idx = self.parent.contents.index(self)
729
+ self.parent.contents[idx] = wrapper_tag
730
+ wrapper_tag.parent = self.parent
731
+ else:
732
+ wrapper_tag.parent = None
733
+ wrapper_tag.contents.append(self)
734
+ self.parent = wrapper_tag
735
+ return wrapper_tag
736
+
737
+ def unwrap(self) -> None:
738
+ """Remove this tag but keep its contents in the parent."""
739
+ if self.parent:
740
+ idx = self.parent.contents.index(self)
741
+ for child in reversed(self.contents):
742
+ if isinstance(child, (Tag, NavigableString)):
743
+ child.parent = self.parent
744
+ self.parent.contents.insert(idx, child)
745
+ self.parent.contents.remove(self)
746
+ self.parent = None
747
+ self.contents = []
748
+
749
+ def insert_before(self, new_element: "Tag") -> None:
750
+ """Insert a tag or string immediately before this tag."""
751
+ if self.parent:
752
+ idx = self.parent.contents.index(self)
753
+ new_element.parent = self.parent
754
+ self.parent.contents.insert(idx, new_element)
755
+
756
+ def insert_after(self, new_element: "Tag") -> None:
757
+ """Insert a tag or string immediately after this tag."""
758
+ if self.parent:
759
+ idx = self.parent.contents.index(self)
760
+ new_element.parent = self.parent
761
+ self.parent.contents.insert(idx + 1, new_element)
762
+
763
+ @property
764
+ def descendants(self):
765
+ """Yield all descendants in document order."""
766
+ for child in self.contents:
767
+ yield child
768
+ if isinstance(child, Tag):
769
+ yield from child.descendants
770
+
771
+ @property
772
+ def parents(self):
773
+ """Yield all parents up the tree."""
774
+ current = self.parent
775
+ while current:
776
+ yield current
777
+ current = current.parent
778
+
779
+ @property
780
+ def next_element(self):
781
+ """Return the next element in document order."""
782
+ if self.contents:
783
+ return self.contents[0]
784
+ current = self
785
+ while current.parent:
786
+ idx = current.parent.contents.index(current)
787
+ if idx + 1 < len(current.parent.contents):
788
+ return current.parent.contents[idx + 1]
789
+ current = current.parent
790
+ return None
791
+
792
+ @property
793
+ def previous_element(self):
794
+ """Return the previous element in document order."""
795
+ if not self.parent:
796
+ return None
797
+ idx = self.parent.contents.index(self)
798
+ if idx > 0:
799
+ prev = self.parent.contents[idx - 1]
800
+ while isinstance(prev, Tag) and prev.contents:
801
+ prev = prev.contents[-1]
802
+ return prev
803
+ return self.parent
804
+
805
+ def decode_contents(self, eventual_encoding="utf-8") -> str:
806
+ """
807
+ Decode the contents of the tag to a string.
808
+
809
+ Args:
810
+ eventual_encoding (str, optional): Encoding to use
811
+
812
+ Returns:
813
+ str: Decoded contents
814
+ """
815
+ return "".join(str(content) for content in self.contents)
816
+
817
+ def prettify(self, formatter="minimal") -> str:
818
+ """
819
+ Return a nicely formatted representation of the tag.
820
+
821
+ Args:
822
+ formatter (str, optional): Formatting style
823
+
824
+ Returns:
825
+ str: Prettified tag representation
826
+ """
827
+
828
+ def _prettify(tag, indent=0):
829
+ result = " " * indent + f"<{tag.name}"
830
+ for k, v in tag.attrs.items():
831
+ if isinstance(v, list):
832
+ v = " ".join(v)
833
+ result += f' {k}="{v}"'
834
+
835
+ # Implementation of self-closing tags
836
+ self_closing = {
837
+ "br",
838
+ "img",
839
+ "input",
840
+ "hr",
841
+ "meta",
842
+ "link",
843
+ "base",
844
+ "area",
845
+ "col",
846
+ "embed",
847
+ "keygen",
848
+ "source",
849
+ "track",
850
+ "wbr",
851
+ }
852
+
853
+ if tag.name.lower() in self_closing and not tag.contents:
854
+ result += " />\n"
855
+ return result
856
+
857
+ result += ">\n"
858
+
859
+ for content in tag.contents:
860
+ if isinstance(content, Tag):
861
+ result += _prettify(content, indent + 2)
862
+ elif isinstance(content, NavigableString):
863
+ if content.strip():
864
+ result += " " * (indent + 2) + str(content) + "\n"
865
+ else:
866
+ if str(content).strip():
867
+ result += " " * (indent + 2) + str(content) + "\n"
868
+
869
+ result += " " * indent + f"</{tag.name}>\n"
870
+ return result
871
+
872
+ return _prettify(self)