webscout 8.2.9__py3-none-any.whl → 2026.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (413) hide show
  1. webscout/AIauto.py +524 -251
  2. webscout/AIbase.py +247 -319
  3. webscout/AIutel.py +68 -703
  4. webscout/Bard.py +1072 -1026
  5. webscout/Extra/GitToolkit/__init__.py +10 -10
  6. webscout/Extra/GitToolkit/gitapi/__init__.py +20 -12
  7. webscout/Extra/GitToolkit/gitapi/gist.py +142 -0
  8. webscout/Extra/GitToolkit/gitapi/organization.py +91 -0
  9. webscout/Extra/GitToolkit/gitapi/repository.py +308 -195
  10. webscout/Extra/GitToolkit/gitapi/search.py +162 -0
  11. webscout/Extra/GitToolkit/gitapi/trending.py +236 -0
  12. webscout/Extra/GitToolkit/gitapi/user.py +128 -96
  13. webscout/Extra/GitToolkit/gitapi/utils.py +82 -62
  14. webscout/Extra/YTToolkit/README.md +443 -375
  15. webscout/Extra/YTToolkit/YTdownloader.py +953 -957
  16. webscout/Extra/YTToolkit/__init__.py +3 -3
  17. webscout/Extra/YTToolkit/transcriber.py +595 -476
  18. webscout/Extra/YTToolkit/ytapi/README.md +230 -44
  19. webscout/Extra/YTToolkit/ytapi/__init__.py +22 -6
  20. webscout/Extra/YTToolkit/ytapi/captions.py +190 -0
  21. webscout/Extra/YTToolkit/ytapi/channel.py +302 -307
  22. webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
  23. webscout/Extra/YTToolkit/ytapi/extras.py +178 -118
  24. webscout/Extra/YTToolkit/ytapi/hashtag.py +120 -0
  25. webscout/Extra/YTToolkit/ytapi/https.py +89 -88
  26. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
  27. webscout/Extra/YTToolkit/ytapi/playlist.py +59 -59
  28. webscout/Extra/YTToolkit/ytapi/pool.py +8 -8
  29. webscout/Extra/YTToolkit/ytapi/query.py +143 -40
  30. webscout/Extra/YTToolkit/ytapi/shorts.py +122 -0
  31. webscout/Extra/YTToolkit/ytapi/stream.py +68 -63
  32. webscout/Extra/YTToolkit/ytapi/suggestions.py +97 -0
  33. webscout/Extra/YTToolkit/ytapi/utils.py +66 -62
  34. webscout/Extra/YTToolkit/ytapi/video.py +403 -232
  35. webscout/Extra/__init__.py +2 -3
  36. webscout/Extra/gguf.py +1298 -684
  37. webscout/Extra/tempmail/README.md +487 -487
  38. webscout/Extra/tempmail/__init__.py +28 -28
  39. webscout/Extra/tempmail/async_utils.py +143 -141
  40. webscout/Extra/tempmail/base.py +172 -161
  41. webscout/Extra/tempmail/cli.py +191 -187
  42. webscout/Extra/tempmail/emailnator.py +88 -84
  43. webscout/Extra/tempmail/mail_tm.py +378 -361
  44. webscout/Extra/tempmail/temp_mail_io.py +304 -292
  45. webscout/Extra/weather.py +196 -194
  46. webscout/Extra/weather_ascii.py +17 -15
  47. webscout/Provider/AISEARCH/PERPLEXED_search.py +175 -0
  48. webscout/Provider/AISEARCH/Perplexity.py +292 -333
  49. webscout/Provider/AISEARCH/README.md +106 -279
  50. webscout/Provider/AISEARCH/__init__.py +16 -9
  51. webscout/Provider/AISEARCH/brave_search.py +298 -0
  52. webscout/Provider/AISEARCH/iask_search.py +357 -410
  53. webscout/Provider/AISEARCH/monica_search.py +200 -220
  54. webscout/Provider/AISEARCH/webpilotai_search.py +242 -255
  55. webscout/Provider/Algion.py +413 -0
  56. webscout/Provider/Andi.py +74 -69
  57. webscout/Provider/Apriel.py +313 -0
  58. webscout/Provider/Ayle.py +323 -0
  59. webscout/Provider/ChatSandbox.py +329 -342
  60. webscout/Provider/ClaudeOnline.py +365 -0
  61. webscout/Provider/Cohere.py +232 -208
  62. webscout/Provider/DeepAI.py +367 -0
  63. webscout/Provider/Deepinfra.py +467 -340
  64. webscout/Provider/EssentialAI.py +217 -0
  65. webscout/Provider/ExaAI.py +274 -261
  66. webscout/Provider/Gemini.py +175 -169
  67. webscout/Provider/GithubChat.py +385 -369
  68. webscout/Provider/Gradient.py +286 -0
  69. webscout/Provider/Groq.py +556 -801
  70. webscout/Provider/HadadXYZ.py +323 -0
  71. webscout/Provider/HeckAI.py +392 -375
  72. webscout/Provider/HuggingFace.py +387 -0
  73. webscout/Provider/IBM.py +340 -0
  74. webscout/Provider/Jadve.py +317 -291
  75. webscout/Provider/K2Think.py +306 -0
  76. webscout/Provider/Koboldai.py +221 -384
  77. webscout/Provider/Netwrck.py +273 -270
  78. webscout/Provider/Nvidia.py +310 -0
  79. webscout/Provider/OPENAI/DeepAI.py +489 -0
  80. webscout/Provider/OPENAI/K2Think.py +423 -0
  81. webscout/Provider/OPENAI/PI.py +463 -0
  82. webscout/Provider/OPENAI/README.md +890 -952
  83. webscout/Provider/OPENAI/TogetherAI.py +405 -0
  84. webscout/Provider/OPENAI/TwoAI.py +255 -357
  85. webscout/Provider/OPENAI/__init__.py +148 -40
  86. webscout/Provider/OPENAI/ai4chat.py +348 -293
  87. webscout/Provider/OPENAI/akashgpt.py +436 -0
  88. webscout/Provider/OPENAI/algion.py +303 -0
  89. webscout/Provider/OPENAI/{exachat.py → ayle.py} +365 -444
  90. webscout/Provider/OPENAI/base.py +253 -249
  91. webscout/Provider/OPENAI/cerebras.py +296 -0
  92. webscout/Provider/OPENAI/chatgpt.py +870 -556
  93. webscout/Provider/OPENAI/chatsandbox.py +233 -173
  94. webscout/Provider/OPENAI/deepinfra.py +403 -322
  95. webscout/Provider/OPENAI/e2b.py +2370 -1414
  96. webscout/Provider/OPENAI/elmo.py +278 -0
  97. webscout/Provider/OPENAI/exaai.py +452 -417
  98. webscout/Provider/OPENAI/freeassist.py +446 -0
  99. webscout/Provider/OPENAI/gradient.py +448 -0
  100. webscout/Provider/OPENAI/groq.py +380 -364
  101. webscout/Provider/OPENAI/hadadxyz.py +292 -0
  102. webscout/Provider/OPENAI/heckai.py +333 -308
  103. webscout/Provider/OPENAI/huggingface.py +321 -0
  104. webscout/Provider/OPENAI/ibm.py +425 -0
  105. webscout/Provider/OPENAI/llmchat.py +253 -0
  106. webscout/Provider/OPENAI/llmchatco.py +378 -335
  107. webscout/Provider/OPENAI/meta.py +541 -0
  108. webscout/Provider/OPENAI/netwrck.py +374 -357
  109. webscout/Provider/OPENAI/nvidia.py +317 -0
  110. webscout/Provider/OPENAI/oivscode.py +348 -287
  111. webscout/Provider/OPENAI/openrouter.py +328 -0
  112. webscout/Provider/OPENAI/pydantic_imports.py +1 -172
  113. webscout/Provider/OPENAI/sambanova.py +397 -0
  114. webscout/Provider/OPENAI/sonus.py +305 -304
  115. webscout/Provider/OPENAI/textpollinations.py +370 -339
  116. webscout/Provider/OPENAI/toolbaz.py +375 -413
  117. webscout/Provider/OPENAI/typefully.py +419 -355
  118. webscout/Provider/OPENAI/typliai.py +279 -0
  119. webscout/Provider/OPENAI/utils.py +314 -318
  120. webscout/Provider/OPENAI/wisecat.py +359 -387
  121. webscout/Provider/OPENAI/writecream.py +185 -163
  122. webscout/Provider/OPENAI/x0gpt.py +462 -365
  123. webscout/Provider/OPENAI/zenmux.py +380 -0
  124. webscout/Provider/OpenRouter.py +386 -0
  125. webscout/Provider/Openai.py +337 -496
  126. webscout/Provider/PI.py +443 -429
  127. webscout/Provider/QwenLM.py +346 -254
  128. webscout/Provider/STT/__init__.py +28 -0
  129. webscout/Provider/STT/base.py +303 -0
  130. webscout/Provider/STT/elevenlabs.py +264 -0
  131. webscout/Provider/Sambanova.py +317 -0
  132. webscout/Provider/TTI/README.md +69 -82
  133. webscout/Provider/TTI/__init__.py +37 -7
  134. webscout/Provider/TTI/base.py +147 -64
  135. webscout/Provider/TTI/claudeonline.py +393 -0
  136. webscout/Provider/TTI/magicstudio.py +292 -201
  137. webscout/Provider/TTI/miragic.py +180 -0
  138. webscout/Provider/TTI/pollinations.py +331 -221
  139. webscout/Provider/TTI/together.py +334 -0
  140. webscout/Provider/TTI/utils.py +14 -11
  141. webscout/Provider/TTS/README.md +186 -192
  142. webscout/Provider/TTS/__init__.py +43 -10
  143. webscout/Provider/TTS/base.py +523 -159
  144. webscout/Provider/TTS/deepgram.py +286 -156
  145. webscout/Provider/TTS/elevenlabs.py +189 -111
  146. webscout/Provider/TTS/freetts.py +218 -0
  147. webscout/Provider/TTS/murfai.py +288 -113
  148. webscout/Provider/TTS/openai_fm.py +364 -129
  149. webscout/Provider/TTS/parler.py +203 -111
  150. webscout/Provider/TTS/qwen.py +334 -0
  151. webscout/Provider/TTS/sherpa.py +286 -0
  152. webscout/Provider/TTS/speechma.py +693 -580
  153. webscout/Provider/TTS/streamElements.py +275 -333
  154. webscout/Provider/TTS/utils.py +280 -280
  155. webscout/Provider/TextPollinationsAI.py +331 -308
  156. webscout/Provider/TogetherAI.py +450 -0
  157. webscout/Provider/TwoAI.py +309 -475
  158. webscout/Provider/TypliAI.py +311 -305
  159. webscout/Provider/UNFINISHED/ChatHub.py +219 -209
  160. webscout/Provider/{OPENAI/glider.py → UNFINISHED/ChutesAI.py} +331 -326
  161. webscout/Provider/{GizAI.py → UNFINISHED/GizAI.py} +300 -295
  162. webscout/Provider/{Marcus.py → UNFINISHED/Marcus.py} +218 -198
  163. webscout/Provider/UNFINISHED/Qodo.py +481 -0
  164. webscout/Provider/{MCPCore.py → UNFINISHED/XenAI.py} +330 -315
  165. webscout/Provider/UNFINISHED/Youchat.py +347 -330
  166. webscout/Provider/UNFINISHED/aihumanizer.py +41 -0
  167. webscout/Provider/UNFINISHED/grammerchecker.py +37 -0
  168. webscout/Provider/UNFINISHED/liner.py +342 -0
  169. webscout/Provider/UNFINISHED/liner_api_request.py +246 -263
  170. webscout/Provider/{samurai.py → UNFINISHED/samurai.py} +231 -224
  171. webscout/Provider/WiseCat.py +256 -233
  172. webscout/Provider/WrDoChat.py +390 -370
  173. webscout/Provider/__init__.py +115 -174
  174. webscout/Provider/ai4chat.py +181 -174
  175. webscout/Provider/akashgpt.py +330 -335
  176. webscout/Provider/cerebras.py +397 -290
  177. webscout/Provider/cleeai.py +236 -213
  178. webscout/Provider/elmo.py +291 -283
  179. webscout/Provider/geminiapi.py +343 -208
  180. webscout/Provider/julius.py +245 -223
  181. webscout/Provider/learnfastai.py +333 -325
  182. webscout/Provider/llama3mitril.py +230 -215
  183. webscout/Provider/llmchat.py +308 -258
  184. webscout/Provider/llmchatco.py +321 -306
  185. webscout/Provider/meta.py +996 -801
  186. webscout/Provider/oivscode.py +332 -309
  187. webscout/Provider/searchchat.py +316 -292
  188. webscout/Provider/sonus.py +264 -258
  189. webscout/Provider/toolbaz.py +359 -353
  190. webscout/Provider/turboseek.py +332 -266
  191. webscout/Provider/typefully.py +262 -202
  192. webscout/Provider/x0gpt.py +332 -299
  193. webscout/__init__.py +31 -39
  194. webscout/__main__.py +5 -5
  195. webscout/cli.py +585 -524
  196. webscout/client.py +1497 -70
  197. webscout/conversation.py +140 -436
  198. webscout/exceptions.py +383 -362
  199. webscout/litagent/__init__.py +29 -29
  200. webscout/litagent/agent.py +492 -455
  201. webscout/litagent/constants.py +60 -60
  202. webscout/models.py +505 -181
  203. webscout/optimizers.py +74 -420
  204. webscout/prompt_manager.py +376 -288
  205. webscout/sanitize.py +1514 -0
  206. webscout/scout/README.md +452 -404
  207. webscout/scout/__init__.py +8 -8
  208. webscout/scout/core/__init__.py +7 -7
  209. webscout/scout/core/crawler.py +330 -210
  210. webscout/scout/core/scout.py +800 -607
  211. webscout/scout/core/search_result.py +51 -96
  212. webscout/scout/core/text_analyzer.py +64 -63
  213. webscout/scout/core/text_utils.py +412 -277
  214. webscout/scout/core/web_analyzer.py +54 -52
  215. webscout/scout/element.py +872 -478
  216. webscout/scout/parsers/__init__.py +70 -69
  217. webscout/scout/parsers/html5lib_parser.py +182 -172
  218. webscout/scout/parsers/html_parser.py +238 -236
  219. webscout/scout/parsers/lxml_parser.py +203 -178
  220. webscout/scout/utils.py +38 -37
  221. webscout/search/__init__.py +47 -0
  222. webscout/search/base.py +201 -0
  223. webscout/search/bing_main.py +45 -0
  224. webscout/search/brave_main.py +92 -0
  225. webscout/search/duckduckgo_main.py +57 -0
  226. webscout/search/engines/__init__.py +127 -0
  227. webscout/search/engines/bing/__init__.py +15 -0
  228. webscout/search/engines/bing/base.py +35 -0
  229. webscout/search/engines/bing/images.py +114 -0
  230. webscout/search/engines/bing/news.py +96 -0
  231. webscout/search/engines/bing/suggestions.py +36 -0
  232. webscout/search/engines/bing/text.py +109 -0
  233. webscout/search/engines/brave/__init__.py +19 -0
  234. webscout/search/engines/brave/base.py +47 -0
  235. webscout/search/engines/brave/images.py +213 -0
  236. webscout/search/engines/brave/news.py +353 -0
  237. webscout/search/engines/brave/suggestions.py +318 -0
  238. webscout/search/engines/brave/text.py +167 -0
  239. webscout/search/engines/brave/videos.py +364 -0
  240. webscout/search/engines/duckduckgo/__init__.py +25 -0
  241. webscout/search/engines/duckduckgo/answers.py +80 -0
  242. webscout/search/engines/duckduckgo/base.py +189 -0
  243. webscout/search/engines/duckduckgo/images.py +100 -0
  244. webscout/search/engines/duckduckgo/maps.py +183 -0
  245. webscout/search/engines/duckduckgo/news.py +70 -0
  246. webscout/search/engines/duckduckgo/suggestions.py +22 -0
  247. webscout/search/engines/duckduckgo/text.py +221 -0
  248. webscout/search/engines/duckduckgo/translate.py +48 -0
  249. webscout/search/engines/duckduckgo/videos.py +80 -0
  250. webscout/search/engines/duckduckgo/weather.py +84 -0
  251. webscout/search/engines/mojeek.py +61 -0
  252. webscout/search/engines/wikipedia.py +77 -0
  253. webscout/search/engines/yahoo/__init__.py +41 -0
  254. webscout/search/engines/yahoo/answers.py +19 -0
  255. webscout/search/engines/yahoo/base.py +34 -0
  256. webscout/search/engines/yahoo/images.py +323 -0
  257. webscout/search/engines/yahoo/maps.py +19 -0
  258. webscout/search/engines/yahoo/news.py +258 -0
  259. webscout/search/engines/yahoo/suggestions.py +140 -0
  260. webscout/search/engines/yahoo/text.py +273 -0
  261. webscout/search/engines/yahoo/translate.py +19 -0
  262. webscout/search/engines/yahoo/videos.py +302 -0
  263. webscout/search/engines/yahoo/weather.py +220 -0
  264. webscout/search/engines/yandex.py +67 -0
  265. webscout/search/engines/yep/__init__.py +13 -0
  266. webscout/search/engines/yep/base.py +34 -0
  267. webscout/search/engines/yep/images.py +101 -0
  268. webscout/search/engines/yep/suggestions.py +38 -0
  269. webscout/search/engines/yep/text.py +99 -0
  270. webscout/search/http_client.py +172 -0
  271. webscout/search/results.py +141 -0
  272. webscout/search/yahoo_main.py +57 -0
  273. webscout/search/yep_main.py +48 -0
  274. webscout/server/__init__.py +48 -0
  275. webscout/server/config.py +78 -0
  276. webscout/server/exceptions.py +69 -0
  277. webscout/server/providers.py +286 -0
  278. webscout/server/request_models.py +131 -0
  279. webscout/server/request_processing.py +404 -0
  280. webscout/server/routes.py +642 -0
  281. webscout/server/server.py +351 -0
  282. webscout/server/ui_templates.py +1171 -0
  283. webscout/swiftcli/__init__.py +79 -95
  284. webscout/swiftcli/core/__init__.py +7 -7
  285. webscout/swiftcli/core/cli.py +574 -297
  286. webscout/swiftcli/core/context.py +98 -104
  287. webscout/swiftcli/core/group.py +268 -241
  288. webscout/swiftcli/decorators/__init__.py +28 -28
  289. webscout/swiftcli/decorators/command.py +243 -221
  290. webscout/swiftcli/decorators/options.py +247 -220
  291. webscout/swiftcli/decorators/output.py +392 -252
  292. webscout/swiftcli/exceptions.py +21 -21
  293. webscout/swiftcli/plugins/__init__.py +9 -9
  294. webscout/swiftcli/plugins/base.py +134 -135
  295. webscout/swiftcli/plugins/manager.py +269 -269
  296. webscout/swiftcli/utils/__init__.py +58 -59
  297. webscout/swiftcli/utils/formatting.py +251 -252
  298. webscout/swiftcli/utils/parsing.py +368 -267
  299. webscout/update_checker.py +280 -136
  300. webscout/utils.py +28 -14
  301. webscout/version.py +2 -1
  302. webscout/version.py.bak +3 -0
  303. webscout/zeroart/__init__.py +218 -135
  304. webscout/zeroart/base.py +70 -66
  305. webscout/zeroart/effects.py +155 -101
  306. webscout/zeroart/fonts.py +1799 -1239
  307. webscout-2026.1.19.dist-info/METADATA +638 -0
  308. webscout-2026.1.19.dist-info/RECORD +312 -0
  309. {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/WHEEL +1 -1
  310. {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/entry_points.txt +1 -1
  311. webscout/DWEBS.py +0 -520
  312. webscout/Extra/Act.md +0 -309
  313. webscout/Extra/GitToolkit/gitapi/README.md +0 -110
  314. webscout/Extra/autocoder/__init__.py +0 -9
  315. webscout/Extra/autocoder/autocoder.py +0 -1105
  316. webscout/Extra/autocoder/autocoder_utiles.py +0 -332
  317. webscout/Extra/gguf.md +0 -430
  318. webscout/Extra/weather.md +0 -281
  319. webscout/Litlogger/README.md +0 -10
  320. webscout/Litlogger/__init__.py +0 -15
  321. webscout/Litlogger/formats.py +0 -4
  322. webscout/Litlogger/handlers.py +0 -103
  323. webscout/Litlogger/levels.py +0 -13
  324. webscout/Litlogger/logger.py +0 -92
  325. webscout/Provider/AI21.py +0 -177
  326. webscout/Provider/AISEARCH/DeepFind.py +0 -254
  327. webscout/Provider/AISEARCH/felo_search.py +0 -202
  328. webscout/Provider/AISEARCH/genspark_search.py +0 -324
  329. webscout/Provider/AISEARCH/hika_search.py +0 -186
  330. webscout/Provider/AISEARCH/scira_search.py +0 -298
  331. webscout/Provider/Aitopia.py +0 -316
  332. webscout/Provider/AllenAI.py +0 -440
  333. webscout/Provider/Blackboxai.py +0 -791
  334. webscout/Provider/ChatGPTClone.py +0 -237
  335. webscout/Provider/ChatGPTGratis.py +0 -194
  336. webscout/Provider/Cloudflare.py +0 -324
  337. webscout/Provider/ExaChat.py +0 -358
  338. webscout/Provider/Flowith.py +0 -217
  339. webscout/Provider/FreeGemini.py +0 -250
  340. webscout/Provider/Glider.py +0 -225
  341. webscout/Provider/HF_space/__init__.py +0 -0
  342. webscout/Provider/HF_space/qwen_qwen2.py +0 -206
  343. webscout/Provider/HuggingFaceChat.py +0 -469
  344. webscout/Provider/Hunyuan.py +0 -283
  345. webscout/Provider/LambdaChat.py +0 -411
  346. webscout/Provider/Llama3.py +0 -259
  347. webscout/Provider/Nemotron.py +0 -218
  348. webscout/Provider/OLLAMA.py +0 -396
  349. webscout/Provider/OPENAI/BLACKBOXAI.py +0 -766
  350. webscout/Provider/OPENAI/Cloudflare.py +0 -378
  351. webscout/Provider/OPENAI/FreeGemini.py +0 -283
  352. webscout/Provider/OPENAI/NEMOTRON.py +0 -232
  353. webscout/Provider/OPENAI/Qwen3.py +0 -283
  354. webscout/Provider/OPENAI/api.py +0 -969
  355. webscout/Provider/OPENAI/c4ai.py +0 -373
  356. webscout/Provider/OPENAI/chatgptclone.py +0 -494
  357. webscout/Provider/OPENAI/copilot.py +0 -242
  358. webscout/Provider/OPENAI/flowith.py +0 -162
  359. webscout/Provider/OPENAI/freeaichat.py +0 -359
  360. webscout/Provider/OPENAI/mcpcore.py +0 -389
  361. webscout/Provider/OPENAI/multichat.py +0 -376
  362. webscout/Provider/OPENAI/opkfc.py +0 -496
  363. webscout/Provider/OPENAI/scirachat.py +0 -477
  364. webscout/Provider/OPENAI/standardinput.py +0 -433
  365. webscout/Provider/OPENAI/typegpt.py +0 -364
  366. webscout/Provider/OPENAI/uncovrAI.py +0 -463
  367. webscout/Provider/OPENAI/venice.py +0 -431
  368. webscout/Provider/OPENAI/yep.py +0 -382
  369. webscout/Provider/OpenGPT.py +0 -209
  370. webscout/Provider/Perplexitylabs.py +0 -415
  371. webscout/Provider/Reka.py +0 -214
  372. webscout/Provider/StandardInput.py +0 -290
  373. webscout/Provider/TTI/aiarta.py +0 -365
  374. webscout/Provider/TTI/artbit.py +0 -0
  375. webscout/Provider/TTI/fastflux.py +0 -200
  376. webscout/Provider/TTI/piclumen.py +0 -203
  377. webscout/Provider/TTI/pixelmuse.py +0 -225
  378. webscout/Provider/TTS/gesserit.py +0 -128
  379. webscout/Provider/TTS/sthir.py +0 -94
  380. webscout/Provider/TeachAnything.py +0 -229
  381. webscout/Provider/UNFINISHED/puterjs.py +0 -635
  382. webscout/Provider/UNFINISHED/test_lmarena.py +0 -119
  383. webscout/Provider/Venice.py +0 -258
  384. webscout/Provider/VercelAI.py +0 -253
  385. webscout/Provider/Writecream.py +0 -246
  386. webscout/Provider/WritingMate.py +0 -269
  387. webscout/Provider/asksteve.py +0 -220
  388. webscout/Provider/chatglm.py +0 -215
  389. webscout/Provider/copilot.py +0 -425
  390. webscout/Provider/freeaichat.py +0 -285
  391. webscout/Provider/granite.py +0 -235
  392. webscout/Provider/hermes.py +0 -266
  393. webscout/Provider/koala.py +0 -170
  394. webscout/Provider/lmarena.py +0 -198
  395. webscout/Provider/multichat.py +0 -364
  396. webscout/Provider/scira_chat.py +0 -299
  397. webscout/Provider/scnet.py +0 -243
  398. webscout/Provider/talkai.py +0 -194
  399. webscout/Provider/typegpt.py +0 -289
  400. webscout/Provider/uncovr.py +0 -368
  401. webscout/Provider/yep.py +0 -389
  402. webscout/litagent/Readme.md +0 -276
  403. webscout/litprinter/__init__.py +0 -59
  404. webscout/swiftcli/Readme.md +0 -323
  405. webscout/tempid.py +0 -128
  406. webscout/webscout_search.py +0 -1184
  407. webscout/webscout_search_async.py +0 -654
  408. webscout/yep_search.py +0 -347
  409. webscout/zeroart/README.md +0 -89
  410. webscout-8.2.9.dist-info/METADATA +0 -1033
  411. webscout-8.2.9.dist-info/RECORD +0 -289
  412. {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/licenses/LICENSE.md +0 -0
  413. {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/top_level.txt +0 -0
webscout/scout/element.py CHANGED
@@ -1,478 +1,872 @@
1
- """
2
- Scout Element Module - Advanced HTML Element Representation
3
- """
4
-
5
- import re
6
- from typing import Any, Dict, List, Optional, Union
7
-
8
-
9
- class NavigableString(str):
10
- """
11
- A string that knows its place in the document tree.
12
- Mimics BeautifulSoup's NavigableString for better compatibility.
13
- """
14
- def __new__(cls, text: str):
15
- """
16
- Create a new NavigableString instance.
17
-
18
- Args:
19
- text (str): String content
20
- """
21
- return str.__new__(cls, text)
22
-
23
- def __init__(self, text: str):
24
- """
25
- Initialize a navigable string.
26
-
27
- Args:
28
- text (str): String content
29
- """
30
- self.parent = None
31
-
32
- def __repr__(self):
33
- """String representation."""
34
- return f"NavigableString({super().__repr__()})"
35
-
36
- def __add__(self, other):
37
- """
38
- Allow concatenation of NavigableString with other strings.
39
-
40
- Args:
41
- other (str): String to concatenate
42
-
43
- Returns:
44
- str: Concatenated string
45
- """
46
- return str(self) + str(other)
47
-
48
- def strip(self, chars=None):
49
- """
50
- Strip whitespace or specified characters.
51
-
52
- Args:
53
- chars (str, optional): Characters to strip
54
-
55
- Returns:
56
- str: Stripped string
57
- """
58
- return NavigableString(super().strip(chars))
59
-
60
- class Tag:
61
- """
62
- Represents an HTML tag with advanced traversal and manipulation capabilities.
63
- Enhanced to closely mimic BeautifulSoup's Tag class.
64
- """
65
- def __init__(self, name: str, attrs: Dict[str, str] = None):
66
- """
67
- Initialize a Tag with name and attributes.
68
-
69
- Args:
70
- name (str): Tag name
71
- attrs (dict, optional): Tag attributes
72
- """
73
- self.name = name
74
- self.attrs = attrs or {}
75
- self.contents = []
76
- self.parent = None
77
- self.string = None # For single string content
78
-
79
- def __str__(self):
80
- """String representation of the tag."""
81
- return self.decode_contents()
82
-
83
- def __repr__(self):
84
- """Detailed representation of the tag."""
85
- return f"<{self.name} {self.attrs}>"
86
-
87
- def __call__(self, *args, **kwargs):
88
- """
89
- Allows calling find_all directly on the tag.
90
- Mimics BeautifulSoup's behavior.
91
- """
92
- return self.find_all(*args, **kwargs)
93
-
94
- def __contains__(self, item):
95
- """
96
- Check if an item is in the tag's contents.
97
-
98
- Args:
99
- item: Item to search for
100
-
101
- Returns:
102
- bool: True if item is in contents, False otherwise
103
- """
104
- return item in self.contents
105
-
106
- def __getitem__(self, key):
107
- """
108
- Get an attribute value using dictionary-like access.
109
-
110
- Args:
111
- key (str): Attribute name
112
-
113
- Returns:
114
- Any: Attribute value
115
- """
116
- return self.attrs[key]
117
-
118
- def __iter__(self):
119
- """
120
- Iterate through tag's contents.
121
-
122
- Returns:
123
- Iterator: Contents of the tag
124
- """
125
- return iter(self.contents)
126
-
127
- def __eq__(self, other):
128
- """
129
- Compare tags based on name and attributes.
130
-
131
- Args:
132
- other (Tag): Tag to compare
133
-
134
- Returns:
135
- bool: True if tags are equivalent
136
- """
137
- if not isinstance(other, Tag):
138
- return False
139
- return (
140
- self.name == other.name and
141
- self.attrs == other.attrs and
142
- str(self) == str(other)
143
- )
144
-
145
- def __hash__(self):
146
- """
147
- Generate a hash for the tag.
148
-
149
- Returns:
150
- int: Hash value
151
- """
152
- return hash((self.name, frozenset(self.attrs.items()), str(self)))
153
-
154
- def find(self, name=None, attrs={}, recursive=True, text=None, **kwargs) -> Optional['Tag']:
155
- """
156
- Find the first matching child element.
157
- Enhanced with more flexible matching.
158
-
159
- Args:
160
- name (str, optional): Tag name to search for
161
- attrs (dict, optional): Attributes to match
162
- recursive (bool, optional): Search recursively
163
- text (str, optional): Text content to match
164
-
165
- Returns:
166
- Tag or None: First matching element
167
- """
168
- results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
169
- return results[0] if results else None
170
-
171
- def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs) -> List['Tag']:
172
- """
173
- Find all matching child elements.
174
- Enhanced with more flexible matching and BeautifulSoup-like features.
175
-
176
- Args:
177
- name (str, optional): Tag name to search for
178
- attrs (dict, optional): Attributes to match
179
- recursive (bool, optional): Search recursively
180
- text (str, optional): Text content to match
181
- limit (int, optional): Maximum number of results
182
-
183
- Returns:
184
- List[Tag]: List of matching elements
185
- """
186
- results = []
187
-
188
- def _match(tag):
189
- # Check tag name with case-insensitive and regex support
190
- if name:
191
- if isinstance(name, str):
192
- if tag.name.lower() != name.lower():
193
- return False
194
- elif isinstance(name, re.Pattern):
195
- if not name.search(tag.name):
196
- return False
197
-
198
- # Check attributes with more flexible matching
199
- for k, v in attrs.items():
200
- # Handle special attribute matching
201
- if k == 'class':
202
- tag_classes = tag.get('class', [])
203
- if isinstance(v, str) and v not in tag_classes:
204
- return False
205
- elif isinstance(v, list) and not all(cls in tag_classes for cls in v):
206
- return False
207
- elif k == 'id':
208
- if tag.get('id') != v:
209
- return False
210
- else:
211
- # Regex or exact match for other attributes
212
- tag_attr = tag.attrs.get(k)
213
- if v is True:
214
- if tag_attr is None:
215
- return False
216
- elif isinstance(v, re.Pattern):
217
- if tag_attr is None or not v.search(str(tag_attr)):
218
- return False
219
- elif tag_attr != v:
220
- return False
221
-
222
- # Check text content
223
- if text:
224
- tag_text = tag.get_text(strip=True)
225
- if isinstance(text, str) and text.lower() not in tag_text.lower():
226
- return False
227
- elif isinstance(text, re.Pattern) and not text.search(tag_text):
228
- return False
229
-
230
- return True
231
-
232
- def _search(element):
233
- if _match(element):
234
- results.append(element)
235
- if limit and len(results) == limit:
236
- return
237
-
238
- if recursive:
239
- for child in element.contents:
240
- if isinstance(child, Tag):
241
- _search(child)
242
-
243
- _search(self)
244
- return results
245
-
246
- def select(self, selector: str) -> List['Tag']:
247
- """
248
- Select elements using CSS selector.
249
- Enhanced to support more complex selectors.
250
-
251
- Args:
252
- selector (str): CSS selector string
253
-
254
- Returns:
255
- List[Tag]: List of matching elements
256
- """
257
- # More advanced CSS selector parsing
258
- # This is a simplified implementation and might need more robust parsing
259
- parts = re.split(r'\s+', selector.strip())
260
- results = []
261
-
262
- def _match_selector(tag, selector_part):
263
- # Support more complex selectors
264
- if selector_part.startswith('.'):
265
- # Class selector
266
- return selector_part[1:] in tag.get('class', [])
267
- elif selector_part.startswith('#'):
268
- # ID selector
269
- return tag.get('id') == selector_part[1:]
270
- elif '[' in selector_part and ']' in selector_part:
271
- # Attribute selector
272
- attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
273
- if attr_match:
274
- tag_name, attr, value = attr_match.groups()
275
- if tag_name and tag.name != tag_name:
276
- return False
277
- if value:
278
- return tag.get(attr) == value.strip("'\"")
279
- return attr in tag.attrs
280
- else:
281
- # Tag selector
282
- return tag.name == selector_part
283
-
284
- def _recursive_select(element, selector_parts):
285
- if not selector_parts:
286
- results.append(element)
287
- return
288
-
289
- current_selector = selector_parts[0]
290
- remaining_selectors = selector_parts[1:]
291
-
292
- if _match_selector(element, current_selector):
293
- if not remaining_selectors:
294
- results.append(element)
295
- else:
296
- for child in element.contents:
297
- if isinstance(child, Tag):
298
- _recursive_select(child, remaining_selectors)
299
-
300
- for child in self.contents:
301
- if isinstance(child, Tag):
302
- _recursive_select(child, parts)
303
-
304
- return results
305
-
306
- def select_one(self, selector: str) -> Optional['Tag']:
307
- """
308
- Select the first element matching the CSS selector.
309
-
310
- Args:
311
- selector (str): CSS selector string
312
-
313
- Returns:
314
- Tag or None: First matching element
315
- """
316
- results = self.select(selector)
317
- return results[0] if results else None
318
-
319
- def get_text(self, separator=' ', strip=False, types=None) -> str:
320
- """
321
- Extract text from the tag and its descendants.
322
- Enhanced to support more flexible text extraction.
323
-
324
- Args:
325
- separator (str, optional): Text separator
326
- strip (bool, optional): Strip whitespace
327
- types (list, optional): Types of content to extract
328
-
329
- Returns:
330
- str: Extracted text
331
- """
332
- texts = []
333
- for content in self.contents:
334
- # Support filtering by content type
335
- if types is None or type(content) in types:
336
- if isinstance(content, NavigableString):
337
- texts.append(str(content))
338
- elif isinstance(content, Tag):
339
- texts.append(content.get_text(separator, strip))
340
-
341
- text = separator.join(texts)
342
- text = re.sub(r'\n\n+', '\n', text) # Replace multiple newlines with single newlines
343
- return text.strip() if strip else text
344
-
345
- def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
346
- """
347
- Find the first text matching a pattern.
348
-
349
- Args:
350
- pattern (str or re.Pattern): Pattern to match
351
- **kwargs: Additional arguments for get_text()
352
-
353
- Returns:
354
- str or None: First matching text
355
- """
356
- text = self.get_text(**kwargs)
357
-
358
- if isinstance(pattern, str):
359
- return pattern if pattern in text else None
360
- elif isinstance(pattern, re.Pattern):
361
- match = pattern.search(text)
362
- return match.group(0) if match else None
363
-
364
- def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
365
- """
366
- Replace text matching a pattern.
367
-
368
- Args:
369
- old (str or re.Pattern): Pattern to replace
370
- new (str): Replacement text
371
- **kwargs: Additional arguments for get_text()
372
-
373
- Returns:
374
- str: Modified text
375
- """
376
- text = self.get_text(**kwargs)
377
-
378
- if isinstance(old, str):
379
- return text.replace(old, new)
380
- elif isinstance(old, re.Pattern):
381
- return old.sub(new, text)
382
-
383
- def get(self, key: str, default: Any = None) -> Any:
384
- """
385
- Get an attribute value.
386
-
387
- Args:
388
- key (str): Attribute name
389
- default (Any, optional): Default value if attribute not found
390
-
391
- Returns:
392
- Any: Attribute value or default
393
- """
394
- return self.attrs.get(key, default)
395
-
396
- def decompose(self) -> None:
397
- """Remove the tag and its contents from the document."""
398
- if self.parent:
399
- self.parent.contents.remove(self)
400
-
401
- def extract(self) -> 'Tag':
402
- """
403
- Remove the tag from the document and return it.
404
-
405
- Returns:
406
- Tag: Extracted tag
407
- """
408
- self.decompose()
409
- return self
410
-
411
- def clear(self) -> None:
412
- """Remove all contents of the tag."""
413
- self.contents.clear()
414
-
415
- def append(self, new_child: Union['Tag', NavigableString, str]) -> None:
416
- """Append a new child to this tag."""
417
- if isinstance(new_child, str):
418
- new_child = NavigableString(new_child)
419
- new_child.parent = self
420
- self.contents.append(new_child)
421
-
422
- def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
423
- """Insert a new child at the given index."""
424
- if isinstance(new_child, str):
425
- new_child = NavigableString(new_child)
426
- new_child.parent = self
427
- self.contents.insert(index, new_child)
428
-
429
- def replace_with(self, new_tag: 'Tag') -> None:
430
- """
431
- Replace this tag with another tag.
432
-
433
- Args:
434
- new_tag (Tag): Tag to replace the current tag
435
- """
436
- if self.parent:
437
- index = self.parent.contents.index(self)
438
- self.parent.contents[index] = new_tag
439
- new_tag.parent = self.parent
440
-
441
- def decode_contents(self, eventual_encoding='utf-8') -> str:
442
- """
443
- Decode the contents of the tag to a string.
444
-
445
- Args:
446
- eventual_encoding (str, optional): Encoding to use
447
-
448
- Returns:
449
- str: Decoded contents
450
- """
451
- return ''.join(str(content) for content in self.contents)
452
-
453
- def prettify(self, formatter='minimal') -> str:
454
- """
455
- Return a nicely formatted representation of the tag.
456
-
457
- Args:
458
- formatter (str, optional): Formatting style
459
-
460
- Returns:
461
- str: Prettified tag representation
462
- """
463
- def _prettify(tag, indent=0):
464
- result = ' ' * indent + f'<{tag.name}'
465
- for k, v in tag.attrs.items():
466
- result += f' {k}="{v}"'
467
- result += '>\n'
468
-
469
- for content in tag.contents:
470
- if isinstance(content, Tag):
471
- result += _prettify(content, indent + 2)
472
- else:
473
- result += ' ' * (indent + 2) + str(content) + '\n'
474
-
475
- result += ' ' * indent + f'</{tag.name}>\n'
476
- return result
477
-
478
- return _prettify(self)
1
+ """
2
+ Scout Element Module - Advanced HTML Element Representation
3
+ """
4
+
5
+ import re
6
+ from typing import Any, Dict, List, Optional, Union
7
+
8
+
9
+ class NavigableString(str):
10
+ """
11
+ A string that knows its place in the document tree.
12
+ Mimics BS4's NavigableString for better compatibility.
13
+ """
14
+
15
+ # The parent may be a Tag or None
16
+ parent: Optional["Tag"]
17
+
18
+ def __new__(cls, text: str):
19
+ """
20
+ Create a new NavigableString instance.
21
+
22
+ Args:
23
+ text (str): String content
24
+ """
25
+ return str.__new__(cls, text)
26
+
27
+ def __init__(self, text: str):
28
+ """
29
+ Initialize a navigable string.
30
+
31
+ Args:
32
+ text (str): String content
33
+ """
34
+ self.parent = None
35
+
36
+ def __repr__(self):
37
+ """String representation."""
38
+ return f"NavigableString({super().__repr__()})"
39
+
40
+ def __add__(self, other):
41
+ """
42
+ Allow concatenation of NavigableString with other strings.
43
+
44
+ Args:
45
+ other (str): String to concatenate
46
+
47
+ Returns:
48
+ str: Concatenated string
49
+ """
50
+ return str(self) + str(other)
51
+
52
+ def strip(self, chars=None):
53
+ """
54
+ Strip whitespace or specified characters.
55
+
56
+ Args:
57
+ chars (str, optional): Characters to strip
58
+
59
+ Returns:
60
+ str: Stripped string
61
+ """
62
+ return NavigableString(super().strip(chars))
63
+
64
+
65
+ class Tag:
66
+ """
67
+ Represents an HTML tag with advanced traversal and manipulation capabilities.
68
+ Enhanced to closely mimic BS4's Tag class.
69
+ """
70
+
71
+ def __init__(self, name: str, attrs: Optional[Dict[str, str]] = None):
72
+ """
73
+ Initialize a Tag with name and attributes.
74
+
75
+ Args:
76
+ name (str): Tag name
77
+ attrs (dict, optional): Tag attributes
78
+ """
79
+ self.name = name
80
+ self.attrs: Dict[str, str] = attrs or {}
81
+ self.contents: List[Union["Tag", NavigableString, str]] = []
82
+ self.parent: Optional["Tag"] = None
83
+ self._string: Optional[str] = None # For single string content
84
+
85
+ def __str__(self):
86
+ """String representation of the tag."""
87
+ return self.decode_contents()
88
+
89
+ def __repr__(self):
90
+ """Detailed representation of the tag."""
91
+ return f"<{self.name} {self.attrs}>"
92
+
93
+ def __call__(self, *args, **kwargs):
94
+ """
95
+ Allows calling find_all directly on the tag.
96
+ Mimics BS4's behavior.
97
+ """
98
+ return self.find_all(*args, **kwargs)
99
+
100
+ def __contains__(self, item):
101
+ """
102
+ Check if an item is in the tag's contents.
103
+
104
+ Args:
105
+ item: Item to search for
106
+
107
+ Returns:
108
+ bool: True if item is in contents, False otherwise
109
+ """
110
+ return item in self.contents
111
+
112
+ def __getitem__(self, key):
113
+ """
114
+ Get an attribute value using dictionary-like access.
115
+
116
+ Args:
117
+ key (str): Attribute name
118
+
119
+ Returns:
120
+ Any: Attribute value
121
+ """
122
+ return self.attrs[key]
123
+
124
+ def __iter__(self):
125
+ """
126
+ Iterate through tag's contents.
127
+
128
+ Returns:
129
+ Iterator: Contents of the tag
130
+ """
131
+ return iter(self.contents)
132
+
133
+ def __eq__(self, other):
134
+ """
135
+ Compare tags based on name and attributes.
136
+
137
+ Args:
138
+ other (Tag): Tag to compare
139
+
140
+ Returns:
141
+ bool: True if tags are equivalent
142
+ """
143
+ if not isinstance(other, Tag):
144
+ return False
145
+ return self.name == other.name and self.attrs == other.attrs and str(self) == str(other)
146
+
147
+ def __hash__(self):
148
+ """
149
+ Generate a hash for the tag.
150
+
151
+ Returns:
152
+ int: Hash value
153
+ """
154
+ return hash((self.name, frozenset(self.attrs.items()), str(self)))
155
+
156
+ def find(
157
+ self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs
158
+ ) -> Optional["Tag"]:
159
+ """
160
+ Find the first matching child element.
161
+ Enhanced with more flexible matching.
162
+
163
+ Args:
164
+ name (str, optional): Tag name to search for
165
+ attrs (dict, optional): Attributes to match
166
+ recursive (bool, optional): Search recursively
167
+ text (str, optional): Text content to match
168
+
169
+ Returns:
170
+ Tag or None: First matching element
171
+ """
172
+ # Merge class_ with attrs['class'] if both are present
173
+ attrs = dict(attrs) if attrs else {}
174
+ if class_ is not None:
175
+ if "class" in attrs:
176
+ # Merge both
177
+ if isinstance(attrs["class"], list):
178
+ class_list = attrs["class"]
179
+ else:
180
+ class_list = [
181
+ cls.strip()
182
+ for cls in re.split(r"[ ,]+", str(attrs["class"]))
183
+ if cls.strip()
184
+ ]
185
+ if isinstance(class_, list):
186
+ class_list += class_
187
+ else:
188
+ class_list += [
189
+ cls.strip() for cls in re.split(r"[ ,]+", str(class_)) if cls.strip()
190
+ ]
191
+ attrs["class"] = class_list
192
+ else:
193
+ attrs["class"] = class_
194
+ results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
195
+ return results[0] if results else None
196
+
197
+ def find_all(
198
+ self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs
199
+ ) -> List["Tag"]:
200
+ """
201
+ Find all matching child elements.
202
+ Enhanced with more flexible matching and BS4-like features.
203
+
204
+ Args:
205
+ name (str, optional): Tag name to search for
206
+ attrs (dict, optional): Attributes to match
207
+ recursive (bool, optional): Search recursively
208
+ text (str, optional): Text content to match
209
+ limit (int, optional): Maximum number of results
210
+
211
+ Returns:
212
+ List[Tag]: List of matching elements
213
+ """
214
+ results = []
215
+
216
+ def _match(tag):
217
+ # Check tag name with case-insensitive and regex support
218
+ if name:
219
+ if isinstance(name, str):
220
+ if name != "*" and tag.name.lower() != name.lower():
221
+ return False
222
+ elif isinstance(name, re.Pattern):
223
+ if not name.search(tag.name):
224
+ return False
225
+ elif isinstance(name, (list, tuple)):
226
+ if tag.name.lower() not in [n.lower() for n in name]:
227
+ return False
228
+
229
+ # Check attributes with more flexible matching
230
+ # Handle class_ parameter if provided
231
+ search_attrs = dict(attrs)
232
+ if class_ is not None:
233
+ search_attrs["class"] = class_
234
+
235
+ for k, v in search_attrs.items():
236
+ tag_attr = tag.attrs.get(k)
237
+
238
+ if k == "class":
239
+ # Support multiple classes and whole-word matching
240
+ tag_classes = tag_attr
241
+ if isinstance(tag_classes, str):
242
+ tag_classes = [
243
+ c.strip() for c in re.split(r"[ ,]+", tag_classes) if c.strip()
244
+ ]
245
+ elif not isinstance(tag_classes, list):
246
+ tag_classes = []
247
+
248
+ if isinstance(v, str):
249
+ v_classes = [c.strip() for c in re.split(r"[ ,]+", v) if c.strip()]
250
+ if not all(cls in tag_classes for cls in v_classes):
251
+ return False
252
+ elif isinstance(v, list):
253
+ if not all(cls in tag_classes for cls in v):
254
+ return False
255
+ elif isinstance(v, re.Pattern):
256
+ if not any(v.search(cls) for cls in tag_classes):
257
+ return False
258
+ else:
259
+ return False
260
+ else:
261
+ # Regex or exact match for other attributes
262
+ if v is True:
263
+ if tag_attr is None:
264
+ return False
265
+ elif v is False:
266
+ if tag_attr is not None:
267
+ return False
268
+ elif isinstance(v, re.Pattern):
269
+ if tag_attr is None or not v.search(str(tag_attr)):
270
+ return False
271
+ elif tag_attr != v:
272
+ return False
273
+
274
+ # Check text content
275
+ if text:
276
+ tag_text = tag.get_text(strip=True)
277
+ if isinstance(text, str):
278
+ if text not in tag_text:
279
+ return False
280
+ elif isinstance(text, re.Pattern):
281
+ if not text.search(tag_text):
282
+ return False
283
+
284
+ return True
285
+
286
+ def _search(element):
287
+ if _match(element):
288
+ results.append(element)
289
+ if limit and len(results) == limit:
290
+ return
291
+
292
+ if recursive:
293
+ for child in element.contents:
294
+ if isinstance(child, Tag):
295
+ _search(child)
296
+
297
+ _search(self)
298
+ return results
299
+
300
+ def select(self, selector: str) -> List["Tag"]:
301
+ """
302
+ Select elements using CSS selector.
303
+ Enhanced to support more complex selectors including:
304
+ - Tag selectors: 'p', 'div'
305
+ - Class selectors: '.class', 'p.class'
306
+ - ID selectors: '#id', 'div#id'
307
+ - Attribute selectors: '[attr]', '[attr=value]'
308
+ - Descendant selectors: 'div p'
309
+ - Child selectors: 'div > p'
310
+ - Multiple classes: '.class1.class2'
311
+
312
+ Args:
313
+ selector (str): CSS selector string
314
+
315
+ Returns:
316
+ List[Tag]: List of matching elements
317
+ """
318
+
319
+ def _parse_simple_selector(simple_sel: str) -> dict:
320
+ """Parse a simple selector like 'p.class#id[attr=value]' into components."""
321
+ components = {"tag": None, "id": None, "classes": [], "attrs": {}}
322
+
323
+ # Extract tag name (at the start)
324
+ tag_match = re.match(r"^([a-zA-Z][\w-]*)", simple_sel)
325
+ if tag_match:
326
+ components["tag"] = tag_match.group(1)
327
+ simple_sel = simple_sel[len(tag_match.group(1)) :]
328
+
329
+ # Extract ID
330
+ id_matches = re.findall(r"#([\w-]+)", simple_sel)
331
+ if id_matches:
332
+ components["id"] = id_matches[0]
333
+
334
+ # Extract classes
335
+ class_matches = re.findall(r"\.([\w-]+)", simple_sel)
336
+ components["classes"] = class_matches
337
+
338
+ # Extract attributes
339
+ attr_matches = re.findall(r"\[([^\]]+)\]", simple_sel)
340
+ for attr_expr in attr_matches:
341
+ if "=" in attr_expr:
342
+ attr_name, attr_value = attr_expr.split("=", 1)
343
+ if isinstance(components["attrs"], dict):
344
+ components["attrs"][attr_name.strip()] = attr_value.strip("'\"")
345
+ else:
346
+ if isinstance(components["attrs"], dict):
347
+ components["attrs"][attr_expr.strip()] = None
348
+
349
+ return components
350
+
351
+ def _match_simple_selector(tag: "Tag", components: dict) -> bool:
352
+ """Check if a tag matches the parsed selector components."""
353
+ # Check tag name
354
+ if components["tag"] and tag.name != components["tag"]:
355
+ return False
356
+
357
+ # Check ID
358
+ if components["id"] and tag.get("id") != components["id"]:
359
+ return False
360
+
361
+ # Check classes
362
+ tag_classes = tag.get("class", "")
363
+ if isinstance(tag_classes, str):
364
+ tag_classes = tag_classes.split()
365
+ elif not isinstance(tag_classes, list):
366
+ tag_classes = [str(tag_classes)] if tag_classes else []
367
+
368
+ for cls in components["classes"]:
369
+ if cls not in tag_classes:
370
+ return False
371
+
372
+ # Check attributes
373
+ for attr_name, attr_value in components["attrs"].items():
374
+ if attr_value is None:
375
+ # Just check attribute exists
376
+ if attr_name not in tag.attrs:
377
+ return False
378
+ else:
379
+ # Check attribute value
380
+ if tag.get(attr_name) != attr_value:
381
+ return False
382
+
383
+ return True
384
+
385
+ def _find_all_matching(element: "Tag", components: dict) -> List["Tag"]:
386
+ """Recursively find all elements matching the selector components."""
387
+ matches = []
388
+
389
+ # Check current element
390
+ if _match_simple_selector(element, components):
391
+ matches.append(element)
392
+
393
+ # Check children recursively
394
+ for child in element.contents:
395
+ if isinstance(child, Tag):
396
+ matches.extend(_find_all_matching(child, components))
397
+
398
+ return matches
399
+
400
+ # Handle combinators (descendant ' ' and child '>')
401
+ if " > " in selector:
402
+ # Child combinator
403
+ parts = [p.strip() for p in selector.split(" > ")]
404
+ return self._select_with_child_combinator(parts)
405
+ elif " " in selector.strip():
406
+ # Descendant combinator
407
+ parts = [p.strip() for p in selector.split()]
408
+ return self._select_with_descendant_combinator(parts)
409
+ else:
410
+ # Simple selector
411
+ components = _parse_simple_selector(selector)
412
+ return _find_all_matching(self, components)
413
+
414
+ def _select_with_descendant_combinator(self, parts: List[str]) -> List["Tag"]:
415
+ """Handle descendant combinator (space)."""
416
+ if not parts:
417
+ return []
418
+
419
+ if len(parts) == 1:
420
+ components = self._parse_selector_components(parts[0])
421
+ return self._find_all_matching_in_tree(self, components)
422
+
423
+ # Find elements matching the first part
424
+ first_components = self._parse_selector_components(parts[0])
425
+ first_matches = self._find_all_matching_in_tree(self, first_components)
426
+
427
+ # For each match, find descendants matching remaining parts
428
+ results = []
429
+ remaining_selector = " ".join(parts[1:])
430
+ for match in first_matches:
431
+ descendants = match.select(remaining_selector)
432
+ results.extend(descendants)
433
+
434
+ return results
435
+
436
+ def _select_with_child_combinator(self, parts: List[str]) -> List["Tag"]:
437
+ """Handle child combinator (>)."""
438
+ if not parts:
439
+ return []
440
+
441
+ if len(parts) == 1:
442
+ components = self._parse_selector_components(parts[0])
443
+ return self._find_all_matching_in_tree(self, components)
444
+
445
+ # Find elements matching the first part
446
+ first_components = self._parse_selector_components(parts[0])
447
+ first_matches = self._find_all_matching_in_tree(self, first_components)
448
+
449
+ # For each match, find direct children matching the next part
450
+ if len(parts) == 2:
451
+ # Last part, just check direct children
452
+ next_components = self._parse_selector_components(parts[1])
453
+ results = []
454
+ for match in first_matches:
455
+ for child in match.contents:
456
+ if isinstance(child, Tag) and self._match_selector_components(
457
+ child, next_components
458
+ ):
459
+ results.append(child)
460
+ return results
461
+ else:
462
+ # More parts, need to continue recursively
463
+ results = []
464
+ next_components = self._parse_selector_components(parts[1])
465
+ remaining_parts = parts[2:]
466
+ for match in first_matches:
467
+ for child in match.contents:
468
+ if isinstance(child, Tag) and self._match_selector_components(
469
+ child, next_components
470
+ ):
471
+ # Continue with remaining parts
472
+ remaining_selector = " > ".join(remaining_parts)
473
+ descendants = child.select(remaining_selector)
474
+ results.extend(descendants)
475
+ return results
476
+
477
+ def _parse_selector_components(self, simple_sel: str) -> dict:
478
+ """Parse a simple selector like 'p.class#id[attr=value]' into components."""
479
+ components = {"tag": None, "id": None, "classes": [], "attrs": {}}
480
+
481
+ # Extract tag name (at the start)
482
+ tag_match = re.match(r"^([a-zA-Z][\w-]*)", simple_sel)
483
+ if tag_match:
484
+ components["tag"] = tag_match.group(1)
485
+ simple_sel = simple_sel[len(tag_match.group(1)) :]
486
+
487
+ # Extract ID
488
+ id_matches = re.findall(r"#([\w-]+)", simple_sel)
489
+ if id_matches:
490
+ components["id"] = id_matches[0]
491
+
492
+ # Extract classes
493
+ class_matches = re.findall(r"\.([\w-]+)", simple_sel)
494
+ components["classes"] = class_matches
495
+
496
+ # Extract attributes
497
+ attr_matches = re.findall(r"\[([^\]]+)\]", simple_sel)
498
+ for attr_expr in attr_matches:
499
+ if "=" in attr_expr:
500
+ attr_name, attr_value = attr_expr.split("=", 1)
501
+ if isinstance(components["attrs"], dict):
502
+ components["attrs"][attr_name.strip()] = attr_value.strip("'\"")
503
+ else:
504
+ if isinstance(components["attrs"], dict):
505
+ components["attrs"][attr_expr.strip()] = None
506
+
507
+ return components
508
+
509
+ def _match_selector_components(self, tag: "Tag", components: dict) -> bool:
510
+ """Check if a tag matches the parsed selector components."""
511
+ # Check tag name
512
+ if components["tag"] and tag.name != components["tag"]:
513
+ return False
514
+
515
+ # Check ID
516
+ if components["id"] and tag.get("id") != components["id"]:
517
+ return False
518
+
519
+ # Check classes
520
+ tag_classes = tag.get("class", "")
521
+ if isinstance(tag_classes, str):
522
+ tag_classes = tag_classes.split()
523
+ elif not isinstance(tag_classes, list):
524
+ tag_classes = [str(tag_classes)] if tag_classes else []
525
+
526
+ for cls in components["classes"]:
527
+ if cls not in tag_classes:
528
+ return False
529
+
530
+ # Check attributes
531
+ for attr_name, attr_value in components["attrs"].items():
532
+ if attr_value is None:
533
+ # Just check attribute exists
534
+ if attr_name not in tag.attrs:
535
+ return False
536
+ else:
537
+ # Check attribute value
538
+ if tag.get(attr_name) != attr_value:
539
+ return False
540
+
541
+ return True
542
+
543
+ def _find_all_matching_in_tree(self, element: "Tag", components: dict) -> List["Tag"]:
544
+ """Recursively find all elements matching the selector components."""
545
+ matches = []
546
+
547
+ # Check current element
548
+ if self._match_selector_components(element, components):
549
+ matches.append(element)
550
+
551
+ # Check children recursively
552
+ for child in element.contents:
553
+ if isinstance(child, Tag):
554
+ matches.extend(self._find_all_matching_in_tree(child, components))
555
+
556
+ return matches
557
+
558
+ def select_one(self, selector: str) -> Optional["Tag"]:
559
+ """
560
+ Select the first element matching the CSS selector.
561
+
562
+ Args:
563
+ selector (str): CSS selector string
564
+
565
+ Returns:
566
+ Tag or None: First matching element
567
+ """
568
+ results = self.select(selector)
569
+ return results[0] if results else None
570
+
571
+ def get_text(self, separator=" ", strip=False, types=None) -> str:
572
+ """
573
+ Extract text from the tag and its descendants.
574
+ Enhanced to support more flexible text extraction.
575
+
576
+ Args:
577
+ separator (str, optional): Text separator
578
+ strip (bool, optional): Strip whitespace
579
+ types (list, optional): Types of content to extract
580
+
581
+ Returns:
582
+ str: Extracted text
583
+ """
584
+ texts = []
585
+ for content in self.contents:
586
+ # Support filtering by content type
587
+ if types is None or type(content) in types:
588
+ if isinstance(content, NavigableString):
589
+ texts.append(str(content))
590
+ elif isinstance(content, Tag):
591
+ texts.append(content.get_text(separator, strip))
592
+
593
+ text = separator.join(texts)
594
+ text = re.sub(r"\n\n+", "\n", text) # Replace multiple newlines with single newlines
595
+ return text.strip() if strip else text
596
+
597
+ def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
598
+ """
599
+ Find the first text matching a pattern.
600
+
601
+ Args:
602
+ pattern (str or re.Pattern): Pattern to match
603
+ **kwargs: Additional arguments for get_text()
604
+
605
+ Returns:
606
+ str or None: First matching text
607
+ """
608
+ text = self.get_text(**kwargs)
609
+
610
+ if isinstance(pattern, str):
611
+ return pattern if pattern in text else None
612
+ elif isinstance(pattern, re.Pattern):
613
+ match = pattern.search(text)
614
+ return match.group(0) if match else None
615
+
616
+ def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
617
+ """
618
+ Replace text matching a pattern.
619
+
620
+ Args:
621
+ old (str or re.Pattern): Pattern to replace
622
+ new (str): Replacement text
623
+ **kwargs: Additional arguments for get_text()
624
+
625
+ Returns:
626
+ str: Modified text
627
+ """
628
+ text = self.get_text(**kwargs)
629
+
630
+ if isinstance(old, str):
631
+ return text.replace(old, new)
632
+ elif isinstance(old, re.Pattern):
633
+ return old.sub(new, text)
634
+
635
+ def get(self, key: str, default: Any = None) -> Any:
636
+ """
637
+ Get an attribute value.
638
+
639
+ Args:
640
+ key (str): Attribute name
641
+ default (Any, optional): Default value if attribute not found
642
+
643
+ Returns:
644
+ Any: Attribute value or default
645
+ """
646
+ return self.attrs.get(key, default)
647
+
648
+ def decompose(self) -> None:
649
+ """Remove the tag and its contents from the document."""
650
+ if self.parent:
651
+ self.parent.contents.remove(self)
652
+
653
+ def extract(self) -> "Tag":
654
+ """
655
+ Remove the tag from the document and return it.
656
+
657
+ Returns:
658
+ Tag: Extracted tag
659
+ """
660
+ self.decompose()
661
+ return self
662
+
663
+ def clear(self) -> None:
664
+ """Remove all contents of the tag."""
665
+ self.contents.clear()
666
+
667
+ @property
668
+ def string(self) -> Optional[str]:
669
+ """
670
+ Get the string content of the tag.
671
+ Returns the explicitly set _string if present or the combined text of the tag's contents.
672
+ """
673
+ return self._string if self._string is not None else self.get_text()
674
+
675
+ @property
676
+ def text(self) -> str:
677
+ """BS4 compatible text property."""
678
+ return self.get_text()
679
+
680
+ @string.setter
681
+ def string(self, value: Optional[str]) -> None:
682
+ """
683
+ Set the string content of the tag.
684
+ Clears existing contents and sets new string value.
685
+
686
+ Args:
687
+ value (str | None): New string content
688
+ """
689
+ self._string = value
690
+ self.clear()
691
+ if value is not None:
692
+ self.append(value)
693
+
694
+ def append(self, new_child: Union["Tag", NavigableString, str]) -> None:
695
+ """Append a new child to this tag with error handling."""
696
+ if isinstance(new_child, str):
697
+ new_child = NavigableString(new_child)
698
+ if hasattr(new_child, "parent"):
699
+ new_child.parent = self
700
+ self.contents.append(new_child)
701
+
702
+ def extend(self, new_children: List[Union["Tag", NavigableString, str]]) -> None:
703
+ """Extend the contents of this tag with a list of new children."""
704
+ for child in new_children:
705
+ self.append(child)
706
+
707
+ def insert(self, index: int, new_child: Union["Tag", NavigableString, str]) -> None:
708
+ """Insert a new child at the given index with error handling."""
709
+ if isinstance(new_child, str):
710
+ new_child = NavigableString(new_child)
711
+ if hasattr(new_child, "parent"):
712
+ new_child.parent = self
713
+ self.contents.insert(index, new_child)
714
+
715
+ def replace_with(self, new_tag: "Tag") -> None:
716
+ """Replace this tag with another tag with error handling."""
717
+ if self.parent:
718
+ try:
719
+ index = self.parent.contents.index(self)
720
+ self.parent.contents[index] = new_tag
721
+ new_tag.parent = self.parent
722
+ except ValueError:
723
+ pass
724
+
725
+ def wrap(self, wrapper_tag: "Tag") -> "Tag":
726
+ """Wrap this tag in another tag."""
727
+ if self.parent:
728
+ idx = self.parent.contents.index(self)
729
+ self.parent.contents[idx] = wrapper_tag
730
+ wrapper_tag.parent = self.parent
731
+ else:
732
+ wrapper_tag.parent = None
733
+ wrapper_tag.contents.append(self)
734
+ self.parent = wrapper_tag
735
+ return wrapper_tag
736
+
737
+ def unwrap(self) -> None:
738
+ """Remove this tag but keep its contents in the parent."""
739
+ if self.parent:
740
+ idx = self.parent.contents.index(self)
741
+ for child in reversed(self.contents):
742
+ if isinstance(child, (Tag, NavigableString)):
743
+ child.parent = self.parent
744
+ self.parent.contents.insert(idx, child)
745
+ self.parent.contents.remove(self)
746
+ self.parent = None
747
+ self.contents = []
748
+
749
+ def insert_before(self, new_element: "Tag") -> None:
750
+ """Insert a tag or string immediately before this tag."""
751
+ if self.parent:
752
+ idx = self.parent.contents.index(self)
753
+ new_element.parent = self.parent
754
+ self.parent.contents.insert(idx, new_element)
755
+
756
+ def insert_after(self, new_element: "Tag") -> None:
757
+ """Insert a tag or string immediately after this tag."""
758
+ if self.parent:
759
+ idx = self.parent.contents.index(self)
760
+ new_element.parent = self.parent
761
+ self.parent.contents.insert(idx + 1, new_element)
762
+
763
+ @property
764
+ def descendants(self):
765
+ """Yield all descendants in document order."""
766
+ for child in self.contents:
767
+ yield child
768
+ if isinstance(child, Tag):
769
+ yield from child.descendants
770
+
771
+ @property
772
+ def parents(self):
773
+ """Yield all parents up the tree."""
774
+ current = self.parent
775
+ while current:
776
+ yield current
777
+ current = current.parent
778
+
779
+ @property
780
+ def next_element(self):
781
+ """Return the next element in document order."""
782
+ if self.contents:
783
+ return self.contents[0]
784
+ current = self
785
+ while current.parent:
786
+ idx = current.parent.contents.index(current)
787
+ if idx + 1 < len(current.parent.contents):
788
+ return current.parent.contents[idx + 1]
789
+ current = current.parent
790
+ return None
791
+
792
+ @property
793
+ def previous_element(self):
794
+ """Return the previous element in document order."""
795
+ if not self.parent:
796
+ return None
797
+ idx = self.parent.contents.index(self)
798
+ if idx > 0:
799
+ prev = self.parent.contents[idx - 1]
800
+ while isinstance(prev, Tag) and prev.contents:
801
+ prev = prev.contents[-1]
802
+ return prev
803
+ return self.parent
804
+
805
+ def decode_contents(self, eventual_encoding="utf-8") -> str:
806
+ """
807
+ Decode the contents of the tag to a string.
808
+
809
+ Args:
810
+ eventual_encoding (str, optional): Encoding to use
811
+
812
+ Returns:
813
+ str: Decoded contents
814
+ """
815
+ return "".join(str(content) for content in self.contents)
816
+
817
+ def prettify(self, formatter="minimal") -> str:
818
+ """
819
+ Return a nicely formatted representation of the tag.
820
+
821
+ Args:
822
+ formatter (str, optional): Formatting style
823
+
824
+ Returns:
825
+ str: Prettified tag representation
826
+ """
827
+
828
+ def _prettify(tag, indent=0):
829
+ result = " " * indent + f"<{tag.name}"
830
+ for k, v in tag.attrs.items():
831
+ if isinstance(v, list):
832
+ v = " ".join(v)
833
+ result += f' {k}="{v}"'
834
+
835
+ # Implementation of self-closing tags
836
+ self_closing = {
837
+ "br",
838
+ "img",
839
+ "input",
840
+ "hr",
841
+ "meta",
842
+ "link",
843
+ "base",
844
+ "area",
845
+ "col",
846
+ "embed",
847
+ "keygen",
848
+ "source",
849
+ "track",
850
+ "wbr",
851
+ }
852
+
853
+ if tag.name.lower() in self_closing and not tag.contents:
854
+ result += " />\n"
855
+ return result
856
+
857
+ result += ">\n"
858
+
859
+ for content in tag.contents:
860
+ if isinstance(content, Tag):
861
+ result += _prettify(content, indent + 2)
862
+ elif isinstance(content, NavigableString):
863
+ if content.strip():
864
+ result += " " * (indent + 2) + str(content) + "\n"
865
+ else:
866
+ if str(content).strip():
867
+ result += " " * (indent + 2) + str(content) + "\n"
868
+
869
+ result += " " * indent + f"</{tag.name}>\n"
870
+ return result
871
+
872
+ return _prettify(self)