webscout 8.3.7__py3-none-any.whl → 2025.10.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (306) hide show
  1. webscout/AIauto.py +250 -250
  2. webscout/AIbase.py +379 -379
  3. webscout/AIutel.py +60 -60
  4. webscout/Bard.py +1012 -1012
  5. webscout/Bing_search.py +417 -417
  6. webscout/DWEBS.py +529 -529
  7. webscout/Extra/Act.md +309 -309
  8. webscout/Extra/GitToolkit/__init__.py +10 -10
  9. webscout/Extra/GitToolkit/gitapi/README.md +110 -110
  10. webscout/Extra/GitToolkit/gitapi/__init__.py +11 -11
  11. webscout/Extra/GitToolkit/gitapi/repository.py +195 -195
  12. webscout/Extra/GitToolkit/gitapi/user.py +96 -96
  13. webscout/Extra/GitToolkit/gitapi/utils.py +61 -61
  14. webscout/Extra/YTToolkit/README.md +375 -375
  15. webscout/Extra/YTToolkit/YTdownloader.py +956 -956
  16. webscout/Extra/YTToolkit/__init__.py +2 -2
  17. webscout/Extra/YTToolkit/transcriber.py +475 -475
  18. webscout/Extra/YTToolkit/ytapi/README.md +44 -44
  19. webscout/Extra/YTToolkit/ytapi/__init__.py +6 -6
  20. webscout/Extra/YTToolkit/ytapi/channel.py +307 -307
  21. webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
  22. webscout/Extra/YTToolkit/ytapi/extras.py +118 -118
  23. webscout/Extra/YTToolkit/ytapi/https.py +88 -88
  24. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
  25. webscout/Extra/YTToolkit/ytapi/playlist.py +58 -58
  26. webscout/Extra/YTToolkit/ytapi/pool.py +7 -7
  27. webscout/Extra/YTToolkit/ytapi/query.py +39 -39
  28. webscout/Extra/YTToolkit/ytapi/stream.py +62 -62
  29. webscout/Extra/YTToolkit/ytapi/utils.py +62 -62
  30. webscout/Extra/YTToolkit/ytapi/video.py +232 -232
  31. webscout/Extra/autocoder/__init__.py +9 -9
  32. webscout/Extra/autocoder/autocoder.py +1105 -1105
  33. webscout/Extra/autocoder/autocoder_utiles.py +332 -332
  34. webscout/Extra/gguf.md +429 -429
  35. webscout/Extra/gguf.py +1213 -1213
  36. webscout/Extra/tempmail/README.md +487 -487
  37. webscout/Extra/tempmail/__init__.py +27 -27
  38. webscout/Extra/tempmail/async_utils.py +140 -140
  39. webscout/Extra/tempmail/base.py +160 -160
  40. webscout/Extra/tempmail/cli.py +186 -186
  41. webscout/Extra/tempmail/emailnator.py +84 -84
  42. webscout/Extra/tempmail/mail_tm.py +360 -360
  43. webscout/Extra/tempmail/temp_mail_io.py +291 -291
  44. webscout/Extra/weather.md +281 -281
  45. webscout/Extra/weather.py +193 -193
  46. webscout/Litlogger/README.md +10 -10
  47. webscout/Litlogger/__init__.py +15 -15
  48. webscout/Litlogger/formats.py +13 -13
  49. webscout/Litlogger/handlers.py +121 -121
  50. webscout/Litlogger/levels.py +13 -13
  51. webscout/Litlogger/logger.py +134 -134
  52. webscout/Provider/AISEARCH/Perplexity.py +332 -332
  53. webscout/Provider/AISEARCH/README.md +279 -279
  54. webscout/Provider/AISEARCH/__init__.py +16 -1
  55. webscout/Provider/AISEARCH/felo_search.py +206 -206
  56. webscout/Provider/AISEARCH/genspark_search.py +323 -323
  57. webscout/Provider/AISEARCH/hika_search.py +185 -185
  58. webscout/Provider/AISEARCH/iask_search.py +410 -410
  59. webscout/Provider/AISEARCH/monica_search.py +219 -219
  60. webscout/Provider/AISEARCH/scira_search.py +316 -316
  61. webscout/Provider/AISEARCH/stellar_search.py +177 -177
  62. webscout/Provider/AISEARCH/webpilotai_search.py +255 -255
  63. webscout/Provider/Aitopia.py +314 -314
  64. webscout/Provider/Andi.py +1 -1
  65. webscout/Provider/Apriel.py +306 -0
  66. webscout/Provider/ChatGPTClone.py +237 -236
  67. webscout/Provider/ChatSandbox.py +343 -343
  68. webscout/Provider/Cloudflare.py +324 -324
  69. webscout/Provider/Cohere.py +208 -208
  70. webscout/Provider/Deepinfra.py +370 -366
  71. webscout/Provider/ExaAI.py +260 -260
  72. webscout/Provider/ExaChat.py +308 -308
  73. webscout/Provider/Flowith.py +221 -221
  74. webscout/Provider/GMI.py +293 -0
  75. webscout/Provider/Gemini.py +164 -164
  76. webscout/Provider/GeminiProxy.py +167 -167
  77. webscout/Provider/GithubChat.py +371 -372
  78. webscout/Provider/Groq.py +800 -800
  79. webscout/Provider/HeckAI.py +383 -383
  80. webscout/Provider/Jadve.py +282 -282
  81. webscout/Provider/K2Think.py +307 -307
  82. webscout/Provider/Koboldai.py +205 -205
  83. webscout/Provider/LambdaChat.py +423 -423
  84. webscout/Provider/Nemotron.py +244 -244
  85. webscout/Provider/Netwrck.py +248 -248
  86. webscout/Provider/OLLAMA.py +395 -395
  87. webscout/Provider/OPENAI/Cloudflare.py +393 -393
  88. webscout/Provider/OPENAI/FalconH1.py +451 -451
  89. webscout/Provider/OPENAI/FreeGemini.py +296 -296
  90. webscout/Provider/OPENAI/K2Think.py +431 -431
  91. webscout/Provider/OPENAI/NEMOTRON.py +240 -240
  92. webscout/Provider/OPENAI/PI.py +427 -427
  93. webscout/Provider/OPENAI/README.md +959 -959
  94. webscout/Provider/OPENAI/TogetherAI.py +345 -345
  95. webscout/Provider/OPENAI/TwoAI.py +465 -465
  96. webscout/Provider/OPENAI/__init__.py +33 -18
  97. webscout/Provider/OPENAI/base.py +248 -248
  98. webscout/Provider/OPENAI/chatglm.py +528 -0
  99. webscout/Provider/OPENAI/chatgpt.py +592 -592
  100. webscout/Provider/OPENAI/chatgptclone.py +521 -521
  101. webscout/Provider/OPENAI/chatsandbox.py +202 -202
  102. webscout/Provider/OPENAI/deepinfra.py +318 -314
  103. webscout/Provider/OPENAI/e2b.py +1665 -1665
  104. webscout/Provider/OPENAI/exaai.py +420 -420
  105. webscout/Provider/OPENAI/exachat.py +452 -452
  106. webscout/Provider/OPENAI/friendli.py +232 -232
  107. webscout/Provider/OPENAI/{refact.py → gmi.py} +324 -274
  108. webscout/Provider/OPENAI/groq.py +364 -364
  109. webscout/Provider/OPENAI/heckai.py +314 -314
  110. webscout/Provider/OPENAI/llmchatco.py +337 -337
  111. webscout/Provider/OPENAI/netwrck.py +355 -355
  112. webscout/Provider/OPENAI/oivscode.py +290 -290
  113. webscout/Provider/OPENAI/opkfc.py +518 -518
  114. webscout/Provider/OPENAI/pydantic_imports.py +1 -1
  115. webscout/Provider/OPENAI/scirachat.py +535 -535
  116. webscout/Provider/OPENAI/sonus.py +308 -308
  117. webscout/Provider/OPENAI/standardinput.py +442 -442
  118. webscout/Provider/OPENAI/textpollinations.py +340 -340
  119. webscout/Provider/OPENAI/toolbaz.py +419 -416
  120. webscout/Provider/OPENAI/typefully.py +362 -362
  121. webscout/Provider/OPENAI/utils.py +295 -295
  122. webscout/Provider/OPENAI/venice.py +436 -436
  123. webscout/Provider/OPENAI/wisecat.py +387 -387
  124. webscout/Provider/OPENAI/writecream.py +166 -166
  125. webscout/Provider/OPENAI/x0gpt.py +378 -378
  126. webscout/Provider/OPENAI/yep.py +389 -389
  127. webscout/Provider/OpenGPT.py +230 -230
  128. webscout/Provider/Openai.py +243 -243
  129. webscout/Provider/PI.py +405 -405
  130. webscout/Provider/Perplexitylabs.py +430 -430
  131. webscout/Provider/QwenLM.py +272 -272
  132. webscout/Provider/STT/__init__.py +16 -1
  133. webscout/Provider/Sambanova.py +257 -257
  134. webscout/Provider/StandardInput.py +309 -309
  135. webscout/Provider/TTI/README.md +82 -82
  136. webscout/Provider/TTI/__init__.py +33 -18
  137. webscout/Provider/TTI/aiarta.py +413 -413
  138. webscout/Provider/TTI/base.py +136 -136
  139. webscout/Provider/TTI/bing.py +243 -243
  140. webscout/Provider/TTI/gpt1image.py +149 -149
  141. webscout/Provider/TTI/imagen.py +196 -196
  142. webscout/Provider/TTI/infip.py +211 -211
  143. webscout/Provider/TTI/magicstudio.py +232 -232
  144. webscout/Provider/TTI/monochat.py +219 -219
  145. webscout/Provider/TTI/piclumen.py +214 -214
  146. webscout/Provider/TTI/pixelmuse.py +232 -232
  147. webscout/Provider/TTI/pollinations.py +232 -232
  148. webscout/Provider/TTI/together.py +288 -288
  149. webscout/Provider/TTI/utils.py +12 -12
  150. webscout/Provider/TTI/venice.py +367 -367
  151. webscout/Provider/TTS/README.md +192 -192
  152. webscout/Provider/TTS/__init__.py +33 -18
  153. webscout/Provider/TTS/parler.py +110 -110
  154. webscout/Provider/TTS/streamElements.py +333 -333
  155. webscout/Provider/TTS/utils.py +280 -280
  156. webscout/Provider/TeachAnything.py +237 -237
  157. webscout/Provider/TextPollinationsAI.py +310 -310
  158. webscout/Provider/TogetherAI.py +356 -356
  159. webscout/Provider/TwoAI.py +312 -312
  160. webscout/Provider/TypliAI.py +311 -311
  161. webscout/Provider/UNFINISHED/ChatHub.py +208 -208
  162. webscout/Provider/UNFINISHED/ChutesAI.py +313 -313
  163. webscout/Provider/UNFINISHED/GizAI.py +294 -294
  164. webscout/Provider/UNFINISHED/Marcus.py +198 -198
  165. webscout/Provider/UNFINISHED/Qodo.py +477 -477
  166. webscout/Provider/UNFINISHED/VercelAIGateway.py +338 -338
  167. webscout/Provider/UNFINISHED/XenAI.py +324 -324
  168. webscout/Provider/UNFINISHED/Youchat.py +330 -330
  169. webscout/Provider/UNFINISHED/liner.py +334 -0
  170. webscout/Provider/UNFINISHED/liner_api_request.py +262 -262
  171. webscout/Provider/UNFINISHED/puterjs.py +634 -634
  172. webscout/Provider/UNFINISHED/samurai.py +223 -223
  173. webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
  174. webscout/Provider/Venice.py +250 -250
  175. webscout/Provider/VercelAI.py +256 -256
  176. webscout/Provider/WiseCat.py +231 -231
  177. webscout/Provider/WrDoChat.py +366 -366
  178. webscout/Provider/__init__.py +33 -18
  179. webscout/Provider/ai4chat.py +174 -174
  180. webscout/Provider/akashgpt.py +331 -331
  181. webscout/Provider/cerebras.py +446 -446
  182. webscout/Provider/chatglm.py +394 -301
  183. webscout/Provider/cleeai.py +211 -211
  184. webscout/Provider/elmo.py +282 -282
  185. webscout/Provider/geminiapi.py +208 -208
  186. webscout/Provider/granite.py +261 -261
  187. webscout/Provider/hermes.py +263 -263
  188. webscout/Provider/julius.py +223 -223
  189. webscout/Provider/learnfastai.py +309 -309
  190. webscout/Provider/llama3mitril.py +214 -214
  191. webscout/Provider/llmchat.py +243 -243
  192. webscout/Provider/llmchatco.py +290 -290
  193. webscout/Provider/meta.py +801 -801
  194. webscout/Provider/oivscode.py +309 -309
  195. webscout/Provider/scira_chat.py +383 -383
  196. webscout/Provider/searchchat.py +292 -292
  197. webscout/Provider/sonus.py +258 -258
  198. webscout/Provider/toolbaz.py +370 -367
  199. webscout/Provider/turboseek.py +273 -273
  200. webscout/Provider/typefully.py +207 -207
  201. webscout/Provider/yep.py +372 -372
  202. webscout/__init__.py +27 -31
  203. webscout/__main__.py +5 -5
  204. webscout/auth/api_key_manager.py +189 -189
  205. webscout/auth/config.py +175 -175
  206. webscout/auth/models.py +185 -185
  207. webscout/auth/routes.py +663 -664
  208. webscout/auth/simple_logger.py +236 -236
  209. webscout/cli.py +523 -523
  210. webscout/conversation.py +438 -438
  211. webscout/exceptions.py +361 -361
  212. webscout/litagent/Readme.md +298 -298
  213. webscout/litagent/__init__.py +28 -28
  214. webscout/litagent/agent.py +581 -581
  215. webscout/litagent/constants.py +59 -59
  216. webscout/litprinter/__init__.py +58 -58
  217. webscout/models.py +181 -181
  218. webscout/optimizers.py +419 -419
  219. webscout/prompt_manager.py +288 -288
  220. webscout/sanitize.py +1078 -1078
  221. webscout/scout/README.md +401 -401
  222. webscout/scout/__init__.py +8 -8
  223. webscout/scout/core/__init__.py +6 -6
  224. webscout/scout/core/crawler.py +297 -297
  225. webscout/scout/core/scout.py +706 -706
  226. webscout/scout/core/search_result.py +95 -95
  227. webscout/scout/core/text_analyzer.py +62 -62
  228. webscout/scout/core/text_utils.py +277 -277
  229. webscout/scout/core/web_analyzer.py +51 -51
  230. webscout/scout/element.py +599 -599
  231. webscout/scout/parsers/__init__.py +69 -69
  232. webscout/scout/parsers/html5lib_parser.py +172 -172
  233. webscout/scout/parsers/html_parser.py +236 -236
  234. webscout/scout/parsers/lxml_parser.py +178 -178
  235. webscout/scout/utils.py +37 -37
  236. webscout/search/__init__.py +51 -0
  237. webscout/search/base.py +195 -0
  238. webscout/search/duckduckgo_main.py +54 -0
  239. webscout/search/engines/__init__.py +48 -0
  240. webscout/search/engines/bing.py +84 -0
  241. webscout/search/engines/bing_news.py +52 -0
  242. webscout/search/engines/brave.py +43 -0
  243. webscout/search/engines/duckduckgo/__init__.py +25 -0
  244. webscout/search/engines/duckduckgo/answers.py +78 -0
  245. webscout/search/engines/duckduckgo/base.py +187 -0
  246. webscout/search/engines/duckduckgo/images.py +97 -0
  247. webscout/search/engines/duckduckgo/maps.py +168 -0
  248. webscout/search/engines/duckduckgo/news.py +68 -0
  249. webscout/search/engines/duckduckgo/suggestions.py +21 -0
  250. webscout/search/engines/duckduckgo/text.py +211 -0
  251. webscout/search/engines/duckduckgo/translate.py +47 -0
  252. webscout/search/engines/duckduckgo/videos.py +63 -0
  253. webscout/search/engines/duckduckgo/weather.py +74 -0
  254. webscout/search/engines/mojeek.py +37 -0
  255. webscout/search/engines/wikipedia.py +56 -0
  256. webscout/search/engines/yahoo.py +65 -0
  257. webscout/search/engines/yahoo_news.py +64 -0
  258. webscout/search/engines/yandex.py +43 -0
  259. webscout/search/engines/yep/__init__.py +13 -0
  260. webscout/search/engines/yep/base.py +32 -0
  261. webscout/search/engines/yep/images.py +99 -0
  262. webscout/search/engines/yep/suggestions.py +35 -0
  263. webscout/search/engines/yep/text.py +114 -0
  264. webscout/search/http_client.py +156 -0
  265. webscout/search/results.py +137 -0
  266. webscout/search/yep_main.py +44 -0
  267. webscout/swiftcli/Readme.md +323 -323
  268. webscout/swiftcli/__init__.py +95 -95
  269. webscout/swiftcli/core/__init__.py +7 -7
  270. webscout/swiftcli/core/cli.py +308 -308
  271. webscout/swiftcli/core/context.py +104 -104
  272. webscout/swiftcli/core/group.py +241 -241
  273. webscout/swiftcli/decorators/__init__.py +28 -28
  274. webscout/swiftcli/decorators/command.py +221 -221
  275. webscout/swiftcli/decorators/options.py +220 -220
  276. webscout/swiftcli/decorators/output.py +302 -302
  277. webscout/swiftcli/exceptions.py +21 -21
  278. webscout/swiftcli/plugins/__init__.py +9 -9
  279. webscout/swiftcli/plugins/base.py +135 -135
  280. webscout/swiftcli/plugins/manager.py +269 -269
  281. webscout/swiftcli/utils/__init__.py +59 -59
  282. webscout/swiftcli/utils/formatting.py +252 -252
  283. webscout/swiftcli/utils/parsing.py +267 -267
  284. webscout/update_checker.py +117 -117
  285. webscout/version.py +1 -1
  286. webscout/version.py.bak +2 -0
  287. webscout/zeroart/README.md +89 -89
  288. webscout/zeroart/__init__.py +134 -134
  289. webscout/zeroart/base.py +66 -66
  290. webscout/zeroart/effects.py +100 -100
  291. webscout/zeroart/fonts.py +1238 -1238
  292. {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/METADATA +936 -937
  293. webscout-2025.10.13.dist-info/RECORD +329 -0
  294. webscout/Provider/AISEARCH/DeepFind.py +0 -254
  295. webscout/Provider/OPENAI/Qwen3.py +0 -303
  296. webscout/Provider/OPENAI/qodo.py +0 -630
  297. webscout/Provider/OPENAI/xenai.py +0 -514
  298. webscout/tempid.py +0 -134
  299. webscout/webscout_search.py +0 -1183
  300. webscout/webscout_search_async.py +0 -649
  301. webscout/yep_search.py +0 -346
  302. webscout-8.3.7.dist-info/RECORD +0 -301
  303. {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/WHEEL +0 -0
  304. {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/entry_points.txt +0 -0
  305. {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/licenses/LICENSE.md +0 -0
  306. {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/top_level.txt +0 -0
webscout/scout/element.py CHANGED
@@ -1,599 +1,599 @@
1
- """
2
- Scout Element Module - Advanced HTML Element Representation
3
- """
4
-
5
- import re
6
- from typing import Any, Dict, List, Optional, Union
7
-
8
-
9
- class NavigableString(str):
10
- """
11
- A string that knows its place in the document tree.
12
- Mimics BeautifulSoup's NavigableString for better compatibility.
13
- """
14
- def __new__(cls, text: str):
15
- """
16
- Create a new NavigableString instance.
17
-
18
- Args:
19
- text (str): String content
20
- """
21
- return str.__new__(cls, text)
22
-
23
- def __init__(self, text: str):
24
- """
25
- Initialize a navigable string.
26
-
27
- Args:
28
- text (str): String content
29
- """
30
- self.parent = None
31
-
32
- def __repr__(self):
33
- """String representation."""
34
- return f"NavigableString({super().__repr__()})"
35
-
36
- def __add__(self, other):
37
- """
38
- Allow concatenation of NavigableString with other strings.
39
-
40
- Args:
41
- other (str): String to concatenate
42
-
43
- Returns:
44
- str: Concatenated string
45
- """
46
- return str(self) + str(other)
47
-
48
- def strip(self, chars=None):
49
- """
50
- Strip whitespace or specified characters.
51
-
52
- Args:
53
- chars (str, optional): Characters to strip
54
-
55
- Returns:
56
- str: Stripped string
57
- """
58
- return NavigableString(super().strip(chars))
59
-
60
- class Tag:
61
- """
62
- Represents an HTML tag with advanced traversal and manipulation capabilities.
63
- Enhanced to closely mimic BeautifulSoup's Tag class.
64
- """
65
- def __init__(self, name: str, attrs: Dict[str, str] = None):
66
- """
67
- Initialize a Tag with name and attributes.
68
-
69
- Args:
70
- name (str): Tag name
71
- attrs (dict, optional): Tag attributes
72
- """
73
- self.name = name
74
- self.attrs = attrs or {}
75
- self.contents = []
76
- self.parent = None
77
- self.string = None # For single string content
78
-
79
- def __str__(self):
80
- """String representation of the tag."""
81
- return self.decode_contents()
82
-
83
- def __repr__(self):
84
- """Detailed representation of the tag."""
85
- return f"<{self.name} {self.attrs}>"
86
-
87
- def __call__(self, *args, **kwargs):
88
- """
89
- Allows calling find_all directly on the tag.
90
- Mimics BeautifulSoup's behavior.
91
- """
92
- return self.find_all(*args, **kwargs)
93
-
94
- def __contains__(self, item):
95
- """
96
- Check if an item is in the tag's contents.
97
-
98
- Args:
99
- item: Item to search for
100
-
101
- Returns:
102
- bool: True if item is in contents, False otherwise
103
- """
104
- return item in self.contents
105
-
106
- def __getitem__(self, key):
107
- """
108
- Get an attribute value using dictionary-like access.
109
-
110
- Args:
111
- key (str): Attribute name
112
-
113
- Returns:
114
- Any: Attribute value
115
- """
116
- return self.attrs[key]
117
-
118
- def __iter__(self):
119
- """
120
- Iterate through tag's contents.
121
-
122
- Returns:
123
- Iterator: Contents of the tag
124
- """
125
- return iter(self.contents)
126
-
127
- def __eq__(self, other):
128
- """
129
- Compare tags based on name and attributes.
130
-
131
- Args:
132
- other (Tag): Tag to compare
133
-
134
- Returns:
135
- bool: True if tags are equivalent
136
- """
137
- if not isinstance(other, Tag):
138
- return False
139
- return (
140
- self.name == other.name and
141
- self.attrs == other.attrs and
142
- str(self) == str(other)
143
- )
144
-
145
- def __hash__(self):
146
- """
147
- Generate a hash for the tag.
148
-
149
- Returns:
150
- int: Hash value
151
- """
152
- return hash((self.name, frozenset(self.attrs.items()), str(self)))
153
-
154
- def find(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> Optional['Tag']:
155
- """
156
- Find the first matching child element.
157
- Enhanced with more flexible matching.
158
-
159
- Args:
160
- name (str, optional): Tag name to search for
161
- attrs (dict, optional): Attributes to match
162
- recursive (bool, optional): Search recursively
163
- text (str, optional): Text content to match
164
-
165
- Returns:
166
- Tag or None: First matching element
167
- """
168
- # Merge class_ with attrs['class'] if both are present
169
- attrs = dict(attrs) if attrs else {}
170
- if class_ is not None:
171
- if 'class' in attrs:
172
- # Merge both
173
- if isinstance(attrs['class'], list):
174
- class_list = attrs['class']
175
- else:
176
- class_list = [cls.strip() for cls in re.split(r'[ ,]+', str(attrs['class'])) if cls.strip()]
177
- if isinstance(class_, list):
178
- class_list += class_
179
- else:
180
- class_list += [cls.strip() for cls in re.split(r'[ ,]+', str(class_)) if cls.strip()]
181
- attrs['class'] = class_list
182
- else:
183
- attrs['class'] = class_
184
- results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
185
- return results[0] if results else None
186
-
187
- def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> List['Tag']:
188
- """
189
- Find all matching child elements.
190
- Enhanced with more flexible matching and BeautifulSoup-like features.
191
-
192
- Args:
193
- name (str, optional): Tag name to search for
194
- attrs (dict, optional): Attributes to match
195
- recursive (bool, optional): Search recursively
196
- text (str, optional): Text content to match
197
- limit (int, optional): Maximum number of results
198
-
199
- Returns:
200
- List[Tag]: List of matching elements
201
- """
202
- results = []
203
-
204
- def _match(tag):
205
- # Check tag name with case-insensitive and regex support
206
- if name:
207
- if isinstance(name, str):
208
- if tag.name.lower() != name.lower():
209
- return False
210
- elif isinstance(name, re.Pattern):
211
- if not name.search(tag.name):
212
- return False
213
-
214
- # Check attributes with more flexible matching
215
- for k, v in attrs.items():
216
- if k == 'class':
217
- tag_classes = tag.get('class', [])
218
- # Support multiple classes separated by space or comma
219
- if isinstance(v, str):
220
- v_classes = [cls.strip() for cls in re.split(r'[ ,]+', v) if cls.strip()]
221
- if not all(cls in tag_classes for cls in v_classes):
222
- return False
223
- elif isinstance(v, list):
224
- if not all(cls in tag_classes for cls in v):
225
- return False
226
- else:
227
- return False
228
- elif k == 'id':
229
- if tag.get('id') != v:
230
- return False
231
- else:
232
- # Regex or exact match for other attributes
233
- tag_attr = tag.attrs.get(k)
234
- if v is True:
235
- if tag_attr is None:
236
- return False
237
- elif isinstance(v, re.Pattern):
238
- if tag_attr is None or not v.search(str(tag_attr)):
239
- return False
240
- elif tag_attr != v:
241
- return False
242
-
243
- # Check text content
244
- if text:
245
- tag_text = tag.get_text(strip=True)
246
- if isinstance(text, str) and text.lower() not in tag_text.lower():
247
- return False
248
- elif isinstance(text, re.Pattern) and not text.search(tag_text):
249
- return False
250
-
251
- return True
252
-
253
- def _search(element):
254
- if _match(element):
255
- results.append(element)
256
- if limit and len(results) == limit:
257
- return
258
-
259
- if recursive:
260
- for child in element.contents:
261
- if isinstance(child, Tag):
262
- _search(child)
263
-
264
- _search(self)
265
- return results
266
-
267
- def select(self, selector: str) -> List['Tag']:
268
- """
269
- Select elements using CSS selector.
270
- Enhanced to support more complex selectors.
271
-
272
- Args:
273
- selector (str): CSS selector string
274
-
275
- Returns:
276
- List[Tag]: List of matching elements
277
- """
278
- # More advanced CSS selector parsing
279
- # This is a simplified implementation and might need more robust parsing
280
- parts = re.split(r'\s+', selector.strip())
281
- results = []
282
-
283
- def _match_selector(tag, selector_part):
284
- # Support more complex selectors
285
- if selector_part.startswith('.'):
286
- # Class selector
287
- return selector_part[1:] in tag.get('class', [])
288
- elif selector_part.startswith('#'):
289
- # ID selector
290
- return tag.get('id') == selector_part[1:]
291
- elif '[' in selector_part and ']' in selector_part:
292
- # Attribute selector
293
- attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
294
- if attr_match:
295
- tag_name, attr, value = attr_match.groups()
296
- if tag_name and tag.name != tag_name:
297
- return False
298
- if value:
299
- return tag.get(attr) == value.strip("'\"")
300
- return attr in tag.attrs
301
- else:
302
- # Tag selector
303
- return tag.name == selector_part
304
-
305
- def _recursive_select(element, selector_parts):
306
- if not selector_parts:
307
- results.append(element)
308
- return
309
-
310
- current_selector = selector_parts[0]
311
- remaining_selectors = selector_parts[1:]
312
-
313
- if _match_selector(element, current_selector):
314
- if not remaining_selectors:
315
- results.append(element)
316
- else:
317
- for child in element.contents:
318
- if isinstance(child, Tag):
319
- _recursive_select(child, remaining_selectors)
320
-
321
- for child in self.contents:
322
- if isinstance(child, Tag):
323
- _recursive_select(child, parts)
324
-
325
- return results
326
-
327
- def select_one(self, selector: str) -> Optional['Tag']:
328
- """
329
- Select the first element matching the CSS selector.
330
-
331
- Args:
332
- selector (str): CSS selector string
333
-
334
- Returns:
335
- Tag or None: First matching element
336
- """
337
- results = self.select(selector)
338
- return results[0] if results else None
339
-
340
- def get_text(self, separator=' ', strip=False, types=None) -> str:
341
- """
342
- Extract text from the tag and its descendants.
343
- Enhanced to support more flexible text extraction.
344
-
345
- Args:
346
- separator (str, optional): Text separator
347
- strip (bool, optional): Strip whitespace
348
- types (list, optional): Types of content to extract
349
-
350
- Returns:
351
- str: Extracted text
352
- """
353
- texts = []
354
- for content in self.contents:
355
- # Support filtering by content type
356
- if types is None or type(content) in types:
357
- if isinstance(content, NavigableString):
358
- texts.append(str(content))
359
- elif isinstance(content, Tag):
360
- texts.append(content.get_text(separator, strip))
361
-
362
- text = separator.join(texts)
363
- text = re.sub(r'\n\n+', '\n', text) # Replace multiple newlines with single newlines
364
- return text.strip() if strip else text
365
-
366
- def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
367
- """
368
- Find the first text matching a pattern.
369
-
370
- Args:
371
- pattern (str or re.Pattern): Pattern to match
372
- **kwargs: Additional arguments for get_text()
373
-
374
- Returns:
375
- str or None: First matching text
376
- """
377
- text = self.get_text(**kwargs)
378
-
379
- if isinstance(pattern, str):
380
- return pattern if pattern in text else None
381
- elif isinstance(pattern, re.Pattern):
382
- match = pattern.search(text)
383
- return match.group(0) if match else None
384
-
385
- def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
386
- """
387
- Replace text matching a pattern.
388
-
389
- Args:
390
- old (str or re.Pattern): Pattern to replace
391
- new (str): Replacement text
392
- **kwargs: Additional arguments for get_text()
393
-
394
- Returns:
395
- str: Modified text
396
- """
397
- text = self.get_text(**kwargs)
398
-
399
- if isinstance(old, str):
400
- return text.replace(old, new)
401
- elif isinstance(old, re.Pattern):
402
- return old.sub(new, text)
403
-
404
- def get(self, key: str, default: Any = None) -> Any:
405
- """
406
- Get an attribute value.
407
-
408
- Args:
409
- key (str): Attribute name
410
- default (Any, optional): Default value if attribute not found
411
-
412
- Returns:
413
- Any: Attribute value or default
414
- """
415
- return self.attrs.get(key, default)
416
-
417
- def decompose(self) -> None:
418
- """Remove the tag and its contents from the document."""
419
- if self.parent:
420
- self.parent.contents.remove(self)
421
-
422
- def extract(self) -> 'Tag':
423
- """
424
- Remove the tag from the document and return it.
425
-
426
- Returns:
427
- Tag: Extracted tag
428
- """
429
- self.decompose()
430
- return self
431
-
432
- def clear(self) -> None:
433
- """Remove all contents of the tag."""
434
- self.contents.clear()
435
-
436
- @property
437
- def string(self):
438
- """
439
- Get the string content of the tag.
440
- Returns the combined text of the tag's contents.
441
- """
442
- return self.get_text()
443
-
444
- @string.setter
445
- def string(self, value):
446
- """
447
- Set the string content of the tag.
448
- Clears existing contents and sets new string value.
449
-
450
- Args:
451
- value (str): New string content
452
- """
453
- self.clear()
454
- if value is not None:
455
- self.append(value)
456
-
457
- def append(self, new_child: Union['Tag', NavigableString, str]) -> None:
458
- """Append a new child to this tag with error handling."""
459
- if isinstance(new_child, str):
460
- new_child = NavigableString(new_child)
461
- if hasattr(new_child, 'parent'):
462
- new_child.parent = self
463
- self.contents.append(new_child)
464
-
465
- def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
466
- """Insert a new child at the given index with error handling."""
467
- if isinstance(new_child, str):
468
- new_child = NavigableString(new_child)
469
- if hasattr(new_child, 'parent'):
470
- new_child.parent = self
471
- self.contents.insert(index, new_child)
472
-
473
- def replace_with(self, new_tag: 'Tag') -> None:
474
- """Replace this tag with another tag with error handling."""
475
- if self.parent:
476
- try:
477
- index = self.parent.contents.index(self)
478
- self.parent.contents[index] = new_tag
479
- new_tag.parent = self.parent
480
- except ValueError:
481
- pass
482
-
483
- def wrap(self, wrapper_tag: 'Tag') -> 'Tag':
484
- """Wrap this tag in another tag."""
485
- if self.parent:
486
- idx = self.parent.contents.index(self)
487
- self.parent.contents[idx] = wrapper_tag
488
- wrapper_tag.parent = self.parent
489
- else:
490
- wrapper_tag.parent = None
491
- wrapper_tag.contents.append(self)
492
- self.parent = wrapper_tag
493
- return wrapper_tag
494
-
495
- def unwrap(self) -> None:
496
- """Remove this tag but keep its contents in the parent."""
497
- if self.parent:
498
- idx = self.parent.contents.index(self)
499
- for child in reversed(self.contents):
500
- child.parent = self.parent
501
- self.parent.contents.insert(idx, child)
502
- self.parent.contents.remove(self)
503
- self.parent = None
504
- self.contents = []
505
-
506
- def insert_before(self, new_element: 'Tag') -> None:
507
- """Insert a tag or string immediately before this tag."""
508
- if self.parent:
509
- idx = self.parent.contents.index(self)
510
- new_element.parent = self.parent
511
- self.parent.contents.insert(idx, new_element)
512
-
513
- def insert_after(self, new_element: 'Tag') -> None:
514
- """Insert a tag or string immediately after this tag."""
515
- if self.parent:
516
- idx = self.parent.contents.index(self)
517
- new_element.parent = self.parent
518
- self.parent.contents.insert(idx + 1, new_element)
519
-
520
- @property
521
- def descendants(self):
522
- """Yield all descendants in document order."""
523
- for child in self.contents:
524
- yield child
525
- if isinstance(child, Tag):
526
- yield from child.descendants
527
-
528
- @property
529
- def parents(self):
530
- """Yield all parents up the tree."""
531
- current = self.parent
532
- while current:
533
- yield current
534
- current = current.parent
535
-
536
- @property
537
- def next_element(self):
538
- """Return the next element in document order."""
539
- if self.contents:
540
- return self.contents[0]
541
- current = self
542
- while current.parent:
543
- idx = current.parent.contents.index(current)
544
- if idx + 1 < len(current.parent.contents):
545
- return current.parent.contents[idx + 1]
546
- current = current.parent
547
- return None
548
-
549
- @property
550
- def previous_element(self):
551
- """Return the previous element in document order."""
552
- if not self.parent:
553
- return None
554
- idx = self.parent.contents.index(self)
555
- if idx > 0:
556
- prev = self.parent.contents[idx - 1]
557
- while isinstance(prev, Tag) and prev.contents:
558
- prev = prev.contents[-1]
559
- return prev
560
- return self.parent
561
-
562
- def decode_contents(self, eventual_encoding='utf-8') -> str:
563
- """
564
- Decode the contents of the tag to a string.
565
-
566
- Args:
567
- eventual_encoding (str, optional): Encoding to use
568
-
569
- Returns:
570
- str: Decoded contents
571
- """
572
- return ''.join(str(content) for content in self.contents)
573
-
574
- def prettify(self, formatter='minimal') -> str:
575
- """
576
- Return a nicely formatted representation of the tag.
577
-
578
- Args:
579
- formatter (str, optional): Formatting style
580
-
581
- Returns:
582
- str: Prettified tag representation
583
- """
584
- def _prettify(tag, indent=0):
585
- result = ' ' * indent + f'<{tag.name}'
586
- for k, v in tag.attrs.items():
587
- result += f' {k}="{v}"'
588
- result += '>\n'
589
-
590
- for content in tag.contents:
591
- if isinstance(content, Tag):
592
- result += _prettify(content, indent + 2)
593
- else:
594
- result += ' ' * (indent + 2) + str(content) + '\n'
595
-
596
- result += ' ' * indent + f'</{tag.name}>\n'
597
- return result
598
-
599
- return _prettify(self)
1
+ """
2
+ Scout Element Module - Advanced HTML Element Representation
3
+ """
4
+
5
+ import re
6
+ from typing import Any, Dict, List, Optional, Union
7
+
8
+
9
+ class NavigableString(str):
10
+ """
11
+ A string that knows its place in the document tree.
12
+ Mimics BeautifulSoup's NavigableString for better compatibility.
13
+ """
14
+ def __new__(cls, text: str):
15
+ """
16
+ Create a new NavigableString instance.
17
+
18
+ Args:
19
+ text (str): String content
20
+ """
21
+ return str.__new__(cls, text)
22
+
23
+ def __init__(self, text: str):
24
+ """
25
+ Initialize a navigable string.
26
+
27
+ Args:
28
+ text (str): String content
29
+ """
30
+ self.parent = None
31
+
32
+ def __repr__(self):
33
+ """String representation."""
34
+ return f"NavigableString({super().__repr__()})"
35
+
36
+ def __add__(self, other):
37
+ """
38
+ Allow concatenation of NavigableString with other strings.
39
+
40
+ Args:
41
+ other (str): String to concatenate
42
+
43
+ Returns:
44
+ str: Concatenated string
45
+ """
46
+ return str(self) + str(other)
47
+
48
+ def strip(self, chars=None):
49
+ """
50
+ Strip whitespace or specified characters.
51
+
52
+ Args:
53
+ chars (str, optional): Characters to strip
54
+
55
+ Returns:
56
+ str: Stripped string
57
+ """
58
+ return NavigableString(super().strip(chars))
59
+
60
+ class Tag:
61
+ """
62
+ Represents an HTML tag with advanced traversal and manipulation capabilities.
63
+ Enhanced to closely mimic BeautifulSoup's Tag class.
64
+ """
65
+ def __init__(self, name: str, attrs: Dict[str, str] = None):
66
+ """
67
+ Initialize a Tag with name and attributes.
68
+
69
+ Args:
70
+ name (str): Tag name
71
+ attrs (dict, optional): Tag attributes
72
+ """
73
+ self.name = name
74
+ self.attrs = attrs or {}
75
+ self.contents = []
76
+ self.parent = None
77
+ self.string = None # For single string content
78
+
79
+ def __str__(self):
80
+ """String representation of the tag."""
81
+ return self.decode_contents()
82
+
83
+ def __repr__(self):
84
+ """Detailed representation of the tag."""
85
+ return f"<{self.name} {self.attrs}>"
86
+
87
+ def __call__(self, *args, **kwargs):
88
+ """
89
+ Allows calling find_all directly on the tag.
90
+ Mimics BeautifulSoup's behavior.
91
+ """
92
+ return self.find_all(*args, **kwargs)
93
+
94
+ def __contains__(self, item):
95
+ """
96
+ Check if an item is in the tag's contents.
97
+
98
+ Args:
99
+ item: Item to search for
100
+
101
+ Returns:
102
+ bool: True if item is in contents, False otherwise
103
+ """
104
+ return item in self.contents
105
+
106
+ def __getitem__(self, key):
107
+ """
108
+ Get an attribute value using dictionary-like access.
109
+
110
+ Args:
111
+ key (str): Attribute name
112
+
113
+ Returns:
114
+ Any: Attribute value
115
+ """
116
+ return self.attrs[key]
117
+
118
+ def __iter__(self):
119
+ """
120
+ Iterate through tag's contents.
121
+
122
+ Returns:
123
+ Iterator: Contents of the tag
124
+ """
125
+ return iter(self.contents)
126
+
127
+ def __eq__(self, other):
128
+ """
129
+ Compare tags based on name and attributes.
130
+
131
+ Args:
132
+ other (Tag): Tag to compare
133
+
134
+ Returns:
135
+ bool: True if tags are equivalent
136
+ """
137
+ if not isinstance(other, Tag):
138
+ return False
139
+ return (
140
+ self.name == other.name and
141
+ self.attrs == other.attrs and
142
+ str(self) == str(other)
143
+ )
144
+
145
+ def __hash__(self):
146
+ """
147
+ Generate a hash for the tag.
148
+
149
+ Returns:
150
+ int: Hash value
151
+ """
152
+ return hash((self.name, frozenset(self.attrs.items()), str(self)))
153
+
154
+ def find(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> Optional['Tag']:
155
+ """
156
+ Find the first matching child element.
157
+ Enhanced with more flexible matching.
158
+
159
+ Args:
160
+ name (str, optional): Tag name to search for
161
+ attrs (dict, optional): Attributes to match
162
+ recursive (bool, optional): Search recursively
163
+ text (str, optional): Text content to match
164
+
165
+ Returns:
166
+ Tag or None: First matching element
167
+ """
168
+ # Merge class_ with attrs['class'] if both are present
169
+ attrs = dict(attrs) if attrs else {}
170
+ if class_ is not None:
171
+ if 'class' in attrs:
172
+ # Merge both
173
+ if isinstance(attrs['class'], list):
174
+ class_list = attrs['class']
175
+ else:
176
+ class_list = [cls.strip() for cls in re.split(r'[ ,]+', str(attrs['class'])) if cls.strip()]
177
+ if isinstance(class_, list):
178
+ class_list += class_
179
+ else:
180
+ class_list += [cls.strip() for cls in re.split(r'[ ,]+', str(class_)) if cls.strip()]
181
+ attrs['class'] = class_list
182
+ else:
183
+ attrs['class'] = class_
184
+ results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
185
+ return results[0] if results else None
186
+
187
+ def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> List['Tag']:
188
+ """
189
+ Find all matching child elements.
190
+ Enhanced with more flexible matching and BeautifulSoup-like features.
191
+
192
+ Args:
193
+ name (str, optional): Tag name to search for
194
+ attrs (dict, optional): Attributes to match
195
+ recursive (bool, optional): Search recursively
196
+ text (str, optional): Text content to match
197
+ limit (int, optional): Maximum number of results
198
+
199
+ Returns:
200
+ List[Tag]: List of matching elements
201
+ """
202
+ results = []
203
+
204
+ def _match(tag):
205
+ # Check tag name with case-insensitive and regex support
206
+ if name:
207
+ if isinstance(name, str):
208
+ if tag.name.lower() != name.lower():
209
+ return False
210
+ elif isinstance(name, re.Pattern):
211
+ if not name.search(tag.name):
212
+ return False
213
+
214
+ # Check attributes with more flexible matching
215
+ for k, v in attrs.items():
216
+ if k == 'class':
217
+ tag_classes = tag.get('class', [])
218
+ # Support multiple classes separated by space or comma
219
+ if isinstance(v, str):
220
+ v_classes = [cls.strip() for cls in re.split(r'[ ,]+', v) if cls.strip()]
221
+ if not all(cls in tag_classes for cls in v_classes):
222
+ return False
223
+ elif isinstance(v, list):
224
+ if not all(cls in tag_classes for cls in v):
225
+ return False
226
+ else:
227
+ return False
228
+ elif k == 'id':
229
+ if tag.get('id') != v:
230
+ return False
231
+ else:
232
+ # Regex or exact match for other attributes
233
+ tag_attr = tag.attrs.get(k)
234
+ if v is True:
235
+ if tag_attr is None:
236
+ return False
237
+ elif isinstance(v, re.Pattern):
238
+ if tag_attr is None or not v.search(str(tag_attr)):
239
+ return False
240
+ elif tag_attr != v:
241
+ return False
242
+
243
+ # Check text content
244
+ if text:
245
+ tag_text = tag.get_text(strip=True)
246
+ if isinstance(text, str) and text.lower() not in tag_text.lower():
247
+ return False
248
+ elif isinstance(text, re.Pattern) and not text.search(tag_text):
249
+ return False
250
+
251
+ return True
252
+
253
+ def _search(element):
254
+ if _match(element):
255
+ results.append(element)
256
+ if limit and len(results) == limit:
257
+ return
258
+
259
+ if recursive:
260
+ for child in element.contents:
261
+ if isinstance(child, Tag):
262
+ _search(child)
263
+
264
+ _search(self)
265
+ return results
266
+
267
+ def select(self, selector: str) -> List['Tag']:
268
+ """
269
+ Select elements using CSS selector.
270
+ Enhanced to support more complex selectors.
271
+
272
+ Args:
273
+ selector (str): CSS selector string
274
+
275
+ Returns:
276
+ List[Tag]: List of matching elements
277
+ """
278
+ # More advanced CSS selector parsing
279
+ # This is a simplified implementation and might need more robust parsing
280
+ parts = re.split(r'\s+', selector.strip())
281
+ results = []
282
+
283
+ def _match_selector(tag, selector_part):
284
+ # Support more complex selectors
285
+ if selector_part.startswith('.'):
286
+ # Class selector
287
+ return selector_part[1:] in tag.get('class', [])
288
+ elif selector_part.startswith('#'):
289
+ # ID selector
290
+ return tag.get('id') == selector_part[1:]
291
+ elif '[' in selector_part and ']' in selector_part:
292
+ # Attribute selector
293
+ attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
294
+ if attr_match:
295
+ tag_name, attr, value = attr_match.groups()
296
+ if tag_name and tag.name != tag_name:
297
+ return False
298
+ if value:
299
+ return tag.get(attr) == value.strip("'\"")
300
+ return attr in tag.attrs
301
+ else:
302
+ # Tag selector
303
+ return tag.name == selector_part
304
+
305
+ def _recursive_select(element, selector_parts):
306
+ if not selector_parts:
307
+ results.append(element)
308
+ return
309
+
310
+ current_selector = selector_parts[0]
311
+ remaining_selectors = selector_parts[1:]
312
+
313
+ if _match_selector(element, current_selector):
314
+ if not remaining_selectors:
315
+ results.append(element)
316
+ else:
317
+ for child in element.contents:
318
+ if isinstance(child, Tag):
319
+ _recursive_select(child, remaining_selectors)
320
+
321
+ for child in self.contents:
322
+ if isinstance(child, Tag):
323
+ _recursive_select(child, parts)
324
+
325
+ return results
326
+
327
+ def select_one(self, selector: str) -> Optional['Tag']:
328
+ """
329
+ Select the first element matching the CSS selector.
330
+
331
+ Args:
332
+ selector (str): CSS selector string
333
+
334
+ Returns:
335
+ Tag or None: First matching element
336
+ """
337
+ results = self.select(selector)
338
+ return results[0] if results else None
339
+
340
+ def get_text(self, separator=' ', strip=False, types=None) -> str:
341
+ """
342
+ Extract text from the tag and its descendants.
343
+ Enhanced to support more flexible text extraction.
344
+
345
+ Args:
346
+ separator (str, optional): Text separator
347
+ strip (bool, optional): Strip whitespace
348
+ types (list, optional): Types of content to extract
349
+
350
+ Returns:
351
+ str: Extracted text
352
+ """
353
+ texts = []
354
+ for content in self.contents:
355
+ # Support filtering by content type
356
+ if types is None or type(content) in types:
357
+ if isinstance(content, NavigableString):
358
+ texts.append(str(content))
359
+ elif isinstance(content, Tag):
360
+ texts.append(content.get_text(separator, strip))
361
+
362
+ text = separator.join(texts)
363
+ text = re.sub(r'\n\n+', '\n', text) # Replace multiple newlines with single newlines
364
+ return text.strip() if strip else text
365
+
366
+ def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
367
+ """
368
+ Find the first text matching a pattern.
369
+
370
+ Args:
371
+ pattern (str or re.Pattern): Pattern to match
372
+ **kwargs: Additional arguments for get_text()
373
+
374
+ Returns:
375
+ str or None: First matching text
376
+ """
377
+ text = self.get_text(**kwargs)
378
+
379
+ if isinstance(pattern, str):
380
+ return pattern if pattern in text else None
381
+ elif isinstance(pattern, re.Pattern):
382
+ match = pattern.search(text)
383
+ return match.group(0) if match else None
384
+
385
+ def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
386
+ """
387
+ Replace text matching a pattern.
388
+
389
+ Args:
390
+ old (str or re.Pattern): Pattern to replace
391
+ new (str): Replacement text
392
+ **kwargs: Additional arguments for get_text()
393
+
394
+ Returns:
395
+ str: Modified text
396
+ """
397
+ text = self.get_text(**kwargs)
398
+
399
+ if isinstance(old, str):
400
+ return text.replace(old, new)
401
+ elif isinstance(old, re.Pattern):
402
+ return old.sub(new, text)
403
+
404
+ def get(self, key: str, default: Any = None) -> Any:
405
+ """
406
+ Get an attribute value.
407
+
408
+ Args:
409
+ key (str): Attribute name
410
+ default (Any, optional): Default value if attribute not found
411
+
412
+ Returns:
413
+ Any: Attribute value or default
414
+ """
415
+ return self.attrs.get(key, default)
416
+
417
+ def decompose(self) -> None:
418
+ """Remove the tag and its contents from the document."""
419
+ if self.parent:
420
+ self.parent.contents.remove(self)
421
+
422
+ def extract(self) -> 'Tag':
423
+ """
424
+ Remove the tag from the document and return it.
425
+
426
+ Returns:
427
+ Tag: Extracted tag
428
+ """
429
+ self.decompose()
430
+ return self
431
+
432
+ def clear(self) -> None:
433
+ """Remove all contents of the tag."""
434
+ self.contents.clear()
435
+
436
+ @property
437
+ def string(self):
438
+ """
439
+ Get the string content of the tag.
440
+ Returns the combined text of the tag's contents.
441
+ """
442
+ return self.get_text()
443
+
444
+ @string.setter
445
+ def string(self, value):
446
+ """
447
+ Set the string content of the tag.
448
+ Clears existing contents and sets new string value.
449
+
450
+ Args:
451
+ value (str): New string content
452
+ """
453
+ self.clear()
454
+ if value is not None:
455
+ self.append(value)
456
+
457
+ def append(self, new_child: Union['Tag', NavigableString, str]) -> None:
458
+ """Append a new child to this tag with error handling."""
459
+ if isinstance(new_child, str):
460
+ new_child = NavigableString(new_child)
461
+ if hasattr(new_child, 'parent'):
462
+ new_child.parent = self
463
+ self.contents.append(new_child)
464
+
465
+ def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
466
+ """Insert a new child at the given index with error handling."""
467
+ if isinstance(new_child, str):
468
+ new_child = NavigableString(new_child)
469
+ if hasattr(new_child, 'parent'):
470
+ new_child.parent = self
471
+ self.contents.insert(index, new_child)
472
+
473
+ def replace_with(self, new_tag: 'Tag') -> None:
474
+ """Replace this tag with another tag with error handling."""
475
+ if self.parent:
476
+ try:
477
+ index = self.parent.contents.index(self)
478
+ self.parent.contents[index] = new_tag
479
+ new_tag.parent = self.parent
480
+ except ValueError:
481
+ pass
482
+
483
+ def wrap(self, wrapper_tag: 'Tag') -> 'Tag':
484
+ """Wrap this tag in another tag."""
485
+ if self.parent:
486
+ idx = self.parent.contents.index(self)
487
+ self.parent.contents[idx] = wrapper_tag
488
+ wrapper_tag.parent = self.parent
489
+ else:
490
+ wrapper_tag.parent = None
491
+ wrapper_tag.contents.append(self)
492
+ self.parent = wrapper_tag
493
+ return wrapper_tag
494
+
495
+ def unwrap(self) -> None:
496
+ """Remove this tag but keep its contents in the parent."""
497
+ if self.parent:
498
+ idx = self.parent.contents.index(self)
499
+ for child in reversed(self.contents):
500
+ child.parent = self.parent
501
+ self.parent.contents.insert(idx, child)
502
+ self.parent.contents.remove(self)
503
+ self.parent = None
504
+ self.contents = []
505
+
506
+ def insert_before(self, new_element: 'Tag') -> None:
507
+ """Insert a tag or string immediately before this tag."""
508
+ if self.parent:
509
+ idx = self.parent.contents.index(self)
510
+ new_element.parent = self.parent
511
+ self.parent.contents.insert(idx, new_element)
512
+
513
+ def insert_after(self, new_element: 'Tag') -> None:
514
+ """Insert a tag or string immediately after this tag."""
515
+ if self.parent:
516
+ idx = self.parent.contents.index(self)
517
+ new_element.parent = self.parent
518
+ self.parent.contents.insert(idx + 1, new_element)
519
+
520
+ @property
521
+ def descendants(self):
522
+ """Yield all descendants in document order."""
523
+ for child in self.contents:
524
+ yield child
525
+ if isinstance(child, Tag):
526
+ yield from child.descendants
527
+
528
+ @property
529
+ def parents(self):
530
+ """Yield all parents up the tree."""
531
+ current = self.parent
532
+ while current:
533
+ yield current
534
+ current = current.parent
535
+
536
+ @property
537
+ def next_element(self):
538
+ """Return the next element in document order."""
539
+ if self.contents:
540
+ return self.contents[0]
541
+ current = self
542
+ while current.parent:
543
+ idx = current.parent.contents.index(current)
544
+ if idx + 1 < len(current.parent.contents):
545
+ return current.parent.contents[idx + 1]
546
+ current = current.parent
547
+ return None
548
+
549
+ @property
550
+ def previous_element(self):
551
+ """Return the previous element in document order."""
552
+ if not self.parent:
553
+ return None
554
+ idx = self.parent.contents.index(self)
555
+ if idx > 0:
556
+ prev = self.parent.contents[idx - 1]
557
+ while isinstance(prev, Tag) and prev.contents:
558
+ prev = prev.contents[-1]
559
+ return prev
560
+ return self.parent
561
+
562
+ def decode_contents(self, eventual_encoding='utf-8') -> str:
563
+ """
564
+ Decode the contents of the tag to a string.
565
+
566
+ Args:
567
+ eventual_encoding (str, optional): Encoding to use
568
+
569
+ Returns:
570
+ str: Decoded contents
571
+ """
572
+ return ''.join(str(content) for content in self.contents)
573
+
574
+ def prettify(self, formatter='minimal') -> str:
575
+ """
576
+ Return a nicely formatted representation of the tag.
577
+
578
+ Args:
579
+ formatter (str, optional): Formatting style
580
+
581
+ Returns:
582
+ str: Prettified tag representation
583
+ """
584
+ def _prettify(tag, indent=0):
585
+ result = ' ' * indent + f'<{tag.name}'
586
+ for k, v in tag.attrs.items():
587
+ result += f' {k}="{v}"'
588
+ result += '>\n'
589
+
590
+ for content in tag.contents:
591
+ if isinstance(content, Tag):
592
+ result += _prettify(content, indent + 2)
593
+ else:
594
+ result += ' ' * (indent + 2) + str(content) + '\n'
595
+
596
+ result += ' ' * indent + f'</{tag.name}>\n'
597
+ return result
598
+
599
+ return _prettify(self)