webscout 8.3.6__py3-none-any.whl → 2025.10.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (304) hide show
  1. webscout/AIauto.py +250 -250
  2. webscout/AIbase.py +379 -379
  3. webscout/AIutel.py +60 -58
  4. webscout/Bard.py +1012 -1012
  5. webscout/Bing_search.py +417 -417
  6. webscout/DWEBS.py +529 -529
  7. webscout/Extra/Act.md +309 -309
  8. webscout/Extra/GitToolkit/__init__.py +10 -10
  9. webscout/Extra/GitToolkit/gitapi/README.md +110 -110
  10. webscout/Extra/GitToolkit/gitapi/__init__.py +11 -11
  11. webscout/Extra/GitToolkit/gitapi/repository.py +195 -195
  12. webscout/Extra/GitToolkit/gitapi/user.py +96 -96
  13. webscout/Extra/GitToolkit/gitapi/utils.py +61 -61
  14. webscout/Extra/YTToolkit/README.md +375 -375
  15. webscout/Extra/YTToolkit/YTdownloader.py +956 -956
  16. webscout/Extra/YTToolkit/__init__.py +2 -2
  17. webscout/Extra/YTToolkit/transcriber.py +475 -475
  18. webscout/Extra/YTToolkit/ytapi/README.md +44 -44
  19. webscout/Extra/YTToolkit/ytapi/__init__.py +6 -6
  20. webscout/Extra/YTToolkit/ytapi/channel.py +307 -307
  21. webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
  22. webscout/Extra/YTToolkit/ytapi/extras.py +118 -118
  23. webscout/Extra/YTToolkit/ytapi/https.py +88 -88
  24. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
  25. webscout/Extra/YTToolkit/ytapi/playlist.py +58 -58
  26. webscout/Extra/YTToolkit/ytapi/pool.py +7 -7
  27. webscout/Extra/YTToolkit/ytapi/query.py +39 -39
  28. webscout/Extra/YTToolkit/ytapi/stream.py +62 -62
  29. webscout/Extra/YTToolkit/ytapi/utils.py +62 -62
  30. webscout/Extra/YTToolkit/ytapi/video.py +232 -232
  31. webscout/Extra/autocoder/__init__.py +9 -9
  32. webscout/Extra/autocoder/autocoder.py +1105 -1105
  33. webscout/Extra/autocoder/autocoder_utiles.py +332 -332
  34. webscout/Extra/gguf.md +429 -429
  35. webscout/Extra/gguf.py +1213 -1213
  36. webscout/Extra/tempmail/README.md +487 -487
  37. webscout/Extra/tempmail/__init__.py +27 -27
  38. webscout/Extra/tempmail/async_utils.py +140 -140
  39. webscout/Extra/tempmail/base.py +160 -160
  40. webscout/Extra/tempmail/cli.py +186 -186
  41. webscout/Extra/tempmail/emailnator.py +84 -84
  42. webscout/Extra/tempmail/mail_tm.py +360 -360
  43. webscout/Extra/tempmail/temp_mail_io.py +291 -291
  44. webscout/Extra/weather.md +281 -281
  45. webscout/Extra/weather.py +193 -193
  46. webscout/Litlogger/README.md +10 -10
  47. webscout/Litlogger/__init__.py +15 -15
  48. webscout/Litlogger/formats.py +13 -13
  49. webscout/Litlogger/handlers.py +121 -121
  50. webscout/Litlogger/levels.py +13 -13
  51. webscout/Litlogger/logger.py +134 -134
  52. webscout/Provider/AISEARCH/Perplexity.py +332 -332
  53. webscout/Provider/AISEARCH/README.md +279 -279
  54. webscout/Provider/AISEARCH/__init__.py +33 -11
  55. webscout/Provider/AISEARCH/felo_search.py +206 -206
  56. webscout/Provider/AISEARCH/genspark_search.py +323 -323
  57. webscout/Provider/AISEARCH/hika_search.py +185 -185
  58. webscout/Provider/AISEARCH/iask_search.py +410 -410
  59. webscout/Provider/AISEARCH/monica_search.py +219 -219
  60. webscout/Provider/AISEARCH/scira_search.py +316 -314
  61. webscout/Provider/AISEARCH/stellar_search.py +177 -177
  62. webscout/Provider/AISEARCH/webpilotai_search.py +255 -255
  63. webscout/Provider/Aitopia.py +314 -315
  64. webscout/Provider/Andi.py +3 -3
  65. webscout/Provider/Apriel.py +306 -0
  66. webscout/Provider/ChatGPTClone.py +236 -236
  67. webscout/Provider/ChatSandbox.py +343 -342
  68. webscout/Provider/Cloudflare.py +324 -324
  69. webscout/Provider/Cohere.py +208 -207
  70. webscout/Provider/Deepinfra.py +370 -369
  71. webscout/Provider/ExaAI.py +260 -260
  72. webscout/Provider/ExaChat.py +308 -387
  73. webscout/Provider/Flowith.py +221 -221
  74. webscout/Provider/GMI.py +293 -0
  75. webscout/Provider/Gemini.py +164 -162
  76. webscout/Provider/GeminiProxy.py +167 -166
  77. webscout/Provider/GithubChat.py +371 -370
  78. webscout/Provider/Groq.py +800 -800
  79. webscout/Provider/HeckAI.py +383 -379
  80. webscout/Provider/Jadve.py +282 -297
  81. webscout/Provider/K2Think.py +308 -0
  82. webscout/Provider/Koboldai.py +206 -384
  83. webscout/Provider/LambdaChat.py +423 -425
  84. webscout/Provider/Nemotron.py +244 -245
  85. webscout/Provider/Netwrck.py +248 -247
  86. webscout/Provider/OLLAMA.py +395 -394
  87. webscout/Provider/OPENAI/Cloudflare.py +394 -395
  88. webscout/Provider/OPENAI/FalconH1.py +452 -457
  89. webscout/Provider/OPENAI/FreeGemini.py +297 -299
  90. webscout/Provider/OPENAI/{monochat.py → K2Think.py} +432 -329
  91. webscout/Provider/OPENAI/NEMOTRON.py +241 -244
  92. webscout/Provider/OPENAI/PI.py +428 -427
  93. webscout/Provider/OPENAI/README.md +959 -959
  94. webscout/Provider/OPENAI/TogetherAI.py +345 -345
  95. webscout/Provider/OPENAI/TwoAI.py +466 -467
  96. webscout/Provider/OPENAI/__init__.py +33 -59
  97. webscout/Provider/OPENAI/ai4chat.py +313 -303
  98. webscout/Provider/OPENAI/base.py +249 -269
  99. webscout/Provider/OPENAI/chatglm.py +528 -0
  100. webscout/Provider/OPENAI/chatgpt.py +593 -588
  101. webscout/Provider/OPENAI/chatgptclone.py +521 -524
  102. webscout/Provider/OPENAI/chatsandbox.py +202 -177
  103. webscout/Provider/OPENAI/deepinfra.py +319 -315
  104. webscout/Provider/OPENAI/e2b.py +1665 -1665
  105. webscout/Provider/OPENAI/exaai.py +420 -420
  106. webscout/Provider/OPENAI/exachat.py +452 -452
  107. webscout/Provider/OPENAI/friendli.py +232 -232
  108. webscout/Provider/OPENAI/{refact.py → gmi.py} +324 -274
  109. webscout/Provider/OPENAI/groq.py +364 -364
  110. webscout/Provider/OPENAI/heckai.py +314 -311
  111. webscout/Provider/OPENAI/llmchatco.py +337 -337
  112. webscout/Provider/OPENAI/netwrck.py +355 -354
  113. webscout/Provider/OPENAI/oivscode.py +290 -290
  114. webscout/Provider/OPENAI/opkfc.py +518 -518
  115. webscout/Provider/OPENAI/pydantic_imports.py +1 -1
  116. webscout/Provider/OPENAI/scirachat.py +535 -529
  117. webscout/Provider/OPENAI/sonus.py +308 -308
  118. webscout/Provider/OPENAI/standardinput.py +442 -442
  119. webscout/Provider/OPENAI/textpollinations.py +340 -348
  120. webscout/Provider/OPENAI/toolbaz.py +419 -413
  121. webscout/Provider/OPENAI/typefully.py +362 -362
  122. webscout/Provider/OPENAI/utils.py +295 -295
  123. webscout/Provider/OPENAI/venice.py +436 -436
  124. webscout/Provider/OPENAI/wisecat.py +387 -387
  125. webscout/Provider/OPENAI/writecream.py +166 -166
  126. webscout/Provider/OPENAI/x0gpt.py +378 -378
  127. webscout/Provider/OPENAI/yep.py +389 -389
  128. webscout/Provider/OpenGPT.py +230 -230
  129. webscout/Provider/Openai.py +244 -496
  130. webscout/Provider/PI.py +405 -404
  131. webscout/Provider/Perplexitylabs.py +430 -431
  132. webscout/Provider/QwenLM.py +272 -254
  133. webscout/Provider/STT/__init__.py +32 -2
  134. webscout/Provider/{Llama3.py → Sambanova.py} +257 -258
  135. webscout/Provider/StandardInput.py +309 -309
  136. webscout/Provider/TTI/README.md +82 -82
  137. webscout/Provider/TTI/__init__.py +33 -12
  138. webscout/Provider/TTI/aiarta.py +413 -413
  139. webscout/Provider/TTI/base.py +136 -136
  140. webscout/Provider/TTI/bing.py +243 -243
  141. webscout/Provider/TTI/gpt1image.py +149 -149
  142. webscout/Provider/TTI/imagen.py +196 -196
  143. webscout/Provider/TTI/infip.py +211 -211
  144. webscout/Provider/TTI/magicstudio.py +232 -232
  145. webscout/Provider/TTI/monochat.py +219 -219
  146. webscout/Provider/TTI/piclumen.py +214 -214
  147. webscout/Provider/TTI/pixelmuse.py +232 -232
  148. webscout/Provider/TTI/pollinations.py +232 -232
  149. webscout/Provider/TTI/together.py +288 -288
  150. webscout/Provider/TTI/utils.py +12 -12
  151. webscout/Provider/TTI/venice.py +367 -367
  152. webscout/Provider/TTS/README.md +192 -192
  153. webscout/Provider/TTS/__init__.py +33 -10
  154. webscout/Provider/TTS/parler.py +110 -110
  155. webscout/Provider/TTS/streamElements.py +333 -333
  156. webscout/Provider/TTS/utils.py +280 -280
  157. webscout/Provider/TeachAnything.py +237 -236
  158. webscout/Provider/TextPollinationsAI.py +311 -318
  159. webscout/Provider/TogetherAI.py +356 -357
  160. webscout/Provider/TwoAI.py +313 -569
  161. webscout/Provider/TypliAI.py +312 -311
  162. webscout/Provider/UNFINISHED/ChatHub.py +208 -208
  163. webscout/Provider/UNFINISHED/ChutesAI.py +313 -313
  164. webscout/Provider/{GizAI.py → UNFINISHED/GizAI.py} +294 -294
  165. webscout/Provider/{Marcus.py → UNFINISHED/Marcus.py} +198 -198
  166. webscout/Provider/{Qodo.py → UNFINISHED/Qodo.py} +477 -477
  167. webscout/Provider/UNFINISHED/VercelAIGateway.py +338 -338
  168. webscout/Provider/{XenAI.py → UNFINISHED/XenAI.py} +324 -324
  169. webscout/Provider/UNFINISHED/Youchat.py +330 -330
  170. webscout/Provider/UNFINISHED/liner.py +334 -0
  171. webscout/Provider/UNFINISHED/liner_api_request.py +262 -262
  172. webscout/Provider/UNFINISHED/puterjs.py +634 -634
  173. webscout/Provider/UNFINISHED/samurai.py +223 -223
  174. webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
  175. webscout/Provider/Venice.py +251 -250
  176. webscout/Provider/VercelAI.py +256 -255
  177. webscout/Provider/WiseCat.py +232 -231
  178. webscout/Provider/WrDoChat.py +367 -366
  179. webscout/Provider/__init__.py +33 -86
  180. webscout/Provider/ai4chat.py +174 -174
  181. webscout/Provider/akashgpt.py +331 -334
  182. webscout/Provider/cerebras.py +446 -340
  183. webscout/Provider/chatglm.py +394 -214
  184. webscout/Provider/cleeai.py +211 -212
  185. webscout/Provider/deepseek_assistant.py +1 -1
  186. webscout/Provider/elmo.py +282 -282
  187. webscout/Provider/geminiapi.py +208 -208
  188. webscout/Provider/granite.py +261 -261
  189. webscout/Provider/hermes.py +263 -265
  190. webscout/Provider/julius.py +223 -222
  191. webscout/Provider/learnfastai.py +309 -309
  192. webscout/Provider/llama3mitril.py +214 -214
  193. webscout/Provider/llmchat.py +243 -243
  194. webscout/Provider/llmchatco.py +290 -290
  195. webscout/Provider/meta.py +801 -801
  196. webscout/Provider/oivscode.py +309 -309
  197. webscout/Provider/scira_chat.py +384 -457
  198. webscout/Provider/searchchat.py +292 -291
  199. webscout/Provider/sonus.py +258 -258
  200. webscout/Provider/toolbaz.py +370 -364
  201. webscout/Provider/turboseek.py +274 -265
  202. webscout/Provider/typefully.py +208 -207
  203. webscout/Provider/x0gpt.py +1 -0
  204. webscout/Provider/yep.py +372 -371
  205. webscout/__init__.py +30 -31
  206. webscout/__main__.py +5 -5
  207. webscout/auth/api_key_manager.py +189 -189
  208. webscout/auth/config.py +175 -175
  209. webscout/auth/models.py +185 -185
  210. webscout/auth/routes.py +664 -664
  211. webscout/auth/simple_logger.py +236 -236
  212. webscout/cli.py +523 -523
  213. webscout/conversation.py +438 -438
  214. webscout/exceptions.py +361 -361
  215. webscout/litagent/Readme.md +298 -298
  216. webscout/litagent/__init__.py +28 -28
  217. webscout/litagent/agent.py +581 -581
  218. webscout/litagent/constants.py +59 -59
  219. webscout/litprinter/__init__.py +58 -58
  220. webscout/models.py +181 -181
  221. webscout/optimizers.py +419 -419
  222. webscout/prompt_manager.py +288 -288
  223. webscout/sanitize.py +1078 -1078
  224. webscout/scout/README.md +401 -401
  225. webscout/scout/__init__.py +8 -8
  226. webscout/scout/core/__init__.py +6 -6
  227. webscout/scout/core/crawler.py +297 -297
  228. webscout/scout/core/scout.py +706 -706
  229. webscout/scout/core/search_result.py +95 -95
  230. webscout/scout/core/text_analyzer.py +62 -62
  231. webscout/scout/core/text_utils.py +277 -277
  232. webscout/scout/core/web_analyzer.py +51 -51
  233. webscout/scout/element.py +599 -599
  234. webscout/scout/parsers/__init__.py +69 -69
  235. webscout/scout/parsers/html5lib_parser.py +172 -172
  236. webscout/scout/parsers/html_parser.py +236 -236
  237. webscout/scout/parsers/lxml_parser.py +178 -178
  238. webscout/scout/utils.py +37 -37
  239. webscout/swiftcli/Readme.md +323 -323
  240. webscout/swiftcli/__init__.py +95 -95
  241. webscout/swiftcli/core/__init__.py +7 -7
  242. webscout/swiftcli/core/cli.py +308 -308
  243. webscout/swiftcli/core/context.py +104 -104
  244. webscout/swiftcli/core/group.py +241 -241
  245. webscout/swiftcli/decorators/__init__.py +28 -28
  246. webscout/swiftcli/decorators/command.py +221 -221
  247. webscout/swiftcli/decorators/options.py +220 -220
  248. webscout/swiftcli/decorators/output.py +302 -302
  249. webscout/swiftcli/exceptions.py +21 -21
  250. webscout/swiftcli/plugins/__init__.py +9 -9
  251. webscout/swiftcli/plugins/base.py +135 -135
  252. webscout/swiftcli/plugins/manager.py +269 -269
  253. webscout/swiftcli/utils/__init__.py +59 -59
  254. webscout/swiftcli/utils/formatting.py +252 -252
  255. webscout/swiftcli/utils/parsing.py +267 -267
  256. webscout/update_checker.py +117 -117
  257. webscout/version.py +1 -1
  258. webscout/webscout_search.py +1183 -1183
  259. webscout/webscout_search_async.py +649 -649
  260. webscout/yep_search.py +346 -346
  261. webscout/zeroart/README.md +89 -89
  262. webscout/zeroart/__init__.py +134 -134
  263. webscout/zeroart/base.py +66 -66
  264. webscout/zeroart/effects.py +100 -100
  265. webscout/zeroart/fonts.py +1238 -1238
  266. {webscout-8.3.6.dist-info → webscout-2025.10.11.dist-info}/METADATA +937 -936
  267. webscout-2025.10.11.dist-info/RECORD +300 -0
  268. webscout/Provider/AISEARCH/DeepFind.py +0 -254
  269. webscout/Provider/AllenAI.py +0 -440
  270. webscout/Provider/Blackboxai.py +0 -793
  271. webscout/Provider/FreeGemini.py +0 -250
  272. webscout/Provider/GptOss.py +0 -207
  273. webscout/Provider/Hunyuan.py +0 -283
  274. webscout/Provider/Kimi.py +0 -445
  275. webscout/Provider/MCPCore.py +0 -322
  276. webscout/Provider/MiniMax.py +0 -207
  277. webscout/Provider/OPENAI/BLACKBOXAI.py +0 -1045
  278. webscout/Provider/OPENAI/MiniMax.py +0 -298
  279. webscout/Provider/OPENAI/Qwen3.py +0 -304
  280. webscout/Provider/OPENAI/autoproxy.py +0 -1067
  281. webscout/Provider/OPENAI/copilot.py +0 -321
  282. webscout/Provider/OPENAI/gptoss.py +0 -288
  283. webscout/Provider/OPENAI/kimi.py +0 -469
  284. webscout/Provider/OPENAI/mcpcore.py +0 -431
  285. webscout/Provider/OPENAI/multichat.py +0 -378
  286. webscout/Provider/OPENAI/qodo.py +0 -630
  287. webscout/Provider/OPENAI/xenai.py +0 -514
  288. webscout/Provider/Reka.py +0 -214
  289. webscout/Provider/UNFINISHED/fetch_together_models.py +0 -90
  290. webscout/Provider/asksteve.py +0 -220
  291. webscout/Provider/copilot.py +0 -441
  292. webscout/Provider/freeaichat.py +0 -294
  293. webscout/Provider/koala.py +0 -182
  294. webscout/Provider/lmarena.py +0 -198
  295. webscout/Provider/monochat.py +0 -275
  296. webscout/Provider/multichat.py +0 -375
  297. webscout/Provider/scnet.py +0 -244
  298. webscout/Provider/talkai.py +0 -194
  299. webscout/tempid.py +0 -128
  300. webscout-8.3.6.dist-info/RECORD +0 -327
  301. {webscout-8.3.6.dist-info → webscout-2025.10.11.dist-info}/WHEEL +0 -0
  302. {webscout-8.3.6.dist-info → webscout-2025.10.11.dist-info}/entry_points.txt +0 -0
  303. {webscout-8.3.6.dist-info → webscout-2025.10.11.dist-info}/licenses/LICENSE.md +0 -0
  304. {webscout-8.3.6.dist-info → webscout-2025.10.11.dist-info}/top_level.txt +0 -0
webscout/scout/element.py CHANGED
@@ -1,599 +1,599 @@
1
- """
2
- Scout Element Module - Advanced HTML Element Representation
3
- """
4
-
5
- import re
6
- from typing import Any, Dict, List, Optional, Union
7
-
8
-
9
- class NavigableString(str):
10
- """
11
- A string that knows its place in the document tree.
12
- Mimics BeautifulSoup's NavigableString for better compatibility.
13
- """
14
- def __new__(cls, text: str):
15
- """
16
- Create a new NavigableString instance.
17
-
18
- Args:
19
- text (str): String content
20
- """
21
- return str.__new__(cls, text)
22
-
23
- def __init__(self, text: str):
24
- """
25
- Initialize a navigable string.
26
-
27
- Args:
28
- text (str): String content
29
- """
30
- self.parent = None
31
-
32
- def __repr__(self):
33
- """String representation."""
34
- return f"NavigableString({super().__repr__()})"
35
-
36
- def __add__(self, other):
37
- """
38
- Allow concatenation of NavigableString with other strings.
39
-
40
- Args:
41
- other (str): String to concatenate
42
-
43
- Returns:
44
- str: Concatenated string
45
- """
46
- return str(self) + str(other)
47
-
48
- def strip(self, chars=None):
49
- """
50
- Strip whitespace or specified characters.
51
-
52
- Args:
53
- chars (str, optional): Characters to strip
54
-
55
- Returns:
56
- str: Stripped string
57
- """
58
- return NavigableString(super().strip(chars))
59
-
60
- class Tag:
61
- """
62
- Represents an HTML tag with advanced traversal and manipulation capabilities.
63
- Enhanced to closely mimic BeautifulSoup's Tag class.
64
- """
65
- def __init__(self, name: str, attrs: Dict[str, str] = None):
66
- """
67
- Initialize a Tag with name and attributes.
68
-
69
- Args:
70
- name (str): Tag name
71
- attrs (dict, optional): Tag attributes
72
- """
73
- self.name = name
74
- self.attrs = attrs or {}
75
- self.contents = []
76
- self.parent = None
77
- self.string = None # For single string content
78
-
79
- def __str__(self):
80
- """String representation of the tag."""
81
- return self.decode_contents()
82
-
83
- def __repr__(self):
84
- """Detailed representation of the tag."""
85
- return f"<{self.name} {self.attrs}>"
86
-
87
- def __call__(self, *args, **kwargs):
88
- """
89
- Allows calling find_all directly on the tag.
90
- Mimics BeautifulSoup's behavior.
91
- """
92
- return self.find_all(*args, **kwargs)
93
-
94
- def __contains__(self, item):
95
- """
96
- Check if an item is in the tag's contents.
97
-
98
- Args:
99
- item: Item to search for
100
-
101
- Returns:
102
- bool: True if item is in contents, False otherwise
103
- """
104
- return item in self.contents
105
-
106
- def __getitem__(self, key):
107
- """
108
- Get an attribute value using dictionary-like access.
109
-
110
- Args:
111
- key (str): Attribute name
112
-
113
- Returns:
114
- Any: Attribute value
115
- """
116
- return self.attrs[key]
117
-
118
- def __iter__(self):
119
- """
120
- Iterate through tag's contents.
121
-
122
- Returns:
123
- Iterator: Contents of the tag
124
- """
125
- return iter(self.contents)
126
-
127
- def __eq__(self, other):
128
- """
129
- Compare tags based on name and attributes.
130
-
131
- Args:
132
- other (Tag): Tag to compare
133
-
134
- Returns:
135
- bool: True if tags are equivalent
136
- """
137
- if not isinstance(other, Tag):
138
- return False
139
- return (
140
- self.name == other.name and
141
- self.attrs == other.attrs and
142
- str(self) == str(other)
143
- )
144
-
145
- def __hash__(self):
146
- """
147
- Generate a hash for the tag.
148
-
149
- Returns:
150
- int: Hash value
151
- """
152
- return hash((self.name, frozenset(self.attrs.items()), str(self)))
153
-
154
- def find(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> Optional['Tag']:
155
- """
156
- Find the first matching child element.
157
- Enhanced with more flexible matching.
158
-
159
- Args:
160
- name (str, optional): Tag name to search for
161
- attrs (dict, optional): Attributes to match
162
- recursive (bool, optional): Search recursively
163
- text (str, optional): Text content to match
164
-
165
- Returns:
166
- Tag or None: First matching element
167
- """
168
- # Merge class_ with attrs['class'] if both are present
169
- attrs = dict(attrs) if attrs else {}
170
- if class_ is not None:
171
- if 'class' in attrs:
172
- # Merge both
173
- if isinstance(attrs['class'], list):
174
- class_list = attrs['class']
175
- else:
176
- class_list = [cls.strip() for cls in re.split(r'[ ,]+', str(attrs['class'])) if cls.strip()]
177
- if isinstance(class_, list):
178
- class_list += class_
179
- else:
180
- class_list += [cls.strip() for cls in re.split(r'[ ,]+', str(class_)) if cls.strip()]
181
- attrs['class'] = class_list
182
- else:
183
- attrs['class'] = class_
184
- results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
185
- return results[0] if results else None
186
-
187
- def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> List['Tag']:
188
- """
189
- Find all matching child elements.
190
- Enhanced with more flexible matching and BeautifulSoup-like features.
191
-
192
- Args:
193
- name (str, optional): Tag name to search for
194
- attrs (dict, optional): Attributes to match
195
- recursive (bool, optional): Search recursively
196
- text (str, optional): Text content to match
197
- limit (int, optional): Maximum number of results
198
-
199
- Returns:
200
- List[Tag]: List of matching elements
201
- """
202
- results = []
203
-
204
- def _match(tag):
205
- # Check tag name with case-insensitive and regex support
206
- if name:
207
- if isinstance(name, str):
208
- if tag.name.lower() != name.lower():
209
- return False
210
- elif isinstance(name, re.Pattern):
211
- if not name.search(tag.name):
212
- return False
213
-
214
- # Check attributes with more flexible matching
215
- for k, v in attrs.items():
216
- if k == 'class':
217
- tag_classes = tag.get('class', [])
218
- # Support multiple classes separated by space or comma
219
- if isinstance(v, str):
220
- v_classes = [cls.strip() for cls in re.split(r'[ ,]+', v) if cls.strip()]
221
- if not all(cls in tag_classes for cls in v_classes):
222
- return False
223
- elif isinstance(v, list):
224
- if not all(cls in tag_classes for cls in v):
225
- return False
226
- else:
227
- return False
228
- elif k == 'id':
229
- if tag.get('id') != v:
230
- return False
231
- else:
232
- # Regex or exact match for other attributes
233
- tag_attr = tag.attrs.get(k)
234
- if v is True:
235
- if tag_attr is None:
236
- return False
237
- elif isinstance(v, re.Pattern):
238
- if tag_attr is None or not v.search(str(tag_attr)):
239
- return False
240
- elif tag_attr != v:
241
- return False
242
-
243
- # Check text content
244
- if text:
245
- tag_text = tag.get_text(strip=True)
246
- if isinstance(text, str) and text.lower() not in tag_text.lower():
247
- return False
248
- elif isinstance(text, re.Pattern) and not text.search(tag_text):
249
- return False
250
-
251
- return True
252
-
253
- def _search(element):
254
- if _match(element):
255
- results.append(element)
256
- if limit and len(results) == limit:
257
- return
258
-
259
- if recursive:
260
- for child in element.contents:
261
- if isinstance(child, Tag):
262
- _search(child)
263
-
264
- _search(self)
265
- return results
266
-
267
- def select(self, selector: str) -> List['Tag']:
268
- """
269
- Select elements using CSS selector.
270
- Enhanced to support more complex selectors.
271
-
272
- Args:
273
- selector (str): CSS selector string
274
-
275
- Returns:
276
- List[Tag]: List of matching elements
277
- """
278
- # More advanced CSS selector parsing
279
- # This is a simplified implementation and might need more robust parsing
280
- parts = re.split(r'\s+', selector.strip())
281
- results = []
282
-
283
- def _match_selector(tag, selector_part):
284
- # Support more complex selectors
285
- if selector_part.startswith('.'):
286
- # Class selector
287
- return selector_part[1:] in tag.get('class', [])
288
- elif selector_part.startswith('#'):
289
- # ID selector
290
- return tag.get('id') == selector_part[1:]
291
- elif '[' in selector_part and ']' in selector_part:
292
- # Attribute selector
293
- attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
294
- if attr_match:
295
- tag_name, attr, value = attr_match.groups()
296
- if tag_name and tag.name != tag_name:
297
- return False
298
- if value:
299
- return tag.get(attr) == value.strip("'\"")
300
- return attr in tag.attrs
301
- else:
302
- # Tag selector
303
- return tag.name == selector_part
304
-
305
- def _recursive_select(element, selector_parts):
306
- if not selector_parts:
307
- results.append(element)
308
- return
309
-
310
- current_selector = selector_parts[0]
311
- remaining_selectors = selector_parts[1:]
312
-
313
- if _match_selector(element, current_selector):
314
- if not remaining_selectors:
315
- results.append(element)
316
- else:
317
- for child in element.contents:
318
- if isinstance(child, Tag):
319
- _recursive_select(child, remaining_selectors)
320
-
321
- for child in self.contents:
322
- if isinstance(child, Tag):
323
- _recursive_select(child, parts)
324
-
325
- return results
326
-
327
- def select_one(self, selector: str) -> Optional['Tag']:
328
- """
329
- Select the first element matching the CSS selector.
330
-
331
- Args:
332
- selector (str): CSS selector string
333
-
334
- Returns:
335
- Tag or None: First matching element
336
- """
337
- results = self.select(selector)
338
- return results[0] if results else None
339
-
340
- def get_text(self, separator=' ', strip=False, types=None) -> str:
341
- """
342
- Extract text from the tag and its descendants.
343
- Enhanced to support more flexible text extraction.
344
-
345
- Args:
346
- separator (str, optional): Text separator
347
- strip (bool, optional): Strip whitespace
348
- types (list, optional): Types of content to extract
349
-
350
- Returns:
351
- str: Extracted text
352
- """
353
- texts = []
354
- for content in self.contents:
355
- # Support filtering by content type
356
- if types is None or type(content) in types:
357
- if isinstance(content, NavigableString):
358
- texts.append(str(content))
359
- elif isinstance(content, Tag):
360
- texts.append(content.get_text(separator, strip))
361
-
362
- text = separator.join(texts)
363
- text = re.sub(r'\n\n+', '\n', text) # Replace multiple newlines with single newlines
364
- return text.strip() if strip else text
365
-
366
- def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
367
- """
368
- Find the first text matching a pattern.
369
-
370
- Args:
371
- pattern (str or re.Pattern): Pattern to match
372
- **kwargs: Additional arguments for get_text()
373
-
374
- Returns:
375
- str or None: First matching text
376
- """
377
- text = self.get_text(**kwargs)
378
-
379
- if isinstance(pattern, str):
380
- return pattern if pattern in text else None
381
- elif isinstance(pattern, re.Pattern):
382
- match = pattern.search(text)
383
- return match.group(0) if match else None
384
-
385
- def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
386
- """
387
- Replace text matching a pattern.
388
-
389
- Args:
390
- old (str or re.Pattern): Pattern to replace
391
- new (str): Replacement text
392
- **kwargs: Additional arguments for get_text()
393
-
394
- Returns:
395
- str: Modified text
396
- """
397
- text = self.get_text(**kwargs)
398
-
399
- if isinstance(old, str):
400
- return text.replace(old, new)
401
- elif isinstance(old, re.Pattern):
402
- return old.sub(new, text)
403
-
404
- def get(self, key: str, default: Any = None) -> Any:
405
- """
406
- Get an attribute value.
407
-
408
- Args:
409
- key (str): Attribute name
410
- default (Any, optional): Default value if attribute not found
411
-
412
- Returns:
413
- Any: Attribute value or default
414
- """
415
- return self.attrs.get(key, default)
416
-
417
- def decompose(self) -> None:
418
- """Remove the tag and its contents from the document."""
419
- if self.parent:
420
- self.parent.contents.remove(self)
421
-
422
- def extract(self) -> 'Tag':
423
- """
424
- Remove the tag from the document and return it.
425
-
426
- Returns:
427
- Tag: Extracted tag
428
- """
429
- self.decompose()
430
- return self
431
-
432
- def clear(self) -> None:
433
- """Remove all contents of the tag."""
434
- self.contents.clear()
435
-
436
- @property
437
- def string(self):
438
- """
439
- Get the string content of the tag.
440
- Returns the combined text of the tag's contents.
441
- """
442
- return self.get_text()
443
-
444
- @string.setter
445
- def string(self, value):
446
- """
447
- Set the string content of the tag.
448
- Clears existing contents and sets new string value.
449
-
450
- Args:
451
- value (str): New string content
452
- """
453
- self.clear()
454
- if value is not None:
455
- self.append(value)
456
-
457
- def append(self, new_child: Union['Tag', NavigableString, str]) -> None:
458
- """Append a new child to this tag with error handling."""
459
- if isinstance(new_child, str):
460
- new_child = NavigableString(new_child)
461
- if hasattr(new_child, 'parent'):
462
- new_child.parent = self
463
- self.contents.append(new_child)
464
-
465
- def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
466
- """Insert a new child at the given index with error handling."""
467
- if isinstance(new_child, str):
468
- new_child = NavigableString(new_child)
469
- if hasattr(new_child, 'parent'):
470
- new_child.parent = self
471
- self.contents.insert(index, new_child)
472
-
473
- def replace_with(self, new_tag: 'Tag') -> None:
474
- """Replace this tag with another tag with error handling."""
475
- if self.parent:
476
- try:
477
- index = self.parent.contents.index(self)
478
- self.parent.contents[index] = new_tag
479
- new_tag.parent = self.parent
480
- except ValueError:
481
- pass
482
-
483
- def wrap(self, wrapper_tag: 'Tag') -> 'Tag':
484
- """Wrap this tag in another tag."""
485
- if self.parent:
486
- idx = self.parent.contents.index(self)
487
- self.parent.contents[idx] = wrapper_tag
488
- wrapper_tag.parent = self.parent
489
- else:
490
- wrapper_tag.parent = None
491
- wrapper_tag.contents.append(self)
492
- self.parent = wrapper_tag
493
- return wrapper_tag
494
-
495
- def unwrap(self) -> None:
496
- """Remove this tag but keep its contents in the parent."""
497
- if self.parent:
498
- idx = self.parent.contents.index(self)
499
- for child in reversed(self.contents):
500
- child.parent = self.parent
501
- self.parent.contents.insert(idx, child)
502
- self.parent.contents.remove(self)
503
- self.parent = None
504
- self.contents = []
505
-
506
- def insert_before(self, new_element: 'Tag') -> None:
507
- """Insert a tag or string immediately before this tag."""
508
- if self.parent:
509
- idx = self.parent.contents.index(self)
510
- new_element.parent = self.parent
511
- self.parent.contents.insert(idx, new_element)
512
-
513
- def insert_after(self, new_element: 'Tag') -> None:
514
- """Insert a tag or string immediately after this tag."""
515
- if self.parent:
516
- idx = self.parent.contents.index(self)
517
- new_element.parent = self.parent
518
- self.parent.contents.insert(idx + 1, new_element)
519
-
520
- @property
521
- def descendants(self):
522
- """Yield all descendants in document order."""
523
- for child in self.contents:
524
- yield child
525
- if isinstance(child, Tag):
526
- yield from child.descendants
527
-
528
- @property
529
- def parents(self):
530
- """Yield all parents up the tree."""
531
- current = self.parent
532
- while current:
533
- yield current
534
- current = current.parent
535
-
536
- @property
537
- def next_element(self):
538
- """Return the next element in document order."""
539
- if self.contents:
540
- return self.contents[0]
541
- current = self
542
- while current.parent:
543
- idx = current.parent.contents.index(current)
544
- if idx + 1 < len(current.parent.contents):
545
- return current.parent.contents[idx + 1]
546
- current = current.parent
547
- return None
548
-
549
- @property
550
- def previous_element(self):
551
- """Return the previous element in document order."""
552
- if not self.parent:
553
- return None
554
- idx = self.parent.contents.index(self)
555
- if idx > 0:
556
- prev = self.parent.contents[idx - 1]
557
- while isinstance(prev, Tag) and prev.contents:
558
- prev = prev.contents[-1]
559
- return prev
560
- return self.parent
561
-
562
- def decode_contents(self, eventual_encoding='utf-8') -> str:
563
- """
564
- Decode the contents of the tag to a string.
565
-
566
- Args:
567
- eventual_encoding (str, optional): Encoding to use
568
-
569
- Returns:
570
- str: Decoded contents
571
- """
572
- return ''.join(str(content) for content in self.contents)
573
-
574
- def prettify(self, formatter='minimal') -> str:
575
- """
576
- Return a nicely formatted representation of the tag.
577
-
578
- Args:
579
- formatter (str, optional): Formatting style
580
-
581
- Returns:
582
- str: Prettified tag representation
583
- """
584
- def _prettify(tag, indent=0):
585
- result = ' ' * indent + f'<{tag.name}'
586
- for k, v in tag.attrs.items():
587
- result += f' {k}="{v}"'
588
- result += '>\n'
589
-
590
- for content in tag.contents:
591
- if isinstance(content, Tag):
592
- result += _prettify(content, indent + 2)
593
- else:
594
- result += ' ' * (indent + 2) + str(content) + '\n'
595
-
596
- result += ' ' * indent + f'</{tag.name}>\n'
597
- return result
598
-
599
- return _prettify(self)
1
+ """
2
+ Scout Element Module - Advanced HTML Element Representation
3
+ """
4
+
5
+ import re
6
+ from typing import Any, Dict, List, Optional, Union
7
+
8
+
9
+ class NavigableString(str):
10
+ """
11
+ A string that knows its place in the document tree.
12
+ Mimics BeautifulSoup's NavigableString for better compatibility.
13
+ """
14
+ def __new__(cls, text: str):
15
+ """
16
+ Create a new NavigableString instance.
17
+
18
+ Args:
19
+ text (str): String content
20
+ """
21
+ return str.__new__(cls, text)
22
+
23
+ def __init__(self, text: str):
24
+ """
25
+ Initialize a navigable string.
26
+
27
+ Args:
28
+ text (str): String content
29
+ """
30
+ self.parent = None
31
+
32
+ def __repr__(self):
33
+ """String representation."""
34
+ return f"NavigableString({super().__repr__()})"
35
+
36
+ def __add__(self, other):
37
+ """
38
+ Allow concatenation of NavigableString with other strings.
39
+
40
+ Args:
41
+ other (str): String to concatenate
42
+
43
+ Returns:
44
+ str: Concatenated string
45
+ """
46
+ return str(self) + str(other)
47
+
48
+ def strip(self, chars=None):
49
+ """
50
+ Strip whitespace or specified characters.
51
+
52
+ Args:
53
+ chars (str, optional): Characters to strip
54
+
55
+ Returns:
56
+ str: Stripped string
57
+ """
58
+ return NavigableString(super().strip(chars))
59
+
60
+ class Tag:
61
+ """
62
+ Represents an HTML tag with advanced traversal and manipulation capabilities.
63
+ Enhanced to closely mimic BeautifulSoup's Tag class.
64
+ """
65
+ def __init__(self, name: str, attrs: Dict[str, str] = None):
66
+ """
67
+ Initialize a Tag with name and attributes.
68
+
69
+ Args:
70
+ name (str): Tag name
71
+ attrs (dict, optional): Tag attributes
72
+ """
73
+ self.name = name
74
+ self.attrs = attrs or {}
75
+ self.contents = []
76
+ self.parent = None
77
+ self.string = None # For single string content
78
+
79
+ def __str__(self):
80
+ """String representation of the tag."""
81
+ return self.decode_contents()
82
+
83
+ def __repr__(self):
84
+ """Detailed representation of the tag."""
85
+ return f"<{self.name} {self.attrs}>"
86
+
87
+ def __call__(self, *args, **kwargs):
88
+ """
89
+ Allows calling find_all directly on the tag.
90
+ Mimics BeautifulSoup's behavior.
91
+ """
92
+ return self.find_all(*args, **kwargs)
93
+
94
+ def __contains__(self, item):
95
+ """
96
+ Check if an item is in the tag's contents.
97
+
98
+ Args:
99
+ item: Item to search for
100
+
101
+ Returns:
102
+ bool: True if item is in contents, False otherwise
103
+ """
104
+ return item in self.contents
105
+
106
+ def __getitem__(self, key):
107
+ """
108
+ Get an attribute value using dictionary-like access.
109
+
110
+ Args:
111
+ key (str): Attribute name
112
+
113
+ Returns:
114
+ Any: Attribute value
115
+ """
116
+ return self.attrs[key]
117
+
118
+ def __iter__(self):
119
+ """
120
+ Iterate through tag's contents.
121
+
122
+ Returns:
123
+ Iterator: Contents of the tag
124
+ """
125
+ return iter(self.contents)
126
+
127
+ def __eq__(self, other):
128
+ """
129
+ Compare tags based on name and attributes.
130
+
131
+ Args:
132
+ other (Tag): Tag to compare
133
+
134
+ Returns:
135
+ bool: True if tags are equivalent
136
+ """
137
+ if not isinstance(other, Tag):
138
+ return False
139
+ return (
140
+ self.name == other.name and
141
+ self.attrs == other.attrs and
142
+ str(self) == str(other)
143
+ )
144
+
145
+ def __hash__(self):
146
+ """
147
+ Generate a hash for the tag.
148
+
149
+ Returns:
150
+ int: Hash value
151
+ """
152
+ return hash((self.name, frozenset(self.attrs.items()), str(self)))
153
+
154
+ def find(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> Optional['Tag']:
155
+ """
156
+ Find the first matching child element.
157
+ Enhanced with more flexible matching.
158
+
159
+ Args:
160
+ name (str, optional): Tag name to search for
161
+ attrs (dict, optional): Attributes to match
162
+ recursive (bool, optional): Search recursively
163
+ text (str, optional): Text content to match
164
+
165
+ Returns:
166
+ Tag or None: First matching element
167
+ """
168
+ # Merge class_ with attrs['class'] if both are present
169
+ attrs = dict(attrs) if attrs else {}
170
+ if class_ is not None:
171
+ if 'class' in attrs:
172
+ # Merge both
173
+ if isinstance(attrs['class'], list):
174
+ class_list = attrs['class']
175
+ else:
176
+ class_list = [cls.strip() for cls in re.split(r'[ ,]+', str(attrs['class'])) if cls.strip()]
177
+ if isinstance(class_, list):
178
+ class_list += class_
179
+ else:
180
+ class_list += [cls.strip() for cls in re.split(r'[ ,]+', str(class_)) if cls.strip()]
181
+ attrs['class'] = class_list
182
+ else:
183
+ attrs['class'] = class_
184
+ results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
185
+ return results[0] if results else None
186
+
187
+ def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> List['Tag']:
188
+ """
189
+ Find all matching child elements.
190
+ Enhanced with more flexible matching and BeautifulSoup-like features.
191
+
192
+ Args:
193
+ name (str, optional): Tag name to search for
194
+ attrs (dict, optional): Attributes to match
195
+ recursive (bool, optional): Search recursively
196
+ text (str, optional): Text content to match
197
+ limit (int, optional): Maximum number of results
198
+
199
+ Returns:
200
+ List[Tag]: List of matching elements
201
+ """
202
+ results = []
203
+
204
+ def _match(tag):
205
+ # Check tag name with case-insensitive and regex support
206
+ if name:
207
+ if isinstance(name, str):
208
+ if tag.name.lower() != name.lower():
209
+ return False
210
+ elif isinstance(name, re.Pattern):
211
+ if not name.search(tag.name):
212
+ return False
213
+
214
+ # Check attributes with more flexible matching
215
+ for k, v in attrs.items():
216
+ if k == 'class':
217
+ tag_classes = tag.get('class', [])
218
+ # Support multiple classes separated by space or comma
219
+ if isinstance(v, str):
220
+ v_classes = [cls.strip() for cls in re.split(r'[ ,]+', v) if cls.strip()]
221
+ if not all(cls in tag_classes for cls in v_classes):
222
+ return False
223
+ elif isinstance(v, list):
224
+ if not all(cls in tag_classes for cls in v):
225
+ return False
226
+ else:
227
+ return False
228
+ elif k == 'id':
229
+ if tag.get('id') != v:
230
+ return False
231
+ else:
232
+ # Regex or exact match for other attributes
233
+ tag_attr = tag.attrs.get(k)
234
+ if v is True:
235
+ if tag_attr is None:
236
+ return False
237
+ elif isinstance(v, re.Pattern):
238
+ if tag_attr is None or not v.search(str(tag_attr)):
239
+ return False
240
+ elif tag_attr != v:
241
+ return False
242
+
243
+ # Check text content
244
+ if text:
245
+ tag_text = tag.get_text(strip=True)
246
+ if isinstance(text, str) and text.lower() not in tag_text.lower():
247
+ return False
248
+ elif isinstance(text, re.Pattern) and not text.search(tag_text):
249
+ return False
250
+
251
+ return True
252
+
253
+ def _search(element):
254
+ if _match(element):
255
+ results.append(element)
256
+ if limit and len(results) == limit:
257
+ return
258
+
259
+ if recursive:
260
+ for child in element.contents:
261
+ if isinstance(child, Tag):
262
+ _search(child)
263
+
264
+ _search(self)
265
+ return results
266
+
267
+ def select(self, selector: str) -> List['Tag']:
268
+ """
269
+ Select elements using CSS selector.
270
+ Enhanced to support more complex selectors.
271
+
272
+ Args:
273
+ selector (str): CSS selector string
274
+
275
+ Returns:
276
+ List[Tag]: List of matching elements
277
+ """
278
+ # More advanced CSS selector parsing
279
+ # This is a simplified implementation and might need more robust parsing
280
+ parts = re.split(r'\s+', selector.strip())
281
+ results = []
282
+
283
+ def _match_selector(tag, selector_part):
284
+ # Support more complex selectors
285
+ if selector_part.startswith('.'):
286
+ # Class selector
287
+ return selector_part[1:] in tag.get('class', [])
288
+ elif selector_part.startswith('#'):
289
+ # ID selector
290
+ return tag.get('id') == selector_part[1:]
291
+ elif '[' in selector_part and ']' in selector_part:
292
+ # Attribute selector
293
+ attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
294
+ if attr_match:
295
+ tag_name, attr, value = attr_match.groups()
296
+ if tag_name and tag.name != tag_name:
297
+ return False
298
+ if value:
299
+ return tag.get(attr) == value.strip("'\"")
300
+ return attr in tag.attrs
301
+ else:
302
+ # Tag selector
303
+ return tag.name == selector_part
304
+
305
+ def _recursive_select(element, selector_parts):
306
+ if not selector_parts:
307
+ results.append(element)
308
+ return
309
+
310
+ current_selector = selector_parts[0]
311
+ remaining_selectors = selector_parts[1:]
312
+
313
+ if _match_selector(element, current_selector):
314
+ if not remaining_selectors:
315
+ results.append(element)
316
+ else:
317
+ for child in element.contents:
318
+ if isinstance(child, Tag):
319
+ _recursive_select(child, remaining_selectors)
320
+
321
+ for child in self.contents:
322
+ if isinstance(child, Tag):
323
+ _recursive_select(child, parts)
324
+
325
+ return results
326
+
327
+ def select_one(self, selector: str) -> Optional['Tag']:
328
+ """
329
+ Select the first element matching the CSS selector.
330
+
331
+ Args:
332
+ selector (str): CSS selector string
333
+
334
+ Returns:
335
+ Tag or None: First matching element
336
+ """
337
+ results = self.select(selector)
338
+ return results[0] if results else None
339
+
340
+ def get_text(self, separator=' ', strip=False, types=None) -> str:
341
+ """
342
+ Extract text from the tag and its descendants.
343
+ Enhanced to support more flexible text extraction.
344
+
345
+ Args:
346
+ separator (str, optional): Text separator
347
+ strip (bool, optional): Strip whitespace
348
+ types (list, optional): Types of content to extract
349
+
350
+ Returns:
351
+ str: Extracted text
352
+ """
353
+ texts = []
354
+ for content in self.contents:
355
+ # Support filtering by content type
356
+ if types is None or type(content) in types:
357
+ if isinstance(content, NavigableString):
358
+ texts.append(str(content))
359
+ elif isinstance(content, Tag):
360
+ texts.append(content.get_text(separator, strip))
361
+
362
+ text = separator.join(texts)
363
+ text = re.sub(r'\n\n+', '\n', text) # Replace multiple newlines with single newlines
364
+ return text.strip() if strip else text
365
+
366
+ def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
367
+ """
368
+ Find the first text matching a pattern.
369
+
370
+ Args:
371
+ pattern (str or re.Pattern): Pattern to match
372
+ **kwargs: Additional arguments for get_text()
373
+
374
+ Returns:
375
+ str or None: First matching text
376
+ """
377
+ text = self.get_text(**kwargs)
378
+
379
+ if isinstance(pattern, str):
380
+ return pattern if pattern in text else None
381
+ elif isinstance(pattern, re.Pattern):
382
+ match = pattern.search(text)
383
+ return match.group(0) if match else None
384
+
385
+ def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
386
+ """
387
+ Replace text matching a pattern.
388
+
389
+ Args:
390
+ old (str or re.Pattern): Pattern to replace
391
+ new (str): Replacement text
392
+ **kwargs: Additional arguments for get_text()
393
+
394
+ Returns:
395
+ str: Modified text
396
+ """
397
+ text = self.get_text(**kwargs)
398
+
399
+ if isinstance(old, str):
400
+ return text.replace(old, new)
401
+ elif isinstance(old, re.Pattern):
402
+ return old.sub(new, text)
403
+
404
+ def get(self, key: str, default: Any = None) -> Any:
405
+ """
406
+ Get an attribute value.
407
+
408
+ Args:
409
+ key (str): Attribute name
410
+ default (Any, optional): Default value if attribute not found
411
+
412
+ Returns:
413
+ Any: Attribute value or default
414
+ """
415
+ return self.attrs.get(key, default)
416
+
417
+ def decompose(self) -> None:
418
+ """Remove the tag and its contents from the document."""
419
+ if self.parent:
420
+ self.parent.contents.remove(self)
421
+
422
+ def extract(self) -> 'Tag':
423
+ """
424
+ Remove the tag from the document and return it.
425
+
426
+ Returns:
427
+ Tag: Extracted tag
428
+ """
429
+ self.decompose()
430
+ return self
431
+
432
+ def clear(self) -> None:
433
+ """Remove all contents of the tag."""
434
+ self.contents.clear()
435
+
436
+ @property
437
+ def string(self):
438
+ """
439
+ Get the string content of the tag.
440
+ Returns the combined text of the tag's contents.
441
+ """
442
+ return self.get_text()
443
+
444
+ @string.setter
445
+ def string(self, value):
446
+ """
447
+ Set the string content of the tag.
448
+ Clears existing contents and sets new string value.
449
+
450
+ Args:
451
+ value (str): New string content
452
+ """
453
+ self.clear()
454
+ if value is not None:
455
+ self.append(value)
456
+
457
+ def append(self, new_child: Union['Tag', NavigableString, str]) -> None:
458
+ """Append a new child to this tag with error handling."""
459
+ if isinstance(new_child, str):
460
+ new_child = NavigableString(new_child)
461
+ if hasattr(new_child, 'parent'):
462
+ new_child.parent = self
463
+ self.contents.append(new_child)
464
+
465
+ def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
466
+ """Insert a new child at the given index with error handling."""
467
+ if isinstance(new_child, str):
468
+ new_child = NavigableString(new_child)
469
+ if hasattr(new_child, 'parent'):
470
+ new_child.parent = self
471
+ self.contents.insert(index, new_child)
472
+
473
+ def replace_with(self, new_tag: 'Tag') -> None:
474
+ """Replace this tag with another tag with error handling."""
475
+ if self.parent:
476
+ try:
477
+ index = self.parent.contents.index(self)
478
+ self.parent.contents[index] = new_tag
479
+ new_tag.parent = self.parent
480
+ except ValueError:
481
+ pass
482
+
483
+ def wrap(self, wrapper_tag: 'Tag') -> 'Tag':
484
+ """Wrap this tag in another tag."""
485
+ if self.parent:
486
+ idx = self.parent.contents.index(self)
487
+ self.parent.contents[idx] = wrapper_tag
488
+ wrapper_tag.parent = self.parent
489
+ else:
490
+ wrapper_tag.parent = None
491
+ wrapper_tag.contents.append(self)
492
+ self.parent = wrapper_tag
493
+ return wrapper_tag
494
+
495
+ def unwrap(self) -> None:
496
+ """Remove this tag but keep its contents in the parent."""
497
+ if self.parent:
498
+ idx = self.parent.contents.index(self)
499
+ for child in reversed(self.contents):
500
+ child.parent = self.parent
501
+ self.parent.contents.insert(idx, child)
502
+ self.parent.contents.remove(self)
503
+ self.parent = None
504
+ self.contents = []
505
+
506
+ def insert_before(self, new_element: 'Tag') -> None:
507
+ """Insert a tag or string immediately before this tag."""
508
+ if self.parent:
509
+ idx = self.parent.contents.index(self)
510
+ new_element.parent = self.parent
511
+ self.parent.contents.insert(idx, new_element)
512
+
513
+ def insert_after(self, new_element: 'Tag') -> None:
514
+ """Insert a tag or string immediately after this tag."""
515
+ if self.parent:
516
+ idx = self.parent.contents.index(self)
517
+ new_element.parent = self.parent
518
+ self.parent.contents.insert(idx + 1, new_element)
519
+
520
+ @property
521
+ def descendants(self):
522
+ """Yield all descendants in document order."""
523
+ for child in self.contents:
524
+ yield child
525
+ if isinstance(child, Tag):
526
+ yield from child.descendants
527
+
528
+ @property
529
+ def parents(self):
530
+ """Yield all parents up the tree."""
531
+ current = self.parent
532
+ while current:
533
+ yield current
534
+ current = current.parent
535
+
536
+ @property
537
+ def next_element(self):
538
+ """Return the next element in document order."""
539
+ if self.contents:
540
+ return self.contents[0]
541
+ current = self
542
+ while current.parent:
543
+ idx = current.parent.contents.index(current)
544
+ if idx + 1 < len(current.parent.contents):
545
+ return current.parent.contents[idx + 1]
546
+ current = current.parent
547
+ return None
548
+
549
+ @property
550
+ def previous_element(self):
551
+ """Return the previous element in document order."""
552
+ if not self.parent:
553
+ return None
554
+ idx = self.parent.contents.index(self)
555
+ if idx > 0:
556
+ prev = self.parent.contents[idx - 1]
557
+ while isinstance(prev, Tag) and prev.contents:
558
+ prev = prev.contents[-1]
559
+ return prev
560
+ return self.parent
561
+
562
+ def decode_contents(self, eventual_encoding='utf-8') -> str:
563
+ """
564
+ Decode the contents of the tag to a string.
565
+
566
+ Args:
567
+ eventual_encoding (str, optional): Encoding to use
568
+
569
+ Returns:
570
+ str: Decoded contents
571
+ """
572
+ return ''.join(str(content) for content in self.contents)
573
+
574
+ def prettify(self, formatter='minimal') -> str:
575
+ """
576
+ Return a nicely formatted representation of the tag.
577
+
578
+ Args:
579
+ formatter (str, optional): Formatting style
580
+
581
+ Returns:
582
+ str: Prettified tag representation
583
+ """
584
+ def _prettify(tag, indent=0):
585
+ result = ' ' * indent + f'<{tag.name}'
586
+ for k, v in tag.attrs.items():
587
+ result += f' {k}="{v}"'
588
+ result += '>\n'
589
+
590
+ for content in tag.contents:
591
+ if isinstance(content, Tag):
592
+ result += _prettify(content, indent + 2)
593
+ else:
594
+ result += ' ' * (indent + 2) + str(content) + '\n'
595
+
596
+ result += ' ' * indent + f'</{tag.name}>\n'
597
+ return result
598
+
599
+ return _prettify(self)