webscout 8.3.7__py3-none-any.whl → 2025.10.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (273) hide show
  1. webscout/AIauto.py +250 -250
  2. webscout/AIbase.py +379 -379
  3. webscout/AIutel.py +60 -60
  4. webscout/Bard.py +1012 -1012
  5. webscout/Bing_search.py +417 -417
  6. webscout/DWEBS.py +529 -529
  7. webscout/Extra/Act.md +309 -309
  8. webscout/Extra/GitToolkit/__init__.py +10 -10
  9. webscout/Extra/GitToolkit/gitapi/README.md +110 -110
  10. webscout/Extra/GitToolkit/gitapi/__init__.py +11 -11
  11. webscout/Extra/GitToolkit/gitapi/repository.py +195 -195
  12. webscout/Extra/GitToolkit/gitapi/user.py +96 -96
  13. webscout/Extra/GitToolkit/gitapi/utils.py +61 -61
  14. webscout/Extra/YTToolkit/README.md +375 -375
  15. webscout/Extra/YTToolkit/YTdownloader.py +956 -956
  16. webscout/Extra/YTToolkit/__init__.py +2 -2
  17. webscout/Extra/YTToolkit/transcriber.py +475 -475
  18. webscout/Extra/YTToolkit/ytapi/README.md +44 -44
  19. webscout/Extra/YTToolkit/ytapi/__init__.py +6 -6
  20. webscout/Extra/YTToolkit/ytapi/channel.py +307 -307
  21. webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
  22. webscout/Extra/YTToolkit/ytapi/extras.py +118 -118
  23. webscout/Extra/YTToolkit/ytapi/https.py +88 -88
  24. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
  25. webscout/Extra/YTToolkit/ytapi/playlist.py +58 -58
  26. webscout/Extra/YTToolkit/ytapi/pool.py +7 -7
  27. webscout/Extra/YTToolkit/ytapi/query.py +39 -39
  28. webscout/Extra/YTToolkit/ytapi/stream.py +62 -62
  29. webscout/Extra/YTToolkit/ytapi/utils.py +62 -62
  30. webscout/Extra/YTToolkit/ytapi/video.py +232 -232
  31. webscout/Extra/autocoder/__init__.py +9 -9
  32. webscout/Extra/autocoder/autocoder.py +1105 -1105
  33. webscout/Extra/autocoder/autocoder_utiles.py +332 -332
  34. webscout/Extra/gguf.md +429 -429
  35. webscout/Extra/gguf.py +1213 -1213
  36. webscout/Extra/tempmail/README.md +487 -487
  37. webscout/Extra/tempmail/__init__.py +27 -27
  38. webscout/Extra/tempmail/async_utils.py +140 -140
  39. webscout/Extra/tempmail/base.py +160 -160
  40. webscout/Extra/tempmail/cli.py +186 -186
  41. webscout/Extra/tempmail/emailnator.py +84 -84
  42. webscout/Extra/tempmail/mail_tm.py +360 -360
  43. webscout/Extra/tempmail/temp_mail_io.py +291 -291
  44. webscout/Extra/weather.md +281 -281
  45. webscout/Extra/weather.py +193 -193
  46. webscout/Litlogger/README.md +10 -10
  47. webscout/Litlogger/__init__.py +15 -15
  48. webscout/Litlogger/formats.py +13 -13
  49. webscout/Litlogger/handlers.py +121 -121
  50. webscout/Litlogger/levels.py +13 -13
  51. webscout/Litlogger/logger.py +134 -134
  52. webscout/Provider/AISEARCH/Perplexity.py +332 -332
  53. webscout/Provider/AISEARCH/README.md +279 -279
  54. webscout/Provider/AISEARCH/__init__.py +16 -1
  55. webscout/Provider/AISEARCH/felo_search.py +206 -206
  56. webscout/Provider/AISEARCH/genspark_search.py +323 -323
  57. webscout/Provider/AISEARCH/hika_search.py +185 -185
  58. webscout/Provider/AISEARCH/iask_search.py +410 -410
  59. webscout/Provider/AISEARCH/monica_search.py +219 -219
  60. webscout/Provider/AISEARCH/scira_search.py +316 -316
  61. webscout/Provider/AISEARCH/stellar_search.py +177 -177
  62. webscout/Provider/AISEARCH/webpilotai_search.py +255 -255
  63. webscout/Provider/Aitopia.py +314 -314
  64. webscout/Provider/Apriel.py +306 -0
  65. webscout/Provider/ChatGPTClone.py +236 -236
  66. webscout/Provider/ChatSandbox.py +343 -343
  67. webscout/Provider/Cloudflare.py +324 -324
  68. webscout/Provider/Cohere.py +208 -208
  69. webscout/Provider/Deepinfra.py +370 -366
  70. webscout/Provider/ExaAI.py +260 -260
  71. webscout/Provider/ExaChat.py +308 -308
  72. webscout/Provider/Flowith.py +221 -221
  73. webscout/Provider/GMI.py +293 -0
  74. webscout/Provider/Gemini.py +164 -164
  75. webscout/Provider/GeminiProxy.py +167 -167
  76. webscout/Provider/GithubChat.py +371 -372
  77. webscout/Provider/Groq.py +800 -800
  78. webscout/Provider/HeckAI.py +383 -383
  79. webscout/Provider/Jadve.py +282 -282
  80. webscout/Provider/K2Think.py +307 -307
  81. webscout/Provider/Koboldai.py +205 -205
  82. webscout/Provider/LambdaChat.py +423 -423
  83. webscout/Provider/Nemotron.py +244 -244
  84. webscout/Provider/Netwrck.py +248 -248
  85. webscout/Provider/OLLAMA.py +395 -395
  86. webscout/Provider/OPENAI/Cloudflare.py +393 -393
  87. webscout/Provider/OPENAI/FalconH1.py +451 -451
  88. webscout/Provider/OPENAI/FreeGemini.py +296 -296
  89. webscout/Provider/OPENAI/K2Think.py +431 -431
  90. webscout/Provider/OPENAI/NEMOTRON.py +240 -240
  91. webscout/Provider/OPENAI/PI.py +427 -427
  92. webscout/Provider/OPENAI/README.md +959 -959
  93. webscout/Provider/OPENAI/TogetherAI.py +345 -345
  94. webscout/Provider/OPENAI/TwoAI.py +465 -465
  95. webscout/Provider/OPENAI/__init__.py +33 -18
  96. webscout/Provider/OPENAI/base.py +248 -248
  97. webscout/Provider/OPENAI/chatglm.py +528 -0
  98. webscout/Provider/OPENAI/chatgpt.py +592 -592
  99. webscout/Provider/OPENAI/chatgptclone.py +521 -521
  100. webscout/Provider/OPENAI/chatsandbox.py +202 -202
  101. webscout/Provider/OPENAI/deepinfra.py +318 -314
  102. webscout/Provider/OPENAI/e2b.py +1665 -1665
  103. webscout/Provider/OPENAI/exaai.py +420 -420
  104. webscout/Provider/OPENAI/exachat.py +452 -452
  105. webscout/Provider/OPENAI/friendli.py +232 -232
  106. webscout/Provider/OPENAI/{refact.py → gmi.py} +324 -274
  107. webscout/Provider/OPENAI/groq.py +364 -364
  108. webscout/Provider/OPENAI/heckai.py +314 -314
  109. webscout/Provider/OPENAI/llmchatco.py +337 -337
  110. webscout/Provider/OPENAI/netwrck.py +355 -355
  111. webscout/Provider/OPENAI/oivscode.py +290 -290
  112. webscout/Provider/OPENAI/opkfc.py +518 -518
  113. webscout/Provider/OPENAI/pydantic_imports.py +1 -1
  114. webscout/Provider/OPENAI/scirachat.py +535 -535
  115. webscout/Provider/OPENAI/sonus.py +308 -308
  116. webscout/Provider/OPENAI/standardinput.py +442 -442
  117. webscout/Provider/OPENAI/textpollinations.py +340 -340
  118. webscout/Provider/OPENAI/toolbaz.py +419 -416
  119. webscout/Provider/OPENAI/typefully.py +362 -362
  120. webscout/Provider/OPENAI/utils.py +295 -295
  121. webscout/Provider/OPENAI/venice.py +436 -436
  122. webscout/Provider/OPENAI/wisecat.py +387 -387
  123. webscout/Provider/OPENAI/writecream.py +166 -166
  124. webscout/Provider/OPENAI/x0gpt.py +378 -378
  125. webscout/Provider/OPENAI/yep.py +389 -389
  126. webscout/Provider/OpenGPT.py +230 -230
  127. webscout/Provider/Openai.py +243 -243
  128. webscout/Provider/PI.py +405 -405
  129. webscout/Provider/Perplexitylabs.py +430 -430
  130. webscout/Provider/QwenLM.py +272 -272
  131. webscout/Provider/STT/__init__.py +16 -1
  132. webscout/Provider/Sambanova.py +257 -257
  133. webscout/Provider/StandardInput.py +309 -309
  134. webscout/Provider/TTI/README.md +82 -82
  135. webscout/Provider/TTI/__init__.py +33 -18
  136. webscout/Provider/TTI/aiarta.py +413 -413
  137. webscout/Provider/TTI/base.py +136 -136
  138. webscout/Provider/TTI/bing.py +243 -243
  139. webscout/Provider/TTI/gpt1image.py +149 -149
  140. webscout/Provider/TTI/imagen.py +196 -196
  141. webscout/Provider/TTI/infip.py +211 -211
  142. webscout/Provider/TTI/magicstudio.py +232 -232
  143. webscout/Provider/TTI/monochat.py +219 -219
  144. webscout/Provider/TTI/piclumen.py +214 -214
  145. webscout/Provider/TTI/pixelmuse.py +232 -232
  146. webscout/Provider/TTI/pollinations.py +232 -232
  147. webscout/Provider/TTI/together.py +288 -288
  148. webscout/Provider/TTI/utils.py +12 -12
  149. webscout/Provider/TTI/venice.py +367 -367
  150. webscout/Provider/TTS/README.md +192 -192
  151. webscout/Provider/TTS/__init__.py +33 -18
  152. webscout/Provider/TTS/parler.py +110 -110
  153. webscout/Provider/TTS/streamElements.py +333 -333
  154. webscout/Provider/TTS/utils.py +280 -280
  155. webscout/Provider/TeachAnything.py +237 -237
  156. webscout/Provider/TextPollinationsAI.py +310 -310
  157. webscout/Provider/TogetherAI.py +356 -356
  158. webscout/Provider/TwoAI.py +312 -312
  159. webscout/Provider/TypliAI.py +311 -311
  160. webscout/Provider/UNFINISHED/ChatHub.py +208 -208
  161. webscout/Provider/UNFINISHED/ChutesAI.py +313 -313
  162. webscout/Provider/UNFINISHED/GizAI.py +294 -294
  163. webscout/Provider/UNFINISHED/Marcus.py +198 -198
  164. webscout/Provider/UNFINISHED/Qodo.py +477 -477
  165. webscout/Provider/UNFINISHED/VercelAIGateway.py +338 -338
  166. webscout/Provider/UNFINISHED/XenAI.py +324 -324
  167. webscout/Provider/UNFINISHED/Youchat.py +330 -330
  168. webscout/Provider/UNFINISHED/liner.py +334 -0
  169. webscout/Provider/UNFINISHED/liner_api_request.py +262 -262
  170. webscout/Provider/UNFINISHED/puterjs.py +634 -634
  171. webscout/Provider/UNFINISHED/samurai.py +223 -223
  172. webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
  173. webscout/Provider/Venice.py +250 -250
  174. webscout/Provider/VercelAI.py +256 -256
  175. webscout/Provider/WiseCat.py +231 -231
  176. webscout/Provider/WrDoChat.py +366 -366
  177. webscout/Provider/__init__.py +33 -18
  178. webscout/Provider/ai4chat.py +174 -174
  179. webscout/Provider/akashgpt.py +331 -331
  180. webscout/Provider/cerebras.py +446 -446
  181. webscout/Provider/chatglm.py +394 -301
  182. webscout/Provider/cleeai.py +211 -211
  183. webscout/Provider/elmo.py +282 -282
  184. webscout/Provider/geminiapi.py +208 -208
  185. webscout/Provider/granite.py +261 -261
  186. webscout/Provider/hermes.py +263 -263
  187. webscout/Provider/julius.py +223 -223
  188. webscout/Provider/learnfastai.py +309 -309
  189. webscout/Provider/llama3mitril.py +214 -214
  190. webscout/Provider/llmchat.py +243 -243
  191. webscout/Provider/llmchatco.py +290 -290
  192. webscout/Provider/meta.py +801 -801
  193. webscout/Provider/oivscode.py +309 -309
  194. webscout/Provider/scira_chat.py +383 -383
  195. webscout/Provider/searchchat.py +292 -292
  196. webscout/Provider/sonus.py +258 -258
  197. webscout/Provider/toolbaz.py +370 -367
  198. webscout/Provider/turboseek.py +273 -273
  199. webscout/Provider/typefully.py +207 -207
  200. webscout/Provider/yep.py +372 -372
  201. webscout/__init__.py +30 -31
  202. webscout/__main__.py +5 -5
  203. webscout/auth/api_key_manager.py +189 -189
  204. webscout/auth/config.py +175 -175
  205. webscout/auth/models.py +185 -185
  206. webscout/auth/routes.py +664 -664
  207. webscout/auth/simple_logger.py +236 -236
  208. webscout/cli.py +523 -523
  209. webscout/conversation.py +438 -438
  210. webscout/exceptions.py +361 -361
  211. webscout/litagent/Readme.md +298 -298
  212. webscout/litagent/__init__.py +28 -28
  213. webscout/litagent/agent.py +581 -581
  214. webscout/litagent/constants.py +59 -59
  215. webscout/litprinter/__init__.py +58 -58
  216. webscout/models.py +181 -181
  217. webscout/optimizers.py +419 -419
  218. webscout/prompt_manager.py +288 -288
  219. webscout/sanitize.py +1078 -1078
  220. webscout/scout/README.md +401 -401
  221. webscout/scout/__init__.py +8 -8
  222. webscout/scout/core/__init__.py +6 -6
  223. webscout/scout/core/crawler.py +297 -297
  224. webscout/scout/core/scout.py +706 -706
  225. webscout/scout/core/search_result.py +95 -95
  226. webscout/scout/core/text_analyzer.py +62 -62
  227. webscout/scout/core/text_utils.py +277 -277
  228. webscout/scout/core/web_analyzer.py +51 -51
  229. webscout/scout/element.py +599 -599
  230. webscout/scout/parsers/__init__.py +69 -69
  231. webscout/scout/parsers/html5lib_parser.py +172 -172
  232. webscout/scout/parsers/html_parser.py +236 -236
  233. webscout/scout/parsers/lxml_parser.py +178 -178
  234. webscout/scout/utils.py +37 -37
  235. webscout/swiftcli/Readme.md +323 -323
  236. webscout/swiftcli/__init__.py +95 -95
  237. webscout/swiftcli/core/__init__.py +7 -7
  238. webscout/swiftcli/core/cli.py +308 -308
  239. webscout/swiftcli/core/context.py +104 -104
  240. webscout/swiftcli/core/group.py +241 -241
  241. webscout/swiftcli/decorators/__init__.py +28 -28
  242. webscout/swiftcli/decorators/command.py +221 -221
  243. webscout/swiftcli/decorators/options.py +220 -220
  244. webscout/swiftcli/decorators/output.py +302 -302
  245. webscout/swiftcli/exceptions.py +21 -21
  246. webscout/swiftcli/plugins/__init__.py +9 -9
  247. webscout/swiftcli/plugins/base.py +135 -135
  248. webscout/swiftcli/plugins/manager.py +269 -269
  249. webscout/swiftcli/utils/__init__.py +59 -59
  250. webscout/swiftcli/utils/formatting.py +252 -252
  251. webscout/swiftcli/utils/parsing.py +267 -267
  252. webscout/update_checker.py +117 -117
  253. webscout/version.py +1 -1
  254. webscout/webscout_search.py +1183 -1183
  255. webscout/webscout_search_async.py +649 -649
  256. webscout/yep_search.py +346 -346
  257. webscout/zeroart/README.md +89 -89
  258. webscout/zeroart/__init__.py +134 -134
  259. webscout/zeroart/base.py +66 -66
  260. webscout/zeroart/effects.py +100 -100
  261. webscout/zeroart/fonts.py +1238 -1238
  262. {webscout-8.3.7.dist-info → webscout-2025.10.11.dist-info}/METADATA +937 -937
  263. webscout-2025.10.11.dist-info/RECORD +300 -0
  264. webscout/Provider/AISEARCH/DeepFind.py +0 -254
  265. webscout/Provider/OPENAI/Qwen3.py +0 -303
  266. webscout/Provider/OPENAI/qodo.py +0 -630
  267. webscout/Provider/OPENAI/xenai.py +0 -514
  268. webscout/tempid.py +0 -134
  269. webscout-8.3.7.dist-info/RECORD +0 -301
  270. {webscout-8.3.7.dist-info → webscout-2025.10.11.dist-info}/WHEEL +0 -0
  271. {webscout-8.3.7.dist-info → webscout-2025.10.11.dist-info}/entry_points.txt +0 -0
  272. {webscout-8.3.7.dist-info → webscout-2025.10.11.dist-info}/licenses/LICENSE.md +0 -0
  273. {webscout-8.3.7.dist-info → webscout-2025.10.11.dist-info}/top_level.txt +0 -0
webscout/scout/element.py CHANGED
@@ -1,599 +1,599 @@
1
- """
2
- Scout Element Module - Advanced HTML Element Representation
3
- """
4
-
5
- import re
6
- from typing import Any, Dict, List, Optional, Union
7
-
8
-
9
- class NavigableString(str):
10
- """
11
- A string that knows its place in the document tree.
12
- Mimics BeautifulSoup's NavigableString for better compatibility.
13
- """
14
- def __new__(cls, text: str):
15
- """
16
- Create a new NavigableString instance.
17
-
18
- Args:
19
- text (str): String content
20
- """
21
- return str.__new__(cls, text)
22
-
23
- def __init__(self, text: str):
24
- """
25
- Initialize a navigable string.
26
-
27
- Args:
28
- text (str): String content
29
- """
30
- self.parent = None
31
-
32
- def __repr__(self):
33
- """String representation."""
34
- return f"NavigableString({super().__repr__()})"
35
-
36
- def __add__(self, other):
37
- """
38
- Allow concatenation of NavigableString with other strings.
39
-
40
- Args:
41
- other (str): String to concatenate
42
-
43
- Returns:
44
- str: Concatenated string
45
- """
46
- return str(self) + str(other)
47
-
48
- def strip(self, chars=None):
49
- """
50
- Strip whitespace or specified characters.
51
-
52
- Args:
53
- chars (str, optional): Characters to strip
54
-
55
- Returns:
56
- str: Stripped string
57
- """
58
- return NavigableString(super().strip(chars))
59
-
60
- class Tag:
61
- """
62
- Represents an HTML tag with advanced traversal and manipulation capabilities.
63
- Enhanced to closely mimic BeautifulSoup's Tag class.
64
- """
65
- def __init__(self, name: str, attrs: Dict[str, str] = None):
66
- """
67
- Initialize a Tag with name and attributes.
68
-
69
- Args:
70
- name (str): Tag name
71
- attrs (dict, optional): Tag attributes
72
- """
73
- self.name = name
74
- self.attrs = attrs or {}
75
- self.contents = []
76
- self.parent = None
77
- self.string = None # For single string content
78
-
79
- def __str__(self):
80
- """String representation of the tag."""
81
- return self.decode_contents()
82
-
83
- def __repr__(self):
84
- """Detailed representation of the tag."""
85
- return f"<{self.name} {self.attrs}>"
86
-
87
- def __call__(self, *args, **kwargs):
88
- """
89
- Allows calling find_all directly on the tag.
90
- Mimics BeautifulSoup's behavior.
91
- """
92
- return self.find_all(*args, **kwargs)
93
-
94
- def __contains__(self, item):
95
- """
96
- Check if an item is in the tag's contents.
97
-
98
- Args:
99
- item: Item to search for
100
-
101
- Returns:
102
- bool: True if item is in contents, False otherwise
103
- """
104
- return item in self.contents
105
-
106
- def __getitem__(self, key):
107
- """
108
- Get an attribute value using dictionary-like access.
109
-
110
- Args:
111
- key (str): Attribute name
112
-
113
- Returns:
114
- Any: Attribute value
115
- """
116
- return self.attrs[key]
117
-
118
- def __iter__(self):
119
- """
120
- Iterate through tag's contents.
121
-
122
- Returns:
123
- Iterator: Contents of the tag
124
- """
125
- return iter(self.contents)
126
-
127
- def __eq__(self, other):
128
- """
129
- Compare tags based on name and attributes.
130
-
131
- Args:
132
- other (Tag): Tag to compare
133
-
134
- Returns:
135
- bool: True if tags are equivalent
136
- """
137
- if not isinstance(other, Tag):
138
- return False
139
- return (
140
- self.name == other.name and
141
- self.attrs == other.attrs and
142
- str(self) == str(other)
143
- )
144
-
145
- def __hash__(self):
146
- """
147
- Generate a hash for the tag.
148
-
149
- Returns:
150
- int: Hash value
151
- """
152
- return hash((self.name, frozenset(self.attrs.items()), str(self)))
153
-
154
- def find(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> Optional['Tag']:
155
- """
156
- Find the first matching child element.
157
- Enhanced with more flexible matching.
158
-
159
- Args:
160
- name (str, optional): Tag name to search for
161
- attrs (dict, optional): Attributes to match
162
- recursive (bool, optional): Search recursively
163
- text (str, optional): Text content to match
164
-
165
- Returns:
166
- Tag or None: First matching element
167
- """
168
- # Merge class_ with attrs['class'] if both are present
169
- attrs = dict(attrs) if attrs else {}
170
- if class_ is not None:
171
- if 'class' in attrs:
172
- # Merge both
173
- if isinstance(attrs['class'], list):
174
- class_list = attrs['class']
175
- else:
176
- class_list = [cls.strip() for cls in re.split(r'[ ,]+', str(attrs['class'])) if cls.strip()]
177
- if isinstance(class_, list):
178
- class_list += class_
179
- else:
180
- class_list += [cls.strip() for cls in re.split(r'[ ,]+', str(class_)) if cls.strip()]
181
- attrs['class'] = class_list
182
- else:
183
- attrs['class'] = class_
184
- results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
185
- return results[0] if results else None
186
-
187
- def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> List['Tag']:
188
- """
189
- Find all matching child elements.
190
- Enhanced with more flexible matching and BeautifulSoup-like features.
191
-
192
- Args:
193
- name (str, optional): Tag name to search for
194
- attrs (dict, optional): Attributes to match
195
- recursive (bool, optional): Search recursively
196
- text (str, optional): Text content to match
197
- limit (int, optional): Maximum number of results
198
-
199
- Returns:
200
- List[Tag]: List of matching elements
201
- """
202
- results = []
203
-
204
- def _match(tag):
205
- # Check tag name with case-insensitive and regex support
206
- if name:
207
- if isinstance(name, str):
208
- if tag.name.lower() != name.lower():
209
- return False
210
- elif isinstance(name, re.Pattern):
211
- if not name.search(tag.name):
212
- return False
213
-
214
- # Check attributes with more flexible matching
215
- for k, v in attrs.items():
216
- if k == 'class':
217
- tag_classes = tag.get('class', [])
218
- # Support multiple classes separated by space or comma
219
- if isinstance(v, str):
220
- v_classes = [cls.strip() for cls in re.split(r'[ ,]+', v) if cls.strip()]
221
- if not all(cls in tag_classes for cls in v_classes):
222
- return False
223
- elif isinstance(v, list):
224
- if not all(cls in tag_classes for cls in v):
225
- return False
226
- else:
227
- return False
228
- elif k == 'id':
229
- if tag.get('id') != v:
230
- return False
231
- else:
232
- # Regex or exact match for other attributes
233
- tag_attr = tag.attrs.get(k)
234
- if v is True:
235
- if tag_attr is None:
236
- return False
237
- elif isinstance(v, re.Pattern):
238
- if tag_attr is None or not v.search(str(tag_attr)):
239
- return False
240
- elif tag_attr != v:
241
- return False
242
-
243
- # Check text content
244
- if text:
245
- tag_text = tag.get_text(strip=True)
246
- if isinstance(text, str) and text.lower() not in tag_text.lower():
247
- return False
248
- elif isinstance(text, re.Pattern) and not text.search(tag_text):
249
- return False
250
-
251
- return True
252
-
253
- def _search(element):
254
- if _match(element):
255
- results.append(element)
256
- if limit and len(results) == limit:
257
- return
258
-
259
- if recursive:
260
- for child in element.contents:
261
- if isinstance(child, Tag):
262
- _search(child)
263
-
264
- _search(self)
265
- return results
266
-
267
- def select(self, selector: str) -> List['Tag']:
268
- """
269
- Select elements using CSS selector.
270
- Enhanced to support more complex selectors.
271
-
272
- Args:
273
- selector (str): CSS selector string
274
-
275
- Returns:
276
- List[Tag]: List of matching elements
277
- """
278
- # More advanced CSS selector parsing
279
- # This is a simplified implementation and might need more robust parsing
280
- parts = re.split(r'\s+', selector.strip())
281
- results = []
282
-
283
- def _match_selector(tag, selector_part):
284
- # Support more complex selectors
285
- if selector_part.startswith('.'):
286
- # Class selector
287
- return selector_part[1:] in tag.get('class', [])
288
- elif selector_part.startswith('#'):
289
- # ID selector
290
- return tag.get('id') == selector_part[1:]
291
- elif '[' in selector_part and ']' in selector_part:
292
- # Attribute selector
293
- attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
294
- if attr_match:
295
- tag_name, attr, value = attr_match.groups()
296
- if tag_name and tag.name != tag_name:
297
- return False
298
- if value:
299
- return tag.get(attr) == value.strip("'\"")
300
- return attr in tag.attrs
301
- else:
302
- # Tag selector
303
- return tag.name == selector_part
304
-
305
- def _recursive_select(element, selector_parts):
306
- if not selector_parts:
307
- results.append(element)
308
- return
309
-
310
- current_selector = selector_parts[0]
311
- remaining_selectors = selector_parts[1:]
312
-
313
- if _match_selector(element, current_selector):
314
- if not remaining_selectors:
315
- results.append(element)
316
- else:
317
- for child in element.contents:
318
- if isinstance(child, Tag):
319
- _recursive_select(child, remaining_selectors)
320
-
321
- for child in self.contents:
322
- if isinstance(child, Tag):
323
- _recursive_select(child, parts)
324
-
325
- return results
326
-
327
- def select_one(self, selector: str) -> Optional['Tag']:
328
- """
329
- Select the first element matching the CSS selector.
330
-
331
- Args:
332
- selector (str): CSS selector string
333
-
334
- Returns:
335
- Tag or None: First matching element
336
- """
337
- results = self.select(selector)
338
- return results[0] if results else None
339
-
340
- def get_text(self, separator=' ', strip=False, types=None) -> str:
341
- """
342
- Extract text from the tag and its descendants.
343
- Enhanced to support more flexible text extraction.
344
-
345
- Args:
346
- separator (str, optional): Text separator
347
- strip (bool, optional): Strip whitespace
348
- types (list, optional): Types of content to extract
349
-
350
- Returns:
351
- str: Extracted text
352
- """
353
- texts = []
354
- for content in self.contents:
355
- # Support filtering by content type
356
- if types is None or type(content) in types:
357
- if isinstance(content, NavigableString):
358
- texts.append(str(content))
359
- elif isinstance(content, Tag):
360
- texts.append(content.get_text(separator, strip))
361
-
362
- text = separator.join(texts)
363
- text = re.sub(r'\n\n+', '\n', text) # Replace multiple newlines with single newlines
364
- return text.strip() if strip else text
365
-
366
- def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
367
- """
368
- Find the first text matching a pattern.
369
-
370
- Args:
371
- pattern (str or re.Pattern): Pattern to match
372
- **kwargs: Additional arguments for get_text()
373
-
374
- Returns:
375
- str or None: First matching text
376
- """
377
- text = self.get_text(**kwargs)
378
-
379
- if isinstance(pattern, str):
380
- return pattern if pattern in text else None
381
- elif isinstance(pattern, re.Pattern):
382
- match = pattern.search(text)
383
- return match.group(0) if match else None
384
-
385
- def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
386
- """
387
- Replace text matching a pattern.
388
-
389
- Args:
390
- old (str or re.Pattern): Pattern to replace
391
- new (str): Replacement text
392
- **kwargs: Additional arguments for get_text()
393
-
394
- Returns:
395
- str: Modified text
396
- """
397
- text = self.get_text(**kwargs)
398
-
399
- if isinstance(old, str):
400
- return text.replace(old, new)
401
- elif isinstance(old, re.Pattern):
402
- return old.sub(new, text)
403
-
404
- def get(self, key: str, default: Any = None) -> Any:
405
- """
406
- Get an attribute value.
407
-
408
- Args:
409
- key (str): Attribute name
410
- default (Any, optional): Default value if attribute not found
411
-
412
- Returns:
413
- Any: Attribute value or default
414
- """
415
- return self.attrs.get(key, default)
416
-
417
- def decompose(self) -> None:
418
- """Remove the tag and its contents from the document."""
419
- if self.parent:
420
- self.parent.contents.remove(self)
421
-
422
- def extract(self) -> 'Tag':
423
- """
424
- Remove the tag from the document and return it.
425
-
426
- Returns:
427
- Tag: Extracted tag
428
- """
429
- self.decompose()
430
- return self
431
-
432
- def clear(self) -> None:
433
- """Remove all contents of the tag."""
434
- self.contents.clear()
435
-
436
- @property
437
- def string(self):
438
- """
439
- Get the string content of the tag.
440
- Returns the combined text of the tag's contents.
441
- """
442
- return self.get_text()
443
-
444
- @string.setter
445
- def string(self, value):
446
- """
447
- Set the string content of the tag.
448
- Clears existing contents and sets new string value.
449
-
450
- Args:
451
- value (str): New string content
452
- """
453
- self.clear()
454
- if value is not None:
455
- self.append(value)
456
-
457
- def append(self, new_child: Union['Tag', NavigableString, str]) -> None:
458
- """Append a new child to this tag with error handling."""
459
- if isinstance(new_child, str):
460
- new_child = NavigableString(new_child)
461
- if hasattr(new_child, 'parent'):
462
- new_child.parent = self
463
- self.contents.append(new_child)
464
-
465
- def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
466
- """Insert a new child at the given index with error handling."""
467
- if isinstance(new_child, str):
468
- new_child = NavigableString(new_child)
469
- if hasattr(new_child, 'parent'):
470
- new_child.parent = self
471
- self.contents.insert(index, new_child)
472
-
473
- def replace_with(self, new_tag: 'Tag') -> None:
474
- """Replace this tag with another tag with error handling."""
475
- if self.parent:
476
- try:
477
- index = self.parent.contents.index(self)
478
- self.parent.contents[index] = new_tag
479
- new_tag.parent = self.parent
480
- except ValueError:
481
- pass
482
-
483
- def wrap(self, wrapper_tag: 'Tag') -> 'Tag':
484
- """Wrap this tag in another tag."""
485
- if self.parent:
486
- idx = self.parent.contents.index(self)
487
- self.parent.contents[idx] = wrapper_tag
488
- wrapper_tag.parent = self.parent
489
- else:
490
- wrapper_tag.parent = None
491
- wrapper_tag.contents.append(self)
492
- self.parent = wrapper_tag
493
- return wrapper_tag
494
-
495
- def unwrap(self) -> None:
496
- """Remove this tag but keep its contents in the parent."""
497
- if self.parent:
498
- idx = self.parent.contents.index(self)
499
- for child in reversed(self.contents):
500
- child.parent = self.parent
501
- self.parent.contents.insert(idx, child)
502
- self.parent.contents.remove(self)
503
- self.parent = None
504
- self.contents = []
505
-
506
- def insert_before(self, new_element: 'Tag') -> None:
507
- """Insert a tag or string immediately before this tag."""
508
- if self.parent:
509
- idx = self.parent.contents.index(self)
510
- new_element.parent = self.parent
511
- self.parent.contents.insert(idx, new_element)
512
-
513
- def insert_after(self, new_element: 'Tag') -> None:
514
- """Insert a tag or string immediately after this tag."""
515
- if self.parent:
516
- idx = self.parent.contents.index(self)
517
- new_element.parent = self.parent
518
- self.parent.contents.insert(idx + 1, new_element)
519
-
520
- @property
521
- def descendants(self):
522
- """Yield all descendants in document order."""
523
- for child in self.contents:
524
- yield child
525
- if isinstance(child, Tag):
526
- yield from child.descendants
527
-
528
- @property
529
- def parents(self):
530
- """Yield all parents up the tree."""
531
- current = self.parent
532
- while current:
533
- yield current
534
- current = current.parent
535
-
536
- @property
537
- def next_element(self):
538
- """Return the next element in document order."""
539
- if self.contents:
540
- return self.contents[0]
541
- current = self
542
- while current.parent:
543
- idx = current.parent.contents.index(current)
544
- if idx + 1 < len(current.parent.contents):
545
- return current.parent.contents[idx + 1]
546
- current = current.parent
547
- return None
548
-
549
- @property
550
- def previous_element(self):
551
- """Return the previous element in document order."""
552
- if not self.parent:
553
- return None
554
- idx = self.parent.contents.index(self)
555
- if idx > 0:
556
- prev = self.parent.contents[idx - 1]
557
- while isinstance(prev, Tag) and prev.contents:
558
- prev = prev.contents[-1]
559
- return prev
560
- return self.parent
561
-
562
- def decode_contents(self, eventual_encoding='utf-8') -> str:
563
- """
564
- Decode the contents of the tag to a string.
565
-
566
- Args:
567
- eventual_encoding (str, optional): Encoding to use
568
-
569
- Returns:
570
- str: Decoded contents
571
- """
572
- return ''.join(str(content) for content in self.contents)
573
-
574
- def prettify(self, formatter='minimal') -> str:
575
- """
576
- Return a nicely formatted representation of the tag.
577
-
578
- Args:
579
- formatter (str, optional): Formatting style
580
-
581
- Returns:
582
- str: Prettified tag representation
583
- """
584
- def _prettify(tag, indent=0):
585
- result = ' ' * indent + f'<{tag.name}'
586
- for k, v in tag.attrs.items():
587
- result += f' {k}="{v}"'
588
- result += '>\n'
589
-
590
- for content in tag.contents:
591
- if isinstance(content, Tag):
592
- result += _prettify(content, indent + 2)
593
- else:
594
- result += ' ' * (indent + 2) + str(content) + '\n'
595
-
596
- result += ' ' * indent + f'</{tag.name}>\n'
597
- return result
598
-
599
- return _prettify(self)
1
+ """
2
+ Scout Element Module - Advanced HTML Element Representation
3
+ """
4
+
5
+ import re
6
+ from typing import Any, Dict, List, Optional, Union
7
+
8
+
9
+ class NavigableString(str):
10
+ """
11
+ A string that knows its place in the document tree.
12
+ Mimics BeautifulSoup's NavigableString for better compatibility.
13
+ """
14
+ def __new__(cls, text: str):
15
+ """
16
+ Create a new NavigableString instance.
17
+
18
+ Args:
19
+ text (str): String content
20
+ """
21
+ return str.__new__(cls, text)
22
+
23
+ def __init__(self, text: str):
24
+ """
25
+ Initialize a navigable string.
26
+
27
+ Args:
28
+ text (str): String content
29
+ """
30
+ self.parent = None
31
+
32
+ def __repr__(self):
33
+ """String representation."""
34
+ return f"NavigableString({super().__repr__()})"
35
+
36
+ def __add__(self, other):
37
+ """
38
+ Allow concatenation of NavigableString with other strings.
39
+
40
+ Args:
41
+ other (str): String to concatenate
42
+
43
+ Returns:
44
+ str: Concatenated string
45
+ """
46
+ return str(self) + str(other)
47
+
48
+ def strip(self, chars=None):
49
+ """
50
+ Strip whitespace or specified characters.
51
+
52
+ Args:
53
+ chars (str, optional): Characters to strip
54
+
55
+ Returns:
56
+ str: Stripped string
57
+ """
58
+ return NavigableString(super().strip(chars))
59
+
60
+ class Tag:
61
+ """
62
+ Represents an HTML tag with advanced traversal and manipulation capabilities.
63
+ Enhanced to closely mimic BeautifulSoup's Tag class.
64
+ """
65
+ def __init__(self, name: str, attrs: Dict[str, str] = None):
66
+ """
67
+ Initialize a Tag with name and attributes.
68
+
69
+ Args:
70
+ name (str): Tag name
71
+ attrs (dict, optional): Tag attributes
72
+ """
73
+ self.name = name
74
+ self.attrs = attrs or {}
75
+ self.contents = []
76
+ self.parent = None
77
+ self.string = None # For single string content
78
+
79
+ def __str__(self):
80
+ """String representation of the tag."""
81
+ return self.decode_contents()
82
+
83
+ def __repr__(self):
84
+ """Detailed representation of the tag."""
85
+ return f"<{self.name} {self.attrs}>"
86
+
87
+ def __call__(self, *args, **kwargs):
88
+ """
89
+ Allows calling find_all directly on the tag.
90
+ Mimics BeautifulSoup's behavior.
91
+ """
92
+ return self.find_all(*args, **kwargs)
93
+
94
+ def __contains__(self, item):
95
+ """
96
+ Check if an item is in the tag's contents.
97
+
98
+ Args:
99
+ item: Item to search for
100
+
101
+ Returns:
102
+ bool: True if item is in contents, False otherwise
103
+ """
104
+ return item in self.contents
105
+
106
+ def __getitem__(self, key):
107
+ """
108
+ Get an attribute value using dictionary-like access.
109
+
110
+ Args:
111
+ key (str): Attribute name
112
+
113
+ Returns:
114
+ Any: Attribute value
115
+ """
116
+ return self.attrs[key]
117
+
118
+ def __iter__(self):
119
+ """
120
+ Iterate through tag's contents.
121
+
122
+ Returns:
123
+ Iterator: Contents of the tag
124
+ """
125
+ return iter(self.contents)
126
+
127
+ def __eq__(self, other):
128
+ """
129
+ Compare tags based on name and attributes.
130
+
131
+ Args:
132
+ other (Tag): Tag to compare
133
+
134
+ Returns:
135
+ bool: True if tags are equivalent
136
+ """
137
+ if not isinstance(other, Tag):
138
+ return False
139
+ return (
140
+ self.name == other.name and
141
+ self.attrs == other.attrs and
142
+ str(self) == str(other)
143
+ )
144
+
145
+ def __hash__(self):
146
+ """
147
+ Generate a hash for the tag.
148
+
149
+ Returns:
150
+ int: Hash value
151
+ """
152
+ return hash((self.name, frozenset(self.attrs.items()), str(self)))
153
+
154
+ def find(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> Optional['Tag']:
155
+ """
156
+ Find the first matching child element.
157
+ Enhanced with more flexible matching.
158
+
159
+ Args:
160
+ name (str, optional): Tag name to search for
161
+ attrs (dict, optional): Attributes to match
162
+ recursive (bool, optional): Search recursively
163
+ text (str, optional): Text content to match
164
+
165
+ Returns:
166
+ Tag or None: First matching element
167
+ """
168
+ # Merge class_ with attrs['class'] if both are present
169
+ attrs = dict(attrs) if attrs else {}
170
+ if class_ is not None:
171
+ if 'class' in attrs:
172
+ # Merge both
173
+ if isinstance(attrs['class'], list):
174
+ class_list = attrs['class']
175
+ else:
176
+ class_list = [cls.strip() for cls in re.split(r'[ ,]+', str(attrs['class'])) if cls.strip()]
177
+ if isinstance(class_, list):
178
+ class_list += class_
179
+ else:
180
+ class_list += [cls.strip() for cls in re.split(r'[ ,]+', str(class_)) if cls.strip()]
181
+ attrs['class'] = class_list
182
+ else:
183
+ attrs['class'] = class_
184
+ results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
185
+ return results[0] if results else None
186
+
187
+ def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> List['Tag']:
188
+ """
189
+ Find all matching child elements.
190
+ Enhanced with more flexible matching and BeautifulSoup-like features.
191
+
192
+ Args:
193
+ name (str, optional): Tag name to search for
194
+ attrs (dict, optional): Attributes to match
195
+ recursive (bool, optional): Search recursively
196
+ text (str, optional): Text content to match
197
+ limit (int, optional): Maximum number of results
198
+
199
+ Returns:
200
+ List[Tag]: List of matching elements
201
+ """
202
+ results = []
203
+
204
+ def _match(tag):
205
+ # Check tag name with case-insensitive and regex support
206
+ if name:
207
+ if isinstance(name, str):
208
+ if tag.name.lower() != name.lower():
209
+ return False
210
+ elif isinstance(name, re.Pattern):
211
+ if not name.search(tag.name):
212
+ return False
213
+
214
+ # Check attributes with more flexible matching
215
+ for k, v in attrs.items():
216
+ if k == 'class':
217
+ tag_classes = tag.get('class', [])
218
+ # Support multiple classes separated by space or comma
219
+ if isinstance(v, str):
220
+ v_classes = [cls.strip() for cls in re.split(r'[ ,]+', v) if cls.strip()]
221
+ if not all(cls in tag_classes for cls in v_classes):
222
+ return False
223
+ elif isinstance(v, list):
224
+ if not all(cls in tag_classes for cls in v):
225
+ return False
226
+ else:
227
+ return False
228
+ elif k == 'id':
229
+ if tag.get('id') != v:
230
+ return False
231
+ else:
232
+ # Regex or exact match for other attributes
233
+ tag_attr = tag.attrs.get(k)
234
+ if v is True:
235
+ if tag_attr is None:
236
+ return False
237
+ elif isinstance(v, re.Pattern):
238
+ if tag_attr is None or not v.search(str(tag_attr)):
239
+ return False
240
+ elif tag_attr != v:
241
+ return False
242
+
243
+ # Check text content
244
+ if text:
245
+ tag_text = tag.get_text(strip=True)
246
+ if isinstance(text, str) and text.lower() not in tag_text.lower():
247
+ return False
248
+ elif isinstance(text, re.Pattern) and not text.search(tag_text):
249
+ return False
250
+
251
+ return True
252
+
253
+ def _search(element):
254
+ if _match(element):
255
+ results.append(element)
256
+ if limit and len(results) == limit:
257
+ return
258
+
259
+ if recursive:
260
+ for child in element.contents:
261
+ if isinstance(child, Tag):
262
+ _search(child)
263
+
264
+ _search(self)
265
+ return results
266
+
267
+ def select(self, selector: str) -> List['Tag']:
268
+ """
269
+ Select elements using CSS selector.
270
+ Enhanced to support more complex selectors.
271
+
272
+ Args:
273
+ selector (str): CSS selector string
274
+
275
+ Returns:
276
+ List[Tag]: List of matching elements
277
+ """
278
+ # More advanced CSS selector parsing
279
+ # This is a simplified implementation and might need more robust parsing
280
+ parts = re.split(r'\s+', selector.strip())
281
+ results = []
282
+
283
+ def _match_selector(tag, selector_part):
284
+ # Support more complex selectors
285
+ if selector_part.startswith('.'):
286
+ # Class selector
287
+ return selector_part[1:] in tag.get('class', [])
288
+ elif selector_part.startswith('#'):
289
+ # ID selector
290
+ return tag.get('id') == selector_part[1:]
291
+ elif '[' in selector_part and ']' in selector_part:
292
+ # Attribute selector
293
+ attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
294
+ if attr_match:
295
+ tag_name, attr, value = attr_match.groups()
296
+ if tag_name and tag.name != tag_name:
297
+ return False
298
+ if value:
299
+ return tag.get(attr) == value.strip("'\"")
300
+ return attr in tag.attrs
301
+ else:
302
+ # Tag selector
303
+ return tag.name == selector_part
304
+
305
+ def _recursive_select(element, selector_parts):
306
+ if not selector_parts:
307
+ results.append(element)
308
+ return
309
+
310
+ current_selector = selector_parts[0]
311
+ remaining_selectors = selector_parts[1:]
312
+
313
+ if _match_selector(element, current_selector):
314
+ if not remaining_selectors:
315
+ results.append(element)
316
+ else:
317
+ for child in element.contents:
318
+ if isinstance(child, Tag):
319
+ _recursive_select(child, remaining_selectors)
320
+
321
+ for child in self.contents:
322
+ if isinstance(child, Tag):
323
+ _recursive_select(child, parts)
324
+
325
+ return results
326
+
327
+ def select_one(self, selector: str) -> Optional['Tag']:
328
+ """
329
+ Select the first element matching the CSS selector.
330
+
331
+ Args:
332
+ selector (str): CSS selector string
333
+
334
+ Returns:
335
+ Tag or None: First matching element
336
+ """
337
+ results = self.select(selector)
338
+ return results[0] if results else None
339
+
340
+ def get_text(self, separator=' ', strip=False, types=None) -> str:
341
+ """
342
+ Extract text from the tag and its descendants.
343
+ Enhanced to support more flexible text extraction.
344
+
345
+ Args:
346
+ separator (str, optional): Text separator
347
+ strip (bool, optional): Strip whitespace
348
+ types (list, optional): Types of content to extract
349
+
350
+ Returns:
351
+ str: Extracted text
352
+ """
353
+ texts = []
354
+ for content in self.contents:
355
+ # Support filtering by content type
356
+ if types is None or type(content) in types:
357
+ if isinstance(content, NavigableString):
358
+ texts.append(str(content))
359
+ elif isinstance(content, Tag):
360
+ texts.append(content.get_text(separator, strip))
361
+
362
+ text = separator.join(texts)
363
+ text = re.sub(r'\n\n+', '\n', text) # Replace multiple newlines with single newlines
364
+ return text.strip() if strip else text
365
+
366
+ def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
367
+ """
368
+ Find the first text matching a pattern.
369
+
370
+ Args:
371
+ pattern (str or re.Pattern): Pattern to match
372
+ **kwargs: Additional arguments for get_text()
373
+
374
+ Returns:
375
+ str or None: First matching text
376
+ """
377
+ text = self.get_text(**kwargs)
378
+
379
+ if isinstance(pattern, str):
380
+ return pattern if pattern in text else None
381
+ elif isinstance(pattern, re.Pattern):
382
+ match = pattern.search(text)
383
+ return match.group(0) if match else None
384
+
385
+ def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
386
+ """
387
+ Replace text matching a pattern.
388
+
389
+ Args:
390
+ old (str or re.Pattern): Pattern to replace
391
+ new (str): Replacement text
392
+ **kwargs: Additional arguments for get_text()
393
+
394
+ Returns:
395
+ str: Modified text
396
+ """
397
+ text = self.get_text(**kwargs)
398
+
399
+ if isinstance(old, str):
400
+ return text.replace(old, new)
401
+ elif isinstance(old, re.Pattern):
402
+ return old.sub(new, text)
403
+
404
+ def get(self, key: str, default: Any = None) -> Any:
405
+ """
406
+ Get an attribute value.
407
+
408
+ Args:
409
+ key (str): Attribute name
410
+ default (Any, optional): Default value if attribute not found
411
+
412
+ Returns:
413
+ Any: Attribute value or default
414
+ """
415
+ return self.attrs.get(key, default)
416
+
417
+ def decompose(self) -> None:
418
+ """Remove the tag and its contents from the document."""
419
+ if self.parent:
420
+ self.parent.contents.remove(self)
421
+
422
+ def extract(self) -> 'Tag':
423
+ """
424
+ Remove the tag from the document and return it.
425
+
426
+ Returns:
427
+ Tag: Extracted tag
428
+ """
429
+ self.decompose()
430
+ return self
431
+
432
+ def clear(self) -> None:
433
+ """Remove all contents of the tag."""
434
+ self.contents.clear()
435
+
436
+ @property
437
+ def string(self):
438
+ """
439
+ Get the string content of the tag.
440
+ Returns the combined text of the tag's contents.
441
+ """
442
+ return self.get_text()
443
+
444
+ @string.setter
445
+ def string(self, value):
446
+ """
447
+ Set the string content of the tag.
448
+ Clears existing contents and sets new string value.
449
+
450
+ Args:
451
+ value (str): New string content
452
+ """
453
+ self.clear()
454
+ if value is not None:
455
+ self.append(value)
456
+
457
+ def append(self, new_child: Union['Tag', NavigableString, str]) -> None:
458
+ """Append a new child to this tag with error handling."""
459
+ if isinstance(new_child, str):
460
+ new_child = NavigableString(new_child)
461
+ if hasattr(new_child, 'parent'):
462
+ new_child.parent = self
463
+ self.contents.append(new_child)
464
+
465
+ def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
466
+ """Insert a new child at the given index with error handling."""
467
+ if isinstance(new_child, str):
468
+ new_child = NavigableString(new_child)
469
+ if hasattr(new_child, 'parent'):
470
+ new_child.parent = self
471
+ self.contents.insert(index, new_child)
472
+
473
+ def replace_with(self, new_tag: 'Tag') -> None:
474
+ """Replace this tag with another tag with error handling."""
475
+ if self.parent:
476
+ try:
477
+ index = self.parent.contents.index(self)
478
+ self.parent.contents[index] = new_tag
479
+ new_tag.parent = self.parent
480
+ except ValueError:
481
+ pass
482
+
483
+ def wrap(self, wrapper_tag: 'Tag') -> 'Tag':
484
+ """Wrap this tag in another tag."""
485
+ if self.parent:
486
+ idx = self.parent.contents.index(self)
487
+ self.parent.contents[idx] = wrapper_tag
488
+ wrapper_tag.parent = self.parent
489
+ else:
490
+ wrapper_tag.parent = None
491
+ wrapper_tag.contents.append(self)
492
+ self.parent = wrapper_tag
493
+ return wrapper_tag
494
+
495
+ def unwrap(self) -> None:
496
+ """Remove this tag but keep its contents in the parent."""
497
+ if self.parent:
498
+ idx = self.parent.contents.index(self)
499
+ for child in reversed(self.contents):
500
+ child.parent = self.parent
501
+ self.parent.contents.insert(idx, child)
502
+ self.parent.contents.remove(self)
503
+ self.parent = None
504
+ self.contents = []
505
+
506
+ def insert_before(self, new_element: 'Tag') -> None:
507
+ """Insert a tag or string immediately before this tag."""
508
+ if self.parent:
509
+ idx = self.parent.contents.index(self)
510
+ new_element.parent = self.parent
511
+ self.parent.contents.insert(idx, new_element)
512
+
513
+ def insert_after(self, new_element: 'Tag') -> None:
514
+ """Insert a tag or string immediately after this tag."""
515
+ if self.parent:
516
+ idx = self.parent.contents.index(self)
517
+ new_element.parent = self.parent
518
+ self.parent.contents.insert(idx + 1, new_element)
519
+
520
+ @property
521
+ def descendants(self):
522
+ """Yield all descendants in document order."""
523
+ for child in self.contents:
524
+ yield child
525
+ if isinstance(child, Tag):
526
+ yield from child.descendants
527
+
528
+ @property
529
+ def parents(self):
530
+ """Yield all parents up the tree."""
531
+ current = self.parent
532
+ while current:
533
+ yield current
534
+ current = current.parent
535
+
536
+ @property
537
+ def next_element(self):
538
+ """Return the next element in document order."""
539
+ if self.contents:
540
+ return self.contents[0]
541
+ current = self
542
+ while current.parent:
543
+ idx = current.parent.contents.index(current)
544
+ if idx + 1 < len(current.parent.contents):
545
+ return current.parent.contents[idx + 1]
546
+ current = current.parent
547
+ return None
548
+
549
+ @property
550
+ def previous_element(self):
551
+ """Return the previous element in document order."""
552
+ if not self.parent:
553
+ return None
554
+ idx = self.parent.contents.index(self)
555
+ if idx > 0:
556
+ prev = self.parent.contents[idx - 1]
557
+ while isinstance(prev, Tag) and prev.contents:
558
+ prev = prev.contents[-1]
559
+ return prev
560
+ return self.parent
561
+
562
+ def decode_contents(self, eventual_encoding='utf-8') -> str:
563
+ """
564
+ Decode the contents of the tag to a string.
565
+
566
+ Args:
567
+ eventual_encoding (str, optional): Encoding to use
568
+
569
+ Returns:
570
+ str: Decoded contents
571
+ """
572
+ return ''.join(str(content) for content in self.contents)
573
+
574
+ def prettify(self, formatter='minimal') -> str:
575
+ """
576
+ Return a nicely formatted representation of the tag.
577
+
578
+ Args:
579
+ formatter (str, optional): Formatting style
580
+
581
+ Returns:
582
+ str: Prettified tag representation
583
+ """
584
+ def _prettify(tag, indent=0):
585
+ result = ' ' * indent + f'<{tag.name}'
586
+ for k, v in tag.attrs.items():
587
+ result += f' {k}="{v}"'
588
+ result += '>\n'
589
+
590
+ for content in tag.contents:
591
+ if isinstance(content, Tag):
592
+ result += _prettify(content, indent + 2)
593
+ else:
594
+ result += ' ' * (indent + 2) + str(content) + '\n'
595
+
596
+ result += ' ' * indent + f'</{tag.name}>\n'
597
+ return result
598
+
599
+ return _prettify(self)