webscout 8.2.7__py3-none-any.whl → 8.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. webscout/AIauto.py +33 -15
  2. webscout/AIbase.py +96 -37
  3. webscout/AIutel.py +703 -250
  4. webscout/Bard.py +441 -323
  5. webscout/Extra/Act.md +309 -0
  6. webscout/Extra/GitToolkit/__init__.py +10 -0
  7. webscout/Extra/GitToolkit/gitapi/README.md +110 -0
  8. webscout/Extra/GitToolkit/gitapi/__init__.py +12 -0
  9. webscout/Extra/GitToolkit/gitapi/repository.py +195 -0
  10. webscout/Extra/GitToolkit/gitapi/user.py +96 -0
  11. webscout/Extra/GitToolkit/gitapi/utils.py +62 -0
  12. webscout/Extra/YTToolkit/README.md +375 -0
  13. webscout/Extra/YTToolkit/YTdownloader.py +957 -0
  14. webscout/Extra/YTToolkit/__init__.py +3 -0
  15. webscout/Extra/YTToolkit/transcriber.py +476 -0
  16. webscout/Extra/YTToolkit/ytapi/README.md +44 -0
  17. webscout/Extra/YTToolkit/ytapi/__init__.py +6 -0
  18. webscout/Extra/YTToolkit/ytapi/channel.py +307 -0
  19. webscout/Extra/YTToolkit/ytapi/errors.py +13 -0
  20. webscout/Extra/YTToolkit/ytapi/extras.py +118 -0
  21. webscout/Extra/YTToolkit/ytapi/https.py +88 -0
  22. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -0
  23. webscout/Extra/YTToolkit/ytapi/playlist.py +59 -0
  24. webscout/Extra/YTToolkit/ytapi/pool.py +8 -0
  25. webscout/Extra/YTToolkit/ytapi/query.py +40 -0
  26. webscout/Extra/YTToolkit/ytapi/stream.py +63 -0
  27. webscout/Extra/YTToolkit/ytapi/utils.py +62 -0
  28. webscout/Extra/YTToolkit/ytapi/video.py +232 -0
  29. webscout/Extra/__init__.py +7 -0
  30. webscout/Extra/autocoder/__init__.py +9 -0
  31. webscout/Extra/autocoder/autocoder.py +1105 -0
  32. webscout/Extra/autocoder/autocoder_utiles.py +332 -0
  33. webscout/Extra/gguf.md +430 -0
  34. webscout/Extra/gguf.py +684 -0
  35. webscout/Extra/tempmail/README.md +488 -0
  36. webscout/Extra/tempmail/__init__.py +28 -0
  37. webscout/Extra/tempmail/async_utils.py +141 -0
  38. webscout/Extra/tempmail/base.py +161 -0
  39. webscout/Extra/tempmail/cli.py +187 -0
  40. webscout/Extra/tempmail/emailnator.py +84 -0
  41. webscout/Extra/tempmail/mail_tm.py +361 -0
  42. webscout/Extra/tempmail/temp_mail_io.py +292 -0
  43. webscout/Extra/weather.md +281 -0
  44. webscout/Extra/weather.py +194 -0
  45. webscout/Extra/weather_ascii.py +76 -0
  46. webscout/Litlogger/README.md +10 -0
  47. webscout/Litlogger/__init__.py +15 -0
  48. webscout/Litlogger/formats.py +4 -0
  49. webscout/Litlogger/handlers.py +103 -0
  50. webscout/Litlogger/levels.py +13 -0
  51. webscout/Litlogger/logger.py +92 -0
  52. webscout/Provider/AI21.py +177 -0
  53. webscout/Provider/AISEARCH/DeepFind.py +254 -0
  54. webscout/Provider/AISEARCH/Perplexity.py +333 -0
  55. webscout/Provider/AISEARCH/README.md +279 -0
  56. webscout/Provider/AISEARCH/__init__.py +9 -0
  57. webscout/Provider/AISEARCH/felo_search.py +202 -0
  58. webscout/Provider/AISEARCH/genspark_search.py +324 -0
  59. webscout/Provider/AISEARCH/hika_search.py +186 -0
  60. webscout/Provider/AISEARCH/iask_search.py +410 -0
  61. webscout/Provider/AISEARCH/monica_search.py +220 -0
  62. webscout/Provider/AISEARCH/scira_search.py +298 -0
  63. webscout/Provider/AISEARCH/webpilotai_search.py +255 -0
  64. webscout/Provider/Aitopia.py +316 -0
  65. webscout/Provider/AllenAI.py +440 -0
  66. webscout/Provider/Andi.py +228 -0
  67. webscout/Provider/Blackboxai.py +791 -0
  68. webscout/Provider/ChatGPTClone.py +237 -0
  69. webscout/Provider/ChatGPTGratis.py +194 -0
  70. webscout/Provider/ChatSandbox.py +342 -0
  71. webscout/Provider/Cloudflare.py +324 -0
  72. webscout/Provider/Cohere.py +208 -0
  73. webscout/Provider/Deepinfra.py +340 -0
  74. webscout/Provider/ExaAI.py +261 -0
  75. webscout/Provider/ExaChat.py +358 -0
  76. webscout/Provider/Flowith.py +217 -0
  77. webscout/Provider/FreeGemini.py +250 -0
  78. webscout/Provider/Gemini.py +169 -0
  79. webscout/Provider/GithubChat.py +369 -0
  80. webscout/Provider/GizAI.py +295 -0
  81. webscout/Provider/Glider.py +225 -0
  82. webscout/Provider/Groq.py +801 -0
  83. webscout/Provider/HF_space/__init__.py +0 -0
  84. webscout/Provider/HF_space/qwen_qwen2.py +206 -0
  85. webscout/Provider/HeckAI.py +375 -0
  86. webscout/Provider/HuggingFaceChat.py +469 -0
  87. webscout/Provider/Hunyuan.py +283 -0
  88. webscout/Provider/Jadve.py +291 -0
  89. webscout/Provider/Koboldai.py +384 -0
  90. webscout/Provider/LambdaChat.py +411 -0
  91. webscout/Provider/Llama3.py +259 -0
  92. webscout/Provider/MCPCore.py +315 -0
  93. webscout/Provider/Marcus.py +198 -0
  94. webscout/Provider/Nemotron.py +218 -0
  95. webscout/Provider/Netwrck.py +270 -0
  96. webscout/Provider/OLLAMA.py +396 -0
  97. webscout/Provider/OPENAI/BLACKBOXAI.py +766 -0
  98. webscout/Provider/OPENAI/Cloudflare.py +378 -0
  99. webscout/Provider/OPENAI/FreeGemini.py +283 -0
  100. webscout/Provider/OPENAI/NEMOTRON.py +232 -0
  101. webscout/Provider/OPENAI/Qwen3.py +283 -0
  102. webscout/Provider/OPENAI/README.md +952 -0
  103. webscout/Provider/OPENAI/TwoAI.py +357 -0
  104. webscout/Provider/OPENAI/__init__.py +40 -0
  105. webscout/Provider/OPENAI/ai4chat.py +293 -0
  106. webscout/Provider/OPENAI/api.py +969 -0
  107. webscout/Provider/OPENAI/base.py +249 -0
  108. webscout/Provider/OPENAI/c4ai.py +373 -0
  109. webscout/Provider/OPENAI/chatgpt.py +556 -0
  110. webscout/Provider/OPENAI/chatgptclone.py +494 -0
  111. webscout/Provider/OPENAI/chatsandbox.py +173 -0
  112. webscout/Provider/OPENAI/copilot.py +242 -0
  113. webscout/Provider/OPENAI/deepinfra.py +322 -0
  114. webscout/Provider/OPENAI/e2b.py +1414 -0
  115. webscout/Provider/OPENAI/exaai.py +417 -0
  116. webscout/Provider/OPENAI/exachat.py +444 -0
  117. webscout/Provider/OPENAI/flowith.py +162 -0
  118. webscout/Provider/OPENAI/freeaichat.py +359 -0
  119. webscout/Provider/OPENAI/glider.py +326 -0
  120. webscout/Provider/OPENAI/groq.py +364 -0
  121. webscout/Provider/OPENAI/heckai.py +308 -0
  122. webscout/Provider/OPENAI/llmchatco.py +335 -0
  123. webscout/Provider/OPENAI/mcpcore.py +389 -0
  124. webscout/Provider/OPENAI/multichat.py +376 -0
  125. webscout/Provider/OPENAI/netwrck.py +357 -0
  126. webscout/Provider/OPENAI/oivscode.py +287 -0
  127. webscout/Provider/OPENAI/opkfc.py +496 -0
  128. webscout/Provider/OPENAI/pydantic_imports.py +172 -0
  129. webscout/Provider/OPENAI/scirachat.py +477 -0
  130. webscout/Provider/OPENAI/sonus.py +304 -0
  131. webscout/Provider/OPENAI/standardinput.py +433 -0
  132. webscout/Provider/OPENAI/textpollinations.py +339 -0
  133. webscout/Provider/OPENAI/toolbaz.py +413 -0
  134. webscout/Provider/OPENAI/typefully.py +355 -0
  135. webscout/Provider/OPENAI/typegpt.py +364 -0
  136. webscout/Provider/OPENAI/uncovrAI.py +463 -0
  137. webscout/Provider/OPENAI/utils.py +318 -0
  138. webscout/Provider/OPENAI/venice.py +431 -0
  139. webscout/Provider/OPENAI/wisecat.py +387 -0
  140. webscout/Provider/OPENAI/writecream.py +163 -0
  141. webscout/Provider/OPENAI/x0gpt.py +365 -0
  142. webscout/Provider/OPENAI/yep.py +382 -0
  143. webscout/Provider/OpenGPT.py +209 -0
  144. webscout/Provider/Openai.py +496 -0
  145. webscout/Provider/PI.py +429 -0
  146. webscout/Provider/Perplexitylabs.py +415 -0
  147. webscout/Provider/QwenLM.py +254 -0
  148. webscout/Provider/Reka.py +214 -0
  149. webscout/Provider/StandardInput.py +290 -0
  150. webscout/Provider/TTI/README.md +82 -0
  151. webscout/Provider/TTI/__init__.py +7 -0
  152. webscout/Provider/TTI/aiarta.py +365 -0
  153. webscout/Provider/TTI/artbit.py +0 -0
  154. webscout/Provider/TTI/base.py +64 -0
  155. webscout/Provider/TTI/fastflux.py +200 -0
  156. webscout/Provider/TTI/magicstudio.py +201 -0
  157. webscout/Provider/TTI/piclumen.py +203 -0
  158. webscout/Provider/TTI/pixelmuse.py +225 -0
  159. webscout/Provider/TTI/pollinations.py +221 -0
  160. webscout/Provider/TTI/utils.py +11 -0
  161. webscout/Provider/TTS/README.md +192 -0
  162. webscout/Provider/TTS/__init__.py +10 -0
  163. webscout/Provider/TTS/base.py +159 -0
  164. webscout/Provider/TTS/deepgram.py +156 -0
  165. webscout/Provider/TTS/elevenlabs.py +111 -0
  166. webscout/Provider/TTS/gesserit.py +128 -0
  167. webscout/Provider/TTS/murfai.py +113 -0
  168. webscout/Provider/TTS/openai_fm.py +129 -0
  169. webscout/Provider/TTS/parler.py +111 -0
  170. webscout/Provider/TTS/speechma.py +580 -0
  171. webscout/Provider/TTS/sthir.py +94 -0
  172. webscout/Provider/TTS/streamElements.py +333 -0
  173. webscout/Provider/TTS/utils.py +280 -0
  174. webscout/Provider/TeachAnything.py +229 -0
  175. webscout/Provider/TextPollinationsAI.py +308 -0
  176. webscout/Provider/TwoAI.py +475 -0
  177. webscout/Provider/TypliAI.py +305 -0
  178. webscout/Provider/UNFINISHED/ChatHub.py +209 -0
  179. webscout/Provider/UNFINISHED/Youchat.py +330 -0
  180. webscout/Provider/UNFINISHED/liner_api_request.py +263 -0
  181. webscout/Provider/UNFINISHED/puterjs.py +635 -0
  182. webscout/Provider/UNFINISHED/test_lmarena.py +119 -0
  183. webscout/Provider/Venice.py +258 -0
  184. webscout/Provider/VercelAI.py +253 -0
  185. webscout/Provider/WiseCat.py +233 -0
  186. webscout/Provider/WrDoChat.py +370 -0
  187. webscout/Provider/Writecream.py +246 -0
  188. webscout/Provider/WritingMate.py +269 -0
  189. webscout/Provider/__init__.py +174 -0
  190. webscout/Provider/ai4chat.py +174 -0
  191. webscout/Provider/akashgpt.py +335 -0
  192. webscout/Provider/asksteve.py +220 -0
  193. webscout/Provider/cerebras.py +290 -0
  194. webscout/Provider/chatglm.py +215 -0
  195. webscout/Provider/cleeai.py +213 -0
  196. webscout/Provider/copilot.py +425 -0
  197. webscout/Provider/elmo.py +283 -0
  198. webscout/Provider/freeaichat.py +285 -0
  199. webscout/Provider/geminiapi.py +208 -0
  200. webscout/Provider/granite.py +235 -0
  201. webscout/Provider/hermes.py +266 -0
  202. webscout/Provider/julius.py +223 -0
  203. webscout/Provider/koala.py +170 -0
  204. webscout/Provider/learnfastai.py +325 -0
  205. webscout/Provider/llama3mitril.py +215 -0
  206. webscout/Provider/llmchat.py +258 -0
  207. webscout/Provider/llmchatco.py +306 -0
  208. webscout/Provider/lmarena.py +198 -0
  209. webscout/Provider/meta.py +801 -0
  210. webscout/Provider/multichat.py +364 -0
  211. webscout/Provider/oivscode.py +309 -0
  212. webscout/Provider/samurai.py +224 -0
  213. webscout/Provider/scira_chat.py +299 -0
  214. webscout/Provider/scnet.py +243 -0
  215. webscout/Provider/searchchat.py +292 -0
  216. webscout/Provider/sonus.py +258 -0
  217. webscout/Provider/talkai.py +194 -0
  218. webscout/Provider/toolbaz.py +353 -0
  219. webscout/Provider/turboseek.py +266 -0
  220. webscout/Provider/typefully.py +202 -0
  221. webscout/Provider/typegpt.py +289 -0
  222. webscout/Provider/uncovr.py +368 -0
  223. webscout/Provider/x0gpt.py +299 -0
  224. webscout/Provider/yep.py +389 -0
  225. webscout/__init__.py +4 -2
  226. webscout/cli.py +3 -28
  227. webscout/client.py +70 -0
  228. webscout/conversation.py +35 -35
  229. webscout/litagent/Readme.md +276 -0
  230. webscout/litagent/__init__.py +29 -0
  231. webscout/litagent/agent.py +455 -0
  232. webscout/litagent/constants.py +60 -0
  233. webscout/litprinter/__init__.py +59 -0
  234. webscout/optimizers.py +419 -419
  235. webscout/scout/README.md +404 -0
  236. webscout/scout/__init__.py +8 -0
  237. webscout/scout/core/__init__.py +7 -0
  238. webscout/scout/core/crawler.py +210 -0
  239. webscout/scout/core/scout.py +607 -0
  240. webscout/scout/core/search_result.py +96 -0
  241. webscout/scout/core/text_analyzer.py +63 -0
  242. webscout/scout/core/text_utils.py +277 -0
  243. webscout/scout/core/web_analyzer.py +52 -0
  244. webscout/scout/element.py +478 -0
  245. webscout/scout/parsers/__init__.py +69 -0
  246. webscout/scout/parsers/html5lib_parser.py +172 -0
  247. webscout/scout/parsers/html_parser.py +236 -0
  248. webscout/scout/parsers/lxml_parser.py +178 -0
  249. webscout/scout/utils.py +37 -0
  250. webscout/swiftcli/Readme.md +323 -0
  251. webscout/swiftcli/__init__.py +95 -0
  252. webscout/swiftcli/core/__init__.py +7 -0
  253. webscout/swiftcli/core/cli.py +297 -0
  254. webscout/swiftcli/core/context.py +104 -0
  255. webscout/swiftcli/core/group.py +241 -0
  256. webscout/swiftcli/decorators/__init__.py +28 -0
  257. webscout/swiftcli/decorators/command.py +221 -0
  258. webscout/swiftcli/decorators/options.py +220 -0
  259. webscout/swiftcli/decorators/output.py +252 -0
  260. webscout/swiftcli/exceptions.py +21 -0
  261. webscout/swiftcli/plugins/__init__.py +9 -0
  262. webscout/swiftcli/plugins/base.py +135 -0
  263. webscout/swiftcli/plugins/manager.py +269 -0
  264. webscout/swiftcli/utils/__init__.py +59 -0
  265. webscout/swiftcli/utils/formatting.py +252 -0
  266. webscout/swiftcli/utils/parsing.py +267 -0
  267. webscout/version.py +1 -1
  268. webscout/webscout_search.py +2 -182
  269. webscout/webscout_search_async.py +1 -179
  270. webscout/zeroart/README.md +89 -0
  271. webscout/zeroart/__init__.py +135 -0
  272. webscout/zeroart/base.py +66 -0
  273. webscout/zeroart/effects.py +101 -0
  274. webscout/zeroart/fonts.py +1239 -0
  275. {webscout-8.2.7.dist-info → webscout-8.2.9.dist-info}/METADATA +262 -83
  276. webscout-8.2.9.dist-info/RECORD +289 -0
  277. {webscout-8.2.7.dist-info → webscout-8.2.9.dist-info}/WHEEL +1 -1
  278. {webscout-8.2.7.dist-info → webscout-8.2.9.dist-info}/entry_points.txt +1 -0
  279. webscout-8.2.7.dist-info/RECORD +0 -26
  280. {webscout-8.2.7.dist-info → webscout-8.2.9.dist-info}/licenses/LICENSE.md +0 -0
  281. {webscout-8.2.7.dist-info → webscout-8.2.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,478 @@
1
+ """
2
+ Scout Element Module - Advanced HTML Element Representation
3
+ """
4
+
5
+ import re
6
+ from typing import Any, Dict, List, Optional, Union
7
+
8
+
9
+ class NavigableString(str):
10
+ """
11
+ A string that knows its place in the document tree.
12
+ Mimics BeautifulSoup's NavigableString for better compatibility.
13
+ """
14
+ def __new__(cls, text: str):
15
+ """
16
+ Create a new NavigableString instance.
17
+
18
+ Args:
19
+ text (str): String content
20
+ """
21
+ return str.__new__(cls, text)
22
+
23
+ def __init__(self, text: str):
24
+ """
25
+ Initialize a navigable string.
26
+
27
+ Args:
28
+ text (str): String content
29
+ """
30
+ self.parent = None
31
+
32
+ def __repr__(self):
33
+ """String representation."""
34
+ return f"NavigableString({super().__repr__()})"
35
+
36
+ def __add__(self, other):
37
+ """
38
+ Allow concatenation of NavigableString with other strings.
39
+
40
+ Args:
41
+ other (str): String to concatenate
42
+
43
+ Returns:
44
+ str: Concatenated string
45
+ """
46
+ return str(self) + str(other)
47
+
48
+ def strip(self, chars=None):
49
+ """
50
+ Strip whitespace or specified characters.
51
+
52
+ Args:
53
+ chars (str, optional): Characters to strip
54
+
55
+ Returns:
56
+ str: Stripped string
57
+ """
58
+ return NavigableString(super().strip(chars))
59
+
60
+ class Tag:
61
+ """
62
+ Represents an HTML tag with advanced traversal and manipulation capabilities.
63
+ Enhanced to closely mimic BeautifulSoup's Tag class.
64
+ """
65
+ def __init__(self, name: str, attrs: Dict[str, str] = None):
66
+ """
67
+ Initialize a Tag with name and attributes.
68
+
69
+ Args:
70
+ name (str): Tag name
71
+ attrs (dict, optional): Tag attributes
72
+ """
73
+ self.name = name
74
+ self.attrs = attrs or {}
75
+ self.contents = []
76
+ self.parent = None
77
+ self.string = None # For single string content
78
+
79
+ def __str__(self):
80
+ """String representation of the tag."""
81
+ return self.decode_contents()
82
+
83
+ def __repr__(self):
84
+ """Detailed representation of the tag."""
85
+ return f"<{self.name} {self.attrs}>"
86
+
87
+ def __call__(self, *args, **kwargs):
88
+ """
89
+ Allows calling find_all directly on the tag.
90
+ Mimics BeautifulSoup's behavior.
91
+ """
92
+ return self.find_all(*args, **kwargs)
93
+
94
+ def __contains__(self, item):
95
+ """
96
+ Check if an item is in the tag's contents.
97
+
98
+ Args:
99
+ item: Item to search for
100
+
101
+ Returns:
102
+ bool: True if item is in contents, False otherwise
103
+ """
104
+ return item in self.contents
105
+
106
+ def __getitem__(self, key):
107
+ """
108
+ Get an attribute value using dictionary-like access.
109
+
110
+ Args:
111
+ key (str): Attribute name
112
+
113
+ Returns:
114
+ Any: Attribute value
115
+ """
116
+ return self.attrs[key]
117
+
118
+ def __iter__(self):
119
+ """
120
+ Iterate through tag's contents.
121
+
122
+ Returns:
123
+ Iterator: Contents of the tag
124
+ """
125
+ return iter(self.contents)
126
+
127
+ def __eq__(self, other):
128
+ """
129
+ Compare tags based on name and attributes.
130
+
131
+ Args:
132
+ other (Tag): Tag to compare
133
+
134
+ Returns:
135
+ bool: True if tags are equivalent
136
+ """
137
+ if not isinstance(other, Tag):
138
+ return False
139
+ return (
140
+ self.name == other.name and
141
+ self.attrs == other.attrs and
142
+ str(self) == str(other)
143
+ )
144
+
145
+ def __hash__(self):
146
+ """
147
+ Generate a hash for the tag.
148
+
149
+ Returns:
150
+ int: Hash value
151
+ """
152
+ return hash((self.name, frozenset(self.attrs.items()), str(self)))
153
+
154
+ def find(self, name=None, attrs={}, recursive=True, text=None, **kwargs) -> Optional['Tag']:
155
+ """
156
+ Find the first matching child element.
157
+ Enhanced with more flexible matching.
158
+
159
+ Args:
160
+ name (str, optional): Tag name to search for
161
+ attrs (dict, optional): Attributes to match
162
+ recursive (bool, optional): Search recursively
163
+ text (str, optional): Text content to match
164
+
165
+ Returns:
166
+ Tag or None: First matching element
167
+ """
168
+ results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
169
+ return results[0] if results else None
170
+
171
+ def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs) -> List['Tag']:
172
+ """
173
+ Find all matching child elements.
174
+ Enhanced with more flexible matching and BeautifulSoup-like features.
175
+
176
+ Args:
177
+ name (str, optional): Tag name to search for
178
+ attrs (dict, optional): Attributes to match
179
+ recursive (bool, optional): Search recursively
180
+ text (str, optional): Text content to match
181
+ limit (int, optional): Maximum number of results
182
+
183
+ Returns:
184
+ List[Tag]: List of matching elements
185
+ """
186
+ results = []
187
+
188
+ def _match(tag):
189
+ # Check tag name with case-insensitive and regex support
190
+ if name:
191
+ if isinstance(name, str):
192
+ if tag.name.lower() != name.lower():
193
+ return False
194
+ elif isinstance(name, re.Pattern):
195
+ if not name.search(tag.name):
196
+ return False
197
+
198
+ # Check attributes with more flexible matching
199
+ for k, v in attrs.items():
200
+ # Handle special attribute matching
201
+ if k == 'class':
202
+ tag_classes = tag.get('class', [])
203
+ if isinstance(v, str) and v not in tag_classes:
204
+ return False
205
+ elif isinstance(v, list) and not all(cls in tag_classes for cls in v):
206
+ return False
207
+ elif k == 'id':
208
+ if tag.get('id') != v:
209
+ return False
210
+ else:
211
+ # Regex or exact match for other attributes
212
+ tag_attr = tag.attrs.get(k)
213
+ if v is True:
214
+ if tag_attr is None:
215
+ return False
216
+ elif isinstance(v, re.Pattern):
217
+ if tag_attr is None or not v.search(str(tag_attr)):
218
+ return False
219
+ elif tag_attr != v:
220
+ return False
221
+
222
+ # Check text content
223
+ if text:
224
+ tag_text = tag.get_text(strip=True)
225
+ if isinstance(text, str) and text.lower() not in tag_text.lower():
226
+ return False
227
+ elif isinstance(text, re.Pattern) and not text.search(tag_text):
228
+ return False
229
+
230
+ return True
231
+
232
+ def _search(element):
233
+ if _match(element):
234
+ results.append(element)
235
+ if limit and len(results) == limit:
236
+ return
237
+
238
+ if recursive:
239
+ for child in element.contents:
240
+ if isinstance(child, Tag):
241
+ _search(child)
242
+
243
+ _search(self)
244
+ return results
245
+
246
+ def select(self, selector: str) -> List['Tag']:
247
+ """
248
+ Select elements using CSS selector.
249
+ Enhanced to support more complex selectors.
250
+
251
+ Args:
252
+ selector (str): CSS selector string
253
+
254
+ Returns:
255
+ List[Tag]: List of matching elements
256
+ """
257
+ # More advanced CSS selector parsing
258
+ # This is a simplified implementation and might need more robust parsing
259
+ parts = re.split(r'\s+', selector.strip())
260
+ results = []
261
+
262
+ def _match_selector(tag, selector_part):
263
+ # Support more complex selectors
264
+ if selector_part.startswith('.'):
265
+ # Class selector
266
+ return selector_part[1:] in tag.get('class', [])
267
+ elif selector_part.startswith('#'):
268
+ # ID selector
269
+ return tag.get('id') == selector_part[1:]
270
+ elif '[' in selector_part and ']' in selector_part:
271
+ # Attribute selector
272
+ attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
273
+ if attr_match:
274
+ tag_name, attr, value = attr_match.groups()
275
+ if tag_name and tag.name != tag_name:
276
+ return False
277
+ if value:
278
+ return tag.get(attr) == value.strip("'\"")
279
+ return attr in tag.attrs
280
+ else:
281
+ # Tag selector
282
+ return tag.name == selector_part
283
+
284
+ def _recursive_select(element, selector_parts):
285
+ if not selector_parts:
286
+ results.append(element)
287
+ return
288
+
289
+ current_selector = selector_parts[0]
290
+ remaining_selectors = selector_parts[1:]
291
+
292
+ if _match_selector(element, current_selector):
293
+ if not remaining_selectors:
294
+ results.append(element)
295
+ else:
296
+ for child in element.contents:
297
+ if isinstance(child, Tag):
298
+ _recursive_select(child, remaining_selectors)
299
+
300
+ for child in self.contents:
301
+ if isinstance(child, Tag):
302
+ _recursive_select(child, parts)
303
+
304
+ return results
305
+
306
+ def select_one(self, selector: str) -> Optional['Tag']:
307
+ """
308
+ Select the first element matching the CSS selector.
309
+
310
+ Args:
311
+ selector (str): CSS selector string
312
+
313
+ Returns:
314
+ Tag or None: First matching element
315
+ """
316
+ results = self.select(selector)
317
+ return results[0] if results else None
318
+
319
+ def get_text(self, separator=' ', strip=False, types=None) -> str:
320
+ """
321
+ Extract text from the tag and its descendants.
322
+ Enhanced to support more flexible text extraction.
323
+
324
+ Args:
325
+ separator (str, optional): Text separator
326
+ strip (bool, optional): Strip whitespace
327
+ types (list, optional): Types of content to extract
328
+
329
+ Returns:
330
+ str: Extracted text
331
+ """
332
+ texts = []
333
+ for content in self.contents:
334
+ # Support filtering by content type
335
+ if types is None or type(content) in types:
336
+ if isinstance(content, NavigableString):
337
+ texts.append(str(content))
338
+ elif isinstance(content, Tag):
339
+ texts.append(content.get_text(separator, strip))
340
+
341
+ text = separator.join(texts)
342
+ text = re.sub(r'\n\n+', '\n', text) # Replace multiple newlines with single newlines
343
+ return text.strip() if strip else text
344
+
345
+ def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
346
+ """
347
+ Find the first text matching a pattern.
348
+
349
+ Args:
350
+ pattern (str or re.Pattern): Pattern to match
351
+ **kwargs: Additional arguments for get_text()
352
+
353
+ Returns:
354
+ str or None: First matching text
355
+ """
356
+ text = self.get_text(**kwargs)
357
+
358
+ if isinstance(pattern, str):
359
+ return pattern if pattern in text else None
360
+ elif isinstance(pattern, re.Pattern):
361
+ match = pattern.search(text)
362
+ return match.group(0) if match else None
363
+
364
+ def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
365
+ """
366
+ Replace text matching a pattern.
367
+
368
+ Args:
369
+ old (str or re.Pattern): Pattern to replace
370
+ new (str): Replacement text
371
+ **kwargs: Additional arguments for get_text()
372
+
373
+ Returns:
374
+ str: Modified text
375
+ """
376
+ text = self.get_text(**kwargs)
377
+
378
+ if isinstance(old, str):
379
+ return text.replace(old, new)
380
+ elif isinstance(old, re.Pattern):
381
+ return old.sub(new, text)
382
+
383
+ def get(self, key: str, default: Any = None) -> Any:
384
+ """
385
+ Get an attribute value.
386
+
387
+ Args:
388
+ key (str): Attribute name
389
+ default (Any, optional): Default value if attribute not found
390
+
391
+ Returns:
392
+ Any: Attribute value or default
393
+ """
394
+ return self.attrs.get(key, default)
395
+
396
+ def decompose(self) -> None:
397
+ """Remove the tag and its contents from the document."""
398
+ if self.parent:
399
+ self.parent.contents.remove(self)
400
+
401
+ def extract(self) -> 'Tag':
402
+ """
403
+ Remove the tag from the document and return it.
404
+
405
+ Returns:
406
+ Tag: Extracted tag
407
+ """
408
+ self.decompose()
409
+ return self
410
+
411
+ def clear(self) -> None:
412
+ """Remove all contents of the tag."""
413
+ self.contents.clear()
414
+
415
+ def append(self, new_child: Union['Tag', NavigableString, str]) -> None:
416
+ """Append a new child to this tag."""
417
+ if isinstance(new_child, str):
418
+ new_child = NavigableString(new_child)
419
+ new_child.parent = self
420
+ self.contents.append(new_child)
421
+
422
+ def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
423
+ """Insert a new child at the given index."""
424
+ if isinstance(new_child, str):
425
+ new_child = NavigableString(new_child)
426
+ new_child.parent = self
427
+ self.contents.insert(index, new_child)
428
+
429
+ def replace_with(self, new_tag: 'Tag') -> None:
430
+ """
431
+ Replace this tag with another tag.
432
+
433
+ Args:
434
+ new_tag (Tag): Tag to replace the current tag
435
+ """
436
+ if self.parent:
437
+ index = self.parent.contents.index(self)
438
+ self.parent.contents[index] = new_tag
439
+ new_tag.parent = self.parent
440
+
441
+ def decode_contents(self, eventual_encoding='utf-8') -> str:
442
+ """
443
+ Decode the contents of the tag to a string.
444
+
445
+ Args:
446
+ eventual_encoding (str, optional): Encoding to use
447
+
448
+ Returns:
449
+ str: Decoded contents
450
+ """
451
+ return ''.join(str(content) for content in self.contents)
452
+
453
+ def prettify(self, formatter='minimal') -> str:
454
+ """
455
+ Return a nicely formatted representation of the tag.
456
+
457
+ Args:
458
+ formatter (str, optional): Formatting style
459
+
460
+ Returns:
461
+ str: Prettified tag representation
462
+ """
463
+ def _prettify(tag, indent=0):
464
+ result = ' ' * indent + f'<{tag.name}'
465
+ for k, v in tag.attrs.items():
466
+ result += f' {k}="{v}"'
467
+ result += '>\n'
468
+
469
+ for content in tag.contents:
470
+ if isinstance(content, Tag):
471
+ result += _prettify(content, indent + 2)
472
+ else:
473
+ result += ' ' * (indent + 2) + str(content) + '\n'
474
+
475
+ result += ' ' * indent + f'</{tag.name}>\n'
476
+ return result
477
+
478
+ return _prettify(self)
@@ -0,0 +1,69 @@
1
+ """
2
+ Scout Parsers - Unified Parsing Interfaces
3
+ """
4
+
5
+ from typing import Dict, Type, Any
6
+
7
+ from .html_parser import HTMLParser
8
+ from .lxml_parser import LXMLParser
9
+ from .html5lib_parser import HTML5Parser
10
+
11
+ class ParserRegistry:
12
+ """
13
+ Centralized parser registry for Scout library.
14
+ Manages and provides access to different HTML parsing strategies.
15
+ """
16
+
17
+ _PARSERS: Dict[str, Type[Any]] = {
18
+ 'html.parser': HTMLParser,
19
+ 'lxml': LXMLParser,
20
+ 'html5lib': HTML5Parser
21
+ }
22
+
23
+ @classmethod
24
+ def get_parser(cls, parser_name: str = 'html.parser') -> Any:
25
+ """
26
+ Retrieve a parser by its name.
27
+
28
+ Args:
29
+ parser_name (str): Name of the parser to retrieve
30
+
31
+ Returns:
32
+ Parser instance
33
+
34
+ Raises:
35
+ ValueError: If the parser is not found
36
+ """
37
+ if parser_name not in cls._PARSERS:
38
+ raise ValueError(f"Parser '{parser_name}' not found. Available parsers: {list(cls._PARSERS.keys())}")
39
+
40
+ return cls._PARSERS[parser_name]()
41
+
42
+ @classmethod
43
+ def register_parser(cls, name: str, parser_class: Type[Any]):
44
+ """
45
+ Register a new parser dynamically.
46
+
47
+ Args:
48
+ name (str): Name of the parser
49
+ parser_class (Type): Parser class to register
50
+ """
51
+ cls._PARSERS[name] = parser_class
52
+
53
+ @classmethod
54
+ def list_parsers(cls) -> Dict[str, Type[Any]]:
55
+ """
56
+ List all registered parsers.
57
+
58
+ Returns:
59
+ Dict of available parsers
60
+ """
61
+ return cls._PARSERS.copy()
62
+
63
+ # Expose key classes and functions
64
+ __all__ = [
65
+ 'HTMLParser',
66
+ 'LXMLParser',
67
+ 'HTML5Parser',
68
+ 'ParserRegistry'
69
+ ]