webscout 7.0__py3-none-any.whl → 7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (147) hide show
  1. webscout/AIauto.py +191 -191
  2. webscout/AIbase.py +122 -122
  3. webscout/AIutel.py +440 -440
  4. webscout/Bard.py +343 -161
  5. webscout/DWEBS.py +489 -492
  6. webscout/Extra/YTToolkit/YTdownloader.py +995 -995
  7. webscout/Extra/YTToolkit/__init__.py +2 -2
  8. webscout/Extra/YTToolkit/transcriber.py +476 -479
  9. webscout/Extra/YTToolkit/ytapi/channel.py +307 -307
  10. webscout/Extra/YTToolkit/ytapi/playlist.py +58 -58
  11. webscout/Extra/YTToolkit/ytapi/pool.py +7 -7
  12. webscout/Extra/YTToolkit/ytapi/utils.py +62 -62
  13. webscout/Extra/YTToolkit/ytapi/video.py +103 -103
  14. webscout/Extra/autocoder/__init__.py +9 -9
  15. webscout/Extra/autocoder/autocoder_utiles.py +199 -199
  16. webscout/Extra/autocoder/rawdog.py +5 -7
  17. webscout/Extra/autollama.py +230 -230
  18. webscout/Extra/gguf.py +3 -3
  19. webscout/Extra/weather.py +171 -171
  20. webscout/LLM.py +442 -442
  21. webscout/Litlogger/__init__.py +67 -681
  22. webscout/Litlogger/core/__init__.py +6 -0
  23. webscout/Litlogger/core/level.py +20 -0
  24. webscout/Litlogger/core/logger.py +123 -0
  25. webscout/Litlogger/handlers/__init__.py +12 -0
  26. webscout/Litlogger/handlers/console.py +50 -0
  27. webscout/Litlogger/handlers/file.py +143 -0
  28. webscout/Litlogger/handlers/network.py +174 -0
  29. webscout/Litlogger/styles/__init__.py +7 -0
  30. webscout/Litlogger/styles/colors.py +231 -0
  31. webscout/Litlogger/styles/formats.py +377 -0
  32. webscout/Litlogger/styles/text.py +87 -0
  33. webscout/Litlogger/utils/__init__.py +6 -0
  34. webscout/Litlogger/utils/detectors.py +154 -0
  35. webscout/Litlogger/utils/formatters.py +200 -0
  36. webscout/Provider/AISEARCH/DeepFind.py +250 -250
  37. webscout/Provider/Blackboxai.py +136 -137
  38. webscout/Provider/ChatGPTGratis.py +226 -0
  39. webscout/Provider/Cloudflare.py +91 -78
  40. webscout/Provider/DeepSeek.py +218 -0
  41. webscout/Provider/Deepinfra.py +59 -35
  42. webscout/Provider/Free2GPT.py +131 -124
  43. webscout/Provider/Gemini.py +100 -115
  44. webscout/Provider/Glider.py +74 -59
  45. webscout/Provider/Groq.py +30 -18
  46. webscout/Provider/Jadve.py +108 -77
  47. webscout/Provider/Llama3.py +117 -94
  48. webscout/Provider/Marcus.py +191 -137
  49. webscout/Provider/Netwrck.py +62 -50
  50. webscout/Provider/PI.py +79 -124
  51. webscout/Provider/PizzaGPT.py +129 -83
  52. webscout/Provider/QwenLM.py +311 -0
  53. webscout/Provider/TTI/AiForce/__init__.py +22 -22
  54. webscout/Provider/TTI/AiForce/async_aiforce.py +257 -257
  55. webscout/Provider/TTI/AiForce/sync_aiforce.py +242 -242
  56. webscout/Provider/TTI/Nexra/__init__.py +22 -22
  57. webscout/Provider/TTI/Nexra/async_nexra.py +286 -286
  58. webscout/Provider/TTI/Nexra/sync_nexra.py +258 -258
  59. webscout/Provider/TTI/PollinationsAI/__init__.py +23 -23
  60. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +330 -330
  61. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +285 -285
  62. webscout/Provider/TTI/artbit/__init__.py +22 -22
  63. webscout/Provider/TTI/artbit/async_artbit.py +184 -184
  64. webscout/Provider/TTI/artbit/sync_artbit.py +176 -176
  65. webscout/Provider/TTI/blackbox/__init__.py +4 -4
  66. webscout/Provider/TTI/blackbox/async_blackbox.py +212 -212
  67. webscout/Provider/TTI/blackbox/sync_blackbox.py +199 -199
  68. webscout/Provider/TTI/deepinfra/__init__.py +4 -4
  69. webscout/Provider/TTI/deepinfra/async_deepinfra.py +227 -227
  70. webscout/Provider/TTI/deepinfra/sync_deepinfra.py +199 -199
  71. webscout/Provider/TTI/huggingface/__init__.py +22 -22
  72. webscout/Provider/TTI/huggingface/async_huggingface.py +199 -199
  73. webscout/Provider/TTI/huggingface/sync_huggingface.py +195 -195
  74. webscout/Provider/TTI/imgninza/__init__.py +4 -4
  75. webscout/Provider/TTI/imgninza/async_ninza.py +214 -214
  76. webscout/Provider/TTI/imgninza/sync_ninza.py +209 -209
  77. webscout/Provider/TTI/talkai/__init__.py +4 -4
  78. webscout/Provider/TTI/talkai/async_talkai.py +229 -229
  79. webscout/Provider/TTI/talkai/sync_talkai.py +207 -207
  80. webscout/Provider/TTS/deepgram.py +182 -182
  81. webscout/Provider/TTS/elevenlabs.py +136 -136
  82. webscout/Provider/TTS/gesserit.py +150 -150
  83. webscout/Provider/TTS/murfai.py +138 -138
  84. webscout/Provider/TTS/parler.py +133 -134
  85. webscout/Provider/TTS/streamElements.py +360 -360
  86. webscout/Provider/TTS/utils.py +280 -280
  87. webscout/Provider/TTS/voicepod.py +116 -116
  88. webscout/Provider/TextPollinationsAI.py +74 -47
  89. webscout/Provider/WiseCat.py +193 -0
  90. webscout/Provider/__init__.py +144 -136
  91. webscout/Provider/cerebras.py +242 -227
  92. webscout/Provider/chatglm.py +204 -204
  93. webscout/Provider/dgaf.py +67 -39
  94. webscout/Provider/gaurish.py +105 -66
  95. webscout/Provider/geminiapi.py +208 -208
  96. webscout/Provider/granite.py +223 -0
  97. webscout/Provider/hermes.py +218 -218
  98. webscout/Provider/llama3mitril.py +179 -179
  99. webscout/Provider/llamatutor.py +72 -62
  100. webscout/Provider/llmchat.py +60 -35
  101. webscout/Provider/meta.py +794 -794
  102. webscout/Provider/multichat.py +331 -230
  103. webscout/Provider/typegpt.py +359 -356
  104. webscout/Provider/yep.py +5 -5
  105. webscout/__main__.py +5 -5
  106. webscout/cli.py +319 -319
  107. webscout/conversation.py +241 -242
  108. webscout/exceptions.py +328 -328
  109. webscout/litagent/__init__.py +28 -28
  110. webscout/litagent/agent.py +2 -3
  111. webscout/litprinter/__init__.py +0 -58
  112. webscout/scout/__init__.py +8 -8
  113. webscout/scout/core.py +884 -884
  114. webscout/scout/element.py +459 -459
  115. webscout/scout/parsers/__init__.py +69 -69
  116. webscout/scout/parsers/html5lib_parser.py +172 -172
  117. webscout/scout/parsers/html_parser.py +236 -236
  118. webscout/scout/parsers/lxml_parser.py +178 -178
  119. webscout/scout/utils.py +38 -38
  120. webscout/swiftcli/__init__.py +811 -811
  121. webscout/update_checker.py +2 -12
  122. webscout/version.py +1 -1
  123. webscout/webscout_search.py +1142 -1140
  124. webscout/webscout_search_async.py +635 -635
  125. webscout/zeroart/__init__.py +54 -54
  126. webscout/zeroart/base.py +60 -60
  127. webscout/zeroart/effects.py +99 -99
  128. webscout/zeroart/fonts.py +816 -816
  129. {webscout-7.0.dist-info → webscout-7.2.dist-info}/METADATA +21 -28
  130. webscout-7.2.dist-info/RECORD +217 -0
  131. webstoken/__init__.py +30 -30
  132. webstoken/classifier.py +189 -189
  133. webstoken/keywords.py +216 -216
  134. webstoken/language.py +128 -128
  135. webstoken/ner.py +164 -164
  136. webstoken/normalizer.py +35 -35
  137. webstoken/processor.py +77 -77
  138. webstoken/sentiment.py +206 -206
  139. webstoken/stemmer.py +73 -73
  140. webstoken/tagger.py +60 -60
  141. webstoken/tokenizer.py +158 -158
  142. webscout/Provider/RUBIKSAI.py +0 -272
  143. webscout-7.0.dist-info/RECORD +0 -199
  144. {webscout-7.0.dist-info → webscout-7.2.dist-info}/LICENSE.md +0 -0
  145. {webscout-7.0.dist-info → webscout-7.2.dist-info}/WHEEL +0 -0
  146. {webscout-7.0.dist-info → webscout-7.2.dist-info}/entry_points.txt +0 -0
  147. {webscout-7.0.dist-info → webscout-7.2.dist-info}/top_level.txt +0 -0
webscout/scout/element.py CHANGED
@@ -1,460 +1,460 @@
1
- """
2
- Scout Element Module - Advanced HTML Element Representation
3
- """
4
-
5
- import re
6
- from typing import Optional, List, Dict, Union, Any, Callable, Iterable
7
-
8
- class NavigableString(str):
9
- """
10
- A string that knows its place in the document tree.
11
- Mimics BeautifulSoup's NavigableString for better compatibility.
12
- """
13
- def __new__(cls, text: str):
14
- """
15
- Create a new NavigableString instance.
16
-
17
- Args:
18
- text (str): String content
19
- """
20
- return str.__new__(cls, text)
21
-
22
- def __init__(self, text: str):
23
- """
24
- Initialize a navigable string.
25
-
26
- Args:
27
- text (str): String content
28
- """
29
- self.parent = None
30
-
31
- def __repr__(self):
32
- """String representation."""
33
- return f"NavigableString({super().__repr__()})"
34
-
35
- def __add__(self, other):
36
- """
37
- Allow concatenation of NavigableString with other strings.
38
-
39
- Args:
40
- other (str): String to concatenate
41
-
42
- Returns:
43
- str: Concatenated string
44
- """
45
- return str(self) + str(other)
46
-
47
- def strip(self, chars=None):
48
- """
49
- Strip whitespace or specified characters.
50
-
51
- Args:
52
- chars (str, optional): Characters to strip
53
-
54
- Returns:
55
- str: Stripped string
56
- """
57
- return NavigableString(super().strip(chars))
58
-
59
- class Tag:
60
- """
61
- Represents an HTML tag with advanced traversal and manipulation capabilities.
62
- Enhanced to closely mimic BeautifulSoup's Tag class.
63
- """
64
- def __init__(self, name: str, attrs: Dict[str, str] = None):
65
- """
66
- Initialize a Tag with name and attributes.
67
-
68
- Args:
69
- name (str): Tag name
70
- attrs (dict, optional): Tag attributes
71
- """
72
- self.name = name
73
- self.attrs = attrs or {}
74
- self.contents = []
75
- self.parent = None
76
- self.string = None # For single string content
77
-
78
- def __str__(self):
79
- """String representation of the tag."""
80
- return self.decode_contents()
81
-
82
- def __repr__(self):
83
- """Detailed representation of the tag."""
84
- return f"<{self.name} {self.attrs}>"
85
-
86
- def __call__(self, *args, **kwargs):
87
- """
88
- Allows calling find_all directly on the tag.
89
- Mimics BeautifulSoup's behavior.
90
- """
91
- return self.find_all(*args, **kwargs)
92
-
93
- def __contains__(self, item):
94
- """
95
- Check if an item is in the tag's contents.
96
-
97
- Args:
98
- item: Item to search for
99
-
100
- Returns:
101
- bool: True if item is in contents, False otherwise
102
- """
103
- return item in self.contents
104
-
105
- def __getitem__(self, key):
106
- """
107
- Get an attribute value using dictionary-like access.
108
-
109
- Args:
110
- key (str): Attribute name
111
-
112
- Returns:
113
- Any: Attribute value
114
- """
115
- return self.attrs[key]
116
-
117
- def __iter__(self):
118
- """
119
- Iterate through tag's contents.
120
-
121
- Returns:
122
- Iterator: Contents of the tag
123
- """
124
- return iter(self.contents)
125
-
126
- def __eq__(self, other):
127
- """
128
- Compare tags based on name and attributes.
129
-
130
- Args:
131
- other (Tag): Tag to compare
132
-
133
- Returns:
134
- bool: True if tags are equivalent
135
- """
136
- if not isinstance(other, Tag):
137
- return False
138
- return (
139
- self.name == other.name and
140
- self.attrs == other.attrs and
141
- str(self) == str(other)
142
- )
143
-
144
- def __hash__(self):
145
- """
146
- Generate a hash for the tag.
147
-
148
- Returns:
149
- int: Hash value
150
- """
151
- return hash((self.name, frozenset(self.attrs.items()), str(self)))
152
-
153
- def find(self, name=None, attrs={}, recursive=True, text=None, **kwargs) -> Optional['Tag']:
154
- """
155
- Find the first matching child element.
156
- Enhanced with more flexible matching.
157
-
158
- Args:
159
- name (str, optional): Tag name to search for
160
- attrs (dict, optional): Attributes to match
161
- recursive (bool, optional): Search recursively
162
- text (str, optional): Text content to match
163
-
164
- Returns:
165
- Tag or None: First matching element
166
- """
167
- results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
168
- return results[0] if results else None
169
-
170
- def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs) -> List['Tag']:
171
- """
172
- Find all matching child elements.
173
- Enhanced with more flexible matching and BeautifulSoup-like features.
174
-
175
- Args:
176
- name (str, optional): Tag name to search for
177
- attrs (dict, optional): Attributes to match
178
- recursive (bool, optional): Search recursively
179
- text (str, optional): Text content to match
180
- limit (int, optional): Maximum number of results
181
-
182
- Returns:
183
- List[Tag]: List of matching elements
184
- """
185
- results = []
186
-
187
- def _match(tag):
188
- # Check tag name with case-insensitive and regex support
189
- if name:
190
- if isinstance(name, str):
191
- if tag.name.lower() != name.lower():
192
- return False
193
- elif isinstance(name, re.Pattern):
194
- if not name.search(tag.name):
195
- return False
196
-
197
- # Check attributes with more flexible matching
198
- for k, v in attrs.items():
199
- # Handle special attribute matching
200
- if k == 'class':
201
- tag_classes = tag.get('class', [])
202
- if isinstance(v, str) and v not in tag_classes:
203
- return False
204
- elif isinstance(v, list) and not all(cls in tag_classes for cls in v):
205
- return False
206
- elif k == 'id':
207
- if tag.get('id') != v:
208
- return False
209
- else:
210
- # Regex or exact match for other attributes
211
- tag_attr = tag.attrs.get(k)
212
- if isinstance(v, re.Pattern):
213
- if not v.search(str(tag_attr)):
214
- return False
215
- elif tag_attr != v:
216
- return False
217
-
218
- # Check text content
219
- if text:
220
- tag_text = tag.get_text(strip=True)
221
- if isinstance(text, str) and text.lower() not in tag_text.lower():
222
- return False
223
- elif isinstance(text, re.Pattern) and not text.search(tag_text):
224
- return False
225
-
226
- return True
227
-
228
- def _search(element):
229
- if _match(element):
230
- results.append(element)
231
- if limit and len(results) == limit:
232
- return
233
-
234
- if recursive:
235
- for child in element.contents:
236
- if isinstance(child, Tag):
237
- _search(child)
238
-
239
- _search(self)
240
- return results
241
-
242
- def select(self, selector: str) -> List['Tag']:
243
- """
244
- Select elements using CSS selector.
245
- Enhanced to support more complex selectors.
246
-
247
- Args:
248
- selector (str): CSS selector string
249
-
250
- Returns:
251
- List[Tag]: List of matching elements
252
- """
253
- # More advanced CSS selector parsing
254
- # This is a simplified implementation and might need more robust parsing
255
- parts = re.split(r'\s+', selector.strip())
256
- results = []
257
-
258
- def _match_selector(tag, selector_part):
259
- # Support more complex selectors
260
- if selector_part.startswith('.'):
261
- # Class selector
262
- return selector_part[1:] in tag.get('class', [])
263
- elif selector_part.startswith('#'):
264
- # ID selector
265
- return tag.get('id') == selector_part[1:]
266
- elif '[' in selector_part and ']' in selector_part:
267
- # Attribute selector
268
- attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
269
- if attr_match:
270
- tag_name, attr, value = attr_match.groups()
271
- if tag_name and tag.name != tag_name:
272
- return False
273
- if value:
274
- return tag.get(attr) == value.strip("'\"")
275
- return attr in tag.attrs
276
- else:
277
- # Tag selector
278
- return tag.name == selector_part
279
-
280
- def _recursive_select(element, selector_parts):
281
- if not selector_parts:
282
- results.append(element)
283
- return
284
-
285
- current_selector = selector_parts[0]
286
- remaining_selectors = selector_parts[1:]
287
-
288
- if _match_selector(element, current_selector):
289
- if not remaining_selectors:
290
- results.append(element)
291
- else:
292
- for child in element.contents:
293
- if isinstance(child, Tag):
294
- _recursive_select(child, remaining_selectors)
295
-
296
- for child in self.contents:
297
- if isinstance(child, Tag):
298
- _recursive_select(child, parts)
299
-
300
- return results
301
-
302
- def select_one(self, selector: str) -> Optional['Tag']:
303
- """
304
- Select the first element matching the CSS selector.
305
-
306
- Args:
307
- selector (str): CSS selector string
308
-
309
- Returns:
310
- Tag or None: First matching element
311
- """
312
- results = self.select(selector)
313
- return results[0] if results else None
314
-
315
- def get_text(self, separator=' ', strip=False, types=None) -> str:
316
- """
317
- Extract text from the tag and its descendants.
318
- Enhanced to support more flexible text extraction.
319
-
320
- Args:
321
- separator (str, optional): Text separator
322
- strip (bool, optional): Strip whitespace
323
- types (list, optional): Types of content to extract
324
-
325
- Returns:
326
- str: Extracted text
327
- """
328
- texts = []
329
- for content in self.contents:
330
- # Support filtering by content type
331
- if types is None or type(content) in types:
332
- if isinstance(content, NavigableString):
333
- texts.append(str(content))
334
- elif isinstance(content, Tag):
335
- texts.append(content.get_text(separator, strip))
336
-
337
- text = separator.join(texts)
338
- text = re.sub(r'\n\n+', '\n', text) # Replace multiple newlines with single newlines
339
- return text.strip() if strip else text
340
-
341
- def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
342
- """
343
- Find the first text matching a pattern.
344
-
345
- Args:
346
- pattern (str or re.Pattern): Pattern to match
347
- **kwargs: Additional arguments for get_text()
348
-
349
- Returns:
350
- str or None: First matching text
351
- """
352
- text = self.get_text(**kwargs)
353
-
354
- if isinstance(pattern, str):
355
- return pattern if pattern in text else None
356
- elif isinstance(pattern, re.Pattern):
357
- match = pattern.search(text)
358
- return match.group(0) if match else None
359
-
360
- def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
361
- """
362
- Replace text matching a pattern.
363
-
364
- Args:
365
- old (str or re.Pattern): Pattern to replace
366
- new (str): Replacement text
367
- **kwargs: Additional arguments for get_text()
368
-
369
- Returns:
370
- str: Modified text
371
- """
372
- text = self.get_text(**kwargs)
373
-
374
- if isinstance(old, str):
375
- return text.replace(old, new)
376
- elif isinstance(old, re.Pattern):
377
- return old.sub(new, text)
378
-
379
- def get(self, key: str, default: Any = None) -> Any:
380
- """
381
- Get an attribute value.
382
-
383
- Args:
384
- key (str): Attribute name
385
- default (Any, optional): Default value if attribute not found
386
-
387
- Returns:
388
- Any: Attribute value or default
389
- """
390
- return self.attrs.get(key, default)
391
-
392
- def decompose(self) -> None:
393
- """Remove the tag and its contents from the document."""
394
- if self.parent:
395
- self.parent.contents.remove(self)
396
-
397
- def extract(self) -> 'Tag':
398
- """
399
- Remove the tag from the document and return it.
400
-
401
- Returns:
402
- Tag: Extracted tag
403
- """
404
- self.decompose()
405
- return self
406
-
407
- def clear(self) -> None:
408
- """Remove all contents of the tag."""
409
- self.contents.clear()
410
-
411
- def replace_with(self, new_tag: 'Tag') -> None:
412
- """
413
- Replace this tag with another tag.
414
-
415
- Args:
416
- new_tag (Tag): Tag to replace the current tag
417
- """
418
- if self.parent:
419
- index = self.parent.contents.index(self)
420
- self.parent.contents[index] = new_tag
421
- new_tag.parent = self.parent
422
-
423
- def decode_contents(self, eventual_encoding='utf-8') -> str:
424
- """
425
- Decode the contents of the tag to a string.
426
-
427
- Args:
428
- eventual_encoding (str, optional): Encoding to use
429
-
430
- Returns:
431
- str: Decoded contents
432
- """
433
- return ''.join(str(content) for content in self.contents)
434
-
435
- def prettify(self, formatter='minimal') -> str:
436
- """
437
- Return a nicely formatted representation of the tag.
438
-
439
- Args:
440
- formatter (str, optional): Formatting style
441
-
442
- Returns:
443
- str: Prettified tag representation
444
- """
445
- def _prettify(tag, indent=0):
446
- result = ' ' * indent + f'<{tag.name}'
447
- for k, v in tag.attrs.items():
448
- result += f' {k}="{v}"'
449
- result += '>\n'
450
-
451
- for content in tag.contents:
452
- if isinstance(content, Tag):
453
- result += _prettify(content, indent + 2)
454
- else:
455
- result += ' ' * (indent + 2) + str(content) + '\n'
456
-
457
- result += ' ' * indent + f'</{tag.name}>\n'
458
- return result
459
-
1
+ """
2
+ Scout Element Module - Advanced HTML Element Representation
3
+ """
4
+
5
+ import re
6
+ from typing import Optional, List, Dict, Union, Any, Callable, Iterable
7
+
8
+ class NavigableString(str):
9
+ """
10
+ A string that knows its place in the document tree.
11
+ Mimics BeautifulSoup's NavigableString for better compatibility.
12
+ """
13
+ def __new__(cls, text: str):
14
+ """
15
+ Create a new NavigableString instance.
16
+
17
+ Args:
18
+ text (str): String content
19
+ """
20
+ return str.__new__(cls, text)
21
+
22
+ def __init__(self, text: str):
23
+ """
24
+ Initialize a navigable string.
25
+
26
+ Args:
27
+ text (str): String content
28
+ """
29
+ self.parent = None
30
+
31
+ def __repr__(self):
32
+ """String representation."""
33
+ return f"NavigableString({super().__repr__()})"
34
+
35
+ def __add__(self, other):
36
+ """
37
+ Allow concatenation of NavigableString with other strings.
38
+
39
+ Args:
40
+ other (str): String to concatenate
41
+
42
+ Returns:
43
+ str: Concatenated string
44
+ """
45
+ return str(self) + str(other)
46
+
47
+ def strip(self, chars=None):
48
+ """
49
+ Strip whitespace or specified characters.
50
+
51
+ Args:
52
+ chars (str, optional): Characters to strip
53
+
54
+ Returns:
55
+ str: Stripped string
56
+ """
57
+ return NavigableString(super().strip(chars))
58
+
59
+ class Tag:
60
+ """
61
+ Represents an HTML tag with advanced traversal and manipulation capabilities.
62
+ Enhanced to closely mimic BeautifulSoup's Tag class.
63
+ """
64
+ def __init__(self, name: str, attrs: Dict[str, str] = None):
65
+ """
66
+ Initialize a Tag with name and attributes.
67
+
68
+ Args:
69
+ name (str): Tag name
70
+ attrs (dict, optional): Tag attributes
71
+ """
72
+ self.name = name
73
+ self.attrs = attrs or {}
74
+ self.contents = []
75
+ self.parent = None
76
+ self.string = None # For single string content
77
+
78
+ def __str__(self):
79
+ """String representation of the tag."""
80
+ return self.decode_contents()
81
+
82
+ def __repr__(self):
83
+ """Detailed representation of the tag."""
84
+ return f"<{self.name} {self.attrs}>"
85
+
86
+ def __call__(self, *args, **kwargs):
87
+ """
88
+ Allows calling find_all directly on the tag.
89
+ Mimics BeautifulSoup's behavior.
90
+ """
91
+ return self.find_all(*args, **kwargs)
92
+
93
+ def __contains__(self, item):
94
+ """
95
+ Check if an item is in the tag's contents.
96
+
97
+ Args:
98
+ item: Item to search for
99
+
100
+ Returns:
101
+ bool: True if item is in contents, False otherwise
102
+ """
103
+ return item in self.contents
104
+
105
+ def __getitem__(self, key):
106
+ """
107
+ Get an attribute value using dictionary-like access.
108
+
109
+ Args:
110
+ key (str): Attribute name
111
+
112
+ Returns:
113
+ Any: Attribute value
114
+ """
115
+ return self.attrs[key]
116
+
117
+ def __iter__(self):
118
+ """
119
+ Iterate through tag's contents.
120
+
121
+ Returns:
122
+ Iterator: Contents of the tag
123
+ """
124
+ return iter(self.contents)
125
+
126
+ def __eq__(self, other):
127
+ """
128
+ Compare tags based on name and attributes.
129
+
130
+ Args:
131
+ other (Tag): Tag to compare
132
+
133
+ Returns:
134
+ bool: True if tags are equivalent
135
+ """
136
+ if not isinstance(other, Tag):
137
+ return False
138
+ return (
139
+ self.name == other.name and
140
+ self.attrs == other.attrs and
141
+ str(self) == str(other)
142
+ )
143
+
144
+ def __hash__(self):
145
+ """
146
+ Generate a hash for the tag.
147
+
148
+ Returns:
149
+ int: Hash value
150
+ """
151
+ return hash((self.name, frozenset(self.attrs.items()), str(self)))
152
+
153
+ def find(self, name=None, attrs={}, recursive=True, text=None, **kwargs) -> Optional['Tag']:
154
+ """
155
+ Find the first matching child element.
156
+ Enhanced with more flexible matching.
157
+
158
+ Args:
159
+ name (str, optional): Tag name to search for
160
+ attrs (dict, optional): Attributes to match
161
+ recursive (bool, optional): Search recursively
162
+ text (str, optional): Text content to match
163
+
164
+ Returns:
165
+ Tag or None: First matching element
166
+ """
167
+ results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
168
+ return results[0] if results else None
169
+
170
+ def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs) -> List['Tag']:
171
+ """
172
+ Find all matching child elements.
173
+ Enhanced with more flexible matching and BeautifulSoup-like features.
174
+
175
+ Args:
176
+ name (str, optional): Tag name to search for
177
+ attrs (dict, optional): Attributes to match
178
+ recursive (bool, optional): Search recursively
179
+ text (str, optional): Text content to match
180
+ limit (int, optional): Maximum number of results
181
+
182
+ Returns:
183
+ List[Tag]: List of matching elements
184
+ """
185
+ results = []
186
+
187
+ def _match(tag):
188
+ # Check tag name with case-insensitive and regex support
189
+ if name:
190
+ if isinstance(name, str):
191
+ if tag.name.lower() != name.lower():
192
+ return False
193
+ elif isinstance(name, re.Pattern):
194
+ if not name.search(tag.name):
195
+ return False
196
+
197
+ # Check attributes with more flexible matching
198
+ for k, v in attrs.items():
199
+ # Handle special attribute matching
200
+ if k == 'class':
201
+ tag_classes = tag.get('class', [])
202
+ if isinstance(v, str) and v not in tag_classes:
203
+ return False
204
+ elif isinstance(v, list) and not all(cls in tag_classes for cls in v):
205
+ return False
206
+ elif k == 'id':
207
+ if tag.get('id') != v:
208
+ return False
209
+ else:
210
+ # Regex or exact match for other attributes
211
+ tag_attr = tag.attrs.get(k)
212
+ if isinstance(v, re.Pattern):
213
+ if not v.search(str(tag_attr)):
214
+ return False
215
+ elif tag_attr != v:
216
+ return False
217
+
218
+ # Check text content
219
+ if text:
220
+ tag_text = tag.get_text(strip=True)
221
+ if isinstance(text, str) and text.lower() not in tag_text.lower():
222
+ return False
223
+ elif isinstance(text, re.Pattern) and not text.search(tag_text):
224
+ return False
225
+
226
+ return True
227
+
228
+ def _search(element):
229
+ if _match(element):
230
+ results.append(element)
231
+ if limit and len(results) == limit:
232
+ return
233
+
234
+ if recursive:
235
+ for child in element.contents:
236
+ if isinstance(child, Tag):
237
+ _search(child)
238
+
239
+ _search(self)
240
+ return results
241
+
242
+ def select(self, selector: str) -> List['Tag']:
243
+ """
244
+ Select elements using CSS selector.
245
+ Enhanced to support more complex selectors.
246
+
247
+ Args:
248
+ selector (str): CSS selector string
249
+
250
+ Returns:
251
+ List[Tag]: List of matching elements
252
+ """
253
+ # More advanced CSS selector parsing
254
+ # This is a simplified implementation and might need more robust parsing
255
+ parts = re.split(r'\s+', selector.strip())
256
+ results = []
257
+
258
+ def _match_selector(tag, selector_part):
259
+ # Support more complex selectors
260
+ if selector_part.startswith('.'):
261
+ # Class selector
262
+ return selector_part[1:] in tag.get('class', [])
263
+ elif selector_part.startswith('#'):
264
+ # ID selector
265
+ return tag.get('id') == selector_part[1:]
266
+ elif '[' in selector_part and ']' in selector_part:
267
+ # Attribute selector
268
+ attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
269
+ if attr_match:
270
+ tag_name, attr, value = attr_match.groups()
271
+ if tag_name and tag.name != tag_name:
272
+ return False
273
+ if value:
274
+ return tag.get(attr) == value.strip("'\"")
275
+ return attr in tag.attrs
276
+ else:
277
+ # Tag selector
278
+ return tag.name == selector_part
279
+
280
+ def _recursive_select(element, selector_parts):
281
+ if not selector_parts:
282
+ results.append(element)
283
+ return
284
+
285
+ current_selector = selector_parts[0]
286
+ remaining_selectors = selector_parts[1:]
287
+
288
+ if _match_selector(element, current_selector):
289
+ if not remaining_selectors:
290
+ results.append(element)
291
+ else:
292
+ for child in element.contents:
293
+ if isinstance(child, Tag):
294
+ _recursive_select(child, remaining_selectors)
295
+
296
+ for child in self.contents:
297
+ if isinstance(child, Tag):
298
+ _recursive_select(child, parts)
299
+
300
+ return results
301
+
302
+ def select_one(self, selector: str) -> Optional['Tag']:
303
+ """
304
+ Select the first element matching the CSS selector.
305
+
306
+ Args:
307
+ selector (str): CSS selector string
308
+
309
+ Returns:
310
+ Tag or None: First matching element
311
+ """
312
+ results = self.select(selector)
313
+ return results[0] if results else None
314
+
315
+ def get_text(self, separator=' ', strip=False, types=None) -> str:
316
+ """
317
+ Extract text from the tag and its descendants.
318
+ Enhanced to support more flexible text extraction.
319
+
320
+ Args:
321
+ separator (str, optional): Text separator
322
+ strip (bool, optional): Strip whitespace
323
+ types (list, optional): Types of content to extract
324
+
325
+ Returns:
326
+ str: Extracted text
327
+ """
328
+ texts = []
329
+ for content in self.contents:
330
+ # Support filtering by content type
331
+ if types is None or type(content) in types:
332
+ if isinstance(content, NavigableString):
333
+ texts.append(str(content))
334
+ elif isinstance(content, Tag):
335
+ texts.append(content.get_text(separator, strip))
336
+
337
+ text = separator.join(texts)
338
+ text = re.sub(r'\n\n+', '\n', text) # Replace multiple newlines with single newlines
339
+ return text.strip() if strip else text
340
+
341
+ def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
342
+ """
343
+ Find the first text matching a pattern.
344
+
345
+ Args:
346
+ pattern (str or re.Pattern): Pattern to match
347
+ **kwargs: Additional arguments for get_text()
348
+
349
+ Returns:
350
+ str or None: First matching text
351
+ """
352
+ text = self.get_text(**kwargs)
353
+
354
+ if isinstance(pattern, str):
355
+ return pattern if pattern in text else None
356
+ elif isinstance(pattern, re.Pattern):
357
+ match = pattern.search(text)
358
+ return match.group(0) if match else None
359
+
360
+ def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
361
+ """
362
+ Replace text matching a pattern.
363
+
364
+ Args:
365
+ old (str or re.Pattern): Pattern to replace
366
+ new (str): Replacement text
367
+ **kwargs: Additional arguments for get_text()
368
+
369
+ Returns:
370
+ str: Modified text
371
+ """
372
+ text = self.get_text(**kwargs)
373
+
374
+ if isinstance(old, str):
375
+ return text.replace(old, new)
376
+ elif isinstance(old, re.Pattern):
377
+ return old.sub(new, text)
378
+
379
+ def get(self, key: str, default: Any = None) -> Any:
380
+ """
381
+ Get an attribute value.
382
+
383
+ Args:
384
+ key (str): Attribute name
385
+ default (Any, optional): Default value if attribute not found
386
+
387
+ Returns:
388
+ Any: Attribute value or default
389
+ """
390
+ return self.attrs.get(key, default)
391
+
392
+ def decompose(self) -> None:
393
+ """Remove the tag and its contents from the document."""
394
+ if self.parent:
395
+ self.parent.contents.remove(self)
396
+
397
+ def extract(self) -> 'Tag':
398
+ """
399
+ Remove the tag from the document and return it.
400
+
401
+ Returns:
402
+ Tag: Extracted tag
403
+ """
404
+ self.decompose()
405
+ return self
406
+
407
+ def clear(self) -> None:
408
+ """Remove all contents of the tag."""
409
+ self.contents.clear()
410
+
411
+ def replace_with(self, new_tag: 'Tag') -> None:
412
+ """
413
+ Replace this tag with another tag.
414
+
415
+ Args:
416
+ new_tag (Tag): Tag to replace the current tag
417
+ """
418
+ if self.parent:
419
+ index = self.parent.contents.index(self)
420
+ self.parent.contents[index] = new_tag
421
+ new_tag.parent = self.parent
422
+
423
+ def decode_contents(self, eventual_encoding='utf-8') -> str:
424
+ """
425
+ Decode the contents of the tag to a string.
426
+
427
+ Args:
428
+ eventual_encoding (str, optional): Encoding to use
429
+
430
+ Returns:
431
+ str: Decoded contents
432
+ """
433
+ return ''.join(str(content) for content in self.contents)
434
+
435
+ def prettify(self, formatter='minimal') -> str:
436
+ """
437
+ Return a nicely formatted representation of the tag.
438
+
439
+ Args:
440
+ formatter (str, optional): Formatting style
441
+
442
+ Returns:
443
+ str: Prettified tag representation
444
+ """
445
+ def _prettify(tag, indent=0):
446
+ result = ' ' * indent + f'<{tag.name}'
447
+ for k, v in tag.attrs.items():
448
+ result += f' {k}="{v}"'
449
+ result += '>\n'
450
+
451
+ for content in tag.contents:
452
+ if isinstance(content, Tag):
453
+ result += _prettify(content, indent + 2)
454
+ else:
455
+ result += ' ' * (indent + 2) + str(content) + '\n'
456
+
457
+ result += ' ' * indent + f'</{tag.name}>\n'
458
+ return result
459
+
460
460
  return _prettify(self)