webscout 8.2.8__py3-none-any.whl → 8.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webscout/AIauto.py +32 -14
- webscout/AIbase.py +96 -37
- webscout/AIutel.py +491 -87
- webscout/Bard.py +441 -323
- webscout/Extra/GitToolkit/__init__.py +10 -10
- webscout/Extra/YTToolkit/ytapi/video.py +232 -232
- webscout/Litlogger/README.md +10 -0
- webscout/Litlogger/__init__.py +7 -59
- webscout/Litlogger/formats.py +4 -0
- webscout/Litlogger/handlers.py +103 -0
- webscout/Litlogger/levels.py +13 -0
- webscout/Litlogger/logger.py +92 -0
- webscout/Provider/AISEARCH/Perplexity.py +332 -358
- webscout/Provider/AISEARCH/felo_search.py +9 -35
- webscout/Provider/AISEARCH/genspark_search.py +30 -56
- webscout/Provider/AISEARCH/hika_search.py +4 -16
- webscout/Provider/AISEARCH/iask_search.py +410 -436
- webscout/Provider/AISEARCH/monica_search.py +4 -30
- webscout/Provider/AISEARCH/scira_search.py +6 -32
- webscout/Provider/AISEARCH/webpilotai_search.py +38 -64
- webscout/Provider/Blackboxai.py +153 -35
- webscout/Provider/Deepinfra.py +339 -339
- webscout/Provider/ExaChat.py +358 -358
- webscout/Provider/Gemini.py +169 -169
- webscout/Provider/GithubChat.py +1 -2
- webscout/Provider/Glider.py +3 -3
- webscout/Provider/HeckAI.py +171 -81
- webscout/Provider/OPENAI/BLACKBOXAI.py +766 -735
- webscout/Provider/OPENAI/Cloudflare.py +7 -7
- webscout/Provider/OPENAI/FreeGemini.py +6 -5
- webscout/Provider/OPENAI/NEMOTRON.py +8 -20
- webscout/Provider/OPENAI/Qwen3.py +283 -0
- webscout/Provider/OPENAI/README.md +952 -1253
- webscout/Provider/OPENAI/TwoAI.py +357 -0
- webscout/Provider/OPENAI/__init__.py +5 -1
- webscout/Provider/OPENAI/ai4chat.py +40 -40
- webscout/Provider/OPENAI/api.py +808 -649
- webscout/Provider/OPENAI/c4ai.py +3 -3
- webscout/Provider/OPENAI/chatgpt.py +555 -555
- webscout/Provider/OPENAI/chatgptclone.py +493 -487
- webscout/Provider/OPENAI/chatsandbox.py +4 -3
- webscout/Provider/OPENAI/copilot.py +242 -0
- webscout/Provider/OPENAI/deepinfra.py +5 -2
- webscout/Provider/OPENAI/e2b.py +63 -5
- webscout/Provider/OPENAI/exaai.py +416 -410
- webscout/Provider/OPENAI/exachat.py +444 -443
- webscout/Provider/OPENAI/freeaichat.py +2 -2
- webscout/Provider/OPENAI/glider.py +5 -2
- webscout/Provider/OPENAI/groq.py +5 -2
- webscout/Provider/OPENAI/heckai.py +308 -307
- webscout/Provider/OPENAI/mcpcore.py +8 -2
- webscout/Provider/OPENAI/multichat.py +4 -4
- webscout/Provider/OPENAI/netwrck.py +6 -5
- webscout/Provider/OPENAI/oivscode.py +287 -0
- webscout/Provider/OPENAI/opkfc.py +496 -496
- webscout/Provider/OPENAI/pydantic_imports.py +172 -0
- webscout/Provider/OPENAI/scirachat.py +15 -9
- webscout/Provider/OPENAI/sonus.py +304 -303
- webscout/Provider/OPENAI/standardinput.py +433 -433
- webscout/Provider/OPENAI/textpollinations.py +4 -4
- webscout/Provider/OPENAI/toolbaz.py +413 -413
- webscout/Provider/OPENAI/typefully.py +3 -3
- webscout/Provider/OPENAI/typegpt.py +11 -5
- webscout/Provider/OPENAI/uncovrAI.py +463 -462
- webscout/Provider/OPENAI/utils.py +90 -79
- webscout/Provider/OPENAI/venice.py +431 -425
- webscout/Provider/OPENAI/wisecat.py +387 -381
- webscout/Provider/OPENAI/writecream.py +3 -3
- webscout/Provider/OPENAI/x0gpt.py +365 -378
- webscout/Provider/OPENAI/yep.py +39 -13
- webscout/Provider/TTI/README.md +55 -101
- webscout/Provider/TTI/__init__.py +4 -9
- webscout/Provider/TTI/aiarta.py +365 -0
- webscout/Provider/TTI/artbit.py +0 -0
- webscout/Provider/TTI/base.py +64 -0
- webscout/Provider/TTI/fastflux.py +200 -0
- webscout/Provider/TTI/magicstudio.py +201 -0
- webscout/Provider/TTI/piclumen.py +203 -0
- webscout/Provider/TTI/pixelmuse.py +225 -0
- webscout/Provider/TTI/pollinations.py +221 -0
- webscout/Provider/TTI/utils.py +11 -0
- webscout/Provider/TTS/__init__.py +2 -1
- webscout/Provider/TTS/base.py +159 -159
- webscout/Provider/TTS/openai_fm.py +129 -0
- webscout/Provider/TextPollinationsAI.py +308 -308
- webscout/Provider/TwoAI.py +239 -44
- webscout/Provider/UNFINISHED/Youchat.py +330 -330
- webscout/Provider/UNFINISHED/puterjs.py +635 -0
- webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
- webscout/Provider/Writecream.py +246 -246
- webscout/Provider/__init__.py +2 -0
- webscout/Provider/ai4chat.py +33 -8
- webscout/Provider/koala.py +169 -169
- webscout/Provider/oivscode.py +309 -0
- webscout/Provider/samurai.py +3 -2
- webscout/Provider/typegpt.py +3 -3
- webscout/Provider/uncovr.py +368 -368
- webscout/client.py +70 -0
- webscout/litprinter/__init__.py +58 -58
- webscout/optimizers.py +419 -419
- webscout/scout/README.md +3 -1
- webscout/scout/core/crawler.py +134 -64
- webscout/scout/core/scout.py +148 -109
- webscout/scout/element.py +106 -88
- webscout/swiftcli/Readme.md +323 -323
- webscout/swiftcli/plugins/manager.py +9 -2
- webscout/version.py +1 -1
- webscout/zeroart/__init__.py +134 -134
- webscout/zeroart/effects.py +100 -100
- webscout/zeroart/fonts.py +1238 -1238
- {webscout-8.2.8.dist-info → webscout-8.2.9.dist-info}/METADATA +159 -35
- {webscout-8.2.8.dist-info → webscout-8.2.9.dist-info}/RECORD +116 -161
- {webscout-8.2.8.dist-info → webscout-8.2.9.dist-info}/WHEEL +1 -1
- {webscout-8.2.8.dist-info → webscout-8.2.9.dist-info}/entry_points.txt +1 -0
- webscout/Litlogger/Readme.md +0 -175
- webscout/Litlogger/core/__init__.py +0 -6
- webscout/Litlogger/core/level.py +0 -23
- webscout/Litlogger/core/logger.py +0 -165
- webscout/Litlogger/handlers/__init__.py +0 -12
- webscout/Litlogger/handlers/console.py +0 -33
- webscout/Litlogger/handlers/file.py +0 -143
- webscout/Litlogger/handlers/network.py +0 -173
- webscout/Litlogger/styles/__init__.py +0 -7
- webscout/Litlogger/styles/colors.py +0 -249
- webscout/Litlogger/styles/formats.py +0 -458
- webscout/Litlogger/styles/text.py +0 -87
- webscout/Litlogger/utils/__init__.py +0 -6
- webscout/Litlogger/utils/detectors.py +0 -153
- webscout/Litlogger/utils/formatters.py +0 -200
- webscout/Provider/TTI/AiForce/README.md +0 -159
- webscout/Provider/TTI/AiForce/__init__.py +0 -22
- webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
- webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
- webscout/Provider/TTI/FreeAIPlayground/README.md +0 -99
- webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
- webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
- webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
- webscout/Provider/TTI/ImgSys/README.md +0 -174
- webscout/Provider/TTI/ImgSys/__init__.py +0 -23
- webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
- webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
- webscout/Provider/TTI/MagicStudio/README.md +0 -101
- webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
- webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
- webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
- webscout/Provider/TTI/Nexra/README.md +0 -155
- webscout/Provider/TTI/Nexra/__init__.py +0 -22
- webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
- webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
- webscout/Provider/TTI/PollinationsAI/README.md +0 -146
- webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
- webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
- webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
- webscout/Provider/TTI/aiarta/README.md +0 -134
- webscout/Provider/TTI/aiarta/__init__.py +0 -2
- webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
- webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
- webscout/Provider/TTI/artbit/README.md +0 -100
- webscout/Provider/TTI/artbit/__init__.py +0 -22
- webscout/Provider/TTI/artbit/async_artbit.py +0 -155
- webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
- webscout/Provider/TTI/fastflux/README.md +0 -129
- webscout/Provider/TTI/fastflux/__init__.py +0 -22
- webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
- webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
- webscout/Provider/TTI/huggingface/README.md +0 -114
- webscout/Provider/TTI/huggingface/__init__.py +0 -22
- webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
- webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
- webscout/Provider/TTI/piclumen/README.md +0 -161
- webscout/Provider/TTI/piclumen/__init__.py +0 -23
- webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
- webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
- webscout/Provider/TTI/pixelmuse/README.md +0 -79
- webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
- webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
- webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
- webscout/Provider/TTI/talkai/README.md +0 -139
- webscout/Provider/TTI/talkai/__init__.py +0 -4
- webscout/Provider/TTI/talkai/async_talkai.py +0 -229
- webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
- webscout/Provider/UNFINISHED/oivscode.py +0 -351
- {webscout-8.2.8.dist-info → webscout-8.2.9.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.2.8.dist-info → webscout-8.2.9.dist-info}/top_level.txt +0 -0
webscout/scout/element.py
CHANGED
|
@@ -3,7 +3,8 @@ Scout Element Module - Advanced HTML Element Representation
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import re
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Any, Dict, List, Optional, Union
|
|
7
|
+
|
|
7
8
|
|
|
8
9
|
class NavigableString(str):
|
|
9
10
|
"""
|
|
@@ -13,44 +14,44 @@ class NavigableString(str):
|
|
|
13
14
|
def __new__(cls, text: str):
|
|
14
15
|
"""
|
|
15
16
|
Create a new NavigableString instance.
|
|
16
|
-
|
|
17
|
+
|
|
17
18
|
Args:
|
|
18
19
|
text (str): String content
|
|
19
20
|
"""
|
|
20
21
|
return str.__new__(cls, text)
|
|
21
|
-
|
|
22
|
+
|
|
22
23
|
def __init__(self, text: str):
|
|
23
24
|
"""
|
|
24
25
|
Initialize a navigable string.
|
|
25
|
-
|
|
26
|
+
|
|
26
27
|
Args:
|
|
27
28
|
text (str): String content
|
|
28
29
|
"""
|
|
29
30
|
self.parent = None
|
|
30
|
-
|
|
31
|
+
|
|
31
32
|
def __repr__(self):
|
|
32
33
|
"""String representation."""
|
|
33
34
|
return f"NavigableString({super().__repr__()})"
|
|
34
|
-
|
|
35
|
+
|
|
35
36
|
def __add__(self, other):
|
|
36
37
|
"""
|
|
37
38
|
Allow concatenation of NavigableString with other strings.
|
|
38
|
-
|
|
39
|
+
|
|
39
40
|
Args:
|
|
40
41
|
other (str): String to concatenate
|
|
41
|
-
|
|
42
|
+
|
|
42
43
|
Returns:
|
|
43
44
|
str: Concatenated string
|
|
44
45
|
"""
|
|
45
46
|
return str(self) + str(other)
|
|
46
|
-
|
|
47
|
+
|
|
47
48
|
def strip(self, chars=None):
|
|
48
49
|
"""
|
|
49
50
|
Strip whitespace or specified characters.
|
|
50
|
-
|
|
51
|
+
|
|
51
52
|
Args:
|
|
52
53
|
chars (str, optional): Characters to strip
|
|
53
|
-
|
|
54
|
+
|
|
54
55
|
Returns:
|
|
55
56
|
str: Stripped string
|
|
56
57
|
"""
|
|
@@ -64,7 +65,7 @@ class Tag:
|
|
|
64
65
|
def __init__(self, name: str, attrs: Dict[str, str] = None):
|
|
65
66
|
"""
|
|
66
67
|
Initialize a Tag with name and attributes.
|
|
67
|
-
|
|
68
|
+
|
|
68
69
|
Args:
|
|
69
70
|
name (str): Tag name
|
|
70
71
|
attrs (dict, optional): Tag attributes
|
|
@@ -74,116 +75,116 @@ class Tag:
|
|
|
74
75
|
self.contents = []
|
|
75
76
|
self.parent = None
|
|
76
77
|
self.string = None # For single string content
|
|
77
|
-
|
|
78
|
+
|
|
78
79
|
def __str__(self):
|
|
79
80
|
"""String representation of the tag."""
|
|
80
81
|
return self.decode_contents()
|
|
81
|
-
|
|
82
|
+
|
|
82
83
|
def __repr__(self):
|
|
83
84
|
"""Detailed representation of the tag."""
|
|
84
85
|
return f"<{self.name} {self.attrs}>"
|
|
85
|
-
|
|
86
|
+
|
|
86
87
|
def __call__(self, *args, **kwargs):
|
|
87
88
|
"""
|
|
88
89
|
Allows calling find_all directly on the tag.
|
|
89
90
|
Mimics BeautifulSoup's behavior.
|
|
90
91
|
"""
|
|
91
92
|
return self.find_all(*args, **kwargs)
|
|
92
|
-
|
|
93
|
+
|
|
93
94
|
def __contains__(self, item):
|
|
94
95
|
"""
|
|
95
96
|
Check if an item is in the tag's contents.
|
|
96
|
-
|
|
97
|
+
|
|
97
98
|
Args:
|
|
98
99
|
item: Item to search for
|
|
99
|
-
|
|
100
|
+
|
|
100
101
|
Returns:
|
|
101
102
|
bool: True if item is in contents, False otherwise
|
|
102
103
|
"""
|
|
103
104
|
return item in self.contents
|
|
104
|
-
|
|
105
|
+
|
|
105
106
|
def __getitem__(self, key):
|
|
106
107
|
"""
|
|
107
108
|
Get an attribute value using dictionary-like access.
|
|
108
|
-
|
|
109
|
+
|
|
109
110
|
Args:
|
|
110
111
|
key (str): Attribute name
|
|
111
|
-
|
|
112
|
+
|
|
112
113
|
Returns:
|
|
113
114
|
Any: Attribute value
|
|
114
115
|
"""
|
|
115
116
|
return self.attrs[key]
|
|
116
|
-
|
|
117
|
+
|
|
117
118
|
def __iter__(self):
|
|
118
119
|
"""
|
|
119
120
|
Iterate through tag's contents.
|
|
120
|
-
|
|
121
|
+
|
|
121
122
|
Returns:
|
|
122
123
|
Iterator: Contents of the tag
|
|
123
124
|
"""
|
|
124
125
|
return iter(self.contents)
|
|
125
|
-
|
|
126
|
+
|
|
126
127
|
def __eq__(self, other):
|
|
127
128
|
"""
|
|
128
129
|
Compare tags based on name and attributes.
|
|
129
|
-
|
|
130
|
+
|
|
130
131
|
Args:
|
|
131
132
|
other (Tag): Tag to compare
|
|
132
|
-
|
|
133
|
+
|
|
133
134
|
Returns:
|
|
134
135
|
bool: True if tags are equivalent
|
|
135
136
|
"""
|
|
136
137
|
if not isinstance(other, Tag):
|
|
137
138
|
return False
|
|
138
139
|
return (
|
|
139
|
-
self.name == other.name and
|
|
140
|
-
self.attrs == other.attrs and
|
|
140
|
+
self.name == other.name and
|
|
141
|
+
self.attrs == other.attrs and
|
|
141
142
|
str(self) == str(other)
|
|
142
143
|
)
|
|
143
|
-
|
|
144
|
+
|
|
144
145
|
def __hash__(self):
|
|
145
146
|
"""
|
|
146
147
|
Generate a hash for the tag.
|
|
147
|
-
|
|
148
|
+
|
|
148
149
|
Returns:
|
|
149
150
|
int: Hash value
|
|
150
151
|
"""
|
|
151
152
|
return hash((self.name, frozenset(self.attrs.items()), str(self)))
|
|
152
|
-
|
|
153
|
+
|
|
153
154
|
def find(self, name=None, attrs={}, recursive=True, text=None, **kwargs) -> Optional['Tag']:
|
|
154
155
|
"""
|
|
155
156
|
Find the first matching child element.
|
|
156
157
|
Enhanced with more flexible matching.
|
|
157
|
-
|
|
158
|
+
|
|
158
159
|
Args:
|
|
159
160
|
name (str, optional): Tag name to search for
|
|
160
161
|
attrs (dict, optional): Attributes to match
|
|
161
162
|
recursive (bool, optional): Search recursively
|
|
162
163
|
text (str, optional): Text content to match
|
|
163
|
-
|
|
164
|
+
|
|
164
165
|
Returns:
|
|
165
166
|
Tag or None: First matching element
|
|
166
167
|
"""
|
|
167
168
|
results = self.find_all(name, attrs, recursive, text, limit=1, **kwargs)
|
|
168
169
|
return results[0] if results else None
|
|
169
|
-
|
|
170
|
+
|
|
170
171
|
def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs) -> List['Tag']:
|
|
171
172
|
"""
|
|
172
173
|
Find all matching child elements.
|
|
173
174
|
Enhanced with more flexible matching and BeautifulSoup-like features.
|
|
174
|
-
|
|
175
|
+
|
|
175
176
|
Args:
|
|
176
177
|
name (str, optional): Tag name to search for
|
|
177
178
|
attrs (dict, optional): Attributes to match
|
|
178
179
|
recursive (bool, optional): Search recursively
|
|
179
180
|
text (str, optional): Text content to match
|
|
180
181
|
limit (int, optional): Maximum number of results
|
|
181
|
-
|
|
182
|
+
|
|
182
183
|
Returns:
|
|
183
184
|
List[Tag]: List of matching elements
|
|
184
185
|
"""
|
|
185
186
|
results = []
|
|
186
|
-
|
|
187
|
+
|
|
187
188
|
def _match(tag):
|
|
188
189
|
# Check tag name with case-insensitive and regex support
|
|
189
190
|
if name:
|
|
@@ -193,7 +194,7 @@ class Tag:
|
|
|
193
194
|
elif isinstance(name, re.Pattern):
|
|
194
195
|
if not name.search(tag.name):
|
|
195
196
|
return False
|
|
196
|
-
|
|
197
|
+
|
|
197
198
|
# Check attributes with more flexible matching
|
|
198
199
|
for k, v in attrs.items():
|
|
199
200
|
# Handle special attribute matching
|
|
@@ -209,12 +210,15 @@ class Tag:
|
|
|
209
210
|
else:
|
|
210
211
|
# Regex or exact match for other attributes
|
|
211
212
|
tag_attr = tag.attrs.get(k)
|
|
212
|
-
if
|
|
213
|
-
if
|
|
213
|
+
if v is True:
|
|
214
|
+
if tag_attr is None:
|
|
215
|
+
return False
|
|
216
|
+
elif isinstance(v, re.Pattern):
|
|
217
|
+
if tag_attr is None or not v.search(str(tag_attr)):
|
|
214
218
|
return False
|
|
215
219
|
elif tag_attr != v:
|
|
216
220
|
return False
|
|
217
|
-
|
|
221
|
+
|
|
218
222
|
# Check text content
|
|
219
223
|
if text:
|
|
220
224
|
tag_text = tag.get_text(strip=True)
|
|
@@ -222,31 +226,31 @@ class Tag:
|
|
|
222
226
|
return False
|
|
223
227
|
elif isinstance(text, re.Pattern) and not text.search(tag_text):
|
|
224
228
|
return False
|
|
225
|
-
|
|
229
|
+
|
|
226
230
|
return True
|
|
227
|
-
|
|
231
|
+
|
|
228
232
|
def _search(element):
|
|
229
233
|
if _match(element):
|
|
230
234
|
results.append(element)
|
|
231
235
|
if limit and len(results) == limit:
|
|
232
236
|
return
|
|
233
|
-
|
|
237
|
+
|
|
234
238
|
if recursive:
|
|
235
239
|
for child in element.contents:
|
|
236
240
|
if isinstance(child, Tag):
|
|
237
241
|
_search(child)
|
|
238
|
-
|
|
242
|
+
|
|
239
243
|
_search(self)
|
|
240
244
|
return results
|
|
241
|
-
|
|
245
|
+
|
|
242
246
|
def select(self, selector: str) -> List['Tag']:
|
|
243
247
|
"""
|
|
244
248
|
Select elements using CSS selector.
|
|
245
249
|
Enhanced to support more complex selectors.
|
|
246
|
-
|
|
250
|
+
|
|
247
251
|
Args:
|
|
248
252
|
selector (str): CSS selector string
|
|
249
|
-
|
|
253
|
+
|
|
250
254
|
Returns:
|
|
251
255
|
List[Tag]: List of matching elements
|
|
252
256
|
"""
|
|
@@ -254,7 +258,7 @@ class Tag:
|
|
|
254
258
|
# This is a simplified implementation and might need more robust parsing
|
|
255
259
|
parts = re.split(r'\s+', selector.strip())
|
|
256
260
|
results = []
|
|
257
|
-
|
|
261
|
+
|
|
258
262
|
def _match_selector(tag, selector_part):
|
|
259
263
|
# Support more complex selectors
|
|
260
264
|
if selector_part.startswith('.'):
|
|
@@ -276,15 +280,15 @@ class Tag:
|
|
|
276
280
|
else:
|
|
277
281
|
# Tag selector
|
|
278
282
|
return tag.name == selector_part
|
|
279
|
-
|
|
283
|
+
|
|
280
284
|
def _recursive_select(element, selector_parts):
|
|
281
285
|
if not selector_parts:
|
|
282
286
|
results.append(element)
|
|
283
287
|
return
|
|
284
|
-
|
|
288
|
+
|
|
285
289
|
current_selector = selector_parts[0]
|
|
286
290
|
remaining_selectors = selector_parts[1:]
|
|
287
|
-
|
|
291
|
+
|
|
288
292
|
if _match_selector(element, current_selector):
|
|
289
293
|
if not remaining_selectors:
|
|
290
294
|
results.append(element)
|
|
@@ -292,36 +296,36 @@ class Tag:
|
|
|
292
296
|
for child in element.contents:
|
|
293
297
|
if isinstance(child, Tag):
|
|
294
298
|
_recursive_select(child, remaining_selectors)
|
|
295
|
-
|
|
299
|
+
|
|
296
300
|
for child in self.contents:
|
|
297
301
|
if isinstance(child, Tag):
|
|
298
302
|
_recursive_select(child, parts)
|
|
299
|
-
|
|
303
|
+
|
|
300
304
|
return results
|
|
301
|
-
|
|
305
|
+
|
|
302
306
|
def select_one(self, selector: str) -> Optional['Tag']:
|
|
303
307
|
"""
|
|
304
308
|
Select the first element matching the CSS selector.
|
|
305
|
-
|
|
309
|
+
|
|
306
310
|
Args:
|
|
307
311
|
selector (str): CSS selector string
|
|
308
|
-
|
|
312
|
+
|
|
309
313
|
Returns:
|
|
310
314
|
Tag or None: First matching element
|
|
311
315
|
"""
|
|
312
316
|
results = self.select(selector)
|
|
313
317
|
return results[0] if results else None
|
|
314
|
-
|
|
318
|
+
|
|
315
319
|
def get_text(self, separator=' ', strip=False, types=None) -> str:
|
|
316
320
|
"""
|
|
317
321
|
Extract text from the tag and its descendants.
|
|
318
322
|
Enhanced to support more flexible text extraction.
|
|
319
|
-
|
|
323
|
+
|
|
320
324
|
Args:
|
|
321
325
|
separator (str, optional): Text separator
|
|
322
326
|
strip (bool, optional): Strip whitespace
|
|
323
327
|
types (list, optional): Types of content to extract
|
|
324
|
-
|
|
328
|
+
|
|
325
329
|
Returns:
|
|
326
330
|
str: Extracted text
|
|
327
331
|
"""
|
|
@@ -333,85 +337,99 @@ class Tag:
|
|
|
333
337
|
texts.append(str(content))
|
|
334
338
|
elif isinstance(content, Tag):
|
|
335
339
|
texts.append(content.get_text(separator, strip))
|
|
336
|
-
|
|
340
|
+
|
|
337
341
|
text = separator.join(texts)
|
|
338
342
|
text = re.sub(r'\n\n+', '\n', text) # Replace multiple newlines with single newlines
|
|
339
343
|
return text.strip() if strip else text
|
|
340
|
-
|
|
344
|
+
|
|
341
345
|
def find_text(self, pattern: Union[str, re.Pattern], **kwargs) -> Optional[str]:
|
|
342
346
|
"""
|
|
343
347
|
Find the first text matching a pattern.
|
|
344
|
-
|
|
348
|
+
|
|
345
349
|
Args:
|
|
346
350
|
pattern (str or re.Pattern): Pattern to match
|
|
347
351
|
**kwargs: Additional arguments for get_text()
|
|
348
|
-
|
|
352
|
+
|
|
349
353
|
Returns:
|
|
350
354
|
str or None: First matching text
|
|
351
355
|
"""
|
|
352
356
|
text = self.get_text(**kwargs)
|
|
353
|
-
|
|
357
|
+
|
|
354
358
|
if isinstance(pattern, str):
|
|
355
359
|
return pattern if pattern in text else None
|
|
356
360
|
elif isinstance(pattern, re.Pattern):
|
|
357
361
|
match = pattern.search(text)
|
|
358
362
|
return match.group(0) if match else None
|
|
359
|
-
|
|
363
|
+
|
|
360
364
|
def replace_text(self, old: Union[str, re.Pattern], new: str, **kwargs) -> str:
|
|
361
365
|
"""
|
|
362
366
|
Replace text matching a pattern.
|
|
363
|
-
|
|
367
|
+
|
|
364
368
|
Args:
|
|
365
369
|
old (str or re.Pattern): Pattern to replace
|
|
366
370
|
new (str): Replacement text
|
|
367
371
|
**kwargs: Additional arguments for get_text()
|
|
368
|
-
|
|
372
|
+
|
|
369
373
|
Returns:
|
|
370
374
|
str: Modified text
|
|
371
375
|
"""
|
|
372
376
|
text = self.get_text(**kwargs)
|
|
373
|
-
|
|
377
|
+
|
|
374
378
|
if isinstance(old, str):
|
|
375
379
|
return text.replace(old, new)
|
|
376
380
|
elif isinstance(old, re.Pattern):
|
|
377
381
|
return old.sub(new, text)
|
|
378
|
-
|
|
382
|
+
|
|
379
383
|
def get(self, key: str, default: Any = None) -> Any:
|
|
380
384
|
"""
|
|
381
385
|
Get an attribute value.
|
|
382
|
-
|
|
386
|
+
|
|
383
387
|
Args:
|
|
384
388
|
key (str): Attribute name
|
|
385
389
|
default (Any, optional): Default value if attribute not found
|
|
386
|
-
|
|
390
|
+
|
|
387
391
|
Returns:
|
|
388
392
|
Any: Attribute value or default
|
|
389
393
|
"""
|
|
390
394
|
return self.attrs.get(key, default)
|
|
391
|
-
|
|
395
|
+
|
|
392
396
|
def decompose(self) -> None:
|
|
393
397
|
"""Remove the tag and its contents from the document."""
|
|
394
398
|
if self.parent:
|
|
395
399
|
self.parent.contents.remove(self)
|
|
396
|
-
|
|
400
|
+
|
|
397
401
|
def extract(self) -> 'Tag':
|
|
398
402
|
"""
|
|
399
403
|
Remove the tag from the document and return it.
|
|
400
|
-
|
|
404
|
+
|
|
401
405
|
Returns:
|
|
402
406
|
Tag: Extracted tag
|
|
403
407
|
"""
|
|
404
408
|
self.decompose()
|
|
405
409
|
return self
|
|
406
|
-
|
|
410
|
+
|
|
407
411
|
def clear(self) -> None:
|
|
408
412
|
"""Remove all contents of the tag."""
|
|
409
413
|
self.contents.clear()
|
|
410
|
-
|
|
414
|
+
|
|
415
|
+
def append(self, new_child: Union['Tag', NavigableString, str]) -> None:
|
|
416
|
+
"""Append a new child to this tag."""
|
|
417
|
+
if isinstance(new_child, str):
|
|
418
|
+
new_child = NavigableString(new_child)
|
|
419
|
+
new_child.parent = self
|
|
420
|
+
self.contents.append(new_child)
|
|
421
|
+
|
|
422
|
+
def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
|
|
423
|
+
"""Insert a new child at the given index."""
|
|
424
|
+
if isinstance(new_child, str):
|
|
425
|
+
new_child = NavigableString(new_child)
|
|
426
|
+
new_child.parent = self
|
|
427
|
+
self.contents.insert(index, new_child)
|
|
428
|
+
|
|
411
429
|
def replace_with(self, new_tag: 'Tag') -> None:
|
|
412
430
|
"""
|
|
413
431
|
Replace this tag with another tag.
|
|
414
|
-
|
|
432
|
+
|
|
415
433
|
Args:
|
|
416
434
|
new_tag (Tag): Tag to replace the current tag
|
|
417
435
|
"""
|
|
@@ -419,26 +437,26 @@ class Tag:
|
|
|
419
437
|
index = self.parent.contents.index(self)
|
|
420
438
|
self.parent.contents[index] = new_tag
|
|
421
439
|
new_tag.parent = self.parent
|
|
422
|
-
|
|
440
|
+
|
|
423
441
|
def decode_contents(self, eventual_encoding='utf-8') -> str:
|
|
424
442
|
"""
|
|
425
443
|
Decode the contents of the tag to a string.
|
|
426
|
-
|
|
444
|
+
|
|
427
445
|
Args:
|
|
428
446
|
eventual_encoding (str, optional): Encoding to use
|
|
429
|
-
|
|
447
|
+
|
|
430
448
|
Returns:
|
|
431
449
|
str: Decoded contents
|
|
432
450
|
"""
|
|
433
451
|
return ''.join(str(content) for content in self.contents)
|
|
434
|
-
|
|
452
|
+
|
|
435
453
|
def prettify(self, formatter='minimal') -> str:
|
|
436
454
|
"""
|
|
437
455
|
Return a nicely formatted representation of the tag.
|
|
438
|
-
|
|
456
|
+
|
|
439
457
|
Args:
|
|
440
458
|
formatter (str, optional): Formatting style
|
|
441
|
-
|
|
459
|
+
|
|
442
460
|
Returns:
|
|
443
461
|
str: Prettified tag representation
|
|
444
462
|
"""
|
|
@@ -447,14 +465,14 @@ class Tag:
|
|
|
447
465
|
for k, v in tag.attrs.items():
|
|
448
466
|
result += f' {k}="{v}"'
|
|
449
467
|
result += '>\n'
|
|
450
|
-
|
|
468
|
+
|
|
451
469
|
for content in tag.contents:
|
|
452
470
|
if isinstance(content, Tag):
|
|
453
471
|
result += _prettify(content, indent + 2)
|
|
454
472
|
else:
|
|
455
473
|
result += ' ' * (indent + 2) + str(content) + '\n'
|
|
456
|
-
|
|
474
|
+
|
|
457
475
|
result += ' ' * indent + f'</{tag.name}>\n'
|
|
458
476
|
return result
|
|
459
|
-
|
|
460
|
-
return _prettify(self)
|
|
477
|
+
|
|
478
|
+
return _prettify(self)
|