webscout 8.3__py3-none-any.whl → 8.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIauto.py +4 -4
- webscout/AIbase.py +61 -1
- webscout/AIutel.py +46 -53
- webscout/Bing_search.py +418 -0
- webscout/Extra/YTToolkit/ytapi/patterns.py +45 -45
- webscout/Extra/YTToolkit/ytapi/stream.py +1 -1
- webscout/Extra/YTToolkit/ytapi/video.py +10 -10
- webscout/Extra/autocoder/autocoder_utiles.py +1 -1
- webscout/Extra/gguf.py +706 -177
- webscout/Litlogger/formats.py +9 -0
- webscout/Litlogger/handlers.py +18 -0
- webscout/Litlogger/logger.py +43 -1
- webscout/Provider/AISEARCH/genspark_search.py +7 -7
- webscout/Provider/AISEARCH/scira_search.py +3 -2
- webscout/Provider/GeminiProxy.py +140 -0
- webscout/Provider/LambdaChat.py +7 -1
- webscout/Provider/MCPCore.py +78 -75
- webscout/Provider/OPENAI/BLACKBOXAI.py +1046 -1017
- webscout/Provider/OPENAI/GeminiProxy.py +328 -0
- webscout/Provider/OPENAI/Qwen3.py +303 -303
- webscout/Provider/OPENAI/README.md +5 -0
- webscout/Provider/OPENAI/README_AUTOPROXY.md +238 -0
- webscout/Provider/OPENAI/TogetherAI.py +355 -0
- webscout/Provider/OPENAI/__init__.py +16 -1
- webscout/Provider/OPENAI/autoproxy.py +332 -0
- webscout/Provider/OPENAI/base.py +101 -14
- webscout/Provider/OPENAI/chatgpt.py +15 -2
- webscout/Provider/OPENAI/chatgptclone.py +14 -3
- webscout/Provider/OPENAI/deepinfra.py +339 -328
- webscout/Provider/OPENAI/e2b.py +295 -74
- webscout/Provider/OPENAI/mcpcore.py +109 -70
- webscout/Provider/OPENAI/opkfc.py +18 -6
- webscout/Provider/OPENAI/scirachat.py +59 -50
- webscout/Provider/OPENAI/toolbaz.py +2 -10
- webscout/Provider/OPENAI/writecream.py +166 -166
- webscout/Provider/OPENAI/x0gpt.py +367 -367
- webscout/Provider/OPENAI/xenai.py +514 -0
- webscout/Provider/OPENAI/yep.py +389 -383
- webscout/Provider/STT/__init__.py +3 -0
- webscout/Provider/STT/base.py +281 -0
- webscout/Provider/STT/elevenlabs.py +265 -0
- webscout/Provider/TTI/__init__.py +4 -1
- webscout/Provider/TTI/aiarta.py +399 -365
- webscout/Provider/TTI/base.py +74 -2
- webscout/Provider/TTI/bing.py +231 -0
- webscout/Provider/TTI/fastflux.py +63 -30
- webscout/Provider/TTI/gpt1image.py +149 -0
- webscout/Provider/TTI/imagen.py +196 -0
- webscout/Provider/TTI/magicstudio.py +60 -29
- webscout/Provider/TTI/piclumen.py +43 -32
- webscout/Provider/TTI/pixelmuse.py +232 -225
- webscout/Provider/TTI/pollinations.py +43 -32
- webscout/Provider/TTI/together.py +287 -0
- webscout/Provider/TTI/utils.py +2 -1
- webscout/Provider/TTS/README.md +1 -0
- webscout/Provider/TTS/__init__.py +2 -1
- webscout/Provider/TTS/freetts.py +140 -0
- webscout/Provider/TTS/speechma.py +45 -39
- webscout/Provider/TogetherAI.py +366 -0
- webscout/Provider/UNFINISHED/ChutesAI.py +314 -0
- webscout/Provider/UNFINISHED/fetch_together_models.py +95 -0
- webscout/Provider/XenAI.py +324 -0
- webscout/Provider/__init__.py +8 -0
- webscout/Provider/deepseek_assistant.py +378 -0
- webscout/Provider/scira_chat.py +3 -2
- webscout/Provider/toolbaz.py +0 -1
- webscout/auth/__init__.py +44 -0
- webscout/auth/api_key_manager.py +189 -0
- webscout/auth/auth_system.py +100 -0
- webscout/auth/config.py +76 -0
- webscout/auth/database.py +400 -0
- webscout/auth/exceptions.py +67 -0
- webscout/auth/middleware.py +248 -0
- webscout/auth/models.py +130 -0
- webscout/auth/providers.py +257 -0
- webscout/auth/rate_limiter.py +254 -0
- webscout/auth/request_models.py +127 -0
- webscout/auth/request_processing.py +226 -0
- webscout/auth/routes.py +526 -0
- webscout/auth/schemas.py +103 -0
- webscout/auth/server.py +312 -0
- webscout/auth/static/favicon.svg +11 -0
- webscout/auth/swagger_ui.py +203 -0
- webscout/auth/templates/components/authentication.html +237 -0
- webscout/auth/templates/components/base.html +103 -0
- webscout/auth/templates/components/endpoints.html +750 -0
- webscout/auth/templates/components/examples.html +491 -0
- webscout/auth/templates/components/footer.html +75 -0
- webscout/auth/templates/components/header.html +27 -0
- webscout/auth/templates/components/models.html +286 -0
- webscout/auth/templates/components/navigation.html +70 -0
- webscout/auth/templates/static/api.js +455 -0
- webscout/auth/templates/static/icons.js +168 -0
- webscout/auth/templates/static/main.js +784 -0
- webscout/auth/templates/static/particles.js +201 -0
- webscout/auth/templates/static/styles.css +3353 -0
- webscout/auth/templates/static/ui.js +374 -0
- webscout/auth/templates/swagger_ui.html +170 -0
- webscout/client.py +49 -3
- webscout/litagent/Readme.md +12 -3
- webscout/litagent/agent.py +99 -62
- webscout/scout/core/scout.py +104 -26
- webscout/scout/element.py +139 -18
- webscout/swiftcli/core/cli.py +14 -3
- webscout/swiftcli/decorators/output.py +59 -9
- webscout/update_checker.py +31 -49
- webscout/version.py +1 -1
- webscout/webscout_search.py +4 -12
- webscout/webscout_search_async.py +3 -10
- webscout/yep_search.py +2 -11
- {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/METADATA +41 -11
- {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/RECORD +116 -68
- {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/entry_points.txt +1 -1
- webscout/Provider/HF_space/__init__.py +0 -0
- webscout/Provider/HF_space/qwen_qwen2.py +0 -206
- webscout/Provider/OPENAI/api.py +0 -1035
- webscout/Provider/TTI/artbit.py +0 -0
- {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/WHEEL +0 -0
- {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/top_level.txt +0 -0
webscout/litagent/agent.py
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
import random
|
|
4
4
|
import threading
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
6
|
|
|
7
|
-
from webscout.litagent.constants import BROWSERS,
|
|
7
|
+
from webscout.litagent.constants import BROWSERS, DEVICES, FINGERPRINTS, OS_VERSIONS
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class LitAgent:
|
|
@@ -19,6 +19,8 @@ class LitAgent:
|
|
|
19
19
|
self.agents = self._generate_agents(100) # Keep 100 agents in memory
|
|
20
20
|
self.thread_safe = thread_safe
|
|
21
21
|
self.lock = threading.RLock() if thread_safe else None
|
|
22
|
+
self.ip_pool = self._generate_ip_pool(20)
|
|
23
|
+
self._ip_index = 0
|
|
22
24
|
self._refresh_timer = None
|
|
23
25
|
self._stats = {
|
|
24
26
|
"total_generated": 100,
|
|
@@ -33,18 +35,18 @@ class LitAgent:
|
|
|
33
35
|
for _ in range(count):
|
|
34
36
|
browser = random.choice(list(BROWSERS.keys()))
|
|
35
37
|
version = random.randint(*BROWSERS[browser])
|
|
36
|
-
|
|
38
|
+
|
|
37
39
|
if browser in ['chrome', 'firefox', 'edge', 'opera', 'brave', 'vivaldi']:
|
|
38
40
|
os_type = random.choice(['windows', 'mac', 'linux'])
|
|
39
41
|
os_ver = random.choice(OS_VERSIONS[os_type])
|
|
40
|
-
|
|
42
|
+
|
|
41
43
|
if os_type == 'windows':
|
|
42
44
|
platform = f"Windows NT {os_ver}"
|
|
43
45
|
elif os_type == 'mac':
|
|
44
46
|
platform = f"Macintosh; Intel Mac OS X {os_ver}"
|
|
45
47
|
else:
|
|
46
48
|
platform = f"X11; Linux {os_ver}"
|
|
47
|
-
|
|
49
|
+
|
|
48
50
|
agent = f"Mozilla/5.0 ({platform}) AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
49
51
|
if browser == 'chrome':
|
|
50
52
|
agent += f"Chrome/{version}.0.0.0 Safari/537.36"
|
|
@@ -58,7 +60,7 @@ class LitAgent:
|
|
|
58
60
|
agent += f"Chrome/{version}.0.0.0 Safari/537.36 Brave/{version}.0.0.0"
|
|
59
61
|
elif browser == 'vivaldi':
|
|
60
62
|
agent += f"Chrome/{version}.0.0.0 Safari/537.36 Vivaldi/{version}.0.{random.randint(1000, 9999)}"
|
|
61
|
-
|
|
63
|
+
|
|
62
64
|
elif browser == 'safari':
|
|
63
65
|
device = random.choice(['mac', 'ios'])
|
|
64
66
|
if device == 'mac':
|
|
@@ -69,9 +71,9 @@ class LitAgent:
|
|
|
69
71
|
device = random.choice(['iPhone', 'iPad'])
|
|
70
72
|
agent = f"Mozilla/5.0 ({device}; CPU OS {ver} like Mac OS X) "
|
|
71
73
|
agent += f"AppleWebKit/{version}.1.15 (KHTML, like Gecko) Version/{version//100}.0 Safari/{version}.1.15"
|
|
72
|
-
|
|
74
|
+
|
|
73
75
|
agents.append(agent)
|
|
74
|
-
|
|
76
|
+
|
|
75
77
|
return list(set(agents)) # Remove any duplicates
|
|
76
78
|
|
|
77
79
|
def _update_stats(self, browser_type=None, device_type=None):
|
|
@@ -107,7 +109,7 @@ class LitAgent:
|
|
|
107
109
|
name = name.lower()
|
|
108
110
|
if name not in BROWSERS:
|
|
109
111
|
return self.random()
|
|
110
|
-
|
|
112
|
+
|
|
111
113
|
if self.thread_safe and self.lock:
|
|
112
114
|
with self.lock:
|
|
113
115
|
agents = [a for a in self.agents if name in a.lower()]
|
|
@@ -153,12 +155,12 @@ class LitAgent:
|
|
|
153
155
|
if self.thread_safe and self.lock:
|
|
154
156
|
with self.lock:
|
|
155
157
|
# Focus on iPad and Android tablets
|
|
156
|
-
agents = [a for a in self.agents if 'iPad' in a or 'Android' in a and
|
|
158
|
+
agents = [a for a in self.agents if 'iPad' in a or 'Android' in a and 'Mobile' not in a]
|
|
157
159
|
agent = random.choice(agents) if agents else self.random()
|
|
158
160
|
self._update_stats(device_type="tablet")
|
|
159
161
|
return agent
|
|
160
162
|
else:
|
|
161
|
-
agents = [a for a in self.agents if 'iPad' in a or 'Android' in a and
|
|
163
|
+
agents = [a for a in self.agents if 'iPad' in a or 'Android' in a and 'Mobile' not in a]
|
|
162
164
|
agent = random.choice(agents) if agents else self.random()
|
|
163
165
|
self._update_stats(device_type="tablet")
|
|
164
166
|
return agent
|
|
@@ -174,10 +176,10 @@ class LitAgent:
|
|
|
174
176
|
elif 'Android' in tv_type:
|
|
175
177
|
agent = f"Mozilla/5.0 (Linux; Android 9; {tv_type}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"
|
|
176
178
|
elif 'Apple' in tv_type:
|
|
177
|
-
agent =
|
|
179
|
+
agent = "Mozilla/5.0 (AppleTV; CPU like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"
|
|
178
180
|
else:
|
|
179
181
|
agent = f"Mozilla/5.0 (Linux; {tv_type}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"
|
|
180
|
-
|
|
182
|
+
|
|
181
183
|
self._update_stats(device_type="tv")
|
|
182
184
|
return agent
|
|
183
185
|
|
|
@@ -192,7 +194,7 @@ class LitAgent:
|
|
|
192
194
|
agent = f"Mozilla/5.0 (Nintendo Switch; {console_type}) AppleWebKit/601.6 (KHTML, like Gecko) NintendoBrowser/5.1.0.13343"
|
|
193
195
|
else:
|
|
194
196
|
agent = self.random()
|
|
195
|
-
|
|
197
|
+
|
|
196
198
|
self._update_stats(device_type="console")
|
|
197
199
|
return agent
|
|
198
200
|
|
|
@@ -215,15 +217,15 @@ class LitAgent:
|
|
|
215
217
|
def opera(self) -> str:
|
|
216
218
|
"""Get an Opera agent! 🎭"""
|
|
217
219
|
return self.browser('opera')
|
|
218
|
-
|
|
220
|
+
|
|
219
221
|
def brave(self) -> str:
|
|
220
222
|
"""Get a Brave agent! 🦁"""
|
|
221
223
|
return self.browser('brave')
|
|
222
|
-
|
|
224
|
+
|
|
223
225
|
def vivaldi(self) -> str:
|
|
224
226
|
"""Get a Vivaldi agent! 🎨"""
|
|
225
227
|
return self.browser('vivaldi')
|
|
226
|
-
|
|
228
|
+
|
|
227
229
|
# OS-specific agents
|
|
228
230
|
def windows(self) -> str:
|
|
229
231
|
"""Get a Windows agent! 🪟"""
|
|
@@ -231,28 +233,28 @@ class LitAgent:
|
|
|
231
233
|
agent = random.choice(agents) if agents else self.random()
|
|
232
234
|
self._update_stats()
|
|
233
235
|
return agent
|
|
234
|
-
|
|
236
|
+
|
|
235
237
|
def macos(self) -> str:
|
|
236
238
|
"""Get a macOS agent! 🍎"""
|
|
237
239
|
agents = [a for a in self.agents if 'Macintosh' in a]
|
|
238
240
|
agent = random.choice(agents) if agents else self.random()
|
|
239
241
|
self._update_stats()
|
|
240
242
|
return agent
|
|
241
|
-
|
|
243
|
+
|
|
242
244
|
def linux(self) -> str:
|
|
243
245
|
"""Get a Linux agent! 🐧"""
|
|
244
246
|
agents = [a for a in self.agents if 'Linux' in a and 'Android' not in a]
|
|
245
247
|
agent = random.choice(agents) if agents else self.random()
|
|
246
248
|
self._update_stats()
|
|
247
249
|
return agent
|
|
248
|
-
|
|
250
|
+
|
|
249
251
|
def android(self) -> str:
|
|
250
252
|
"""Get an Android agent! 🤖"""
|
|
251
253
|
agents = [a for a in self.agents if 'Android' in a]
|
|
252
254
|
agent = random.choice(agents) if agents else self.random()
|
|
253
255
|
self._update_stats()
|
|
254
256
|
return agent
|
|
255
|
-
|
|
257
|
+
|
|
256
258
|
def ios(self) -> str:
|
|
257
259
|
"""Get an iOS agent! 📱"""
|
|
258
260
|
agents = [a for a in self.agents if 'iPhone' in a or 'iPad' in a]
|
|
@@ -260,25 +262,24 @@ class LitAgent:
|
|
|
260
262
|
self._update_stats()
|
|
261
263
|
return agent
|
|
262
264
|
|
|
263
|
-
def custom(self, browser: str, version: Optional[str] = None,
|
|
264
|
-
os: Optional[str] = None, os_version: Optional[str] = None,
|
|
265
|
+
def custom(self, browser: str, version: Optional[str] = None,
|
|
266
|
+
os: Optional[str] = None, os_version: Optional[str] = None,
|
|
265
267
|
device_type: Optional[str] = None) -> str:
|
|
266
268
|
"""Generate a custom user agent with specified parameters! 🛠️
|
|
267
|
-
|
|
269
|
+
|
|
268
270
|
Args:
|
|
269
271
|
browser: Browser name (chrome, firefox, safari, edge, opera)
|
|
270
272
|
version: Browser version (optional)
|
|
271
273
|
os: Operating system (windows, mac, linux, android, ios)
|
|
272
274
|
os_version: OS version (optional)
|
|
273
275
|
device_type: Device type (desktop, mobile, tablet)
|
|
274
|
-
|
|
275
276
|
Returns:
|
|
276
277
|
Customized user agent string
|
|
277
278
|
"""
|
|
278
279
|
browser = browser.lower() if browser else 'chrome'
|
|
279
280
|
if browser not in BROWSERS:
|
|
280
281
|
browser = 'chrome'
|
|
281
|
-
|
|
282
|
+
|
|
282
283
|
if version:
|
|
283
284
|
try:
|
|
284
285
|
version_num = int(version.split('.')[0])
|
|
@@ -286,15 +287,15 @@ class LitAgent:
|
|
|
286
287
|
version_num = random.randint(*BROWSERS[browser])
|
|
287
288
|
else:
|
|
288
289
|
version_num = random.randint(*BROWSERS[browser])
|
|
289
|
-
|
|
290
|
+
|
|
290
291
|
os = os.lower() if os else random.choice(['windows', 'mac', 'linux'])
|
|
291
292
|
if os not in OS_VERSIONS:
|
|
292
293
|
os = 'windows'
|
|
293
|
-
|
|
294
|
+
|
|
294
295
|
os_ver = os_version or random.choice(OS_VERSIONS[os])
|
|
295
|
-
|
|
296
|
+
|
|
296
297
|
device_type = device_type.lower() if device_type else 'desktop'
|
|
297
|
-
|
|
298
|
+
|
|
298
299
|
# Build the user agent
|
|
299
300
|
if os == 'windows':
|
|
300
301
|
platform = f"Windows NT {os_ver}"
|
|
@@ -308,10 +309,10 @@ class LitAgent:
|
|
|
308
309
|
device = 'iPhone' if device_type == 'mobile' else 'iPad'
|
|
309
310
|
platform = f"{device}; CPU OS {os_ver} like Mac OS X"
|
|
310
311
|
else:
|
|
311
|
-
platform =
|
|
312
|
-
|
|
312
|
+
platform = "Windows NT 10.0" # Default fallback
|
|
313
|
+
|
|
313
314
|
agent = f"Mozilla/5.0 ({platform}) AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
314
|
-
|
|
315
|
+
|
|
315
316
|
if browser == 'chrome':
|
|
316
317
|
agent += f"Chrome/{version_num}.0.0.0 Safari/537.36"
|
|
317
318
|
elif browser == 'firefox':
|
|
@@ -325,34 +326,39 @@ class LitAgent:
|
|
|
325
326
|
agent += f"Chrome/{version_num}.0.0.0 Safari/537.36 OPR/{version_num}.0.0.0"
|
|
326
327
|
elif browser == 'brave':
|
|
327
328
|
agent += f"Chrome/{version_num}.0.0.0 Safari/537.36 Brave/{version_num}.1.0"
|
|
328
|
-
|
|
329
|
+
|
|
329
330
|
self._update_stats(browser_type=browser, device_type=device_type)
|
|
330
331
|
return agent
|
|
331
332
|
|
|
332
333
|
def generate_fingerprint(self, browser: Optional[str] = None) -> Dict[str, str]:
|
|
333
|
-
"""
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
334
|
+
"""
|
|
335
|
+
Generate a consistent browser fingerprint for anti-fingerprinting purposes.
|
|
336
|
+
|
|
337
|
+
This method creates a dictionary of HTTP headers and related values that simulate
|
|
338
|
+
a realistic browser fingerprint, including user agent, accept headers, platform,
|
|
339
|
+
sec-ch-ua, and various IP-related headers. Optionally, a specific browser type
|
|
340
|
+
can be requested.
|
|
341
|
+
|
|
337
342
|
Args:
|
|
338
|
-
browser:
|
|
339
|
-
|
|
343
|
+
browser (Optional[str]): The browser name to generate the fingerprint for.
|
|
344
|
+
If not specified, a random browser is used.
|
|
345
|
+
|
|
340
346
|
Returns:
|
|
341
|
-
|
|
347
|
+
Dict[str, str]: A dictionary containing fingerprinting headers and values.
|
|
342
348
|
"""
|
|
343
349
|
# Get a random user agent using the random() method
|
|
344
350
|
user_agent = self.random()
|
|
345
|
-
|
|
351
|
+
|
|
346
352
|
# If browser is specified, try to get a matching one
|
|
347
353
|
if browser:
|
|
348
354
|
browser = browser.lower()
|
|
349
355
|
if browser in BROWSERS:
|
|
350
356
|
user_agent = self.browser(browser)
|
|
351
|
-
|
|
357
|
+
|
|
352
358
|
accept_language = random.choice(FINGERPRINTS["accept_language"])
|
|
353
359
|
accept = random.choice(FINGERPRINTS["accept"])
|
|
354
360
|
platform = random.choice(FINGERPRINTS["platforms"])
|
|
355
|
-
|
|
361
|
+
|
|
356
362
|
# Generate sec-ch-ua based on the user agent
|
|
357
363
|
sec_ch_ua = ""
|
|
358
364
|
for browser_name in FINGERPRINTS["sec_ch_ua"]:
|
|
@@ -360,15 +366,22 @@ class LitAgent:
|
|
|
360
366
|
version = random.randint(*BROWSERS[browser_name])
|
|
361
367
|
sec_ch_ua = FINGERPRINTS["sec_ch_ua"][browser_name].format(version, version)
|
|
362
368
|
break
|
|
363
|
-
|
|
369
|
+
|
|
370
|
+
ip = self.rotate_ip()
|
|
364
371
|
fingerprint = {
|
|
365
372
|
"user_agent": user_agent,
|
|
366
373
|
"accept_language": accept_language,
|
|
367
374
|
"accept": accept,
|
|
368
375
|
"sec_ch_ua": sec_ch_ua,
|
|
369
|
-
"platform": platform
|
|
376
|
+
"platform": platform,
|
|
377
|
+
"x-forwarded-for": ip,
|
|
378
|
+
"x-real-ip": ip,
|
|
379
|
+
"x-client-ip": ip,
|
|
380
|
+
"forwarded": f"for={ip};proto=https",
|
|
381
|
+
"x-forwarded-proto": "https",
|
|
382
|
+
"x-request-id": self.random_id(8) if hasattr(self, 'random_id') else ''.join(random.choices('0123456789abcdef', k=8)),
|
|
370
383
|
}
|
|
371
|
-
|
|
384
|
+
|
|
372
385
|
self._update_stats(browser_type=browser)
|
|
373
386
|
return fingerprint
|
|
374
387
|
|
|
@@ -381,30 +394,30 @@ class LitAgent:
|
|
|
381
394
|
else:
|
|
382
395
|
self.agents = self._generate_agents(100)
|
|
383
396
|
self._stats["total_generated"] += 100
|
|
384
|
-
|
|
397
|
+
|
|
385
398
|
|
|
386
399
|
def auto_refresh(self, interval_minutes: int = 30) -> None:
|
|
387
400
|
"""Set up automatic refreshing of agents pool! ⏱️
|
|
388
|
-
|
|
401
|
+
|
|
389
402
|
Args:
|
|
390
403
|
interval_minutes: Minutes between refreshes
|
|
391
404
|
"""
|
|
392
405
|
if self._refresh_timer:
|
|
393
406
|
self._refresh_timer.cancel()
|
|
394
|
-
|
|
407
|
+
|
|
395
408
|
def _refresh_task():
|
|
396
409
|
self.refresh()
|
|
397
410
|
self._refresh_timer = threading.Timer(interval_minutes * 60, _refresh_task)
|
|
398
411
|
self._refresh_timer.daemon = True
|
|
399
412
|
self._refresh_timer.start()
|
|
400
|
-
|
|
413
|
+
|
|
401
414
|
self._refresh_timer = threading.Timer(interval_minutes * 60, _refresh_task)
|
|
402
415
|
self._refresh_timer.daemon = True
|
|
403
416
|
self._refresh_timer.start()
|
|
404
|
-
|
|
417
|
+
|
|
405
418
|
def get_stats(self) -> Dict[str, Any]:
|
|
406
419
|
"""Get statistics about agent usage! 📊
|
|
407
|
-
|
|
420
|
+
|
|
408
421
|
Returns:
|
|
409
422
|
Dictionary with usage statistics
|
|
410
423
|
"""
|
|
@@ -412,18 +425,16 @@ class LitAgent:
|
|
|
412
425
|
# Calculate top browser
|
|
413
426
|
top_browser = max(stats_copy["browser_usage"].items(), key=lambda x: x[1])[0] if stats_copy["browser_usage"] else None
|
|
414
427
|
stats_copy["top_browser"] = top_browser
|
|
415
|
-
|
|
428
|
+
|
|
416
429
|
# Calculate fake detection avoidance rate (just for fun)
|
|
417
430
|
stats_copy["avoidance_rate"] = min(99.9, 90 + (stats_copy["total_generated"] / 1000))
|
|
418
|
-
|
|
431
|
+
|
|
419
432
|
return stats_copy
|
|
420
|
-
|
|
433
|
+
|
|
421
434
|
def export_stats(self, filename: str) -> bool:
|
|
422
435
|
"""Export usage statistics to a file! 💾
|
|
423
|
-
|
|
424
436
|
Args:
|
|
425
437
|
filename: Path to export the stats
|
|
426
|
-
|
|
427
438
|
Returns:
|
|
428
439
|
True if export was successful, False otherwise
|
|
429
440
|
"""
|
|
@@ -432,9 +443,35 @@ class LitAgent:
|
|
|
432
443
|
with open(filename, 'w') as f:
|
|
433
444
|
json.dump(self.get_stats(), f, indent=2)
|
|
434
445
|
return True
|
|
435
|
-
except Exception
|
|
446
|
+
except Exception:
|
|
436
447
|
return False
|
|
437
448
|
|
|
449
|
+
def random_crypto_ip(self) -> str:
|
|
450
|
+
"""Generate a random IP address for cryptography purposes."""
|
|
451
|
+
return ".".join(str(random.randint(0, 255)) for _ in range(4))
|
|
452
|
+
|
|
453
|
+
def _generate_ip_pool(self, count: int = 20) -> List[str]:
|
|
454
|
+
"""Generate a pool of random IP addresses."""
|
|
455
|
+
return [self.random_crypto_ip() for _ in range(count)]
|
|
456
|
+
|
|
457
|
+
def rotate_ip(self) -> str:
|
|
458
|
+
"""Rotate through the IP pool and return the next IP."""
|
|
459
|
+
if not self.ip_pool:
|
|
460
|
+
self.ip_pool = self._generate_ip_pool(20)
|
|
461
|
+
self._ip_index = 0
|
|
462
|
+
|
|
463
|
+
ip = self.ip_pool[self._ip_index]
|
|
464
|
+
self._ip_index = (self._ip_index + 1) % len(self.ip_pool)
|
|
465
|
+
return ip
|
|
466
|
+
|
|
467
|
+
# Backwards compatibility for older versions expecting _random_ip
|
|
468
|
+
def _random_ip(self) -> str:
|
|
469
|
+
return self.rotate_ip()
|
|
470
|
+
|
|
471
|
+
def random_id(self, length: int = 16) -> str:
|
|
472
|
+
"""Generate a random identifier string."""
|
|
473
|
+
return ''.join(random.choices('0123456789abcdef', k=length)).lower()
|
|
474
|
+
|
|
438
475
|
if __name__ == "__main__":
|
|
439
476
|
# Test it out! 🧪
|
|
440
477
|
agent = LitAgent()
|
|
@@ -447,9 +484,9 @@ if __name__ == "__main__":
|
|
|
447
484
|
print("Tablet:", agent.tablet())
|
|
448
485
|
print("Smart TV:", agent.smart_tv())
|
|
449
486
|
print("Gaming:", agent.gaming())
|
|
450
|
-
|
|
487
|
+
|
|
451
488
|
# Test custom agent
|
|
452
489
|
print("Custom:", agent.custom(browser="chrome", os="windows", os_version="10.0"))
|
|
453
|
-
|
|
490
|
+
|
|
454
491
|
# Test fingerprinting
|
|
455
|
-
print("Fingerprint:", agent.generate_fingerprint("chrome"))
|
|
492
|
+
print("Fingerprint:", agent.generate_fingerprint("chrome"))
|
webscout/scout/core/scout.py
CHANGED
|
@@ -265,7 +265,7 @@ class Scout:
|
|
|
265
265
|
|
|
266
266
|
return json.dumps(_tag_to_dict(self._soup), indent=indent)
|
|
267
267
|
|
|
268
|
-
def find(self, name=None, attrs={}, recursive=True, text=None, **kwargs) -> ScoutSearchResult:
|
|
268
|
+
def find(self, name=None, attrs={}, recursive=True, text=None, class_=None, **kwargs) -> ScoutSearchResult:
|
|
269
269
|
"""
|
|
270
270
|
Find the first matching element.
|
|
271
271
|
|
|
@@ -278,10 +278,10 @@ class Scout:
|
|
|
278
278
|
Returns:
|
|
279
279
|
ScoutSearchResult: First matching element
|
|
280
280
|
"""
|
|
281
|
-
result = self._soup.find(name, attrs, recursive, text, **kwargs)
|
|
281
|
+
result = self._soup.find(name, attrs, recursive, text, limit=1, class_=class_, **kwargs)
|
|
282
282
|
return ScoutSearchResult([result]) if result else ScoutSearchResult([])
|
|
283
283
|
|
|
284
|
-
def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs) -> ScoutSearchResult:
|
|
284
|
+
def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, class_=None, **kwargs) -> ScoutSearchResult:
|
|
285
285
|
"""
|
|
286
286
|
Find all matching elements.
|
|
287
287
|
|
|
@@ -295,7 +295,7 @@ class Scout:
|
|
|
295
295
|
Returns:
|
|
296
296
|
ScoutSearchResult: List of matching elements
|
|
297
297
|
"""
|
|
298
|
-
results = self._soup.find_all(name, attrs, recursive, text, limit, **kwargs)
|
|
298
|
+
results = self._soup.find_all(name, attrs, recursive, text, limit, class_=class_, **kwargs)
|
|
299
299
|
return ScoutSearchResult(results)
|
|
300
300
|
|
|
301
301
|
def find_parent(self, name=None, attrs={}, **kwargs) -> Optional[Tag]:
|
|
@@ -474,6 +474,19 @@ class Scout:
|
|
|
474
474
|
sentences = tokenizer.tokenize(text)
|
|
475
475
|
return "\n\n".join(sentences)
|
|
476
476
|
|
|
477
|
+
def get_text_robust(self, separator=' ', strip=False, types=None, encoding_fallbacks=None) -> str:
|
|
478
|
+
"""Extract text robustly, trying multiple encodings if needed."""
|
|
479
|
+
try:
|
|
480
|
+
return self.get_text(separator, strip, types)
|
|
481
|
+
except UnicodeDecodeError:
|
|
482
|
+
if encoding_fallbacks:
|
|
483
|
+
for enc in encoding_fallbacks:
|
|
484
|
+
try:
|
|
485
|
+
return self._soup.get_text(separator, strip, types).encode(enc).decode(enc)
|
|
486
|
+
except Exception:
|
|
487
|
+
continue
|
|
488
|
+
raise
|
|
489
|
+
|
|
477
490
|
def remove_tags(self, tags: List[str]) -> None:
|
|
478
491
|
"""
|
|
479
492
|
Remove specified tags and their contents from the document.
|
|
@@ -543,29 +556,19 @@ class Scout:
|
|
|
543
556
|
"""
|
|
544
557
|
old_tag.replace_with(new_tag)
|
|
545
558
|
|
|
546
|
-
def encode(self, encoding='utf-8') -> bytes:
|
|
547
|
-
"""
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
Returns:
|
|
554
|
-
bytes: Encoded document
|
|
555
|
-
"""
|
|
556
|
-
return str(self._soup).encode(encoding)
|
|
557
|
-
|
|
558
|
-
def decode(self, encoding='utf-8') -> str:
|
|
559
|
-
"""
|
|
560
|
-
Decode the document from a specific encoding.
|
|
561
|
-
|
|
562
|
-
Args:
|
|
563
|
-
encoding (str, optional): Encoding to use
|
|
559
|
+
def encode(self, encoding='utf-8', errors='strict') -> bytes:
|
|
560
|
+
"""Encode the document to a specific encoding with error handling."""
|
|
561
|
+
try:
|
|
562
|
+
return str(self._soup).encode(encoding, errors)
|
|
563
|
+
except Exception:
|
|
564
|
+
return str(self._soup).encode('utf-8', errors)
|
|
564
565
|
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
566
|
+
def decode(self, encoding='utf-8', errors='strict') -> str:
|
|
567
|
+
"""Decode the document from a specific encoding with error handling."""
|
|
568
|
+
try:
|
|
569
|
+
return str(self._soup).decode(encoding, errors)
|
|
570
|
+
except Exception:
|
|
571
|
+
return str(self._soup)
|
|
569
572
|
|
|
570
573
|
def __str__(self) -> str:
|
|
571
574
|
"""
|
|
@@ -605,3 +608,78 @@ class Scout:
|
|
|
605
608
|
decoded_markup = re.sub(r'\s+', ' ', decoded_markup)
|
|
606
609
|
|
|
607
610
|
return decoded_markup
|
|
611
|
+
|
|
612
|
+
def wrap(self, wrapper_tag: Tag) -> Tag:
|
|
613
|
+
"""Wrap the root tag in another tag with error handling."""
|
|
614
|
+
try:
|
|
615
|
+
return self._soup.wrap(wrapper_tag)
|
|
616
|
+
except Exception:
|
|
617
|
+
return wrapper_tag
|
|
618
|
+
|
|
619
|
+
def unwrap(self) -> None:
|
|
620
|
+
"""Unwrap the root tag, keeping its contents in the parent, with error handling."""
|
|
621
|
+
try:
|
|
622
|
+
self._soup.unwrap()
|
|
623
|
+
except Exception:
|
|
624
|
+
pass
|
|
625
|
+
|
|
626
|
+
def insert_before(self, new_element: Tag) -> None:
|
|
627
|
+
"""Insert a tag or string immediately before the root tag with error handling."""
|
|
628
|
+
try:
|
|
629
|
+
self._soup.insert_before(new_element)
|
|
630
|
+
except Exception:
|
|
631
|
+
pass
|
|
632
|
+
|
|
633
|
+
def insert_after(self, new_element: Tag) -> None:
|
|
634
|
+
"""Insert a tag or string immediately after the root tag with error handling."""
|
|
635
|
+
try:
|
|
636
|
+
self._soup.insert_after(new_element)
|
|
637
|
+
except Exception:
|
|
638
|
+
pass
|
|
639
|
+
|
|
640
|
+
def append(self, tag: Tag) -> None:
|
|
641
|
+
"""Append a tag to the root tag with error handling."""
|
|
642
|
+
try:
|
|
643
|
+
self._soup.append(tag)
|
|
644
|
+
except Exception:
|
|
645
|
+
pass
|
|
646
|
+
|
|
647
|
+
@property
|
|
648
|
+
def descendants(self):
|
|
649
|
+
"""Yield all descendants of the root tag in document order."""
|
|
650
|
+
return self._soup.descendants
|
|
651
|
+
|
|
652
|
+
@property
|
|
653
|
+
def parents(self):
|
|
654
|
+
"""Yield all parents of the root tag up the tree."""
|
|
655
|
+
return self._soup.parents
|
|
656
|
+
|
|
657
|
+
@property
|
|
658
|
+
def next_element(self):
|
|
659
|
+
"""Return the next element in document order after the root tag."""
|
|
660
|
+
return self._soup.next_element
|
|
661
|
+
|
|
662
|
+
@property
|
|
663
|
+
def previous_element(self):
|
|
664
|
+
"""Return the previous element in document order before the root tag."""
|
|
665
|
+
return self._soup.previous_element
|
|
666
|
+
|
|
667
|
+
def fetch_and_parse(self, url: str, requests_session=None, **kwargs) -> 'Scout':
|
|
668
|
+
"""Fetch HTML from a URL using requests and parse it with Scout."""
|
|
669
|
+
import requests
|
|
670
|
+
session = requests_session or requests.Session()
|
|
671
|
+
resp = session.get(url, **kwargs)
|
|
672
|
+
return Scout(resp.content, features=self.features)
|
|
673
|
+
|
|
674
|
+
def tables_to_dataframe(self, table_index=0, pandas_module=None):
|
|
675
|
+
"""Convert the nth table in the document to a pandas DataFrame."""
|
|
676
|
+
import pandas as pd
|
|
677
|
+
if pandas_module:
|
|
678
|
+
pd = pandas_module
|
|
679
|
+
tables = self.find_all('table')
|
|
680
|
+
if not tables or table_index >= len(tables):
|
|
681
|
+
return None
|
|
682
|
+
table = tables[table_index]
|
|
683
|
+
rows = table.find_all('tr')
|
|
684
|
+
data = [[cell.get_text(strip=True) for cell in row.find_all(['td', 'th'])] for row in rows]
|
|
685
|
+
return pd.DataFrame(data)
|