webscout 2025.10.14.1__py3-none-any.whl → 2025.10.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/Extra/YTToolkit/README.md +1 -1
- webscout/Extra/tempmail/README.md +3 -3
- webscout/Provider/OPENAI/README.md +1 -1
- webscout/Provider/TTI/bing.py +4 -4
- webscout/__init__.py +1 -1
- webscout/cli.py +0 -147
- webscout/client.py +4 -5
- webscout/litprinter/__init__.py +0 -42
- webscout/scout/README.md +59 -8
- webscout/scout/core/scout.py +62 -0
- webscout/scout/element.py +251 -45
- webscout/search/__init__.py +5 -8
- webscout/search/bing_main.py +42 -0
- webscout/search/engines/bing/__init__.py +1 -0
- webscout/search/engines/bing/base.py +33 -0
- webscout/search/engines/bing/images.py +108 -0
- webscout/search/engines/bing/news.py +91 -0
- webscout/search/engines/bing/suggestions.py +34 -0
- webscout/search/engines/bing/text.py +106 -0
- webscout/search/engines/duckduckgo/maps.py +13 -0
- webscout/search/engines/yahoo/__init__.py +41 -0
- webscout/search/engines/yahoo/answers.py +16 -0
- webscout/search/engines/yahoo/base.py +34 -0
- webscout/search/engines/yahoo/images.py +324 -0
- webscout/search/engines/yahoo/maps.py +16 -0
- webscout/search/engines/yahoo/news.py +258 -0
- webscout/search/engines/yahoo/suggestions.py +140 -0
- webscout/search/engines/yahoo/text.py +273 -0
- webscout/search/engines/yahoo/translate.py +16 -0
- webscout/search/engines/yahoo/videos.py +302 -0
- webscout/search/engines/yahoo/weather.py +220 -0
- webscout/search/http_client.py +1 -1
- webscout/search/yahoo_main.py +54 -0
- webscout/{auth → server}/__init__.py +2 -23
- webscout/server/config.py +84 -0
- webscout/{auth → server}/request_processing.py +3 -28
- webscout/{auth → server}/routes.py +14 -170
- webscout/server/schemas.py +23 -0
- webscout/{auth → server}/server.py +11 -43
- webscout/server/simple_logger.py +84 -0
- webscout/version.py +1 -1
- webscout/version.py.bak +1 -1
- webscout/zeroart/README.md +17 -9
- webscout/zeroart/__init__.py +78 -6
- webscout/zeroart/effects.py +51 -1
- webscout/zeroart/fonts.py +559 -1
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/METADATA +15 -332
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/RECORD +55 -48
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/entry_points.txt +1 -1
- webscout/Bing_search.py +0 -417
- webscout/DWEBS.py +0 -529
- webscout/auth/api_key_manager.py +0 -189
- webscout/auth/auth_system.py +0 -85
- webscout/auth/config.py +0 -175
- webscout/auth/database.py +0 -755
- webscout/auth/middleware.py +0 -248
- webscout/auth/models.py +0 -185
- webscout/auth/rate_limiter.py +0 -254
- webscout/auth/schemas.py +0 -103
- webscout/auth/simple_logger.py +0 -236
- webscout/search/engines/bing.py +0 -84
- webscout/search/engines/bing_news.py +0 -52
- webscout/search/engines/yahoo.py +0 -65
- webscout/search/engines/yahoo_news.py +0 -64
- /webscout/{auth → server}/exceptions.py +0 -0
- /webscout/{auth → server}/providers.py +0 -0
- /webscout/{auth → server}/request_models.py +0 -0
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/WHEEL +0 -0
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
<div align="center">
|
|
2
|
-
<a href="https://github.com/
|
|
2
|
+
<a href="https://github.com/pyscout/Webscout">
|
|
3
3
|
<img src="https://img.shields.io/badge/YTToolkit-YouTube%20Toolkit-red?style=for-the-badge&logo=youtube&logoColor=white" alt="YTToolkit Logo">
|
|
4
4
|
</a>
|
|
5
5
|
<h1>YTToolkit</h1>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<div align="center">
|
|
2
|
-
<a href="https://github.com/
|
|
3
|
-
<img src="https://img.shields.io/badge/WebScout-TempMail%
|
|
2
|
+
<a href="https://github.com/pyscout/Webscout">
|
|
3
|
+
<img src="https://img.shields.io/badge/WebScout-TempMail%20Toolkit-blue?style=for-the-badge&logo=maildotru&logoColor=white" alt="WebScout TempMail Toolkit">
|
|
4
4
|
</a>
|
|
5
5
|
|
|
6
6
|
<h1>📧 TempMail</h1>
|
|
@@ -484,5 +484,5 @@ Please refer to the main Webscout project's contributing guidelines if you plan
|
|
|
484
484
|
<a href="https://buymeacoffee.com/oevortex"><img alt="Buy Me A Coffee" src="https://img.shields.io/badge/Buy%20Me%20A%20Coffee-FFDD00?style=for-the-badge&logo=buymeacoffee&logoColor=black"></a>
|
|
485
485
|
</div>
|
|
486
486
|
<p>📧 TempMail - Part of the Webscout Toolkit</p>
|
|
487
|
-
<a href="https://github.com/
|
|
487
|
+
<a href="https://github.com/pyscout/Webscout">Back to Main Webscout Project</a>
|
|
488
488
|
</div>
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
<div align="center">
|
|
2
|
-
<a href="https://github.com/
|
|
2
|
+
<a href="https://github.com/pyscout/Webscout">
|
|
3
3
|
<img src="https://img.shields.io/badge/WebScout-OpenAI%20Compatible%20Providers-4285F4?style=for-the-badge&logo=openai&logoColor=white" alt="WebScout OpenAI Compatible Providers">
|
|
4
4
|
</a>
|
|
5
5
|
<br/>
|
webscout/Provider/TTI/bing.py
CHANGED
|
@@ -7,6 +7,7 @@ from webscout.Provider.TTI.utils import ImageData, ImageResponse
|
|
|
7
7
|
from webscout.Provider.TTI.base import TTICompatibleProvider, BaseImages
|
|
8
8
|
from io import BytesIO
|
|
9
9
|
from webscout.litagent import LitAgent
|
|
10
|
+
from webscout.scout import Scout
|
|
10
11
|
|
|
11
12
|
try:
|
|
12
13
|
from PIL import Image
|
|
@@ -81,9 +82,8 @@ class Images(BaseImages):
|
|
|
81
82
|
time.sleep(3)
|
|
82
83
|
try:
|
|
83
84
|
poll_resp = session.get(polling_url, headers=headers, timeout=timeout)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
imgs = [img["src"].split("?")[0] for img in soup.select(".img_cont .mimg") if img.get("src")]
|
|
85
|
+
scout = Scout(poll_resp.text, features='html.parser')
|
|
86
|
+
imgs = [img["src"].split("?")[0] for img in scout.select(".img_cont .mimg") if img.attrs.get("src")]
|
|
87
87
|
if imgs:
|
|
88
88
|
img_url = imgs[0]
|
|
89
89
|
break
|
|
@@ -232,7 +232,7 @@ class BingImageAI(TTICompatibleProvider):
|
|
|
232
232
|
|
|
233
233
|
if __name__ == "__main__":
|
|
234
234
|
from rich import print
|
|
235
|
-
client = BingImageAI(cookie="
|
|
235
|
+
client = BingImageAI(cookie="1Fw9daLSZzVBJXgevTDuc0jHZ60l4m5IiQEwjRCFOwEkpEBDmw3b8CEAALFSwZ1QBu-rATNkfD0i0gfJmVHeFlogqIriGwxNwT9T6fVREgAQD4_qn0VnQYP681NN4K80t6o-eJXnK1MBhdjxTIaok8173LGmLkEWLqHC0k3dYnF7m2kHRhf1dxjEH3WDI56hxiSPZtnggdzrfnuFAmOgCQQ")
|
|
236
236
|
response = client.images.create(
|
|
237
237
|
model="gpt4o",
|
|
238
238
|
prompt="A cat riding a bicycle",
|
webscout/__init__.py
CHANGED
webscout/cli.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
from .swiftcli import CLI, option
|
|
3
3
|
from .search import DuckDuckGoSearch, YepSearch # Import search classes
|
|
4
|
-
from .DWEBS import GoogleSearch # Import GoogleSearch from DWEBS
|
|
5
4
|
from .version import __version__
|
|
6
5
|
|
|
7
6
|
# Alias for backward compatibility
|
|
@@ -262,152 +261,6 @@ def weather(location: str, language: str, proxy: str = None, timeout: int = 10):
|
|
|
262
261
|
raise e
|
|
263
262
|
|
|
264
263
|
@app.command()
|
|
265
|
-
@option("--keywords", "-k", help="Search keywords", required=True)
|
|
266
|
-
@option("--region", "-r", help="Region for search results (ISO country code)", default="all")
|
|
267
|
-
@option("--safesearch", "-s", help="SafeSearch setting (on, moderate, off)", default="moderate")
|
|
268
|
-
@option("--max-results", "-m", help="Maximum number of results", type=int, default=10)
|
|
269
|
-
@option("--start-num", "-start", help="Starting position for pagination", type=int, default=0)
|
|
270
|
-
@option("--unique", "-u", help="Filter duplicate results", type=bool, default=True)
|
|
271
|
-
@option("--timeout", "-timeout", help="Timeout value for requests", type=int, default=10)
|
|
272
|
-
@option("--proxy", "-p", help="Proxy URL to use for requests")
|
|
273
|
-
@option("--impersonate", "-i", help="Browser to impersonate", default="chrome110")
|
|
274
|
-
def google_text(
|
|
275
|
-
keywords: str,
|
|
276
|
-
region: str,
|
|
277
|
-
safesearch: str,
|
|
278
|
-
max_results: int,
|
|
279
|
-
start_num: int,
|
|
280
|
-
unique: bool,
|
|
281
|
-
timeout: int = 10,
|
|
282
|
-
proxy: str = None,
|
|
283
|
-
impersonate: str = "chrome110"
|
|
284
|
-
):
|
|
285
|
-
"""Perform a text search using Google Search."""
|
|
286
|
-
google = GoogleSearch(
|
|
287
|
-
timeout=timeout,
|
|
288
|
-
proxies={"https": proxy, "http": proxy} if proxy else None,
|
|
289
|
-
verify=True,
|
|
290
|
-
lang="en",
|
|
291
|
-
sleep_interval=0.0,
|
|
292
|
-
impersonate=impersonate
|
|
293
|
-
)
|
|
294
|
-
|
|
295
|
-
try:
|
|
296
|
-
results = google.text(
|
|
297
|
-
keywords=keywords,
|
|
298
|
-
region=region,
|
|
299
|
-
safesearch=safesearch,
|
|
300
|
-
max_results=max_results,
|
|
301
|
-
start_num=start_num,
|
|
302
|
-
unique=unique
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
# Convert SearchResult objects to dictionaries for printing
|
|
306
|
-
formatted_results = []
|
|
307
|
-
for result in results:
|
|
308
|
-
result_dict = {
|
|
309
|
-
"title": result.title,
|
|
310
|
-
"url": result.url,
|
|
311
|
-
"description": result.description,
|
|
312
|
-
}
|
|
313
|
-
# Add any metadata to the result dictionary
|
|
314
|
-
for k, v in result.metadata.items():
|
|
315
|
-
result_dict[k] = v
|
|
316
|
-
|
|
317
|
-
formatted_results.append(result_dict)
|
|
318
|
-
|
|
319
|
-
_print_data(formatted_results)
|
|
320
|
-
except Exception as e:
|
|
321
|
-
raise e
|
|
322
|
-
|
|
323
|
-
@app.command()
|
|
324
|
-
@option("--keywords", "-k", help="Search keywords", required=True)
|
|
325
|
-
@option("--region", "-r", help="Region for search results (ISO country code)", default="all")
|
|
326
|
-
@option("--safesearch", "-s", help="SafeSearch setting (on, moderate, off)", default="moderate")
|
|
327
|
-
@option("--max-results", "-m", help="Maximum number of results", type=int, default=10)
|
|
328
|
-
@option("--timeout", "-timeout", help="Timeout value for requests", type=int, default=10)
|
|
329
|
-
@option("--proxy", "-p", help="Proxy URL to use for requests")
|
|
330
|
-
@option("--impersonate", "-i", help="Browser to impersonate", default="chrome110")
|
|
331
|
-
def google_news(
|
|
332
|
-
keywords: str,
|
|
333
|
-
region: str,
|
|
334
|
-
safesearch: str,
|
|
335
|
-
max_results: int,
|
|
336
|
-
timeout: int = 10,
|
|
337
|
-
proxy: str = None,
|
|
338
|
-
impersonate: str = "chrome110"
|
|
339
|
-
):
|
|
340
|
-
"""Perform a news search using Google Search."""
|
|
341
|
-
google = GoogleSearch(
|
|
342
|
-
timeout=timeout,
|
|
343
|
-
proxies={"https": proxy, "http": proxy} if proxy else None,
|
|
344
|
-
verify=True,
|
|
345
|
-
lang="en",
|
|
346
|
-
sleep_interval=0.0,
|
|
347
|
-
impersonate=impersonate
|
|
348
|
-
)
|
|
349
|
-
|
|
350
|
-
try:
|
|
351
|
-
results = google.news(
|
|
352
|
-
keywords=keywords,
|
|
353
|
-
region=region,
|
|
354
|
-
safesearch=safesearch,
|
|
355
|
-
max_results=max_results
|
|
356
|
-
)
|
|
357
|
-
|
|
358
|
-
# Convert SearchResult objects to dictionaries for printing
|
|
359
|
-
formatted_results = []
|
|
360
|
-
for result in results:
|
|
361
|
-
result_dict = {
|
|
362
|
-
"title": result.title,
|
|
363
|
-
"url": result.url,
|
|
364
|
-
"description": result.description,
|
|
365
|
-
}
|
|
366
|
-
# Add any metadata to the result dictionary
|
|
367
|
-
for k, v in result.metadata.items():
|
|
368
|
-
result_dict[k] = v
|
|
369
|
-
|
|
370
|
-
formatted_results.append(result_dict)
|
|
371
|
-
|
|
372
|
-
_print_data(formatted_results)
|
|
373
|
-
except Exception as e:
|
|
374
|
-
raise e
|
|
375
|
-
|
|
376
|
-
@app.command()
|
|
377
|
-
@option("--query", "-q", help="Search query", required=True)
|
|
378
|
-
@option("--region", "-r", help="Region for suggestions (ISO country code)", default="all")
|
|
379
|
-
@option("--timeout", "-timeout", help="Timeout value for requests", type=int, default=10)
|
|
380
|
-
@option("--proxy", "-p", help="Proxy URL to use for requests")
|
|
381
|
-
@option("--impersonate", "-i", help="Browser to impersonate", default="chrome110")
|
|
382
|
-
def google_suggestions(
|
|
383
|
-
query: str,
|
|
384
|
-
region: str,
|
|
385
|
-
timeout: int = 10,
|
|
386
|
-
proxy: str = None,
|
|
387
|
-
impersonate: str = "chrome110"
|
|
388
|
-
):
|
|
389
|
-
"""Get search suggestions from Google Search."""
|
|
390
|
-
google = GoogleSearch(
|
|
391
|
-
timeout=timeout,
|
|
392
|
-
proxies={"https": proxy, "http": proxy} if proxy else None,
|
|
393
|
-
verify=True,
|
|
394
|
-
lang="en",
|
|
395
|
-
sleep_interval=0.0,
|
|
396
|
-
impersonate=impersonate
|
|
397
|
-
)
|
|
398
|
-
|
|
399
|
-
try:
|
|
400
|
-
results = google.suggestions(query=query, region=region)
|
|
401
|
-
|
|
402
|
-
# Format suggestions for printing
|
|
403
|
-
formatted_results = []
|
|
404
|
-
for i, suggestion in enumerate(results, 1):
|
|
405
|
-
formatted_results.append({"position": i, "suggestion": suggestion})
|
|
406
|
-
|
|
407
|
-
_print_data(formatted_results)
|
|
408
|
-
except Exception as e:
|
|
409
|
-
raise e
|
|
410
|
-
|
|
411
264
|
@app.command()
|
|
412
265
|
@option("--keywords", "-k", help="Search keywords", required=True)
|
|
413
266
|
@option("--region", "-r", help="Region for search results", default="all")
|
webscout/client.py
CHANGED
|
@@ -34,20 +34,19 @@ from webscout.Provider.OPENAI import *
|
|
|
34
34
|
try:
|
|
35
35
|
# Use lazy import to avoid module execution issues
|
|
36
36
|
def run_api(*args, **kwargs):
|
|
37
|
-
|
|
38
|
-
from webscout.auth.server import run_api as _run_api
|
|
37
|
+
from webscout.server.server import run_api as _run_api
|
|
39
38
|
return _run_api(*args, **kwargs)
|
|
40
39
|
|
|
41
40
|
def start_server(**kwargs):
|
|
42
41
|
"""Start the Webscout OpenAI-compatible API server (FastAPI backend)."""
|
|
43
|
-
from webscout.
|
|
42
|
+
from webscout.server.server import run_api as _run_api
|
|
44
43
|
return _run_api(**kwargs)
|
|
45
44
|
except ImportError:
|
|
46
45
|
# Fallback for environments where the backend is not available
|
|
47
46
|
def run_api(*args, **kwargs):
|
|
48
|
-
raise ImportError("webscout.
|
|
47
|
+
raise ImportError("webscout.server.server.run_api is not available in this environment.")
|
|
49
48
|
def start_server(*args, **kwargs):
|
|
50
|
-
raise ImportError("webscout.
|
|
49
|
+
raise ImportError("webscout.server.server.start_server is not available in this environment.")
|
|
51
50
|
|
|
52
51
|
# ---
|
|
53
52
|
# API Documentation
|
webscout/litprinter/__init__.py
CHANGED
|
@@ -1,45 +1,3 @@
|
|
|
1
|
-
"""
|
|
2
|
-
>>> from litprinter import litprint
|
|
3
|
-
>>> from litprinter import lit
|
|
4
|
-
>>> from litprinter import install, uninstall
|
|
5
|
-
>>>
|
|
6
|
-
>>> litprint("Hello, world!")
|
|
7
|
-
LIT -> [__main__.py:1] in () >>> Hello, world!
|
|
8
|
-
>>>
|
|
9
|
-
>>> def my_function():
|
|
10
|
-
... lit(1, 2, 3)
|
|
11
|
-
>>> my_function()
|
|
12
|
-
LIT -> [__main__.py:4] in my_function() >>> 1, 2, 3
|
|
13
|
-
>>> install()
|
|
14
|
-
>>> ic("This is now the builtins.ic()")
|
|
15
|
-
LIT -> [__main__.py:7] in () >>> This is now the builtins.ic()
|
|
16
|
-
>>> uninstall()
|
|
17
|
-
|
|
18
|
-
This module provides enhanced print and logging functionalities for Python,
|
|
19
|
-
allowing developers to debug their code with style and precision. It
|
|
20
|
-
includes the litprint and lit functions for debugging, log for logging, and
|
|
21
|
-
install/uninstall functions for integration into the builtins module.
|
|
22
|
-
It also handles colorizing output and provides different styles and customizable
|
|
23
|
-
options.
|
|
24
|
-
|
|
25
|
-
LITPRINTER is inspired by the icecream package and provides similar functionality
|
|
26
|
-
with additional features:
|
|
27
|
-
- Variable inspection with expression display
|
|
28
|
-
- Return value handling for inline usage
|
|
29
|
-
- Support for custom formatters for specific data types
|
|
30
|
-
- Execution context tracking
|
|
31
|
-
- Rich-like colorized output with multiple themes (JARVIS, RICH, MODERN, NEON, CYBERPUNK)
|
|
32
|
-
- Better JSON formatting with indent=2 by default
|
|
33
|
-
- Advanced pretty printing for complex data structures with smart truncation
|
|
34
|
-
- Clickable file paths in supported terminals and editors (VSCode compatible)
|
|
35
|
-
- Enhanced visual formatting with better spacing and separators
|
|
36
|
-
- Special formatters for common types (Exception, bytes, set, frozenset, etc.)
|
|
37
|
-
- Smart object introspection for custom classes
|
|
38
|
-
- Logging capabilities with timestamp and log levels
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
# Try to import from the standalone litprinter package first
|
|
42
|
-
# If it's not installed
|
|
43
1
|
try:
|
|
44
2
|
import litprinter
|
|
45
3
|
# If standalone package is found, re-export all its components
|
webscout/scout/README.md
CHANGED
|
@@ -43,7 +43,7 @@ pip install webscout
|
|
|
43
43
|
Or install the latest version from GitHub:
|
|
44
44
|
|
|
45
45
|
```bash
|
|
46
|
-
pip install git+https://github.com/
|
|
46
|
+
pip install git+https://github.com/pyscout/Webscout.git
|
|
47
47
|
```
|
|
48
48
|
|
|
49
49
|
## 🚀 Quick Start
|
|
@@ -147,10 +147,57 @@ Scout provides powerful tools for navigating and manipulating HTML/XML documents
|
|
|
147
147
|
- **Document Manipulation**: Modify, replace, or remove elements
|
|
148
148
|
- **Dynamic Building**: Easily append or insert new nodes
|
|
149
149
|
|
|
150
|
+
#### CSS Selector Support
|
|
151
|
+
|
|
152
|
+
Scout includes a comprehensive CSS selector engine that supports all common selector types:
|
|
153
|
+
|
|
150
154
|
```python
|
|
151
|
-
#
|
|
152
|
-
|
|
155
|
+
# Tag selectors
|
|
156
|
+
paragraphs = scout.select('p')
|
|
157
|
+
divs = scout.select('div')
|
|
158
|
+
|
|
159
|
+
# Class selectors
|
|
160
|
+
items = scout.select('.item') # Single class
|
|
161
|
+
cards = scout.select('div.card') # Tag + class
|
|
162
|
+
special = scout.select('.card.special') # Multiple classes
|
|
163
|
+
|
|
164
|
+
# ID selectors
|
|
165
|
+
header = scout.select_one('#header') # Single element by ID
|
|
166
|
+
menu = scout.select('nav#main-menu') # Tag + ID
|
|
167
|
+
|
|
168
|
+
# Attribute selectors
|
|
169
|
+
links = scout.select('a[href]') # Has attribute
|
|
170
|
+
external = scout.select('a[rel="nofollow"]') # Attribute value
|
|
171
|
+
images = scout.select('img[alt]') # Has alt attribute
|
|
172
|
+
|
|
173
|
+
# Descendant selectors (space)
|
|
174
|
+
nested = scout.select('div p') # Any p inside div
|
|
175
|
+
deep = scout.select('article section p') # Deeply nested
|
|
176
|
+
|
|
177
|
+
# Child selectors (>)
|
|
178
|
+
direct = scout.select('ul > li') # Direct children only
|
|
179
|
+
menu_items = scout.select('nav#menu > ul > li') # Multiple levels
|
|
180
|
+
|
|
181
|
+
# Combined selectors
|
|
182
|
+
complex = scout.select('div.container > p.text[lang="en"]')
|
|
183
|
+
links = scout.select('ol#results > li.item a[href]')
|
|
184
|
+
|
|
185
|
+
# Get first match only
|
|
186
|
+
first = scout.select_one('p.intro')
|
|
187
|
+
```
|
|
153
188
|
|
|
189
|
+
**Supported Selector Types:**
|
|
190
|
+
- **Tag**: `p`, `div`, `a`
|
|
191
|
+
- **Class**: `.class`, `div.class`, `.class1.class2`
|
|
192
|
+
- **ID**: `#id`, `div#id`
|
|
193
|
+
- **Attribute**: `[attr]`, `[attr="value"]`
|
|
194
|
+
- **Descendant**: `div p`, `article section p`
|
|
195
|
+
- **Child**: `div > p`, `ul > li`
|
|
196
|
+
- **Combined**: `p.class#id[attr="value"]`
|
|
197
|
+
|
|
198
|
+
#### Element Navigation
|
|
199
|
+
|
|
200
|
+
```python
|
|
154
201
|
# Advanced find with attribute matching
|
|
155
202
|
results = scout.find_all('a', attrs={'class': 'external', 'rel': 'nofollow'})
|
|
156
203
|
|
|
@@ -340,6 +387,10 @@ cached_data = scout.cache('parsed_data')
|
|
|
340
387
|
- `__init__(markup, features='html.parser', from_encoding=None)`: Initialize with HTML content
|
|
341
388
|
- `find(name, attrs={}, recursive=True, text=None)`: Find first matching element
|
|
342
389
|
- `find_all(name, attrs={}, recursive=True, text=None, limit=None)`: Find all matching elements
|
|
390
|
+
- `find_next(name, attrs={}, text=None)`: Find next element in document order
|
|
391
|
+
- `find_all_next(name, attrs={}, text=None, limit=None)`: Find all next elements in document order
|
|
392
|
+
- `find_previous(name, attrs={}, text=None)`: Find previous element in document order
|
|
393
|
+
- `find_all_previous(name, attrs={}, text=None, limit=None)`: Find all previous elements in document order
|
|
343
394
|
- `select(selector)`: Find elements using CSS selector
|
|
344
395
|
- `get_text(separator=' ', strip=False)`: Extract text from document
|
|
345
396
|
- `analyze_text()`: Perform text analysis
|
|
@@ -358,7 +409,7 @@ cached_data = scout.cache('parsed_data')
|
|
|
358
409
|
- `_crawl_page(url, depth=0)`: Crawl a single page (internal method)
|
|
359
410
|
- `_is_valid_url(url)`: Check if a URL is valid (internal method)
|
|
360
411
|
|
|
361
|
-
For detailed API documentation, please refer to the [documentation](https://github.com/
|
|
412
|
+
For detailed API documentation, please refer to the [documentation](https://github.com/pyscout/Webscout/wiki).
|
|
362
413
|
|
|
363
414
|
## 🔧 Dependencies
|
|
364
415
|
|
|
@@ -393,9 +444,9 @@ This project is licensed under the MIT License - see the LICENSE file for detail
|
|
|
393
444
|
<div align="center">
|
|
394
445
|
<p>Made with ❤️ by the Webscout team</p>
|
|
395
446
|
<p>
|
|
396
|
-
<a href="https://github.com/
|
|
397
|
-
<a href="https://github.com/
|
|
398
|
-
<a href="https://github.com/
|
|
399
|
-
<a href="https://github.com/
|
|
447
|
+
<a href="https://github.com/pyscout/Webscout">GitHub</a> •
|
|
448
|
+
<a href="https://github.com/pyscout/Webscout/wiki">Documentation</a> •
|
|
449
|
+
<a href="https://github.com/pyscout/Webscout/issues">Report Bug</a> •
|
|
450
|
+
<a href="https://github.com/pyscout/Webscout/issues">Request Feature</a>
|
|
400
451
|
</p>
|
|
401
452
|
</div>
|
webscout/scout/core/scout.py
CHANGED
|
@@ -454,6 +454,68 @@ class Scout:
|
|
|
454
454
|
pass
|
|
455
455
|
return siblings
|
|
456
456
|
|
|
457
|
+
def find_next(self, name=None, attrs={}, text=None, **kwargs) -> Optional[Tag]:
|
|
458
|
+
"""
|
|
459
|
+
Find the next element in document order.
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
name: Tag name to search for
|
|
463
|
+
attrs: Attributes to match
|
|
464
|
+
text: Text content to match
|
|
465
|
+
**kwargs: Additional attributes
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
Optional[Tag]: Next matching element or None
|
|
469
|
+
"""
|
|
470
|
+
return self._soup.find_next(name, attrs, text, **kwargs)
|
|
471
|
+
|
|
472
|
+
def find_all_next(self, name=None, attrs={}, text=None, limit=None, **kwargs) -> List[Tag]:
|
|
473
|
+
"""
|
|
474
|
+
Find all next elements in document order.
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
name: Tag name to search for
|
|
478
|
+
attrs: Attributes to match
|
|
479
|
+
text: Text content to match
|
|
480
|
+
limit: Maximum number of results
|
|
481
|
+
**kwargs: Additional attributes
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
List[Tag]: List of matching elements
|
|
485
|
+
"""
|
|
486
|
+
return self._soup.find_all_next(name, attrs, text, limit, **kwargs)
|
|
487
|
+
|
|
488
|
+
def find_previous(self, name=None, attrs={}, text=None, **kwargs) -> Optional[Tag]:
|
|
489
|
+
"""
|
|
490
|
+
Find the previous element in document order.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
name: Tag name to search for
|
|
494
|
+
attrs: Attributes to match
|
|
495
|
+
text: Text content to match
|
|
496
|
+
**kwargs: Additional attributes
|
|
497
|
+
|
|
498
|
+
Returns:
|
|
499
|
+
Optional[Tag]: Previous matching element or None
|
|
500
|
+
"""
|
|
501
|
+
return self._soup.find_previous(name, attrs, text, **kwargs)
|
|
502
|
+
|
|
503
|
+
def find_all_previous(self, name=None, attrs={}, text=None, limit=None, **kwargs) -> List[Tag]:
|
|
504
|
+
"""
|
|
505
|
+
Find all previous elements in document order.
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
name: Tag name to search for
|
|
509
|
+
attrs: Attributes to match
|
|
510
|
+
text: Text content to match
|
|
511
|
+
limit: Maximum number of results
|
|
512
|
+
**kwargs: Additional attributes
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
List[Tag]: List of matching elements
|
|
516
|
+
"""
|
|
517
|
+
return self._soup.find_all_previous(name, attrs, text, limit, **kwargs)
|
|
518
|
+
|
|
457
519
|
def select(self, selector: str) -> List[Tag]:
|
|
458
520
|
"""
|
|
459
521
|
Select elements using CSS selector.
|