webscout 2025.10.11__py3-none-any.whl → 2025.10.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/Provider/Andi.py +1 -1
- webscout/Provider/ChatGPTClone.py +2 -1
- webscout/__init__.py +1 -4
- webscout/auth/routes.py +2 -3
- webscout/cli.py +1 -1
- webscout/search/__init__.py +51 -0
- webscout/search/base.py +195 -0
- webscout/search/duckduckgo_main.py +54 -0
- webscout/search/engines/__init__.py +48 -0
- webscout/search/engines/bing.py +84 -0
- webscout/search/engines/bing_news.py +52 -0
- webscout/search/engines/brave.py +43 -0
- webscout/search/engines/duckduckgo/__init__.py +25 -0
- webscout/search/engines/duckduckgo/answers.py +78 -0
- webscout/search/engines/duckduckgo/base.py +187 -0
- webscout/search/engines/duckduckgo/images.py +97 -0
- webscout/search/engines/duckduckgo/maps.py +168 -0
- webscout/search/engines/duckduckgo/news.py +68 -0
- webscout/search/engines/duckduckgo/suggestions.py +21 -0
- webscout/search/engines/duckduckgo/text.py +211 -0
- webscout/search/engines/duckduckgo/translate.py +47 -0
- webscout/search/engines/duckduckgo/videos.py +63 -0
- webscout/search/engines/duckduckgo/weather.py +74 -0
- webscout/search/engines/mojeek.py +37 -0
- webscout/search/engines/wikipedia.py +56 -0
- webscout/search/engines/yahoo.py +65 -0
- webscout/search/engines/yahoo_news.py +64 -0
- webscout/search/engines/yandex.py +43 -0
- webscout/search/engines/yep/__init__.py +13 -0
- webscout/search/engines/yep/base.py +32 -0
- webscout/search/engines/yep/images.py +99 -0
- webscout/search/engines/yep/suggestions.py +35 -0
- webscout/search/engines/yep/text.py +114 -0
- webscout/search/http_client.py +156 -0
- webscout/search/results.py +137 -0
- webscout/search/yep_main.py +44 -0
- webscout/version.py +1 -1
- webscout/version.py.bak +2 -0
- {webscout-2025.10.11.dist-info → webscout-2025.10.13.dist-info}/METADATA +3 -4
- {webscout-2025.10.11.dist-info → webscout-2025.10.13.dist-info}/RECORD +44 -15
- webscout/webscout_search.py +0 -1183
- webscout/webscout_search_async.py +0 -649
- webscout/yep_search.py +0 -346
- {webscout-2025.10.11.dist-info → webscout-2025.10.13.dist-info}/WHEEL +0 -0
- {webscout-2025.10.11.dist-info → webscout-2025.10.13.dist-info}/entry_points.txt +0 -0
- {webscout-2025.10.11.dist-info → webscout-2025.10.13.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-2025.10.11.dist-info → webscout-2025.10.13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from ....litagent import LitAgent
|
|
4
|
+
from curl_cffi.requests import Session
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class YepBase:
|
|
8
|
+
"""Base class for Yep search engines."""
|
|
9
|
+
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
timeout: int = 20,
|
|
13
|
+
proxies: dict[str, str] | None = None,
|
|
14
|
+
verify: bool = True,
|
|
15
|
+
impersonate: str = "chrome110",
|
|
16
|
+
):
|
|
17
|
+
self.base_url = "https://api.yep.com/fs/2/search"
|
|
18
|
+
self.timeout = timeout
|
|
19
|
+
self.session = Session(
|
|
20
|
+
proxies=proxies,
|
|
21
|
+
verify=verify,
|
|
22
|
+
impersonate=impersonate,
|
|
23
|
+
timeout=timeout,
|
|
24
|
+
)
|
|
25
|
+
self.session.headers.update(
|
|
26
|
+
{
|
|
27
|
+
**LitAgent().generate_fingerprint(),
|
|
28
|
+
"Origin": "https://yep.com",
|
|
29
|
+
"Referer": "https://yep.com/",
|
|
30
|
+
}
|
|
31
|
+
)
|
|
32
|
+
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
|
+
from urllib.parse import urlencode
|
|
5
|
+
|
|
6
|
+
from .base import YepBase
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class YepImages(YepBase):
|
|
10
|
+
def run(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
11
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
12
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "all")
|
|
13
|
+
safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
14
|
+
max_results = args[3] if len(args) > 3 else kwargs.get("max_results")
|
|
15
|
+
|
|
16
|
+
safe_search_map = {
|
|
17
|
+
"on": "on",
|
|
18
|
+
"moderate": "moderate",
|
|
19
|
+
"off": "off"
|
|
20
|
+
}
|
|
21
|
+
safe_setting = safe_search_map.get(safesearch.lower(), "moderate")
|
|
22
|
+
|
|
23
|
+
params = {
|
|
24
|
+
"client": "web",
|
|
25
|
+
"gl": region,
|
|
26
|
+
"limit": str(max_results) if max_results else "10",
|
|
27
|
+
"no_correct": "false",
|
|
28
|
+
"q": keywords,
|
|
29
|
+
"safeSearch": safe_setting,
|
|
30
|
+
"type": "images"
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
url = f"{self.base_url}?{urlencode(params)}"
|
|
34
|
+
try:
|
|
35
|
+
response = self.session.get(url)
|
|
36
|
+
response.raise_for_status()
|
|
37
|
+
raw_results = response.json()
|
|
38
|
+
|
|
39
|
+
if not raw_results or len(raw_results) < 2:
|
|
40
|
+
return []
|
|
41
|
+
|
|
42
|
+
formatted_results = []
|
|
43
|
+
results = raw_results[1].get('results', [])
|
|
44
|
+
|
|
45
|
+
for result in results:
|
|
46
|
+
if result.get("type") != "Image":
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
formatted_result = {
|
|
50
|
+
"title": self._remove_html_tags(result.get("title", "")),
|
|
51
|
+
"image": result.get("image_id", ""),
|
|
52
|
+
"thumbnail": result.get("src", ""),
|
|
53
|
+
"url": result.get("host_page", ""),
|
|
54
|
+
"height": result.get("height", 0),
|
|
55
|
+
"width": result.get("width", 0),
|
|
56
|
+
"source": result.get("visual_url", "")
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if "srcset" in result:
|
|
60
|
+
formatted_result["thumbnail_hd"] = result["srcset"].split(",")[1].strip().split(" ")[0]
|
|
61
|
+
|
|
62
|
+
formatted_results.append(formatted_result)
|
|
63
|
+
|
|
64
|
+
if max_results:
|
|
65
|
+
return formatted_results[:max_results]
|
|
66
|
+
return formatted_results
|
|
67
|
+
|
|
68
|
+
except Exception as e:
|
|
69
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
70
|
+
raise Exception(f"Yep image search failed with status {e.response.status_code}: {str(e)}")
|
|
71
|
+
else:
|
|
72
|
+
raise Exception(f"Yep image search failed: {str(e)}")
|
|
73
|
+
|
|
74
|
+
def _remove_html_tags(self, text: str) -> str:
|
|
75
|
+
result = ""
|
|
76
|
+
in_tag = False
|
|
77
|
+
|
|
78
|
+
for char in text:
|
|
79
|
+
if char == '<':
|
|
80
|
+
in_tag = True
|
|
81
|
+
elif char == '>':
|
|
82
|
+
in_tag = False
|
|
83
|
+
elif not in_tag:
|
|
84
|
+
result += char
|
|
85
|
+
|
|
86
|
+
replacements = {
|
|
87
|
+
' ': ' ',
|
|
88
|
+
'&': '&',
|
|
89
|
+
'<': '<',
|
|
90
|
+
'>': '>',
|
|
91
|
+
'"': '"',
|
|
92
|
+
''': "'",
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
for entity, replacement in replacements.items():
|
|
96
|
+
result = result.replace(entity, replacement)
|
|
97
|
+
|
|
98
|
+
return result.strip()
|
|
99
|
+
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
from urllib.parse import urlencode
|
|
5
|
+
|
|
6
|
+
from .base import YepBase
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class YepSuggestions(YepBase):
|
|
10
|
+
def run(self, *args, **kwargs) -> List[str]:
|
|
11
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
12
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "all")
|
|
13
|
+
|
|
14
|
+
params = {
|
|
15
|
+
"query": keywords,
|
|
16
|
+
"type": "web",
|
|
17
|
+
"gl": region
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
url = f"https://api.yep.com/ac/?{urlencode(params)}"
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
response = self.session.get(url)
|
|
24
|
+
response.raise_for_status()
|
|
25
|
+
data = response.json()
|
|
26
|
+
if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
|
|
27
|
+
return data[1]
|
|
28
|
+
return []
|
|
29
|
+
|
|
30
|
+
except Exception as e:
|
|
31
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
32
|
+
raise Exception(f"Yep suggestions failed with status {e.response.status_code}: {str(e)}")
|
|
33
|
+
else:
|
|
34
|
+
raise Exception(f"Yep suggestions failed: {str(e)}")
|
|
35
|
+
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
|
+
from urllib.parse import urlencode
|
|
5
|
+
|
|
6
|
+
from .base import YepBase
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class YepSearch(YepBase):
|
|
10
|
+
def run(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
11
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
12
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "all")
|
|
13
|
+
safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
14
|
+
max_results = args[3] if len(args) > 3 else kwargs.get("max_results")
|
|
15
|
+
|
|
16
|
+
safe_search_map = {
|
|
17
|
+
"on": "on",
|
|
18
|
+
"moderate": "moderate",
|
|
19
|
+
"off": "off"
|
|
20
|
+
}
|
|
21
|
+
safe_setting = safe_search_map.get(safesearch.lower(), "moderate")
|
|
22
|
+
|
|
23
|
+
params = {
|
|
24
|
+
"client": "web",
|
|
25
|
+
"gl": region,
|
|
26
|
+
"limit": str(max_results) if max_results else "10",
|
|
27
|
+
"no_correct": "false",
|
|
28
|
+
"q": keywords,
|
|
29
|
+
"safeSearch": safe_setting,
|
|
30
|
+
"type": "web"
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
url = f"{self.base_url}?{urlencode(params)}"
|
|
34
|
+
try:
|
|
35
|
+
response = self.session.get(url)
|
|
36
|
+
response.raise_for_status()
|
|
37
|
+
raw_results = response.json()
|
|
38
|
+
|
|
39
|
+
formatted_results = self.format_results(raw_results)
|
|
40
|
+
|
|
41
|
+
if max_results:
|
|
42
|
+
return formatted_results[:max_results]
|
|
43
|
+
return formatted_results
|
|
44
|
+
except Exception as e:
|
|
45
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
46
|
+
raise Exception(f"Yep search failed with status {e.response.status_code}: {str(e)}")
|
|
47
|
+
else:
|
|
48
|
+
raise Exception(f"Yep search failed: {str(e)}")
|
|
49
|
+
|
|
50
|
+
def format_results(self, raw_results: dict) -> List[Dict]:
|
|
51
|
+
formatted_results = []
|
|
52
|
+
|
|
53
|
+
if not raw_results or len(raw_results) < 2:
|
|
54
|
+
return formatted_results
|
|
55
|
+
|
|
56
|
+
results = raw_results[1].get('results', [])
|
|
57
|
+
|
|
58
|
+
for result in results:
|
|
59
|
+
formatted_result = {
|
|
60
|
+
"title": self._remove_html_tags(result.get("title", "")),
|
|
61
|
+
"href": result.get("url", ""),
|
|
62
|
+
"body": self._remove_html_tags(result.get("snippet", "")),
|
|
63
|
+
"source": result.get("visual_url", ""),
|
|
64
|
+
"position": len(formatted_results) + 1,
|
|
65
|
+
"type": result.get("type", "organic"),
|
|
66
|
+
"first_seen": result.get("first_seen", None)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if "sitelinks" in result:
|
|
70
|
+
sitelinks = []
|
|
71
|
+
if "full" in result["sitelinks"]:
|
|
72
|
+
sitelinks.extend(result["sitelinks"]["full"])
|
|
73
|
+
if "short" in result["sitelinks"]:
|
|
74
|
+
sitelinks.extend(result["sitelinks"]["short"])
|
|
75
|
+
|
|
76
|
+
if sitelinks:
|
|
77
|
+
formatted_result["sitelinks"] = [
|
|
78
|
+
{
|
|
79
|
+
"title": self._remove_html_tags(link.get("title", "")),
|
|
80
|
+
"href": link.get("url", "")
|
|
81
|
+
}
|
|
82
|
+
for link in sitelinks
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
formatted_results.append(formatted_result)
|
|
86
|
+
|
|
87
|
+
return formatted_results
|
|
88
|
+
|
|
89
|
+
def _remove_html_tags(self, text: str) -> str:
|
|
90
|
+
result = ""
|
|
91
|
+
in_tag = False
|
|
92
|
+
|
|
93
|
+
for char in text:
|
|
94
|
+
if char == '<':
|
|
95
|
+
in_tag = True
|
|
96
|
+
elif char == '>':
|
|
97
|
+
in_tag = False
|
|
98
|
+
elif not in_tag:
|
|
99
|
+
result += char
|
|
100
|
+
|
|
101
|
+
replacements = {
|
|
102
|
+
' ': ' ',
|
|
103
|
+
'&': '&',
|
|
104
|
+
'<': '<',
|
|
105
|
+
'>': '>',
|
|
106
|
+
'"': '"',
|
|
107
|
+
''': "'",
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
for entity, replacement in replacements.items():
|
|
111
|
+
result = result.replace(entity, replacement)
|
|
112
|
+
|
|
113
|
+
return result.strip()
|
|
114
|
+
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""HTTP client for search engines."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from random import choice
|
|
7
|
+
from typing import Any, Literal
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import trio # noqa: F401
|
|
11
|
+
except ImportError:
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
import curl_cffi.requests
|
|
15
|
+
|
|
16
|
+
from ..exceptions import RatelimitE, TimeoutE, WebscoutE
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class HttpClient:
|
|
22
|
+
"""HTTP client wrapper for search engines."""
|
|
23
|
+
|
|
24
|
+
# curl_cffi supported browser impersonations
|
|
25
|
+
_impersonates = (
|
|
26
|
+
"chrome99", "chrome100", "chrome101", "chrome104", "chrome107", "chrome110",
|
|
27
|
+
"chrome116", "chrome119", "chrome120", "chrome123", "chrome124", "chrome131", "chrome133a",
|
|
28
|
+
"chrome99_android", "chrome131_android",
|
|
29
|
+
"safari15_3", "safari15_5", "safari17_0", "safari17_2_ios", "safari18_0", "safari18_0_ios",
|
|
30
|
+
"edge99", "edge101",
|
|
31
|
+
"firefox133", "firefox135",
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
proxy: str | None = None,
|
|
37
|
+
timeout: int | None = 10,
|
|
38
|
+
verify: bool = True,
|
|
39
|
+
headers: dict[str, str] | None = None,
|
|
40
|
+
) -> None:
|
|
41
|
+
"""Initialize HTTP client.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
proxy: Proxy URL (supports http/https/socks5).
|
|
45
|
+
timeout: Request timeout in seconds.
|
|
46
|
+
verify: Whether to verify SSL certificates.
|
|
47
|
+
headers: Default headers for requests.
|
|
48
|
+
"""
|
|
49
|
+
self.proxy = proxy
|
|
50
|
+
self.timeout = timeout
|
|
51
|
+
self.verify = verify
|
|
52
|
+
|
|
53
|
+
# Choose random browser to impersonate
|
|
54
|
+
impersonate_browser = choice(self._impersonates)
|
|
55
|
+
|
|
56
|
+
# Initialize curl_cffi session
|
|
57
|
+
self.client = curl_cffi.requests.Session(
|
|
58
|
+
headers=headers or {},
|
|
59
|
+
proxies={'http': self.proxy, 'https': self.proxy} if self.proxy else None,
|
|
60
|
+
timeout=timeout,
|
|
61
|
+
impersonate=impersonate_browser,
|
|
62
|
+
verify=verify,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def request(
|
|
66
|
+
self,
|
|
67
|
+
method: Literal["GET", "POST", "HEAD", "OPTIONS", "DELETE", "PUT", "PATCH"],
|
|
68
|
+
url: str,
|
|
69
|
+
params: dict[str, Any] | None = None,
|
|
70
|
+
data: dict[str, Any] | bytes | None = None,
|
|
71
|
+
json: Any = None,
|
|
72
|
+
headers: dict[str, str] | None = None,
|
|
73
|
+
cookies: dict[str, str] | None = None,
|
|
74
|
+
timeout: int | None = None,
|
|
75
|
+
**kwargs: Any,
|
|
76
|
+
) -> curl_cffi.requests.Response:
|
|
77
|
+
"""Make HTTP request.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
method: HTTP method.
|
|
81
|
+
url: Request URL.
|
|
82
|
+
params: URL parameters.
|
|
83
|
+
data: Request body data.
|
|
84
|
+
json: JSON data to send.
|
|
85
|
+
headers: Request headers.
|
|
86
|
+
cookies: Request cookies.
|
|
87
|
+
timeout: Request timeout (overrides default).
|
|
88
|
+
**kwargs: Additional arguments passed to curl_cffi.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Response object.
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
TimeoutE: Request timeout.
|
|
95
|
+
RatelimitE: Rate limit exceeded.
|
|
96
|
+
WebscoutE: Other request errors.
|
|
97
|
+
"""
|
|
98
|
+
try:
|
|
99
|
+
request_kwargs: dict[str, Any] = {
|
|
100
|
+
"params": params,
|
|
101
|
+
"headers": headers,
|
|
102
|
+
"json": json,
|
|
103
|
+
"timeout": timeout or self.timeout,
|
|
104
|
+
**kwargs,
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if isinstance(cookies, dict):
|
|
108
|
+
request_kwargs["cookies"] = cookies
|
|
109
|
+
|
|
110
|
+
if data is not None:
|
|
111
|
+
request_kwargs["data"] = data
|
|
112
|
+
|
|
113
|
+
resp = self.client.request(method, url, **request_kwargs)
|
|
114
|
+
|
|
115
|
+
# Check response status
|
|
116
|
+
if resp.status_code == 200:
|
|
117
|
+
return resp
|
|
118
|
+
elif resp.status_code in (202, 301, 403, 400, 429, 418):
|
|
119
|
+
raise RatelimitE(f"{resp.url} {resp.status_code} Rate limit")
|
|
120
|
+
else:
|
|
121
|
+
raise WebscoutE(f"{resp.url} returned {resp.status_code}")
|
|
122
|
+
|
|
123
|
+
except curl_cffi.requests.RequestException as ex:
|
|
124
|
+
if "time" in str(ex).lower() or "timeout" in str(ex).lower():
|
|
125
|
+
raise TimeoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
126
|
+
raise WebscoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
127
|
+
|
|
128
|
+
def get(self, url: str, **kwargs: Any) -> curl_cffi.requests.Response:
|
|
129
|
+
"""Make GET request."""
|
|
130
|
+
return self.request("GET", url, **kwargs)
|
|
131
|
+
|
|
132
|
+
def post(self, url: str, **kwargs: Any) -> curl_cffi.requests.Response:
|
|
133
|
+
"""Make POST request."""
|
|
134
|
+
return self.request("POST", url, **kwargs)
|
|
135
|
+
|
|
136
|
+
def set_cookies(self, url: str, cookies: dict[str, str]) -> None:
|
|
137
|
+
"""Set cookies for a domain.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
url: URL to set cookies for.
|
|
141
|
+
cookies: Cookie dictionary.
|
|
142
|
+
"""
|
|
143
|
+
self.client.cookies.update(cookies)
|
|
144
|
+
|
|
145
|
+
def close(self) -> None:
|
|
146
|
+
"""Close the HTTP client."""
|
|
147
|
+
if hasattr(self.client, 'close'):
|
|
148
|
+
self.client.close()
|
|
149
|
+
|
|
150
|
+
def __enter__(self) -> HttpClient:
|
|
151
|
+
"""Context manager entry."""
|
|
152
|
+
return self
|
|
153
|
+
|
|
154
|
+
def __exit__(self, *args: Any) -> None:
|
|
155
|
+
"""Context manager exit."""
|
|
156
|
+
self.close()
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Result models for search engines."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class TextResult:
|
|
11
|
+
"""Text search result."""
|
|
12
|
+
|
|
13
|
+
title: str = ""
|
|
14
|
+
href: str = ""
|
|
15
|
+
body: str = ""
|
|
16
|
+
|
|
17
|
+
def to_dict(self) -> dict[str, Any]:
|
|
18
|
+
"""Convert to dictionary."""
|
|
19
|
+
return {
|
|
20
|
+
"title": self.title,
|
|
21
|
+
"href": self.href,
|
|
22
|
+
"body": self.body,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class ImagesResult:
|
|
28
|
+
"""Images search result."""
|
|
29
|
+
|
|
30
|
+
title: str = ""
|
|
31
|
+
image: str = ""
|
|
32
|
+
thumbnail: str = ""
|
|
33
|
+
url: str = ""
|
|
34
|
+
height: int = 0
|
|
35
|
+
width: int = 0
|
|
36
|
+
source: str = ""
|
|
37
|
+
|
|
38
|
+
def to_dict(self) -> dict[str, Any]:
|
|
39
|
+
"""Convert to dictionary."""
|
|
40
|
+
return {
|
|
41
|
+
"title": self.title,
|
|
42
|
+
"image": self.image,
|
|
43
|
+
"thumbnail": self.thumbnail,
|
|
44
|
+
"url": self.url,
|
|
45
|
+
"height": self.height,
|
|
46
|
+
"width": self.width,
|
|
47
|
+
"source": self.source,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class VideosResult:
|
|
53
|
+
"""Videos search result."""
|
|
54
|
+
|
|
55
|
+
content: str = ""
|
|
56
|
+
description: str = ""
|
|
57
|
+
duration: str = ""
|
|
58
|
+
embed_html: str = ""
|
|
59
|
+
embed_url: str = ""
|
|
60
|
+
image_token: str = ""
|
|
61
|
+
images: dict[str, str] = field(default_factory=dict)
|
|
62
|
+
provider: str = ""
|
|
63
|
+
published: str = ""
|
|
64
|
+
publisher: str = ""
|
|
65
|
+
statistics: dict[str, int] = field(default_factory=dict)
|
|
66
|
+
title: str = ""
|
|
67
|
+
uploader: str = ""
|
|
68
|
+
|
|
69
|
+
def to_dict(self) -> dict[str, Any]:
|
|
70
|
+
"""Convert to dictionary."""
|
|
71
|
+
return {
|
|
72
|
+
"content": self.content,
|
|
73
|
+
"description": self.description,
|
|
74
|
+
"duration": self.duration,
|
|
75
|
+
"embed_html": self.embed_html,
|
|
76
|
+
"embed_url": self.embed_url,
|
|
77
|
+
"image_token": self.image_token,
|
|
78
|
+
"images": self.images,
|
|
79
|
+
"provider": self.provider,
|
|
80
|
+
"published": self.published,
|
|
81
|
+
"publisher": self.publisher,
|
|
82
|
+
"statistics": self.statistics,
|
|
83
|
+
"title": self.title,
|
|
84
|
+
"uploader": self.uploader,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class NewsResult:
|
|
90
|
+
"""News search result."""
|
|
91
|
+
|
|
92
|
+
date: str = ""
|
|
93
|
+
title: str = ""
|
|
94
|
+
body: str = ""
|
|
95
|
+
url: str = ""
|
|
96
|
+
image: str = ""
|
|
97
|
+
source: str = ""
|
|
98
|
+
|
|
99
|
+
def to_dict(self) -> dict[str, Any]:
|
|
100
|
+
"""Convert to dictionary."""
|
|
101
|
+
return {
|
|
102
|
+
"date": self.date,
|
|
103
|
+
"title": self.title,
|
|
104
|
+
"body": self.body,
|
|
105
|
+
"url": self.url,
|
|
106
|
+
"image": self.image,
|
|
107
|
+
"source": self.source,
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class BooksResult:
|
|
113
|
+
"""Books search result."""
|
|
114
|
+
|
|
115
|
+
title: str = ""
|
|
116
|
+
author: str = ""
|
|
117
|
+
href: str = ""
|
|
118
|
+
thumbnail: str = ""
|
|
119
|
+
year: str = ""
|
|
120
|
+
publisher: str = ""
|
|
121
|
+
language: str = ""
|
|
122
|
+
filesize: str = ""
|
|
123
|
+
extension: str = ""
|
|
124
|
+
|
|
125
|
+
def to_dict(self) -> dict[str, Any]:
|
|
126
|
+
"""Convert to dictionary."""
|
|
127
|
+
return {
|
|
128
|
+
"title": self.title,
|
|
129
|
+
"author": self.author,
|
|
130
|
+
"href": self.href,
|
|
131
|
+
"thumbnail": self.thumbnail,
|
|
132
|
+
"year": self.year,
|
|
133
|
+
"publisher": self.publisher,
|
|
134
|
+
"language": self.language,
|
|
135
|
+
"filesize": self.filesize,
|
|
136
|
+
"extension": self.extension,
|
|
137
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Yep unified search interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
from .base import BaseSearch
|
|
6
|
+
from .engines.yep.text import YepSearch as YepTextSearch
|
|
7
|
+
from .engines.yep.images import YepImages
|
|
8
|
+
from .engines.yep.suggestions import YepSuggestions
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class YepSearch(BaseSearch):
|
|
12
|
+
"""Unified Yep search interface."""
|
|
13
|
+
|
|
14
|
+
def text(self, keywords: str, region: str = "all", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
|
|
15
|
+
search = YepTextSearch()
|
|
16
|
+
return search.run(keywords, region, safesearch, max_results)
|
|
17
|
+
|
|
18
|
+
def images(self, keywords: str, region: str = "all", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
|
|
19
|
+
search = YepImages()
|
|
20
|
+
return search.run(keywords, region, safesearch, max_results)
|
|
21
|
+
|
|
22
|
+
def suggestions(self, keywords: str, region: str = "all") -> List[str]:
|
|
23
|
+
search = YepSuggestions()
|
|
24
|
+
return search.run(keywords, region)
|
|
25
|
+
|
|
26
|
+
def videos(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
27
|
+
"""Videos search not supported by Yep."""
|
|
28
|
+
raise NotImplementedError("Yep does not support video search")
|
|
29
|
+
|
|
30
|
+
def news(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
31
|
+
"""News search not supported by Yep."""
|
|
32
|
+
raise NotImplementedError("Yep does not support news search")
|
|
33
|
+
|
|
34
|
+
def answers(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
35
|
+
"""Instant answers not supported by Yep."""
|
|
36
|
+
raise NotImplementedError("Yep does not support instant answers")
|
|
37
|
+
|
|
38
|
+
def maps(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
39
|
+
"""Maps search not supported by Yep."""
|
|
40
|
+
raise NotImplementedError("Yep does not support maps search")
|
|
41
|
+
|
|
42
|
+
def translate(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
43
|
+
"""Translation not supported by Yep."""
|
|
44
|
+
raise NotImplementedError("Yep does not support translation")
|
webscout/version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "2025.10.
|
|
1
|
+
__version__ = "2025.10.13"
|
|
2
2
|
__prog__ = "webscout"
|
webscout/version.py.bak
ADDED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: webscout
|
|
3
|
-
Version: 2025.10.
|
|
3
|
+
Version: 2025.10.13
|
|
4
4
|
Summary: Search for anything using Google, DuckDuckGo, phind.com, Contains AI models, can transcribe yt videos, temporary email and phone number generation, has TTS support, webai (terminal gpt and open interpreter) and offline LLMs and more
|
|
5
5
|
Author-email: OEvortex <helpingai5@gmail.com>
|
|
6
6
|
License: HelpingAI
|
|
@@ -477,7 +477,7 @@ results = search("Python programming", num_results=5)
|
|
|
477
477
|
|
|
478
478
|
<hr/>
|
|
479
479
|
|
|
480
|
-
## 🦆 DuckDuckGo Search with WEBS
|
|
480
|
+
## 🦆 DuckDuckGo Search with WEBS
|
|
481
481
|
|
|
482
482
|
Webscout provides powerful interfaces to DuckDuckGo's search capabilities through the `WEBS` and `AsyncWEBS` classes.
|
|
483
483
|
|
|
@@ -586,8 +586,7 @@ with WEBS() as webs:
|
|
|
586
586
|
<p>
|
|
587
587
|
|
|
588
588
|
```python
|
|
589
|
-
from webscout import
|
|
590
|
-
import datetime
|
|
589
|
+
from webscout.search import DuckDuckGoSearch
|
|
591
590
|
|
|
592
591
|
def fetch_formatted_news(keywords, timelimit='d', max_results=20):
|
|
593
592
|
"""Fetch and format news articles"""
|