webscout 1.1.7__py3-none-any.whl → 1.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIbase.py +69 -69
- webscout/AIutel.py +654 -654
- webscout/__init__.py +0 -4
- webscout/cli.py +9 -8
- webscout/exceptions.py +9 -1
- webscout/utils.py +4 -18
- webscout/version.py +1 -1
- webscout/webscout_search.py +17 -3
- webscout/webscout_search_async.py +107 -55
- {webscout-1.1.7.dist-info → webscout-1.1.9.dist-info}/METADATA +3 -9
- webscout-1.1.9.dist-info/RECORD +20 -0
- webscout-1.1.7.dist-info/RECORD +0 -20
- {webscout-1.1.7.dist-info → webscout-1.1.9.dist-info}/LICENSE.md +0 -0
- {webscout-1.1.7.dist-info → webscout-1.1.9.dist-info}/WHEEL +0 -0
- {webscout-1.1.7.dist-info → webscout-1.1.9.dist-info}/entry_points.txt +0 -0
- {webscout-1.1.7.dist-info → webscout-1.1.9.dist-info}/top_level.txt +0 -0
webscout/__init__.py
CHANGED
|
@@ -5,14 +5,10 @@ using the DuckDuckGo.com search engine.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import logging
|
|
8
|
-
|
|
9
|
-
# ruff: noqa: F401
|
|
10
8
|
from .webscout_search import WEBS
|
|
11
9
|
from .webscout_search_async import AsyncWEBS
|
|
12
10
|
from .version import __version__
|
|
13
11
|
|
|
14
12
|
__all__ = ["WEBS", "AsyncWEBS", "__version__", "cli"]
|
|
15
13
|
|
|
16
|
-
# A do-nothing logging handler
|
|
17
|
-
# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
|
|
18
14
|
logging.getLogger("webscout").addHandler(logging.NullHandler())
|
webscout/cli.py
CHANGED
|
@@ -8,7 +8,7 @@ from urllib.parse import unquote
|
|
|
8
8
|
import click
|
|
9
9
|
from curl_cffi import requests
|
|
10
10
|
|
|
11
|
-
from .
|
|
11
|
+
from .webscout_search import WEBS
|
|
12
12
|
from .utils import json_dumps
|
|
13
13
|
from .version import __version__
|
|
14
14
|
|
|
@@ -114,18 +114,19 @@ def _download_results(keywords, results, images=False, proxy=None, threads=None)
|
|
|
114
114
|
|
|
115
115
|
@click.group(chain=True)
|
|
116
116
|
def cli():
|
|
117
|
-
"""
|
|
118
|
-
|
|
119
|
-
This decorator is used to create a group of commands.
|
|
120
|
-
The `chain=True` parameter allows the commands in the group to be chained together,
|
|
121
|
-
meaning the output of one command can be used as the input for the next command.
|
|
122
|
-
"""
|
|
117
|
+
"""dukduckgo_search CLI tool"""
|
|
123
118
|
pass
|
|
124
119
|
|
|
125
120
|
|
|
121
|
+
def safe_entry_point():
|
|
122
|
+
try:
|
|
123
|
+
cli()
|
|
124
|
+
except Exception as ex:
|
|
125
|
+
click.echo(f"{type(ex).__name__}: {ex}")
|
|
126
|
+
|
|
127
|
+
|
|
126
128
|
@cli.command()
|
|
127
129
|
def version():
|
|
128
|
-
"""A command-line interface command that prints and returns the current version of the program."""
|
|
129
130
|
print(__version__)
|
|
130
131
|
return __version__
|
|
131
132
|
|
webscout/exceptions.py
CHANGED
|
@@ -1,2 +1,10 @@
|
|
|
1
1
|
class WebscoutE(Exception):
|
|
2
|
-
"""Base exception class for
|
|
2
|
+
"""Base exception class for duckduckgo_search."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class RatelimitE(Exception):
|
|
6
|
+
"""Raised for rate limit exceeded errors during API requests."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TimeoutE(Exception):
|
|
10
|
+
"""Raised for timeout errors during API requests."""
|
webscout/utils.py
CHANGED
|
@@ -1,35 +1,27 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import re
|
|
3
2
|
from decimal import Decimal
|
|
4
|
-
from functools import lru_cache
|
|
5
3
|
from html import unescape
|
|
6
4
|
from math import atan2, cos, radians, sin, sqrt
|
|
7
5
|
from typing import Any, Dict, List, Union
|
|
8
6
|
from urllib.parse import unquote
|
|
9
7
|
|
|
10
|
-
|
|
8
|
+
import orjson
|
|
11
9
|
|
|
12
|
-
|
|
13
|
-
import orjson
|
|
14
|
-
except ModuleNotFoundError:
|
|
15
|
-
HAS_ORJSON = False
|
|
16
|
-
else:
|
|
17
|
-
HAS_ORJSON = True
|
|
10
|
+
from .exceptions import WebscoutE
|
|
18
11
|
|
|
19
|
-
REGEX_500_IN_URL = re.compile(r"(?:\d{3}-\d{2}\.js)")
|
|
20
12
|
REGEX_STRIP_TAGS = re.compile("<.*?>")
|
|
21
13
|
|
|
22
14
|
|
|
23
15
|
def json_dumps(obj: Any) -> str:
|
|
24
16
|
try:
|
|
25
|
-
return orjson.dumps(obj).decode("utf-8")
|
|
17
|
+
return orjson.dumps(obj).decode("utf-8")
|
|
26
18
|
except Exception as ex:
|
|
27
19
|
raise WebscoutE(f"{type(ex).__name__}: {ex}") from ex
|
|
28
20
|
|
|
29
21
|
|
|
30
22
|
def json_loads(obj: Union[str, bytes]) -> Any:
|
|
31
23
|
try:
|
|
32
|
-
return orjson.loads(obj)
|
|
24
|
+
return orjson.loads(obj)
|
|
33
25
|
except Exception as ex:
|
|
34
26
|
raise WebscoutE(f"{type(ex).__name__}: {ex}") from ex
|
|
35
27
|
|
|
@@ -63,12 +55,6 @@ def _text_extract_json(html_bytes: bytes, keywords: str) -> List[Dict[str, str]]
|
|
|
63
55
|
raise WebscoutE(f"_text_extract_json() {keywords=} return None")
|
|
64
56
|
|
|
65
57
|
|
|
66
|
-
@lru_cache
|
|
67
|
-
def _is_500_in_url(url: str) -> bool:
|
|
68
|
-
"""Something like '506-00.js' inside the url."""
|
|
69
|
-
return bool(REGEX_500_IN_URL.search(url))
|
|
70
|
-
|
|
71
|
-
|
|
72
58
|
def _normalize(raw_html: str) -> str:
|
|
73
59
|
"""Strip HTML tags from the raw_html string."""
|
|
74
60
|
return unescape(REGEX_STRIP_TAGS.sub("", raw_html)) if raw_html else ""
|
webscout/version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "1.1.
|
|
1
|
+
__version__ = "1.1.9"
|
|
2
2
|
|
webscout/webscout_search.py
CHANGED
|
@@ -3,7 +3,7 @@ from concurrent.futures import Future
|
|
|
3
3
|
from threading import Thread
|
|
4
4
|
from types import TracebackType
|
|
5
5
|
from typing import Any, Awaitable, Dict, Optional, Type, Union
|
|
6
|
-
|
|
6
|
+
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
|
7
7
|
from .webscout_search_async import AsyncWEBS
|
|
8
8
|
|
|
9
9
|
|
|
@@ -18,6 +18,7 @@ class WEBS(AsyncWEBS):
|
|
|
18
18
|
timeout: Optional[int] = 10,
|
|
19
19
|
) -> None:
|
|
20
20
|
super().__init__(headers=headers, proxies=proxies, timeout=timeout)
|
|
21
|
+
self._exit_done = False
|
|
21
22
|
|
|
22
23
|
def __enter__(self) -> "WEBS":
|
|
23
24
|
return self
|
|
@@ -27,8 +28,21 @@ class WEBS(AsyncWEBS):
|
|
|
27
28
|
exc_type: Optional[Type[BaseException]],
|
|
28
29
|
exc_val: Optional[BaseException],
|
|
29
30
|
exc_tb: Optional[TracebackType],
|
|
30
|
-
) ->
|
|
31
|
-
|
|
31
|
+
) -> None:
|
|
32
|
+
self._close_session()
|
|
33
|
+
|
|
34
|
+
def __del__(self) -> None:
|
|
35
|
+
self._close_session()
|
|
36
|
+
|
|
37
|
+
def _close_session(self) -> None:
|
|
38
|
+
"""Close the curl-cffi async session."""
|
|
39
|
+
if self._exit_done is False:
|
|
40
|
+
# Ensure self._asession.close() is a coroutine
|
|
41
|
+
coro = self._asession.close()
|
|
42
|
+
# Check if coro is a coroutine object
|
|
43
|
+
if asyncio.iscoroutine(coro):
|
|
44
|
+
self._run_async_in_thread(coro)
|
|
45
|
+
self._exit_done = True
|
|
32
46
|
|
|
33
47
|
def _run_async_in_thread(self, coro: Awaitable[Any]) -> Any:
|
|
34
48
|
"""Runs an async coroutine in a separate thread."""
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import logging
|
|
3
|
+
import warnings
|
|
3
4
|
from concurrent.futures import ThreadPoolExecutor
|
|
4
5
|
from contextlib import suppress
|
|
5
6
|
from datetime import datetime, timezone
|
|
@@ -10,24 +11,30 @@ from types import TracebackType
|
|
|
10
11
|
from typing import Dict, List, Optional, Tuple, Union
|
|
11
12
|
|
|
12
13
|
from curl_cffi import requests
|
|
13
|
-
from lxml import html
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
try:
|
|
16
|
+
from lxml.html import HTMLParser as LHTMLParser
|
|
17
|
+
from lxml.html import document_fromstring
|
|
18
|
+
|
|
19
|
+
LXML_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
LXML_AVAILABLE = False
|
|
22
|
+
|
|
23
|
+
from .exceptions import WebscoutE, RatelimitE, TimeoutE
|
|
16
24
|
from .utils import (
|
|
17
25
|
_calculate_distance,
|
|
18
26
|
_extract_vqd,
|
|
19
|
-
_is_500_in_url,
|
|
20
27
|
_normalize,
|
|
21
28
|
_normalize_url,
|
|
22
29
|
_text_extract_json,
|
|
23
30
|
json_loads,
|
|
24
31
|
)
|
|
25
32
|
|
|
26
|
-
logger = logging.getLogger("AsyncWEBS")
|
|
33
|
+
logger = logging.getLogger("webscout_search.AsyncWEBS")
|
|
27
34
|
|
|
28
35
|
|
|
29
36
|
class AsyncWEBS:
|
|
30
|
-
"""
|
|
37
|
+
"""Webscout async class to get search results from duckduckgo.com."""
|
|
31
38
|
|
|
32
39
|
_executor: Optional[ThreadPoolExecutor] = None
|
|
33
40
|
|
|
@@ -43,40 +50,52 @@ class AsyncWEBS:
|
|
|
43
50
|
headers (dict, optional): Dictionary of headers for the HTTP client. Defaults to None.
|
|
44
51
|
proxies (Union[dict, str], optional): Proxies for the HTTP client (can be dict or str). Defaults to None.
|
|
45
52
|
timeout (int, optional): Timeout value for the HTTP client. Defaults to 10.
|
|
46
|
-
|
|
47
|
-
Raises:
|
|
48
|
-
webscoutE: Raised when there is a generic exception during the API request.
|
|
49
53
|
"""
|
|
50
54
|
self.proxies = {"all": proxies} if isinstance(proxies, str) else proxies
|
|
51
55
|
self._asession = requests.AsyncSession(
|
|
52
|
-
headers=headers,
|
|
56
|
+
headers=headers,
|
|
57
|
+
proxies=self.proxies,
|
|
58
|
+
timeout=timeout,
|
|
59
|
+
impersonate="chrome",
|
|
60
|
+
allow_redirects=False,
|
|
53
61
|
)
|
|
54
62
|
self._asession.headers["Referer"] = "https://duckduckgo.com/"
|
|
55
|
-
self._parser: Optional[
|
|
63
|
+
self._parser: Optional[LHTMLParser] = None
|
|
64
|
+
self._exception_event = asyncio.Event()
|
|
65
|
+
self._exit_done = False
|
|
56
66
|
|
|
57
67
|
async def __aenter__(self) -> "AsyncWEBS":
|
|
58
|
-
"""A context manager method that is called when entering the 'with' statement."""
|
|
59
68
|
return self
|
|
60
69
|
|
|
61
70
|
async def __aexit__(
|
|
62
|
-
self,
|
|
71
|
+
self,
|
|
72
|
+
exc_type: Optional[BaseException] = None,
|
|
73
|
+
exc_val: Optional[BaseException] = None,
|
|
74
|
+
exc_tb: Optional[TracebackType] = None,
|
|
63
75
|
) -> None:
|
|
64
|
-
|
|
65
|
-
|
|
76
|
+
await self._session_close()
|
|
77
|
+
|
|
78
|
+
def __del__(self) -> None:
|
|
79
|
+
if self._exit_done is False:
|
|
80
|
+
asyncio.create_task(self._session_close())
|
|
66
81
|
|
|
67
|
-
def
|
|
82
|
+
async def _session_close(self) -> None:
|
|
83
|
+
"""Close the curl-cffi async session."""
|
|
84
|
+
if self._exit_done is False:
|
|
85
|
+
await self._asession.close()
|
|
86
|
+
self._exit_done = True
|
|
87
|
+
|
|
88
|
+
def _get_parser(self) -> "LHTMLParser":
|
|
68
89
|
"""Get HTML parser."""
|
|
69
90
|
if self._parser is None:
|
|
70
|
-
self._parser =
|
|
71
|
-
remove_blank_text=True, remove_comments=True, remove_pis=True, collect_ids=False
|
|
72
|
-
)
|
|
91
|
+
self._parser = LHTMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True, collect_ids=False)
|
|
73
92
|
return self._parser
|
|
74
93
|
|
|
75
94
|
def _get_executor(self, max_workers: int = 1) -> ThreadPoolExecutor:
|
|
76
95
|
"""Get ThreadPoolExecutor. Default max_workers=1, because >=2 leads to a big overhead"""
|
|
77
|
-
if
|
|
78
|
-
|
|
79
|
-
return
|
|
96
|
+
if AsyncWEBS._executor is None:
|
|
97
|
+
AsyncWEBS._executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
98
|
+
return AsyncWEBS._executor
|
|
80
99
|
|
|
81
100
|
async def _aget_url(
|
|
82
101
|
self,
|
|
@@ -85,18 +104,23 @@ class AsyncWEBS:
|
|
|
85
104
|
data: Optional[Union[Dict[str, str], bytes]] = None,
|
|
86
105
|
params: Optional[Dict[str, str]] = None,
|
|
87
106
|
) -> bytes:
|
|
107
|
+
if self._exception_event.is_set():
|
|
108
|
+
raise WebscoutE("Exception occurred in previous call.")
|
|
88
109
|
try:
|
|
89
110
|
resp = await self._asession.request(method, url, data=data, params=params, stream=True)
|
|
90
|
-
resp.raise_for_status()
|
|
91
111
|
resp_content: bytes = await resp.acontent()
|
|
92
|
-
logger.debug(f"_aget_url() {resp.status_code} {resp.elapsed:.2f} {len(resp_content)} {resp.url}")
|
|
93
|
-
if _is_500_in_url(resp.url) or resp.status_code == 202:
|
|
94
|
-
raise WebscoutE("Ratelimit")
|
|
95
|
-
if resp.status_code == 200:
|
|
96
|
-
return resp_content
|
|
97
112
|
except Exception as ex:
|
|
98
|
-
|
|
99
|
-
|
|
113
|
+
self._exception_event.set()
|
|
114
|
+
if "time" in str(ex).lower():
|
|
115
|
+
raise TimeoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
116
|
+
raise WebscoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
117
|
+
logger.debug(f"_aget_url() {resp.url} {resp.status_code} {resp.elapsed:.2f} {len(resp_content)}")
|
|
118
|
+
if resp.status_code == 200:
|
|
119
|
+
return resp_content
|
|
120
|
+
self._exception_event.set()
|
|
121
|
+
if resp.status_code in (202, 301, 403):
|
|
122
|
+
raise RatelimitE(f"{resp.url} {resp.status_code}")
|
|
123
|
+
raise WebscoutE(f"{resp.url} return None. {params=} {data=}")
|
|
100
124
|
|
|
101
125
|
async def _aget_vqd(self, keywords: str) -> str:
|
|
102
126
|
"""Get vqd value for a search query."""
|
|
@@ -129,8 +153,14 @@ class AsyncWEBS:
|
|
|
129
153
|
List of dictionaries with search results, or None if there was an error.
|
|
130
154
|
|
|
131
155
|
Raises:
|
|
132
|
-
WebscoutE:
|
|
156
|
+
WebscoutE: Base exception for Webscout errors.
|
|
157
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
158
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
133
159
|
"""
|
|
160
|
+
if LXML_AVAILABLE is False and backend != "api":
|
|
161
|
+
backend = "api"
|
|
162
|
+
warnings.warn("lxml is not installed. Using backend='api'.", stacklevel=2)
|
|
163
|
+
|
|
134
164
|
if backend == "api":
|
|
135
165
|
results = await self._text_api(keywords, region, safesearch, timelimit, max_results)
|
|
136
166
|
elif backend == "html":
|
|
@@ -147,7 +177,7 @@ class AsyncWEBS:
|
|
|
147
177
|
timelimit: Optional[str] = None,
|
|
148
178
|
max_results: Optional[int] = None,
|
|
149
179
|
) -> List[Dict[str, str]]:
|
|
150
|
-
"""
|
|
180
|
+
"""Webscout text search generator. Query params: https://duckduckgo.com/params.
|
|
151
181
|
|
|
152
182
|
Args:
|
|
153
183
|
keywords: keywords for query.
|
|
@@ -160,7 +190,9 @@ class AsyncWEBS:
|
|
|
160
190
|
List of dictionaries with search results.
|
|
161
191
|
|
|
162
192
|
Raises:
|
|
163
|
-
WebscoutE:
|
|
193
|
+
WebscoutE: Base exception for Webscout errors.
|
|
194
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
195
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
164
196
|
"""
|
|
165
197
|
assert keywords, "keywords is mandatory"
|
|
166
198
|
|
|
@@ -170,9 +202,11 @@ class AsyncWEBS:
|
|
|
170
202
|
"q": keywords,
|
|
171
203
|
"kl": region,
|
|
172
204
|
"l": region,
|
|
205
|
+
"p": "",
|
|
206
|
+
"s": "0",
|
|
207
|
+
"df": "",
|
|
173
208
|
"vqd": vqd,
|
|
174
|
-
"
|
|
175
|
-
"a": "ftsa", # something
|
|
209
|
+
"ex": "",
|
|
176
210
|
}
|
|
177
211
|
safesearch = safesearch.lower()
|
|
178
212
|
if safesearch == "moderate":
|
|
@@ -223,7 +257,7 @@ class AsyncWEBS:
|
|
|
223
257
|
timelimit: Optional[str] = None,
|
|
224
258
|
max_results: Optional[int] = None,
|
|
225
259
|
) -> List[Dict[str, str]]:
|
|
226
|
-
"""
|
|
260
|
+
"""Webscout text search generator. Query params: https://duckduckgo.com/params.
|
|
227
261
|
|
|
228
262
|
Args:
|
|
229
263
|
keywords: keywords for query.
|
|
@@ -236,7 +270,9 @@ class AsyncWEBS:
|
|
|
236
270
|
List of dictionaries with search results.
|
|
237
271
|
|
|
238
272
|
Raises:
|
|
239
|
-
WebscoutE:
|
|
273
|
+
WebscoutE: Base exception for Webscout errors.
|
|
274
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
275
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
240
276
|
"""
|
|
241
277
|
assert keywords, "keywords is mandatory"
|
|
242
278
|
|
|
@@ -266,7 +302,7 @@ class AsyncWEBS:
|
|
|
266
302
|
return
|
|
267
303
|
|
|
268
304
|
tree = await self._asession.loop.run_in_executor(
|
|
269
|
-
self._get_executor(), partial(
|
|
305
|
+
self._get_executor(), partial(document_fromstring, resp_content, self._get_parser())
|
|
270
306
|
)
|
|
271
307
|
|
|
272
308
|
for e in tree.xpath("//div[h2]"):
|
|
@@ -306,7 +342,7 @@ class AsyncWEBS:
|
|
|
306
342
|
timelimit: Optional[str] = None,
|
|
307
343
|
max_results: Optional[int] = None,
|
|
308
344
|
) -> List[Dict[str, str]]:
|
|
309
|
-
"""
|
|
345
|
+
"""Webscout text search generator. Query params: https://duckduckgo.com/params.
|
|
310
346
|
|
|
311
347
|
Args:
|
|
312
348
|
keywords: keywords for query.
|
|
@@ -318,7 +354,9 @@ class AsyncWEBS:
|
|
|
318
354
|
List of dictionaries with search results.
|
|
319
355
|
|
|
320
356
|
Raises:
|
|
321
|
-
WebscoutE:
|
|
357
|
+
WebscoutE: Base exception for Webscout errors.
|
|
358
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
359
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
322
360
|
"""
|
|
323
361
|
assert keywords, "keywords is mandatory"
|
|
324
362
|
|
|
@@ -343,7 +381,7 @@ class AsyncWEBS:
|
|
|
343
381
|
return
|
|
344
382
|
|
|
345
383
|
tree = await self._asession.loop.run_in_executor(
|
|
346
|
-
self._get_executor(), partial(
|
|
384
|
+
self._get_executor(), partial(document_fromstring, resp_content, self._get_parser())
|
|
347
385
|
)
|
|
348
386
|
|
|
349
387
|
data = zip(cycle(range(1, 5)), tree.xpath("//table[last()]//tr"))
|
|
@@ -393,7 +431,7 @@ class AsyncWEBS:
|
|
|
393
431
|
license_image: Optional[str] = None,
|
|
394
432
|
max_results: Optional[int] = None,
|
|
395
433
|
) -> List[Dict[str, str]]:
|
|
396
|
-
"""
|
|
434
|
+
"""Webscout images search. Query params: https://duckduckgo.com/params.
|
|
397
435
|
|
|
398
436
|
Args:
|
|
399
437
|
keywords: keywords for query.
|
|
@@ -416,7 +454,9 @@ class AsyncWEBS:
|
|
|
416
454
|
List of dictionaries with images search results.
|
|
417
455
|
|
|
418
456
|
Raises:
|
|
419
|
-
WebscoutE:
|
|
457
|
+
WebscoutE: Base exception for Webscout errors.
|
|
458
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
459
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
420
460
|
"""
|
|
421
461
|
assert keywords, "keywords is mandatory"
|
|
422
462
|
|
|
@@ -484,7 +524,7 @@ class AsyncWEBS:
|
|
|
484
524
|
license_videos: Optional[str] = None,
|
|
485
525
|
max_results: Optional[int] = None,
|
|
486
526
|
) -> List[Dict[str, str]]:
|
|
487
|
-
"""
|
|
527
|
+
"""Webscout videos search. Query params: https://duckduckgo.com/params.
|
|
488
528
|
|
|
489
529
|
Args:
|
|
490
530
|
keywords: keywords for query.
|
|
@@ -500,7 +540,9 @@ class AsyncWEBS:
|
|
|
500
540
|
List of dictionaries with videos search results.
|
|
501
541
|
|
|
502
542
|
Raises:
|
|
503
|
-
WebscoutE:
|
|
543
|
+
WebscoutE: Base exception for Webscout errors.
|
|
544
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
545
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
504
546
|
"""
|
|
505
547
|
assert keywords, "keywords is mandatory"
|
|
506
548
|
|
|
@@ -553,7 +595,7 @@ class AsyncWEBS:
|
|
|
553
595
|
timelimit: Optional[str] = None,
|
|
554
596
|
max_results: Optional[int] = None,
|
|
555
597
|
) -> List[Dict[str, str]]:
|
|
556
|
-
"""
|
|
598
|
+
"""Webscout news search. Query params: https://duckduckgo.com/params.
|
|
557
599
|
|
|
558
600
|
Args:
|
|
559
601
|
keywords: keywords for query.
|
|
@@ -566,7 +608,9 @@ class AsyncWEBS:
|
|
|
566
608
|
List of dictionaries with news search results.
|
|
567
609
|
|
|
568
610
|
Raises:
|
|
569
|
-
|
|
611
|
+
WebscoutE: Base exception for Webscout errors.
|
|
612
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
613
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
570
614
|
"""
|
|
571
615
|
assert keywords, "keywords is mandatory"
|
|
572
616
|
|
|
@@ -618,7 +662,7 @@ class AsyncWEBS:
|
|
|
618
662
|
return list(islice(filter(None, results), max_results))
|
|
619
663
|
|
|
620
664
|
async def answers(self, keywords: str) -> List[Dict[str, str]]:
|
|
621
|
-
"""
|
|
665
|
+
"""Webscout instant answers. Query params: https://duckduckgo.com/params.
|
|
622
666
|
|
|
623
667
|
Args:
|
|
624
668
|
keywords: keywords for query,
|
|
@@ -627,7 +671,9 @@ class AsyncWEBS:
|
|
|
627
671
|
List of dictionaries with instant answers results.
|
|
628
672
|
|
|
629
673
|
Raises:
|
|
630
|
-
WebscoutE:
|
|
674
|
+
WebscoutE: Base exception for Webscout errors.
|
|
675
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
676
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
631
677
|
"""
|
|
632
678
|
assert keywords, "keywords is mandatory"
|
|
633
679
|
|
|
@@ -687,7 +733,7 @@ class AsyncWEBS:
|
|
|
687
733
|
return results
|
|
688
734
|
|
|
689
735
|
async def suggestions(self, keywords: str, region: str = "wt-wt") -> List[Dict[str, str]]:
|
|
690
|
-
"""
|
|
736
|
+
"""Webscout suggestions. Query params: https://duckduckgo.com/params.
|
|
691
737
|
|
|
692
738
|
Args:
|
|
693
739
|
keywords: keywords for query.
|
|
@@ -697,7 +743,9 @@ class AsyncWEBS:
|
|
|
697
743
|
List of dictionaries with suggestions results.
|
|
698
744
|
|
|
699
745
|
Raises:
|
|
700
|
-
WebscoutE:
|
|
746
|
+
WebscoutE: Base exception for Webscout errors.
|
|
747
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
748
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
701
749
|
"""
|
|
702
750
|
assert keywords, "keywords is mandatory"
|
|
703
751
|
|
|
@@ -705,7 +753,7 @@ class AsyncWEBS:
|
|
|
705
753
|
"q": keywords,
|
|
706
754
|
"kl": region,
|
|
707
755
|
}
|
|
708
|
-
resp_content = await self._aget_url("GET", "https://duckduckgo.com/ac", params=payload)
|
|
756
|
+
resp_content = await self._aget_url("GET", "https://duckduckgo.com/ac/", params=payload)
|
|
709
757
|
page_data = json_loads(resp_content)
|
|
710
758
|
return [r for r in page_data]
|
|
711
759
|
|
|
@@ -724,7 +772,7 @@ class AsyncWEBS:
|
|
|
724
772
|
radius: int = 0,
|
|
725
773
|
max_results: Optional[int] = None,
|
|
726
774
|
) -> List[Dict[str, str]]:
|
|
727
|
-
"""
|
|
775
|
+
"""Webscout maps search. Query params: https://duckduckgo.com/params.
|
|
728
776
|
|
|
729
777
|
Args:
|
|
730
778
|
keywords: keywords for query
|
|
@@ -745,7 +793,9 @@ class AsyncWEBS:
|
|
|
745
793
|
List of dictionaries with maps search results, or None if there was an error.
|
|
746
794
|
|
|
747
795
|
Raises:
|
|
748
|
-
|
|
796
|
+
WebscoutE: Base exception for Webscout errors.
|
|
797
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
798
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
749
799
|
"""
|
|
750
800
|
assert keywords, "keywords is mandatory"
|
|
751
801
|
|
|
@@ -895,7 +945,7 @@ class AsyncWEBS:
|
|
|
895
945
|
async def translate(
|
|
896
946
|
self, keywords: Union[List[str], str], from_: Optional[str] = None, to: str = "en"
|
|
897
947
|
) -> List[Dict[str, str]]:
|
|
898
|
-
"""
|
|
948
|
+
"""Webscout translate.
|
|
899
949
|
|
|
900
950
|
Args:
|
|
901
951
|
keywords: string or list of strings to translate.
|
|
@@ -906,7 +956,9 @@ class AsyncWEBS:
|
|
|
906
956
|
List od dictionaries with translated keywords.
|
|
907
957
|
|
|
908
958
|
Raises:
|
|
909
|
-
|
|
959
|
+
WebscoutE: Base exception for Webscout errors.
|
|
960
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
961
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
910
962
|
"""
|
|
911
963
|
assert keywords, "keywords is mandatory"
|
|
912
964
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: webscout
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.9
|
|
4
4
|
Summary: Search for words, documents, images, videos, news, maps and text translation using the DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models
|
|
5
5
|
Author: OEvortex
|
|
6
6
|
Author-email: helpingai5@gmail.com
|
|
@@ -376,14 +376,8 @@ Here is an example of initializing the WEBS class:
|
|
|
376
376
|
```python3
|
|
377
377
|
from webscout import WEBS
|
|
378
378
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
# Fetching text results for the query "python programming" with a maximum of 5 results
|
|
383
|
-
results = [result for result in WEBS_instance.text("python programming", max_results=5)]
|
|
384
|
-
|
|
385
|
-
# Displaying the obtained results
|
|
386
|
-
print(results)
|
|
379
|
+
R = WEBS().text("python programming", max_results=5)
|
|
380
|
+
print(R)
|
|
387
381
|
```
|
|
388
382
|
Here is an example of initializing the AsyncWEBS class:
|
|
389
383
|
```python3
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
webscout/AI.py,sha256=CwUCeGnNRL9STd5bAZSyIiLysorBMu065HrkY8UCzAQ,49618
|
|
2
|
+
webscout/AIbase.py,sha256=vQi2ougu5bG-QdmoYmxCQsOg7KTEgG7EF6nZh5qqUGw,2343
|
|
3
|
+
webscout/AIutel.py,sha256=cvsuw57hq3GirAiT-PjqwhAiLPf1urOzDb2szJ4bwmo,24124
|
|
4
|
+
webscout/HelpingAI.py,sha256=YeZw0zYVHMcBFFPNdd3_Ghpm9ebt_EScQjHO_IIs4lg,8103
|
|
5
|
+
webscout/LLM.py,sha256=XByJPiATLA_57FBWKw18Xx_PGRCPOj-GJE96aQH1k2Y,3309
|
|
6
|
+
webscout/__init__.py,sha256=RrvYG33maRS0U7J-M99FroQLzVkt_gxDJ6WZ0_BkmTk,398
|
|
7
|
+
webscout/__main__.py,sha256=ZtTRgsRjUi2JOvYFLF1ZCh55Sdoz94I-BS-TlJC7WDU,126
|
|
8
|
+
webscout/cli.py,sha256=F888fdrFUQgczMBN4yMOSf6Nh-IbvkqpPhDsbnA2FtQ,17059
|
|
9
|
+
webscout/exceptions.py,sha256=4AOO5wexeL96nvUS-badcckcwrPS7UpZyAgB9vknHZE,276
|
|
10
|
+
webscout/models.py,sha256=5iQIdtedT18YuTZ3npoG7kLMwcrKwhQ7928dl_7qZW0,692
|
|
11
|
+
webscout/utils.py,sha256=c_98M4oqpb54pUun3fpGGlCerFD6ZHUbghyp5b7Mwgo,2605
|
|
12
|
+
webscout/version.py,sha256=9Xh1hfMTJ3cNE-lJfuf9gTs-1e4Ze1MpQ16B7bymBJM,25
|
|
13
|
+
webscout/webscout_search.py,sha256=TnpcL3ZvLPiCUD8TikFzvH_1UFnFcZVI6W8PUb7BB-8,2910
|
|
14
|
+
webscout/webscout_search_async.py,sha256=Y5frH0k3hLqBCR-8dn7a_b7EvxdYxn6wHiKl3jWosE0,40670
|
|
15
|
+
webscout-1.1.9.dist-info/LICENSE.md,sha256=mRVwJuT4SXC5O93BFdsfWBjlXjGn2Np90Zm5SocUzM0,3150
|
|
16
|
+
webscout-1.1.9.dist-info/METADATA,sha256=ROIYp7c8Qa9-rqdnVMOU0x6FgA5Ko-Qp3NqRvNDYOWM,24121
|
|
17
|
+
webscout-1.1.9.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
18
|
+
webscout-1.1.9.dist-info/entry_points.txt,sha256=8-93eRslYrzTHs5E-6yFRJrve00C9q-SkXJD113jzRY,197
|
|
19
|
+
webscout-1.1.9.dist-info/top_level.txt,sha256=nYIw7OKBQDr_Z33IzZUKidRD3zQEo8jOJYkMVMeN334,9
|
|
20
|
+
webscout-1.1.9.dist-info/RECORD,,
|
webscout-1.1.7.dist-info/RECORD
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
webscout/AI.py,sha256=CwUCeGnNRL9STd5bAZSyIiLysorBMu065HrkY8UCzAQ,49618
|
|
2
|
-
webscout/AIbase.py,sha256=jBqTcFRq3IqDKoKTkciy4HaurrL-Iw4SAQIbtkLY4UU,2274
|
|
3
|
-
webscout/AIutel.py,sha256=Su41bjO5rVo7JlICs8sKvRYHX0wcxzy-73hClCTWxyw,23470
|
|
4
|
-
webscout/HelpingAI.py,sha256=YeZw0zYVHMcBFFPNdd3_Ghpm9ebt_EScQjHO_IIs4lg,8103
|
|
5
|
-
webscout/LLM.py,sha256=XByJPiATLA_57FBWKw18Xx_PGRCPOj-GJE96aQH1k2Y,3309
|
|
6
|
-
webscout/__init__.py,sha256=vHJGZexYIaWDTHfMimqA7enct9b7zPDf6jLsS7NDBiA,536
|
|
7
|
-
webscout/__main__.py,sha256=ZtTRgsRjUi2JOvYFLF1ZCh55Sdoz94I-BS-TlJC7WDU,126
|
|
8
|
-
webscout/cli.py,sha256=AI0irt6FtZVWiCEyrDOY78OrXBoQT8pXmct5KN_vPhc,17295
|
|
9
|
-
webscout/exceptions.py,sha256=7u52Mt5iyEUCZvaZuEYwQVV8HL8IdZBv1r5s5Ss_xU0,75
|
|
10
|
-
webscout/models.py,sha256=5iQIdtedT18YuTZ3npoG7kLMwcrKwhQ7928dl_7qZW0,692
|
|
11
|
-
webscout/utils.py,sha256=-YKF2Qw1tYRvPSZpJur7b2lHiXOFDY4iRQryxNxQ7co,3025
|
|
12
|
-
webscout/version.py,sha256=gA8f8N2jHLqj_ji0TxZUT14uhd1nNP2PTHuPHDLPkOM,25
|
|
13
|
-
webscout/webscout_search.py,sha256=WpA64xu1FZfkD6Ry3y0-w1eV0-gVeZjy8wmQWX4Mz2c,2320
|
|
14
|
-
webscout/webscout_search_async.py,sha256=Q5-1NuYhr756aMFZuNpUlUZkz9PifuV8tyDWaTuYrn8,38135
|
|
15
|
-
webscout-1.1.7.dist-info/LICENSE.md,sha256=mRVwJuT4SXC5O93BFdsfWBjlXjGn2Np90Zm5SocUzM0,3150
|
|
16
|
-
webscout-1.1.7.dist-info/METADATA,sha256=Hk4LmPsKmBMpV2PcTX-_uKze_dUd8GU0xnZOltpXysM,24367
|
|
17
|
-
webscout-1.1.7.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
18
|
-
webscout-1.1.7.dist-info/entry_points.txt,sha256=8-93eRslYrzTHs5E-6yFRJrve00C9q-SkXJD113jzRY,197
|
|
19
|
-
webscout-1.1.7.dist-info/top_level.txt,sha256=nYIw7OKBQDr_Z33IzZUKidRD3zQEo8jOJYkMVMeN334,9
|
|
20
|
-
webscout-1.1.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|