webscout 2.6__py3-none-any.whl → 2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/LLM.py +56 -1
- webscout/Local/_version.py +1 -1
- webscout/Local/formats.py +154 -88
- webscout/Local/model.py +4 -4
- webscout/Local/thread.py +166 -156
- webscout/Provider/BasedGPT.py +226 -0
- webscout/Provider/__init__.py +1 -0
- webscout/__init__.py +2 -2
- webscout/cli.py +39 -3
- webscout/version.py +1 -1
- webscout/webscout_search.py +1018 -40
- webscout/webscout_search_async.py +151 -839
- {webscout-2.6.dist-info → webscout-2.8.dist-info}/METADATA +37 -21
- {webscout-2.6.dist-info → webscout-2.8.dist-info}/RECORD +18 -17
- {webscout-2.6.dist-info → webscout-2.8.dist-info}/LICENSE.md +0 -0
- {webscout-2.6.dist-info → webscout-2.8.dist-info}/WHEEL +0 -0
- {webscout-2.6.dist-info → webscout-2.8.dist-info}/entry_points.txt +0 -0
- {webscout-2.6.dist-info → webscout-2.8.dist-info}/top_level.txt +0 -0
|
@@ -1,43 +1,11 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import logging
|
|
3
|
-
import warnings
|
|
4
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
5
|
-
from contextlib import suppress
|
|
6
|
-
from datetime import datetime, timezone
|
|
7
|
-
from decimal import Decimal
|
|
8
|
-
from functools import cached_property, partial
|
|
9
|
-
from itertools import cycle, islice
|
|
10
2
|
from types import TracebackType
|
|
11
|
-
from typing import Dict, List, Optional,
|
|
3
|
+
from typing import Dict, List, Optional, Type, Union
|
|
12
4
|
|
|
13
|
-
from
|
|
5
|
+
from .webscout_search import WEBS
|
|
14
6
|
|
|
15
|
-
try:
|
|
16
|
-
from lxml.html import HTMLParser as LHTMLParser
|
|
17
|
-
from lxml.html import document_fromstring
|
|
18
|
-
|
|
19
|
-
LXML_AVAILABLE = True
|
|
20
|
-
except ImportError:
|
|
21
|
-
LXML_AVAILABLE = False
|
|
22
|
-
|
|
23
|
-
from .exceptions import WebscoutE, RatelimitE, TimeoutE
|
|
24
|
-
from .utils import (
|
|
25
|
-
_calculate_distance,
|
|
26
|
-
_extract_vqd,
|
|
27
|
-
_normalize,
|
|
28
|
-
_normalize_url,
|
|
29
|
-
_text_extract_json,
|
|
30
|
-
json_loads,
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
logger = logging.getLogger("webscout_search.AsyncWEBS")
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class AsyncWEBS:
|
|
37
|
-
"""webscout_search async class to get search results from duckduckgo.com."""
|
|
38
|
-
|
|
39
|
-
_executor: Optional[ThreadPoolExecutor] = None
|
|
40
7
|
|
|
8
|
+
class AsyncWEBS(WEBS):
|
|
41
9
|
def __init__(
|
|
42
10
|
self,
|
|
43
11
|
headers: Optional[Dict[str, str]] = None,
|
|
@@ -53,83 +21,35 @@ class AsyncWEBS:
|
|
|
53
21
|
example: "http://user:pass@example.com:3128". Defaults to None.
|
|
54
22
|
timeout (int, optional): Timeout value for the HTTP client. Defaults to 10.
|
|
55
23
|
"""
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
warnings.warn("'proxies' is deprecated, use 'proxy' instead.", stacklevel=1)
|
|
60
|
-
self.proxy = proxies.get("http") or proxies.get("https") if isinstance(proxies, dict) else proxies
|
|
61
|
-
self._asession = requests.AsyncSession(
|
|
62
|
-
headers=headers,
|
|
63
|
-
proxy=self.proxy,
|
|
64
|
-
timeout=timeout,
|
|
65
|
-
impersonate="chrome",
|
|
66
|
-
allow_redirects=False,
|
|
67
|
-
)
|
|
68
|
-
self._asession.headers["Referer"] = "https://duckduckgo.com/"
|
|
69
|
-
self._exception_event = asyncio.Event()
|
|
24
|
+
super().__init__(headers=headers, proxy=proxy, proxies=proxies, timeout=timeout)
|
|
25
|
+
self._loop = asyncio.get_running_loop()
|
|
26
|
+
self._executor = super()._executor
|
|
70
27
|
|
|
71
28
|
async def __aenter__(self) -> "AsyncWEBS":
|
|
72
29
|
return self
|
|
73
30
|
|
|
74
31
|
async def __aexit__(
|
|
75
32
|
self,
|
|
76
|
-
exc_type: Optional[Type[BaseException]]
|
|
77
|
-
exc_val: Optional[BaseException]
|
|
78
|
-
exc_tb: Optional[TracebackType]
|
|
33
|
+
exc_type: Optional[Type[BaseException]],
|
|
34
|
+
exc_val: Optional[BaseException],
|
|
35
|
+
exc_tb: Optional[TracebackType],
|
|
79
36
|
) -> None:
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def __del__(self) -> None:
|
|
83
|
-
if hasattr(self, "_asession") and self._asession._closed is False:
|
|
84
|
-
with suppress(RuntimeError, RuntimeWarning):
|
|
85
|
-
asyncio.create_task(self._asession.close()) # type: ignore
|
|
86
|
-
|
|
87
|
-
@cached_property
|
|
88
|
-
def parser(self) -> Optional["LHTMLParser"]:
|
|
89
|
-
"""Get HTML parser."""
|
|
90
|
-
return LHTMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True, collect_ids=False)
|
|
91
|
-
|
|
92
|
-
@classmethod
|
|
93
|
-
def _get_executor(cls, max_workers: int = 1) -> ThreadPoolExecutor:
|
|
94
|
-
"""Get ThreadPoolExecutor. Default max_workers=1, because >=2 leads to a big overhead"""
|
|
95
|
-
if cls._executor is None:
|
|
96
|
-
cls._executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
97
|
-
return cls._executor
|
|
37
|
+
pass
|
|
98
38
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
return cls._get_executor()
|
|
39
|
+
async def achat(self, keywords: str, model: str = "gpt-3.5") -> str:
|
|
40
|
+
"""Initiates async chat session with Webscout AI.
|
|
102
41
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
url: str,
|
|
107
|
-
data: Optional[Union[Dict[str, str], bytes]] = None,
|
|
108
|
-
params: Optional[Dict[str, str]] = None,
|
|
109
|
-
) -> bytes:
|
|
110
|
-
if self._exception_event.is_set():
|
|
111
|
-
raise WebscoutE("Exception occurred in previous call.")
|
|
112
|
-
try:
|
|
113
|
-
resp = await self._asession.request(method, url, data=data, params=params)
|
|
114
|
-
except Exception as ex:
|
|
115
|
-
self._exception_event.set()
|
|
116
|
-
if "time" in str(ex).lower():
|
|
117
|
-
raise TimeoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
118
|
-
raise WebscoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
119
|
-
logger.debug(f"_aget_url() {resp.url} {resp.status_code} {resp.elapsed:.2f} {len(resp.content)}")
|
|
120
|
-
if resp.status_code == 200:
|
|
121
|
-
return cast(bytes, resp.content)
|
|
122
|
-
self._exception_event.set()
|
|
123
|
-
if resp.status_code in (202, 301, 403):
|
|
124
|
-
raise RatelimitE(f"{resp.url} {resp.status_code} Ratelimit")
|
|
125
|
-
raise WebscoutE(f"{resp.url} return None. {params=} {data=}")
|
|
42
|
+
Args:
|
|
43
|
+
keywords (str): The initial message or question to send to the AI.
|
|
44
|
+
model (str): The model to use: "gpt-3.5", "claude-3-haiku". Defaults to "gpt-3.5".
|
|
126
45
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
46
|
+
Returns:
|
|
47
|
+
str: The response from the AI.
|
|
48
|
+
"""
|
|
49
|
+
result = await self._loop.run_in_executor(self._executor, super().chat, keywords, model)
|
|
50
|
+
return result
|
|
131
51
|
|
|
132
|
-
async def
|
|
52
|
+
async def atext(
|
|
133
53
|
self,
|
|
134
54
|
keywords: str,
|
|
135
55
|
region: str = "wt-wt",
|
|
@@ -138,7 +58,7 @@ class AsyncWEBS:
|
|
|
138
58
|
backend: str = "api",
|
|
139
59
|
max_results: Optional[int] = None,
|
|
140
60
|
) -> List[Dict[str, str]]:
|
|
141
|
-
"""
|
|
61
|
+
"""Webscout async text search. Query params: https://duckduckgo.com/params.
|
|
142
62
|
|
|
143
63
|
Args:
|
|
144
64
|
keywords: keywords for query.
|
|
@@ -155,296 +75,16 @@ class AsyncWEBS:
|
|
|
155
75
|
List of dictionaries with search results, or None if there was an error.
|
|
156
76
|
|
|
157
77
|
Raises:
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
"""
|
|
162
|
-
if LXML_AVAILABLE is False and backend != "api":
|
|
163
|
-
backend = "api"
|
|
164
|
-
warnings.warn("lxml is not installed. Using backend='api'.", stacklevel=2)
|
|
165
|
-
|
|
166
|
-
if backend == "api":
|
|
167
|
-
results = await self._text_api(keywords, region, safesearch, timelimit, max_results)
|
|
168
|
-
elif backend == "html":
|
|
169
|
-
results = await self._text_html(keywords, region, safesearch, timelimit, max_results)
|
|
170
|
-
elif backend == "lite":
|
|
171
|
-
results = await self._text_lite(keywords, region, timelimit, max_results)
|
|
172
|
-
return results
|
|
173
|
-
|
|
174
|
-
async def _text_api(
|
|
175
|
-
self,
|
|
176
|
-
keywords: str,
|
|
177
|
-
region: str = "wt-wt",
|
|
178
|
-
safesearch: str = "moderate",
|
|
179
|
-
timelimit: Optional[str] = None,
|
|
180
|
-
max_results: Optional[int] = None,
|
|
181
|
-
) -> List[Dict[str, str]]:
|
|
182
|
-
"""webscout text search generator. Query params: https://duckduckgo.com/params.
|
|
183
|
-
|
|
184
|
-
Args:
|
|
185
|
-
keywords: keywords for query.
|
|
186
|
-
region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
|
|
187
|
-
safesearch: on, moderate, off. Defaults to "moderate".
|
|
188
|
-
timelimit: d, w, m, y. Defaults to None.
|
|
189
|
-
max_results: max number of results. If None, returns results only from the first response. Defaults to None.
|
|
190
|
-
|
|
191
|
-
Returns:
|
|
192
|
-
List of dictionaries with search results.
|
|
193
|
-
|
|
194
|
-
Raises:
|
|
195
|
-
WebscoutE: Base exception for webscout_search errors.
|
|
196
|
-
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
197
|
-
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
198
|
-
"""
|
|
199
|
-
assert keywords, "keywords is mandatory"
|
|
200
|
-
|
|
201
|
-
vqd = await self._aget_vqd(keywords)
|
|
202
|
-
|
|
203
|
-
payload = {
|
|
204
|
-
"q": keywords,
|
|
205
|
-
"kl": region,
|
|
206
|
-
"l": region,
|
|
207
|
-
"p": "",
|
|
208
|
-
"s": "0",
|
|
209
|
-
"df": "",
|
|
210
|
-
"vqd": vqd,
|
|
211
|
-
"ex": "",
|
|
212
|
-
}
|
|
213
|
-
safesearch = safesearch.lower()
|
|
214
|
-
if safesearch == "moderate":
|
|
215
|
-
payload["ex"] = "-1"
|
|
216
|
-
elif safesearch == "off":
|
|
217
|
-
payload["ex"] = "-2"
|
|
218
|
-
elif safesearch == "on": # strict
|
|
219
|
-
payload["p"] = "1"
|
|
220
|
-
if timelimit:
|
|
221
|
-
payload["df"] = timelimit
|
|
222
|
-
|
|
223
|
-
cache = set()
|
|
224
|
-
results: List[Optional[Dict[str, str]]] = [None] * 1100
|
|
225
|
-
|
|
226
|
-
async def _text_api_page(s: int, page: int) -> None:
|
|
227
|
-
priority = page * 100
|
|
228
|
-
payload["s"] = f"{s}"
|
|
229
|
-
resp_content = await self._aget_url("GET", "https://links.duckduckgo.com/d.js", params=payload)
|
|
230
|
-
page_data = _text_extract_json(resp_content, keywords)
|
|
231
|
-
|
|
232
|
-
for row in page_data:
|
|
233
|
-
href = row.get("u", None)
|
|
234
|
-
if href and href not in cache and href != f"http://www.google.com/search?q={keywords}":
|
|
235
|
-
cache.add(href)
|
|
236
|
-
body = _normalize(row["a"])
|
|
237
|
-
if body:
|
|
238
|
-
priority += 1
|
|
239
|
-
result = {
|
|
240
|
-
"title": _normalize(row["t"]),
|
|
241
|
-
"href": _normalize_url(href),
|
|
242
|
-
"body": body,
|
|
243
|
-
}
|
|
244
|
-
results[priority] = result
|
|
245
|
-
|
|
246
|
-
tasks = [asyncio.create_task(_text_api_page(0, 0))]
|
|
247
|
-
if max_results:
|
|
248
|
-
max_results = min(max_results, 500)
|
|
249
|
-
tasks.extend(
|
|
250
|
-
asyncio.create_task(_text_api_page(s, i)) for i, s in enumerate(range(23, max_results, 50), start=1)
|
|
251
|
-
)
|
|
252
|
-
try:
|
|
253
|
-
await asyncio.gather(*tasks)
|
|
254
|
-
except Exception as e:
|
|
255
|
-
for task in tasks:
|
|
256
|
-
task.cancel()
|
|
257
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
|
258
|
-
raise e
|
|
259
|
-
|
|
260
|
-
return list(islice(filter(None, results), max_results))
|
|
261
|
-
|
|
262
|
-
async def _text_html(
|
|
263
|
-
self,
|
|
264
|
-
keywords: str,
|
|
265
|
-
region: str = "wt-wt",
|
|
266
|
-
safesearch: str = "moderate",
|
|
267
|
-
timelimit: Optional[str] = None,
|
|
268
|
-
max_results: Optional[int] = None,
|
|
269
|
-
) -> List[Dict[str, str]]:
|
|
270
|
-
"""webscout text search generator. Query params: https://duckduckgo.com/params.
|
|
271
|
-
|
|
272
|
-
Args:
|
|
273
|
-
keywords: keywords for query.
|
|
274
|
-
region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
|
|
275
|
-
safesearch: on, moderate, off. Defaults to "moderate".
|
|
276
|
-
timelimit: d, w, m, y. Defaults to None.
|
|
277
|
-
max_results: max number of results. If None, returns results only from the first response. Defaults to None.
|
|
278
|
-
|
|
279
|
-
Returns:
|
|
280
|
-
List of dictionaries with search results.
|
|
281
|
-
|
|
282
|
-
Raises:
|
|
283
|
-
WebscoutE: Base exception for webscout_search errors.
|
|
284
|
-
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
285
|
-
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
286
|
-
"""
|
|
287
|
-
assert keywords, "keywords is mandatory"
|
|
288
|
-
|
|
289
|
-
self._asession.headers["Referer"] = "https://html.duckduckgo.com/"
|
|
290
|
-
safesearch_base = {"on": "1", "moderate": "-1", "off": "-2"}
|
|
291
|
-
payload = {
|
|
292
|
-
"q": keywords,
|
|
293
|
-
"kl": region,
|
|
294
|
-
"p": safesearch_base[safesearch.lower()],
|
|
295
|
-
"o": "json",
|
|
296
|
-
"api": "d.js",
|
|
297
|
-
}
|
|
298
|
-
if timelimit:
|
|
299
|
-
payload["df"] = timelimit
|
|
300
|
-
if max_results and max_results > 20:
|
|
301
|
-
vqd = await self._aget_vqd(keywords)
|
|
302
|
-
payload["vqd"] = vqd
|
|
303
|
-
|
|
304
|
-
cache = set()
|
|
305
|
-
results: List[Optional[Dict[str, str]]] = [None] * 1100
|
|
306
|
-
|
|
307
|
-
async def _text_html_page(s: int, page: int) -> None:
|
|
308
|
-
priority = page * 100
|
|
309
|
-
payload["s"] = f"{s}"
|
|
310
|
-
resp_content = await self._aget_url("POST", "https://html.duckduckgo.com/html", data=payload)
|
|
311
|
-
if b"No results." in resp_content:
|
|
312
|
-
return
|
|
313
|
-
|
|
314
|
-
tree = await self._asession.loop.run_in_executor(
|
|
315
|
-
self.executor, partial(document_fromstring, resp_content, self.parser)
|
|
316
|
-
)
|
|
317
|
-
|
|
318
|
-
for e in tree.xpath("//div[h2]"):
|
|
319
|
-
href = e.xpath("./a/@href")
|
|
320
|
-
href = href[0] if href else None
|
|
321
|
-
if (
|
|
322
|
-
href
|
|
323
|
-
and href not in cache
|
|
324
|
-
and not href.startswith(
|
|
325
|
-
("http://www.google.com/search?q=", "https://duckduckgo.com/y.js?ad_domain")
|
|
326
|
-
)
|
|
327
|
-
):
|
|
328
|
-
cache.add(href)
|
|
329
|
-
title = e.xpath("./h2/a/text()")
|
|
330
|
-
body = e.xpath("./a//text()")
|
|
331
|
-
|
|
332
|
-
priority += 1
|
|
333
|
-
result = {
|
|
334
|
-
"title": _normalize(title[0]),
|
|
335
|
-
"href": _normalize_url(href),
|
|
336
|
-
"body": _normalize("".join(body)),
|
|
337
|
-
}
|
|
338
|
-
results[priority] = result
|
|
339
|
-
|
|
340
|
-
tasks = [asyncio.create_task(_text_html_page(0, 0))]
|
|
341
|
-
if max_results:
|
|
342
|
-
max_results = min(max_results, 500)
|
|
343
|
-
tasks.extend(
|
|
344
|
-
asyncio.create_task(_text_html_page(s, i)) for i, s in enumerate(range(23, max_results, 50), start=1)
|
|
345
|
-
)
|
|
346
|
-
try:
|
|
347
|
-
await asyncio.gather(*tasks)
|
|
348
|
-
except Exception as e:
|
|
349
|
-
for task in tasks:
|
|
350
|
-
task.cancel()
|
|
351
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
|
352
|
-
raise e
|
|
353
|
-
|
|
354
|
-
return list(islice(filter(None, results), max_results))
|
|
355
|
-
|
|
356
|
-
async def _text_lite(
|
|
357
|
-
self,
|
|
358
|
-
keywords: str,
|
|
359
|
-
region: str = "wt-wt",
|
|
360
|
-
timelimit: Optional[str] = None,
|
|
361
|
-
max_results: Optional[int] = None,
|
|
362
|
-
) -> List[Dict[str, str]]:
|
|
363
|
-
"""webscout text search generator. Query params: https://duckduckgo.com/params.
|
|
364
|
-
|
|
365
|
-
Args:
|
|
366
|
-
keywords: keywords for query.
|
|
367
|
-
region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
|
|
368
|
-
timelimit: d, w, m, y. Defaults to None.
|
|
369
|
-
max_results: max number of results. If None, returns results only from the first response. Defaults to None.
|
|
370
|
-
|
|
371
|
-
Returns:
|
|
372
|
-
List of dictionaries with search results.
|
|
373
|
-
|
|
374
|
-
Raises:
|
|
375
|
-
WebscoutE: Base exception for webscout_search errors.
|
|
376
|
-
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
377
|
-
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
78
|
+
DuckDuckGoSearchException: Base exception for duckduckgo_search errors.
|
|
79
|
+
RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits.
|
|
80
|
+
TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts.
|
|
378
81
|
"""
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
"q": keywords,
|
|
384
|
-
"o": "json",
|
|
385
|
-
"api": "d.js",
|
|
386
|
-
"kl": region,
|
|
387
|
-
}
|
|
388
|
-
if timelimit:
|
|
389
|
-
payload["df"] = timelimit
|
|
390
|
-
|
|
391
|
-
cache = set()
|
|
392
|
-
results: List[Optional[Dict[str, str]]] = [None] * 1100
|
|
393
|
-
|
|
394
|
-
async def _text_lite_page(s: int, page: int) -> None:
|
|
395
|
-
priority = page * 100
|
|
396
|
-
payload["s"] = f"{s}"
|
|
397
|
-
resp_content = await self._aget_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
|
|
398
|
-
if b"No more results." in resp_content:
|
|
399
|
-
return
|
|
400
|
-
|
|
401
|
-
tree = await self._asession.loop.run_in_executor(
|
|
402
|
-
self.executor, partial(document_fromstring, resp_content, self.parser)
|
|
403
|
-
)
|
|
404
|
-
|
|
405
|
-
data = zip(cycle(range(1, 5)), tree.xpath("//table[last()]//tr"))
|
|
406
|
-
for i, e in data:
|
|
407
|
-
if i == 1:
|
|
408
|
-
href = e.xpath(".//a//@href")
|
|
409
|
-
href = href[0] if href else None
|
|
410
|
-
if (
|
|
411
|
-
href is None
|
|
412
|
-
or href in cache
|
|
413
|
-
or href.startswith(("http://www.google.com/search?q=", "https://duckduckgo.com/y.js?ad_domain"))
|
|
414
|
-
):
|
|
415
|
-
[next(data, None) for _ in range(3)] # skip block(i=1,2,3,4)
|
|
416
|
-
else:
|
|
417
|
-
cache.add(href)
|
|
418
|
-
title = e.xpath(".//a//text()")[0]
|
|
419
|
-
elif i == 2:
|
|
420
|
-
body = e.xpath(".//td[@class='result-snippet']//text()")
|
|
421
|
-
body = "".join(body).strip()
|
|
422
|
-
elif i == 3:
|
|
423
|
-
priority += 1
|
|
424
|
-
result = {
|
|
425
|
-
"title": _normalize(title),
|
|
426
|
-
"href": _normalize_url(href),
|
|
427
|
-
"body": _normalize(body),
|
|
428
|
-
}
|
|
429
|
-
results[priority] = result
|
|
430
|
-
|
|
431
|
-
tasks = [asyncio.create_task(_text_lite_page(0, 0))]
|
|
432
|
-
if max_results:
|
|
433
|
-
max_results = min(max_results, 500)
|
|
434
|
-
tasks.extend(
|
|
435
|
-
asyncio.create_task(_text_lite_page(s, i)) for i, s in enumerate(range(23, max_results, 50), start=1)
|
|
436
|
-
)
|
|
437
|
-
try:
|
|
438
|
-
await asyncio.gather(*tasks)
|
|
439
|
-
except Exception as e:
|
|
440
|
-
for task in tasks:
|
|
441
|
-
task.cancel()
|
|
442
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
|
443
|
-
raise e
|
|
444
|
-
|
|
445
|
-
return list(islice(filter(None, results), max_results))
|
|
82
|
+
result = await self._loop.run_in_executor(
|
|
83
|
+
self._executor, super().text, keywords, region, safesearch, timelimit, backend, max_results
|
|
84
|
+
)
|
|
85
|
+
return result
|
|
446
86
|
|
|
447
|
-
async def
|
|
87
|
+
async def aimages(
|
|
448
88
|
self,
|
|
449
89
|
keywords: str,
|
|
450
90
|
region: str = "wt-wt",
|
|
@@ -457,7 +97,7 @@ class AsyncWEBS:
|
|
|
457
97
|
license_image: Optional[str] = None,
|
|
458
98
|
max_results: Optional[int] = None,
|
|
459
99
|
) -> List[Dict[str, str]]:
|
|
460
|
-
"""
|
|
100
|
+
"""Webscout async images search. Query params: https://duckduckgo.com/params.
|
|
461
101
|
|
|
462
102
|
Args:
|
|
463
103
|
keywords: keywords for query.
|
|
@@ -480,74 +120,27 @@ class AsyncWEBS:
|
|
|
480
120
|
List of dictionaries with images search results.
|
|
481
121
|
|
|
482
122
|
Raises:
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
123
|
+
DuckDuckGoSearchException: Base exception for duckduckgo_search errors.
|
|
124
|
+
RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits.
|
|
125
|
+
TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts.
|
|
486
126
|
"""
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
"vqd": vqd,
|
|
503
|
-
"f": f"{timelimit},{size},{color},{type_image},{layout},{license_image}",
|
|
504
|
-
"p": safesearch_base[safesearch.lower()],
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
cache = set()
|
|
508
|
-
results: List[Optional[Dict[str, str]]] = [None] * 600
|
|
509
|
-
|
|
510
|
-
async def _images_page(s: int, page: int) -> None:
|
|
511
|
-
priority = page * 100
|
|
512
|
-
payload["s"] = f"{s}"
|
|
513
|
-
resp_content = await self._aget_url("GET", "https://duckduckgo.com/i.js", params=payload)
|
|
514
|
-
resp_json = json_loads(resp_content)
|
|
515
|
-
|
|
516
|
-
page_data = resp_json.get("results", [])
|
|
517
|
-
|
|
518
|
-
for row in page_data:
|
|
519
|
-
image_url = row.get("image")
|
|
520
|
-
if image_url and image_url not in cache:
|
|
521
|
-
cache.add(image_url)
|
|
522
|
-
priority += 1
|
|
523
|
-
result = {
|
|
524
|
-
"title": row["title"],
|
|
525
|
-
"image": _normalize_url(image_url),
|
|
526
|
-
"thumbnail": _normalize_url(row["thumbnail"]),
|
|
527
|
-
"url": _normalize_url(row["url"]),
|
|
528
|
-
"height": row["height"],
|
|
529
|
-
"width": row["width"],
|
|
530
|
-
"source": row["source"],
|
|
531
|
-
}
|
|
532
|
-
results[priority] = result
|
|
533
|
-
|
|
534
|
-
tasks = [asyncio.create_task(_images_page(0, page=0))]
|
|
535
|
-
if max_results:
|
|
536
|
-
max_results = min(max_results, 500)
|
|
537
|
-
tasks.extend(
|
|
538
|
-
asyncio.create_task(_images_page(s, i)) for i, s in enumerate(range(100, max_results, 100), start=1)
|
|
539
|
-
)
|
|
540
|
-
try:
|
|
541
|
-
await asyncio.gather(*tasks)
|
|
542
|
-
except Exception as e:
|
|
543
|
-
for task in tasks:
|
|
544
|
-
task.cancel()
|
|
545
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
|
546
|
-
raise e
|
|
547
|
-
|
|
548
|
-
return list(islice(filter(None, results), max_results))
|
|
127
|
+
result = await self._loop.run_in_executor(
|
|
128
|
+
self._executor,
|
|
129
|
+
super().images,
|
|
130
|
+
keywords,
|
|
131
|
+
region,
|
|
132
|
+
safesearch,
|
|
133
|
+
timelimit,
|
|
134
|
+
size,
|
|
135
|
+
color,
|
|
136
|
+
type_image,
|
|
137
|
+
layout,
|
|
138
|
+
license_image,
|
|
139
|
+
max_results,
|
|
140
|
+
)
|
|
141
|
+
return result
|
|
549
142
|
|
|
550
|
-
async def
|
|
143
|
+
async def avideos(
|
|
551
144
|
self,
|
|
552
145
|
keywords: str,
|
|
553
146
|
region: str = "wt-wt",
|
|
@@ -558,7 +151,7 @@ class AsyncWEBS:
|
|
|
558
151
|
license_videos: Optional[str] = None,
|
|
559
152
|
max_results: Optional[int] = None,
|
|
560
153
|
) -> List[Dict[str, str]]:
|
|
561
|
-
"""
|
|
154
|
+
"""Webscout async videos search. Query params: https://duckduckgo.com/params.
|
|
562
155
|
|
|
563
156
|
Args:
|
|
564
157
|
keywords: keywords for query.
|
|
@@ -574,62 +167,25 @@ class AsyncWEBS:
|
|
|
574
167
|
List of dictionaries with videos search results.
|
|
575
168
|
|
|
576
169
|
Raises:
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
170
|
+
DuckDuckGoSearchException: Base exception for duckduckgo_search errors.
|
|
171
|
+
RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits.
|
|
172
|
+
TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts.
|
|
580
173
|
"""
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
"vqd": vqd,
|
|
595
|
-
"f": f"{timelimit},{resolution},{duration},{license_videos}",
|
|
596
|
-
"p": safesearch_base[safesearch.lower()],
|
|
597
|
-
}
|
|
598
|
-
|
|
599
|
-
cache = set()
|
|
600
|
-
results: List[Optional[Dict[str, str]]] = [None] * 700
|
|
601
|
-
|
|
602
|
-
async def _videos_page(s: int, page: int) -> None:
|
|
603
|
-
priority = page * 100
|
|
604
|
-
payload["s"] = f"{s}"
|
|
605
|
-
resp_content = await self._aget_url("GET", "https://duckduckgo.com/v.js", params=payload)
|
|
606
|
-
resp_json = json_loads(resp_content)
|
|
607
|
-
|
|
608
|
-
page_data = resp_json.get("results", [])
|
|
609
|
-
|
|
610
|
-
for row in page_data:
|
|
611
|
-
if row["content"] not in cache:
|
|
612
|
-
cache.add(row["content"])
|
|
613
|
-
priority += 1
|
|
614
|
-
results[priority] = row
|
|
615
|
-
|
|
616
|
-
tasks = [asyncio.create_task(_videos_page(0, 0))]
|
|
617
|
-
if max_results:
|
|
618
|
-
max_results = min(max_results, 400)
|
|
619
|
-
tasks.extend(
|
|
620
|
-
asyncio.create_task(_videos_page(s, i)) for i, s in enumerate(range(59, max_results, 59), start=1)
|
|
621
|
-
)
|
|
622
|
-
try:
|
|
623
|
-
await asyncio.gather(*tasks)
|
|
624
|
-
except Exception as e:
|
|
625
|
-
for task in tasks:
|
|
626
|
-
task.cancel()
|
|
627
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
|
628
|
-
raise e
|
|
629
|
-
|
|
630
|
-
return list(islice(filter(None, results), max_results))
|
|
174
|
+
result = await self._loop.run_in_executor(
|
|
175
|
+
self._executor,
|
|
176
|
+
super().videos,
|
|
177
|
+
keywords,
|
|
178
|
+
region,
|
|
179
|
+
safesearch,
|
|
180
|
+
timelimit,
|
|
181
|
+
resolution,
|
|
182
|
+
duration,
|
|
183
|
+
license_videos,
|
|
184
|
+
max_results,
|
|
185
|
+
)
|
|
186
|
+
return result
|
|
631
187
|
|
|
632
|
-
async def
|
|
188
|
+
async def anews(
|
|
633
189
|
self,
|
|
634
190
|
keywords: str,
|
|
635
191
|
region: str = "wt-wt",
|
|
@@ -637,7 +193,7 @@ class AsyncWEBS:
|
|
|
637
193
|
timelimit: Optional[str] = None,
|
|
638
194
|
max_results: Optional[int] = None,
|
|
639
195
|
) -> List[Dict[str, str]]:
|
|
640
|
-
"""
|
|
196
|
+
"""Webscout async news search. Query params: https://duckduckgo.com/params.
|
|
641
197
|
|
|
642
198
|
Args:
|
|
643
199
|
keywords: keywords for query.
|
|
@@ -650,69 +206,26 @@ class AsyncWEBS:
|
|
|
650
206
|
List of dictionaries with news search results.
|
|
651
207
|
|
|
652
208
|
Raises:
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
209
|
+
DuckDuckGoSearchException: Base exception for duckduckgo_search errors.
|
|
210
|
+
RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits.
|
|
211
|
+
TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts.
|
|
656
212
|
"""
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
"vqd": vqd,
|
|
668
|
-
"p": safesearch_base[safesearch.lower()],
|
|
669
|
-
}
|
|
670
|
-
if timelimit:
|
|
671
|
-
payload["df"] = timelimit
|
|
672
|
-
|
|
673
|
-
cache = set()
|
|
674
|
-
results: List[Optional[Dict[str, str]]] = [None] * 700
|
|
675
|
-
|
|
676
|
-
async def _news_page(s: int, page: int) -> None:
|
|
677
|
-
priority = page * 100
|
|
678
|
-
payload["s"] = f"{s}"
|
|
679
|
-
resp_content = await self._aget_url("GET", "https://duckduckgo.com/news.js", params=payload)
|
|
680
|
-
resp_json = json_loads(resp_content)
|
|
681
|
-
page_data = resp_json.get("results", [])
|
|
682
|
-
|
|
683
|
-
for row in page_data:
|
|
684
|
-
if row["url"] not in cache:
|
|
685
|
-
cache.add(row["url"])
|
|
686
|
-
image_url = row.get("image", None)
|
|
687
|
-
priority += 1
|
|
688
|
-
result = {
|
|
689
|
-
"date": datetime.fromtimestamp(row["date"], timezone.utc).isoformat(),
|
|
690
|
-
"title": row["title"],
|
|
691
|
-
"body": _normalize(row["excerpt"]),
|
|
692
|
-
"url": _normalize_url(row["url"]),
|
|
693
|
-
"image": _normalize_url(image_url),
|
|
694
|
-
"source": row["source"],
|
|
695
|
-
}
|
|
696
|
-
results[priority] = result
|
|
697
|
-
|
|
698
|
-
tasks = [asyncio.create_task(_news_page(0, 0))]
|
|
699
|
-
if max_results:
|
|
700
|
-
max_results = min(max_results, 200)
|
|
701
|
-
tasks.extend(
|
|
702
|
-
asyncio.create_task(_news_page(s, i)) for i, s in enumerate(range(29, max_results, 29), start=1)
|
|
703
|
-
)
|
|
704
|
-
try:
|
|
705
|
-
await asyncio.gather(*tasks)
|
|
706
|
-
except Exception as e:
|
|
707
|
-
for task in tasks:
|
|
708
|
-
task.cancel()
|
|
709
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
|
710
|
-
raise e
|
|
711
|
-
|
|
712
|
-
return list(islice(filter(None, results), max_results))
|
|
213
|
+
result = await self._loop.run_in_executor(
|
|
214
|
+
self._executor,
|
|
215
|
+
super().news,
|
|
216
|
+
keywords,
|
|
217
|
+
region,
|
|
218
|
+
safesearch,
|
|
219
|
+
timelimit,
|
|
220
|
+
max_results,
|
|
221
|
+
)
|
|
222
|
+
return result
|
|
713
223
|
|
|
714
|
-
async def
|
|
715
|
-
|
|
224
|
+
async def aanswers(
|
|
225
|
+
self,
|
|
226
|
+
keywords: str,
|
|
227
|
+
) -> List[Dict[str, str]]:
|
|
228
|
+
"""Webscout async instant answers. Query params: https://duckduckgo.com/params.
|
|
716
229
|
|
|
717
230
|
Args:
|
|
718
231
|
keywords: keywords for query,
|
|
@@ -721,69 +234,23 @@ class AsyncWEBS:
|
|
|
721
234
|
List of dictionaries with instant answers results.
|
|
722
235
|
|
|
723
236
|
Raises:
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
237
|
+
DuckDuckGoSearchException: Base exception for duckduckgo_search errors.
|
|
238
|
+
RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits.
|
|
239
|
+
TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts.
|
|
727
240
|
"""
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
resp_content = await self._aget_url("GET", "https://api.duckduckgo.com/", params=payload)
|
|
735
|
-
page_data = json_loads(resp_content)
|
|
736
|
-
|
|
737
|
-
results = []
|
|
738
|
-
answer = page_data.get("AbstractText")
|
|
739
|
-
url = page_data.get("AbstractURL")
|
|
740
|
-
if answer:
|
|
741
|
-
results.append(
|
|
742
|
-
{
|
|
743
|
-
"icon": None,
|
|
744
|
-
"text": answer,
|
|
745
|
-
"topic": None,
|
|
746
|
-
"url": url,
|
|
747
|
-
}
|
|
748
|
-
)
|
|
749
|
-
|
|
750
|
-
# related
|
|
751
|
-
payload = {
|
|
752
|
-
"q": f"{keywords}",
|
|
753
|
-
"format": "json",
|
|
754
|
-
}
|
|
755
|
-
resp_content = await self._aget_url("GET", "https://api.duckduckgo.com/", params=payload)
|
|
756
|
-
resp_json = json_loads(resp_content)
|
|
757
|
-
page_data = resp_json.get("RelatedTopics", [])
|
|
758
|
-
|
|
759
|
-
for row in page_data:
|
|
760
|
-
topic = row.get("Name")
|
|
761
|
-
if not topic:
|
|
762
|
-
icon = row["Icon"].get("URL")
|
|
763
|
-
results.append(
|
|
764
|
-
{
|
|
765
|
-
"icon": f"https://duckduckgo.com{icon}" if icon else "",
|
|
766
|
-
"text": row["Text"],
|
|
767
|
-
"topic": None,
|
|
768
|
-
"url": row["FirstURL"],
|
|
769
|
-
}
|
|
770
|
-
)
|
|
771
|
-
else:
|
|
772
|
-
for subrow in row["Topics"]:
|
|
773
|
-
icon = subrow["Icon"].get("URL")
|
|
774
|
-
results.append(
|
|
775
|
-
{
|
|
776
|
-
"icon": f"https://duckduckgo.com{icon}" if icon else "",
|
|
777
|
-
"text": subrow["Text"],
|
|
778
|
-
"topic": topic,
|
|
779
|
-
"url": subrow["FirstURL"],
|
|
780
|
-
}
|
|
781
|
-
)
|
|
782
|
-
|
|
783
|
-
return results
|
|
241
|
+
result = await self._loop.run_in_executor(
|
|
242
|
+
self._executor,
|
|
243
|
+
super().answers,
|
|
244
|
+
keywords,
|
|
245
|
+
)
|
|
246
|
+
return result
|
|
784
247
|
|
|
785
|
-
async def
|
|
786
|
-
|
|
248
|
+
async def asuggestions(
|
|
249
|
+
self,
|
|
250
|
+
keywords: str,
|
|
251
|
+
region: str = "wt-wt",
|
|
252
|
+
) -> List[Dict[str, str]]:
|
|
253
|
+
"""Webscout async suggestions. Query params: https://duckduckgo.com/params.
|
|
787
254
|
|
|
788
255
|
Args:
|
|
789
256
|
keywords: keywords for query.
|
|
@@ -793,21 +260,19 @@ class AsyncWEBS:
|
|
|
793
260
|
List of dictionaries with suggestions results.
|
|
794
261
|
|
|
795
262
|
Raises:
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
263
|
+
DuckDuckGoSearchException: Base exception for duckduckgo_search errors.
|
|
264
|
+
RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits.
|
|
265
|
+
TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts.
|
|
799
266
|
"""
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
page_data = json_loads(resp_content)
|
|
808
|
-
return [r for r in page_data]
|
|
267
|
+
result = await self._loop.run_in_executor(
|
|
268
|
+
self._executor,
|
|
269
|
+
super().suggestions,
|
|
270
|
+
keywords,
|
|
271
|
+
region,
|
|
272
|
+
)
|
|
273
|
+
return result
|
|
809
274
|
|
|
810
|
-
async def
|
|
275
|
+
async def amaps(
|
|
811
276
|
self,
|
|
812
277
|
keywords: str,
|
|
813
278
|
place: Optional[str] = None,
|
|
@@ -822,7 +287,7 @@ class AsyncWEBS:
|
|
|
822
287
|
radius: int = 0,
|
|
823
288
|
max_results: Optional[int] = None,
|
|
824
289
|
) -> List[Dict[str, str]]:
|
|
825
|
-
"""
|
|
290
|
+
"""Webscout async maps search. Query params: https://duckduckgo.com/params.
|
|
826
291
|
|
|
827
292
|
Args:
|
|
828
293
|
keywords: keywords for query
|
|
@@ -843,159 +308,35 @@ class AsyncWEBS:
|
|
|
843
308
|
List of dictionaries with maps search results, or None if there was an error.
|
|
844
309
|
|
|
845
310
|
Raises:
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
311
|
+
DuckDuckGoSearchException: Base exception for duckduckgo_search errors.
|
|
312
|
+
RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits.
|
|
313
|
+
TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts.
|
|
849
314
|
"""
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
"polygon_geojson": "0",
|
|
868
|
-
"format": "jsonv2",
|
|
869
|
-
}
|
|
870
|
-
else:
|
|
871
|
-
params = {
|
|
872
|
-
"polygon_geojson": "0",
|
|
873
|
-
"format": "jsonv2",
|
|
874
|
-
}
|
|
875
|
-
if street:
|
|
876
|
-
params["street"] = street
|
|
877
|
-
if city:
|
|
878
|
-
params["city"] = city
|
|
879
|
-
if county:
|
|
880
|
-
params["county"] = county
|
|
881
|
-
if state:
|
|
882
|
-
params["state"] = state
|
|
883
|
-
if country:
|
|
884
|
-
params["country"] = country
|
|
885
|
-
if postalcode:
|
|
886
|
-
params["postalcode"] = postalcode
|
|
887
|
-
# request nominatim api to get coordinates box
|
|
888
|
-
resp_content = await self._aget_url(
|
|
889
|
-
"GET",
|
|
890
|
-
"https://nominatim.openstreetmap.org/search.php",
|
|
891
|
-
params=params,
|
|
892
|
-
)
|
|
893
|
-
if resp_content == b"[]":
|
|
894
|
-
raise WebscoutE("maps() Сoordinates are not found, check function parameters.")
|
|
895
|
-
resp_json = json_loads(resp_content)
|
|
896
|
-
coordinates = resp_json[0]["boundingbox"]
|
|
897
|
-
lat_t, lon_l = Decimal(coordinates[1]), Decimal(coordinates[2])
|
|
898
|
-
lat_b, lon_r = Decimal(coordinates[0]), Decimal(coordinates[3])
|
|
899
|
-
|
|
900
|
-
# if a radius is specified, expand the search square
|
|
901
|
-
lat_t += Decimal(radius) * Decimal(0.008983)
|
|
902
|
-
lat_b -= Decimal(radius) * Decimal(0.008983)
|
|
903
|
-
lon_l -= Decimal(radius) * Decimal(0.008983)
|
|
904
|
-
lon_r += Decimal(radius) * Decimal(0.008983)
|
|
905
|
-
logger.debug(f"bbox coordinates\n{lat_t} {lon_l}\n{lat_b} {lon_r}")
|
|
906
|
-
|
|
907
|
-
cache = set()
|
|
908
|
-
results: List[Dict[str, str]] = []
|
|
909
|
-
|
|
910
|
-
async def _maps_page(
|
|
911
|
-
bbox: Tuple[Decimal, Decimal, Decimal, Decimal],
|
|
912
|
-
) -> Optional[List[Dict[str, str]]]:
|
|
913
|
-
if max_results and len(results) >= max_results:
|
|
914
|
-
return None
|
|
915
|
-
lat_t, lon_l, lat_b, lon_r = bbox
|
|
916
|
-
params = {
|
|
917
|
-
"q": keywords,
|
|
918
|
-
"vqd": vqd,
|
|
919
|
-
"tg": "maps_places",
|
|
920
|
-
"rt": "D",
|
|
921
|
-
"mkexp": "b",
|
|
922
|
-
"wiki_info": "1",
|
|
923
|
-
"is_requery": "1",
|
|
924
|
-
"bbox_tl": f"{lat_t},{lon_l}",
|
|
925
|
-
"bbox_br": f"{lat_b},{lon_r}",
|
|
926
|
-
"strict_bbox": "1",
|
|
927
|
-
}
|
|
928
|
-
resp_content = await self._aget_url("GET", "https://duckduckgo.com/local.js", params=params)
|
|
929
|
-
resp_json = json_loads(resp_content)
|
|
930
|
-
page_data = resp_json.get("results", [])
|
|
931
|
-
|
|
932
|
-
page_results = []
|
|
933
|
-
for res in page_data:
|
|
934
|
-
r_name = f'{res["name"]} {res["address"]}'
|
|
935
|
-
if r_name in cache:
|
|
936
|
-
continue
|
|
937
|
-
else:
|
|
938
|
-
cache.add(r_name)
|
|
939
|
-
result = {
|
|
940
|
-
"title": res["name"],
|
|
941
|
-
"address": res["address"],
|
|
942
|
-
"country_code": res["country_code"],
|
|
943
|
-
"url": _normalize_url(res["website"]),
|
|
944
|
-
"phone": res["phone"] or "",
|
|
945
|
-
"latitude": res["coordinates"]["latitude"],
|
|
946
|
-
"longitude": res["coordinates"]["longitude"],
|
|
947
|
-
"source": _normalize_url(res["url"]),
|
|
948
|
-
"image": x.get("image", "") if (x := res["embed"]) else "",
|
|
949
|
-
"desc": x.get("description", "") if (x := res["embed"]) else "",
|
|
950
|
-
"hours": res["hours"] or "",
|
|
951
|
-
"category": res["ddg_category"] or "",
|
|
952
|
-
"facebook": f"www.facebook.com/profile.php?id={x}" if (x := res["facebook_id"]) else "",
|
|
953
|
-
"instagram": f"https://www.instagram.com/{x}" if (x := res["instagram_id"]) else "",
|
|
954
|
-
"twitter": f"https://twitter.com/{x}" if (x := res["twitter_id"]) else "",
|
|
955
|
-
}
|
|
956
|
-
page_results.append(result)
|
|
957
|
-
|
|
958
|
-
return page_results
|
|
959
|
-
|
|
960
|
-
# search squares (bboxes)
|
|
961
|
-
start_bbox = (lat_t, lon_l, lat_b, lon_r)
|
|
962
|
-
work_bboxes = [start_bbox]
|
|
963
|
-
while work_bboxes:
|
|
964
|
-
queue_bboxes = [] # for next iteration, at the end of the iteration work_bboxes = queue_bboxes
|
|
965
|
-
tasks = []
|
|
966
|
-
for bbox in work_bboxes:
|
|
967
|
-
tasks.append(asyncio.create_task(_maps_page(bbox)))
|
|
968
|
-
# if distance between coordinates > 1, divide the square into 4 parts and save them in queue_bboxes
|
|
969
|
-
if _calculate_distance(lat_t, lon_l, lat_b, lon_r) > 1:
|
|
970
|
-
lat_t, lon_l, lat_b, lon_r = bbox
|
|
971
|
-
lat_middle = (lat_t + lat_b) / 2
|
|
972
|
-
lon_middle = (lon_l + lon_r) / 2
|
|
973
|
-
bbox1 = (lat_t, lon_l, lat_middle, lon_middle)
|
|
974
|
-
bbox2 = (lat_t, lon_middle, lat_middle, lon_r)
|
|
975
|
-
bbox3 = (lat_middle, lon_l, lat_b, lon_middle)
|
|
976
|
-
bbox4 = (lat_middle, lon_middle, lat_b, lon_r)
|
|
977
|
-
queue_bboxes.extend([bbox1, bbox2, bbox3, bbox4])
|
|
978
|
-
|
|
979
|
-
# gather tasks using asyncio.wait_for and timeout
|
|
980
|
-
with suppress(Exception):
|
|
981
|
-
work_bboxes_results = await asyncio.gather(*[asyncio.wait_for(task, timeout=10) for task in tasks])
|
|
982
|
-
|
|
983
|
-
for x in work_bboxes_results:
|
|
984
|
-
if isinstance(x, list):
|
|
985
|
-
results.extend(x)
|
|
986
|
-
elif isinstance(x, dict):
|
|
987
|
-
results.append(x)
|
|
988
|
-
|
|
989
|
-
work_bboxes = queue_bboxes
|
|
990
|
-
if not max_results or len(results) >= max_results or len(work_bboxes_results) == 0:
|
|
991
|
-
break
|
|
992
|
-
|
|
993
|
-
return list(islice(results, max_results))
|
|
315
|
+
result = await self._loop.run_in_executor(
|
|
316
|
+
self._executor,
|
|
317
|
+
super().maps,
|
|
318
|
+
keywords,
|
|
319
|
+
place,
|
|
320
|
+
street,
|
|
321
|
+
city,
|
|
322
|
+
county,
|
|
323
|
+
state,
|
|
324
|
+
country,
|
|
325
|
+
postalcode,
|
|
326
|
+
latitude,
|
|
327
|
+
longitude,
|
|
328
|
+
radius,
|
|
329
|
+
max_results,
|
|
330
|
+
)
|
|
331
|
+
return result
|
|
994
332
|
|
|
995
|
-
async def
|
|
996
|
-
self,
|
|
333
|
+
async def atranslate(
|
|
334
|
+
self,
|
|
335
|
+
keywords: Union[List[str], str],
|
|
336
|
+
from_: Optional[str] = None,
|
|
337
|
+
to: str = "en",
|
|
997
338
|
) -> List[Dict[str, str]]:
|
|
998
|
-
"""
|
|
339
|
+
"""Webscout async translate.
|
|
999
340
|
|
|
1000
341
|
Args:
|
|
1001
342
|
keywords: string or list of strings to translate.
|
|
@@ -1006,44 +347,15 @@ class AsyncWEBS:
|
|
|
1006
347
|
List od dictionaries with translated keywords.
|
|
1007
348
|
|
|
1008
349
|
Raises:
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
350
|
+
DuckDuckGoSearchException: Base exception for duckduckgo_search errors.
|
|
351
|
+
RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits.
|
|
352
|
+
TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts.
|
|
1012
353
|
"""
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
}
|
|
1022
|
-
if from_:
|
|
1023
|
-
payload["from"] = from_
|
|
1024
|
-
|
|
1025
|
-
results = []
|
|
1026
|
-
|
|
1027
|
-
async def _translate_keyword(keyword: str) -> None:
|
|
1028
|
-
resp_content = await self._aget_url(
|
|
1029
|
-
"POST",
|
|
1030
|
-
"https://duckduckgo.com/translation.js",
|
|
1031
|
-
params=payload,
|
|
1032
|
-
data=keyword.encode(),
|
|
1033
|
-
)
|
|
1034
|
-
page_data = json_loads(resp_content)
|
|
1035
|
-
page_data["original"] = keyword
|
|
1036
|
-
results.append(page_data)
|
|
1037
|
-
|
|
1038
|
-
if isinstance(keywords, str):
|
|
1039
|
-
keywords = [keywords]
|
|
1040
|
-
tasks = [asyncio.create_task(_translate_keyword(keyword)) for keyword in keywords]
|
|
1041
|
-
try:
|
|
1042
|
-
await asyncio.gather(*tasks)
|
|
1043
|
-
except Exception as e:
|
|
1044
|
-
for task in tasks:
|
|
1045
|
-
task.cancel()
|
|
1046
|
-
await asyncio.gather(*tasks, return_exceptions=True)
|
|
1047
|
-
raise e
|
|
1048
|
-
|
|
1049
|
-
return results
|
|
354
|
+
result = await self._loop.run_in_executor(
|
|
355
|
+
self._executor,
|
|
356
|
+
super().translate,
|
|
357
|
+
keywords,
|
|
358
|
+
from_,
|
|
359
|
+
to,
|
|
360
|
+
)
|
|
361
|
+
return result
|