webscout 7.8__py3-none-any.whl → 7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (41) hide show
  1. webscout/Bard.py +5 -25
  2. webscout/DWEBS.py +476 -476
  3. webscout/Extra/__init__.py +2 -0
  4. webscout/Extra/autocoder/__init__.py +1 -1
  5. webscout/Extra/autocoder/{rawdog.py → autocoder.py} +849 -849
  6. webscout/Extra/tempmail/__init__.py +26 -0
  7. webscout/Extra/tempmail/async_utils.py +141 -0
  8. webscout/Extra/tempmail/base.py +156 -0
  9. webscout/Extra/tempmail/cli.py +187 -0
  10. webscout/Extra/tempmail/mail_tm.py +361 -0
  11. webscout/Extra/tempmail/temp_mail_io.py +292 -0
  12. webscout/Provider/Deepinfra.py +288 -286
  13. webscout/Provider/ElectronHub.py +709 -716
  14. webscout/Provider/ExaChat.py +20 -5
  15. webscout/Provider/Gemini.py +167 -165
  16. webscout/Provider/Groq.py +38 -24
  17. webscout/Provider/LambdaChat.py +2 -1
  18. webscout/Provider/TextPollinationsAI.py +232 -230
  19. webscout/Provider/__init__.py +0 -4
  20. webscout/Provider/copilot.py +427 -427
  21. webscout/Provider/freeaichat.py +8 -1
  22. webscout/Provider/uncovr.py +312 -299
  23. webscout/Provider/yep.py +64 -12
  24. webscout/__init__.py +38 -36
  25. webscout/cli.py +293 -293
  26. webscout/conversation.py +350 -17
  27. webscout/litprinter/__init__.py +59 -667
  28. webscout/optimizers.py +419 -419
  29. webscout/update_checker.py +14 -12
  30. webscout/version.py +1 -1
  31. webscout/webscout_search.py +1282 -1282
  32. webscout/webscout_search_async.py +813 -813
  33. {webscout-7.8.dist-info → webscout-7.9.dist-info}/METADATA +44 -39
  34. {webscout-7.8.dist-info → webscout-7.9.dist-info}/RECORD +38 -35
  35. webscout/Provider/DARKAI.py +0 -225
  36. webscout/Provider/EDITEE.py +0 -192
  37. webscout/litprinter/colors.py +0 -54
  38. {webscout-7.8.dist-info → webscout-7.9.dist-info}/LICENSE.md +0 -0
  39. {webscout-7.8.dist-info → webscout-7.9.dist-info}/WHEEL +0 -0
  40. {webscout-7.8.dist-info → webscout-7.9.dist-info}/entry_points.txt +0 -0
  41. {webscout-7.8.dist-info → webscout-7.9.dist-info}/top_level.txt +0 -0
@@ -1,813 +1,813 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- import os
5
- import warnings
6
- from datetime import datetime, timezone
7
- from functools import cached_property
8
- from itertools import cycle
9
- from random import choice, shuffle
10
- from time import time
11
- from types import TracebackType
12
- from typing import Any, Dict, List, Optional, Type, Union, cast, AsyncIterator
13
-
14
- import httpx
15
- from lxml.etree import _Element
16
- from lxml.html import HTMLParser as LHTMLParser
17
- from lxml.html import document_fromstring
18
-
19
- from .exceptions import ConversationLimitException, RatelimitE, TimeoutE, WebscoutE
20
- from .utils import (
21
- _expand_proxy_tb_alias,
22
- _extract_vqd,
23
- _normalize,
24
- _normalize_url,
25
- json_loads,
26
- )
27
-
28
-
29
-
30
-
31
- class AsyncWEBS:
32
- """Asynchronous webscout class to get search results."""
33
-
34
- _impersonates = (
35
- "chrome_100", "chrome_101", "chrome_104", "chrome_105", "chrome_106", "chrome_107",
36
- "chrome_108", "chrome_109", "chrome_114", "chrome_116", "chrome_117", "chrome_118",
37
- "chrome_119", "chrome_120", "chrome_123", "chrome_124", "chrome_126", "chrome_127",
38
- "chrome_128", "chrome_129", "chrome_130", "chrome_131", "chrome_133",
39
- "safari_ios_16.5", "safari_ios_17.2", "safari_ios_17.4.1", "safari_ios_18.1.1",
40
- "safari_15.3", "safari_15.5", "safari_15.6.1", "safari_16", "safari_16.5",
41
- "safari_17.0", "safari_17.2.1", "safari_17.4.1", "safari_17.5",
42
- "safari_18", "safari_18.2",
43
- "safari_ipad_18",
44
- "edge_101", "edge_122", "edge_127", "edge_131",
45
- "firefox_109", "firefox_117", "firefox_128", "firefox_133", "firefox_135",
46
- )
47
- _impersonates_os = ("android", "ios", "linux", "macos", "windows")
48
- _chat_models = {
49
- "gpt-4o-mini": "gpt-4o-mini",
50
- "llama-3.3-70b": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
51
- "claude-3-haiku": "claude-3-haiku-20240307",
52
- "o3-mini": "o3-mini",
53
- "mistral-small-3": "mistralai/Mistral-Small-24B-Instruct-2501",
54
- }
55
-
56
- def __init__(
57
- self,
58
- headers: Optional[Dict[str, str]] = None,
59
- proxy: Optional[str] = None,
60
- proxies: Union[Dict[str, str], str, None] = None, # deprecated
61
- timeout: Optional[int] = 10,
62
- verify: bool = True,
63
- ) -> None:
64
- """Initialize the AsyncWEBS object.
65
-
66
- Args:
67
- headers (dict, optional): Dictionary of headers for the HTTP client. Defaults to None.
68
- proxy (str, optional): proxy for the HTTP client, supports http/https/socks5 protocols.
69
- example: "http://user:pass@example.com:3128". Defaults to None.
70
- timeout (int, optional): Timeout value for the HTTP client. Defaults to 10.
71
- verify (bool): SSL verification when making the request. Defaults to True.
72
- """
73
- ddgs_proxy: Optional[str] = os.environ.get("DDGS_PROXY")
74
- self.proxy: Optional[str] = ddgs_proxy if ddgs_proxy else _expand_proxy_tb_alias(proxy)
75
- assert self.proxy is None or isinstance(self.proxy, str), "proxy must be a str"
76
- if not proxy and proxies:
77
- warnings.warn("'proxies' is deprecated, use 'proxy' instead.", stacklevel=1)
78
- self.proxy = proxies.get("http") or proxies.get("https") if isinstance(proxies, dict) else proxies
79
-
80
- default_headers = {
81
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
82
- "Accept-Language": "en-US,en;q=0.5",
83
- "Accept-Encoding": "gzip, deflate, br",
84
- "DNT": "1",
85
- "Connection": "keep-alive",
86
- "Upgrade-Insecure-Requests": "1",
87
- "Sec-Fetch-Dest": "document",
88
- "Sec-Fetch-Mode": "navigate",
89
- "Sec-Fetch-Site": "none",
90
- "Sec-Fetch-User": "?1",
91
- "Referer": "https://duckduckgo.com/",
92
- }
93
-
94
- self.headers = headers if headers else {}
95
- self.headers.update(default_headers)
96
-
97
- self.client = httpx.AsyncClient(
98
- headers=self.headers,
99
- proxies=self.proxy,
100
- timeout=timeout,
101
- follow_redirects=False,
102
- verify=verify,
103
- )
104
- self.sleep_timestamp = 0.0
105
-
106
- self._exception_event = asyncio.Event()
107
- self._chat_messages: List[Dict[str, str]] = []
108
- self._chat_tokens_count = 0
109
- self._chat_vqd: str = ""
110
- self._chat_vqd_hash: str = ""
111
- self._chat_xfe: str = ""
112
-
113
- async def __aenter__(self) -> AsyncWEBS:
114
- return self
115
-
116
- async def __aexit__(
117
- self,
118
- exc_type: Optional[Type[BaseException]] = None,
119
- exc_val: Optional[BaseException] = None,
120
- exc_tb: Optional[TracebackType] = None,
121
- ) -> None:
122
- await self.client.aclose()
123
-
124
- @cached_property
125
- def parser(self) -> LHTMLParser:
126
- """Get HTML parser."""
127
- return LHTMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True, collect_ids=False)
128
-
129
- async def _sleep(self, sleeptime: float = 0.75) -> None:
130
- """Sleep between API requests."""
131
- delay = 0.0 if not self.sleep_timestamp else 0.0 if time() - self.sleep_timestamp >= 20 else sleeptime
132
- self.sleep_timestamp = time()
133
- await asyncio.sleep(delay)
134
-
135
- async def _get_url(
136
- self,
137
- method: str,
138
- url: str,
139
- params: Optional[Dict[str, str]] = None,
140
- content: Optional[bytes] = None,
141
- data: Optional[Dict[str, str]] = None,
142
- headers: Optional[Dict[str, str]] = None,
143
- cookies: Optional[Dict[str, str]] = None,
144
- json: Any = None,
145
- timeout: Optional[float] = None,
146
- ) -> Any:
147
- """Make HTTP request with proper rate limiting."""
148
- await self._sleep()
149
- try:
150
- resp = await self.client.request(
151
- method,
152
- url,
153
- params=params,
154
- content=content,
155
- data=data,
156
- headers=headers,
157
- cookies=cookies,
158
- json=json,
159
- timeout=timeout or self.timeout,
160
- )
161
- except Exception as ex:
162
- if "time" in str(ex).lower():
163
- raise TimeoutE(f"{url} {type(ex).__name__}: {ex}") from ex
164
- raise WebscoutE(f"{url} {type(ex).__name__}: {ex}") from ex
165
-
166
- if resp.status_code == 200:
167
- return resp
168
- elif resp.status_code in (202, 301, 403, 400, 429, 418):
169
- raise RatelimitE(f"{resp.url} {resp.status_code} Ratelimit")
170
- raise WebscoutE(f"{resp.url} return None. {params=} {content=} {data=}")
171
-
172
- async def _get_vqd(self, keywords: str) -> str:
173
- """Get vqd value for a search query."""
174
- resp_content = (await self._get_url("GET", "https://duckduckgo.com", params={"q": keywords})).content
175
- return _extract_vqd(resp_content, keywords)
176
-
177
- async def achat_yield(self, keywords: str, model: str = "gpt-4o-mini", timeout: int = 30) -> AsyncIterator[str]:
178
- """Initiates an async chat session with webscout AI.
179
-
180
- Args:
181
- keywords (str): The initial message or question to send to the AI.
182
- model (str): The model to use: "gpt-4o-mini", "llama-3.3-70b", "claude-3-haiku",
183
- "o3-mini", "mistral-small-3". Defaults to "gpt-4o-mini".
184
- timeout (int): Timeout value for the HTTP client. Defaults to 30.
185
-
186
- Yields:
187
- str: Chunks of the response from the AI.
188
- """
189
- # x-fe-version
190
- if not self._chat_xfe:
191
- resp_content = (await self._get_url(
192
- method="GET",
193
- url="https://duckduckgo.com/?q=DuckDuckGo+AI+Chat&ia=chat&duckai=1",
194
- )).content
195
- try:
196
- xfe1 = resp_content.split(b'__DDG_BE_VERSION__="', maxsplit=1)[1].split(b'"', maxsplit=1)[0].decode()
197
- xfe2 = resp_content.split(b'__DDG_FE_CHAT_HASH__="', maxsplit=1)[1].split(b'"', maxsplit=1)[0].decode()
198
- self._chat_xfe = f"{xfe1}-{xfe2}"
199
- except Exception as ex:
200
- raise WebscoutE(
201
- f"achat_yield() Error to get _chat_xfe: {type(ex).__name__}: {ex}"
202
- ) from ex
203
- # vqd
204
- if not self._chat_vqd:
205
- resp = await self._get_url(
206
- method="GET", url="https://duckduckgo.com/duckchat/v1/status", headers={"x-vqd-accept": "1"}
207
- )
208
- self._chat_vqd = resp.headers.get("x-vqd-4", "")
209
- self._chat_vqd_hash = resp.headers.get("x-vqd-hash-1", "")
210
-
211
- self._chat_messages.append({"role": "user", "content": keywords})
212
- self._chat_tokens_count += max(len(keywords) // 4, 1) # approximate number of tokens
213
- if model not in self._chat_models:
214
- warnings.warn(f"{model=} is unavailable. Using 'gpt-4o-mini'", stacklevel=1)
215
- model = "gpt-4o-mini"
216
- json_data = {
217
- "model": self._chat_models[model],
218
- "messages": self._chat_messages,
219
- }
220
- resp = await self._get_url(
221
- method="POST",
222
- url="https://duckduckgo.com/duckchat/v1/chat",
223
- headers={
224
- "x-fe-version": self._chat_xfe,
225
- "x-vqd-4": self._chat_vqd,
226
- "x-vqd-hash-1": "",
227
- },
228
- json=json_data,
229
- timeout=timeout,
230
- )
231
- self._chat_vqd = resp.headers.get("x-vqd-4", "")
232
- self._chat_vqd_hash = resp.headers.get("x-vqd-hash-1", "")
233
- chunks = []
234
- try:
235
- async for chunk in resp.aiter_bytes():
236
- lines = chunk.split(b"data:")
237
- for line in lines:
238
- if line := line.strip():
239
- if line == b"[DONE]":
240
- break
241
- if line == b"[DONE][LIMIT_CONVERSATION]":
242
- raise ConversationLimitException("ERR_CONVERSATION_LIMIT")
243
- x = json_loads(line)
244
- if isinstance(x, dict):
245
- if x.get("action") == "error":
246
- err_message = x.get("type", "")
247
- if x.get("status") == 429:
248
- raise (
249
- ConversationLimitException(err_message)
250
- if err_message == "ERR_CONVERSATION_LIMIT"
251
- else RatelimitE(err_message)
252
- )
253
- raise WebscoutE(err_message)
254
- elif message := x.get("message"):
255
- chunks.append(message)
256
- yield message
257
- except Exception as ex:
258
- raise WebscoutE(f"achat_yield() {type(ex).__name__}: {ex}") from ex
259
-
260
- result = "".join(chunks)
261
- self._chat_messages.append({"role": "assistant", "content": result})
262
- self._chat_tokens_count += len(result)
263
-
264
- async def achat(self, keywords: str, model: str = "gpt-4o-mini", timeout: int = 30) -> str:
265
- """Initiates an async chat session with webscout AI.
266
-
267
- Args:
268
- keywords (str): The initial message or question to send to the AI.
269
- model (str): The model to use: "gpt-4o-mini", "llama-3.3-70b", "claude-3-haiku",
270
- "o3-mini", "mistral-small-3". Defaults to "gpt-4o-mini".
271
- timeout (int): Timeout value for the HTTP client. Defaults to 30.
272
-
273
- Returns:
274
- str: The response from the AI.
275
- """
276
- chunks = []
277
- async for chunk in self.achat_yield(keywords, model, timeout):
278
- chunks.append(chunk)
279
- return "".join(chunks)
280
-
281
- async def atext(
282
- self,
283
- keywords: str,
284
- region: str = "wt-wt",
285
- safesearch: str = "moderate",
286
- timelimit: Optional[str] = None,
287
- backend: str = "auto",
288
- max_results: Optional[int] = None,
289
- ) -> List[Dict[str, str]]:
290
- """webscout async text search. Query params: https://duckduckgo.com/params.
291
-
292
- Args:
293
- keywords: keywords for query.
294
- region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
295
- safesearch: on, moderate, off. Defaults to "moderate".
296
- timelimit: d, w, m, y. Defaults to None.
297
- backend: auto, html, lite. Defaults to auto.
298
- auto - try all backends in random order,
299
- html - collect data from https://html.duckduckgo.com,
300
- lite - collect data from https://lite.duckduckgo.com.
301
- max_results: max number of results. If None, returns results only from the first response. Defaults to None.
302
-
303
- Returns:
304
- List of dictionaries with search results.
305
-
306
- Raises:
307
- WebscoutE: Base exception for webscout errors.
308
- RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
309
- TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
310
- """
311
- if backend in ("api", "ecosia"):
312
- warnings.warn(f"{backend=} is deprecated, using backend='auto'", stacklevel=2)
313
- backend = "auto"
314
- backends = ["html", "lite"] if backend == "auto" else [backend]
315
- shuffle(backends)
316
-
317
- results, err = [], None
318
- for b in backends:
319
- try:
320
- if b == "html":
321
- results = await self._text_html(keywords, region, timelimit, max_results)
322
- elif b == "lite":
323
- results = await self._text_lite(keywords, region, timelimit, max_results)
324
- return results
325
- except Exception as ex:
326
- err = ex
327
-
328
- raise WebscoutE(err)
329
-
330
- async def _text_html(
331
- self,
332
- keywords: str,
333
- region: str = "wt-wt",
334
- timelimit: Optional[str] = None,
335
- max_results: Optional[int] = None,
336
- ) -> List[Dict[str, str]]:
337
- """HTML backend for text search."""
338
- assert keywords, "keywords is mandatory"
339
-
340
- payload = {
341
- "q": keywords,
342
- "s": "0",
343
- "o": "json",
344
- "api": "d.js",
345
- "vqd": "",
346
- "kl": region,
347
- "bing_market": region,
348
- }
349
- if timelimit:
350
- payload["df"] = timelimit
351
-
352
- cache = set()
353
- results: List[Dict[str, str]] = []
354
-
355
- for _ in range(5):
356
- resp_content = await self._get_url("POST", "https://html.duckduckgo.com/html", data=payload)
357
- if b"No results." in resp_content:
358
- return results
359
-
360
- tree = document_fromstring(resp_content, self.parser)
361
- elements = tree.xpath("//div[h2]")
362
- if not isinstance(elements, list):
363
- return results
364
-
365
- for e in elements:
366
- if isinstance(e, _Element):
367
- hrefxpath = e.xpath("./a/@href")
368
- href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None
369
- if (
370
- href
371
- and href not in cache
372
- and not href.startswith(
373
- ("http://www.google.com/search?q=", "https://duckduckgo.com/y.js?ad_domain")
374
- )
375
- ):
376
- cache.add(href)
377
- titlexpath = e.xpath("./h2/a/text()")
378
- title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else ""
379
- bodyxpath = e.xpath("./a//text()")
380
- body = "".join(str(x) for x in bodyxpath) if bodyxpath and isinstance(bodyxpath, list) else ""
381
- results.append(
382
- {
383
- "title": _normalize(title),
384
- "href": _normalize_url(href),
385
- "body": _normalize(body),
386
- }
387
- )
388
- if max_results and len(results) >= max_results:
389
- return results
390
-
391
- npx = tree.xpath('.//div[@class="nav-link"]')
392
- if not npx or not max_results:
393
- return results
394
- next_page = npx[-1] if isinstance(npx, list) else None
395
- if isinstance(next_page, _Element):
396
- names = next_page.xpath('.//input[@type="hidden"]/@name')
397
- values = next_page.xpath('.//input[@type="hidden"]/@value')
398
- if isinstance(names, list) and isinstance(values, list):
399
- payload = {str(n): str(v) for n, v in zip(names, values)}
400
-
401
- return results
402
-
403
- async def _text_lite(
404
- self,
405
- keywords: str,
406
- region: str = "wt-wt",
407
- timelimit: Optional[str] = None,
408
- max_results: Optional[int] = None,
409
- ) -> List[Dict[str, str]]:
410
- """Lite backend for text search."""
411
- assert keywords, "keywords is mandatory"
412
-
413
- payload = {
414
- "q": keywords,
415
- "s": "0",
416
- "o": "json",
417
- "api": "d.js",
418
- "vqd": "",
419
- "kl": region,
420
- "bing_market": region,
421
- }
422
- if timelimit:
423
- payload["df"] = timelimit
424
-
425
- cache = set()
426
- results: List[Dict[str, str]] = []
427
-
428
- for _ in range(5):
429
- resp_content = await self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
430
- if b"No more results." in resp_content:
431
- return results
432
-
433
- tree = document_fromstring(resp_content, self.parser)
434
- elements = tree.xpath("//table[last()]//tr")
435
- if not isinstance(elements, list):
436
- return results
437
-
438
- data = zip(cycle(range(1, 5)), elements)
439
- for i, e in data:
440
- if isinstance(e, _Element):
441
- if i == 1:
442
- hrefxpath = e.xpath(".//a//@href")
443
- href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None
444
- if (
445
- href is None
446
- or href in cache
447
- or href.startswith(
448
- ("http://www.google.com/search?q=", "https://duckduckgo.com/y.js?ad_domain")
449
- )
450
- ):
451
- [next(data, None) for _ in range(3)] # skip block(i=1,2,3,4)
452
- else:
453
- cache.add(href)
454
- titlexpath = e.xpath(".//a//text()")
455
- title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else ""
456
- elif i == 2:
457
- bodyxpath = e.xpath(".//td[@class='result-snippet']//text()")
458
- body = (
459
- "".join(str(x) for x in bodyxpath).strip()
460
- if bodyxpath and isinstance(bodyxpath, list)
461
- else ""
462
- )
463
- if href:
464
- results.append(
465
- {
466
- "title": _normalize(title),
467
- "href": _normalize_url(href),
468
- "body": _normalize(body),
469
- }
470
- )
471
- if max_results and len(results) >= max_results:
472
- return results
473
-
474
- next_page_s = tree.xpath("//form[./input[contains(@value, 'ext')]]/input[@name='s']/@value")
475
- if not next_page_s or not max_results:
476
- return results
477
- elif isinstance(next_page_s, list):
478
- payload["s"] = str(next_page_s[0])
479
-
480
- return results
481
-
482
- async def aimages(
483
- self,
484
- keywords: str,
485
- region: str = "wt-wt",
486
- safesearch: str = "moderate",
487
- timelimit: Optional[str] = None,
488
- size: Optional[str] = None,
489
- color: Optional[str] = None,
490
- type_image: Optional[str] = None,
491
- layout: Optional[str] = None,
492
- license_image: Optional[str] = None,
493
- max_results: Optional[int] = None,
494
- ) -> List[Dict[str, str]]:
495
- """webscout async images search. Query params: https://duckduckgo.com/params.
496
-
497
- Args:
498
- keywords: keywords for query.
499
- region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
500
- safesearch: on, moderate, off. Defaults to "moderate".
501
- timelimit: Day, Week, Month, Year. Defaults to None.
502
- size: Small, Medium, Large, Wallpaper. Defaults to None.
503
- color: color, Monochrome, Red, Orange, Yellow, Green, Blue,
504
- Purple, Pink, Brown, Black, Gray, Teal, White. Defaults to None.
505
- type_image: photo, clipart, gif, transparent, line.
506
- Defaults to None.
507
- layout: Square, Tall, Wide. Defaults to None.
508
- license_image: any (All Creative Commons), Public (PublicDomain),
509
- Share (Free to Share and Use), ShareCommercially (Free to Share and Use Commercially),
510
- Modify (Free to Modify, Share, and Use), ModifyCommercially (Free to Modify, Share, and
511
- Use Commercially). Defaults to None.
512
- max_results: max number of results. If None, returns results only from the first response. Defaults to None.
513
-
514
- Returns:
515
- List of dictionaries with images search results.
516
-
517
- Raises:
518
- WebscoutE: Base exception for webscout errors.
519
- RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
520
- TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
521
- """
522
- result = await self._loop.run_in_executor(
523
- self._executor,
524
- super().images,
525
- keywords,
526
- region,
527
- safesearch,
528
- timelimit,
529
- size,
530
- color,
531
- type_image,
532
- layout,
533
- license_image,
534
- max_results,
535
- )
536
- return result
537
-
538
- async def avideos(
539
- self,
540
- keywords: str,
541
- region: str = "wt-wt",
542
- safesearch: str = "moderate",
543
- timelimit: Optional[str] = None,
544
- resolution: Optional[str] = None,
545
- duration: Optional[str] = None,
546
- license_videos: Optional[str] = None,
547
- max_results: Optional[int] = None,
548
- ) -> List[Dict[str, str]]:
549
- """webscout async videos search. Query params: https://duckduckgo.com/params.
550
-
551
- Args:
552
- keywords: keywords for query.
553
- region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
554
- safesearch: on, moderate, off. Defaults to "moderate".
555
- timelimit: d, w, m. Defaults to None.
556
- resolution: high, standart. Defaults to None.
557
- duration: short, medium, long. Defaults to None.
558
- license_videos: creativeCommon, youtube. Defaults to None.
559
- max_results: max number of results. If None, returns results only from the first response. Defaults to None.
560
-
561
- Returns:
562
- List of dictionaries with videos search results.
563
-
564
- Raises:
565
- WebscoutE: Base exception for webscout errors.
566
- RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
567
- TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
568
- """
569
- result = await self._loop.run_in_executor(
570
- self._executor,
571
- super().videos,
572
- keywords,
573
- region,
574
- safesearch,
575
- timelimit,
576
- resolution,
577
- duration,
578
- license_videos,
579
- max_results,
580
- )
581
- return result
582
-
583
- async def anews(
584
- self,
585
- keywords: str,
586
- region: str = "wt-wt",
587
- safesearch: str = "moderate",
588
- timelimit: Optional[str] = None,
589
- max_results: Optional[int] = None,
590
- ) -> List[Dict[str, str]]:
591
- """webscout async news search. Query params: https://duckduckgo.com/params.
592
-
593
- Args:
594
- keywords: keywords for query.
595
- region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
596
- safesearch: on, moderate, off. Defaults to "moderate".
597
- timelimit: d, w, m. Defaults to None.
598
- max_results: max number of results. If None, returns results only from the first response. Defaults to None.
599
-
600
- Returns:
601
- List of dictionaries with news search results.
602
-
603
- Raises:
604
- WebscoutE: Base exception for webscout errors.
605
- RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
606
- TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
607
- """
608
- result = await self._loop.run_in_executor(
609
- self._executor,
610
- super().news,
611
- keywords,
612
- region,
613
- safesearch,
614
- timelimit,
615
- max_results,
616
- )
617
- return result
618
-
619
- async def aanswers(
620
- self,
621
- keywords: str,
622
- ) -> List[Dict[str, str]]:
623
- """webscout async instant answers. Query params: https://duckduckgo.com/params.
624
-
625
- Args:
626
- keywords: keywords for query,
627
-
628
- Returns:
629
- List of dictionaries with instant answers results.
630
-
631
- Raises:
632
- WebscoutE: Base exception for webscout errors.
633
- RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
634
- TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
635
- """
636
- result = await self._loop.run_in_executor(
637
- self._executor,
638
- super().answers,
639
- keywords,
640
- )
641
- return result
642
-
643
- async def asuggestions(
644
- self,
645
- keywords: str,
646
- region: str = "wt-wt",
647
- ) -> List[Dict[str, str]]:
648
- """webscout async suggestions. Query params: https://duckduckgo.com/params.
649
-
650
- Args:
651
- keywords: keywords for query.
652
- region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
653
-
654
- Returns:
655
- List of dictionaries with suggestions results.
656
-
657
- Raises:
658
- WebscoutE: Base exception for webscout errors.
659
- RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
660
- TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
661
- """
662
- result = await self._loop.run_in_executor(
663
- self._executor,
664
- super().suggestions,
665
- keywords,
666
- region,
667
- )
668
- return result
669
-
670
- async def amaps(
671
- self,
672
- keywords: str,
673
- place: Optional[str] = None,
674
- street: Optional[str] = None,
675
- city: Optional[str] = None,
676
- county: Optional[str] = None,
677
- state: Optional[str] = None,
678
- country: Optional[str] = None,
679
- postalcode: Optional[str] = None,
680
- latitude: Optional[str] = None,
681
- longitude: Optional[str] = None,
682
- radius: int = 0,
683
- max_results: Optional[int] = None,
684
- ) -> List[Dict[str, str]]:
685
- """webscout async maps search. Query params: https://duckduckgo.com/params.
686
-
687
- Args:
688
- keywords: keywords for query
689
- place: if set, the other parameters are not used. Defaults to None.
690
- street: house number/street. Defaults to None.
691
- city: city of search. Defaults to None.
692
- county: county of search. Defaults to None.
693
- state: state of search. Defaults to None.
694
- country: country of search. Defaults to None.
695
- postalcode: postalcode of search. Defaults to None.
696
- latitude: geographic coordinate (north-south position). Defaults to None.
697
- longitude: geographic coordinate (east-west position); if latitude and
698
- longitude are set, the other parameters are not used. Defaults to None.
699
- radius: expand the search square by the distance in kilometers. Defaults to 0.
700
- max_results: max number of results. If None, returns results only from the first response. Defaults to None.
701
-
702
- Returns:
703
- List of dictionaries with maps search results, or None if there was an error.
704
-
705
- Raises:
706
- WebscoutE: Base exception for webscout errors.
707
- RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
708
- TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
709
- """
710
- result = await self._loop.run_in_executor(
711
- self._executor,
712
- super().maps,
713
- keywords,
714
- place,
715
- street,
716
- city,
717
- county,
718
- state,
719
- country,
720
- postalcode,
721
- latitude,
722
- longitude,
723
- radius,
724
- max_results,
725
- )
726
- return result
727
-
728
- async def atranslate(
729
- self,
730
- keywords: Union[List[str], str],
731
- from_: Optional[str] = None,
732
- to: str = "en",
733
- ) -> List[Dict[str, str]]:
734
- """webscout async translate.
735
-
736
- Args:
737
- keywords: string or list of strings to translate.
738
- from_: translate from (defaults automatically). Defaults to None.
739
- to: what language to translate. Defaults to "en".
740
-
741
- Returns:
742
- List od dictionaries with translated keywords.
743
-
744
- Raises:
745
- WebscoutE: Base exception for webscout errors.
746
- RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
747
- TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
748
- """
749
- result = await self._loop.run_in_executor(
750
- self._executor,
751
- super().translate,
752
- keywords,
753
- from_,
754
- to,
755
- )
756
- return result
757
-
758
- async def aweather(
759
- self,
760
- location: str,
761
- language: str = "en",
762
- ) -> dict[str, Any]:
763
- """Async version of weather information retrieval from DuckDuckGo.
764
-
765
- Args:
766
- location: Location to get weather for.
767
- language: Language code (e.g. 'en', 'es'). Defaults to "en".
768
-
769
- Returns:
770
- Dictionary containing weather data with the following structure:
771
- {
772
- "location": str,
773
- "current": {
774
- "condition": str,
775
- "temperature_c": float,
776
- "feels_like_c": float,
777
- "humidity": float,
778
- "wind_speed_ms": float,
779
- "wind_direction": float,
780
- "visibility_m": float
781
- },
782
- "daily_forecast": List[{
783
- "date": str,
784
- "condition": str,
785
- "max_temp_c": float,
786
- "min_temp_c": float,
787
- "sunrise": str,
788
- "sunset": str
789
- }],
790
- "hourly_forecast": List[{
791
- "time": str,
792
- "condition": str,
793
- "temperature_c": float,
794
- "feels_like_c": float,
795
- "humidity": float,
796
- "wind_speed_ms": float,
797
- "wind_direction": float,
798
- "visibility_m": float
799
- }]
800
- }
801
-
802
- Raises:
803
- WebscoutE: Base exception for webscout errors.
804
- RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
805
- TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
806
- """
807
- result = await self._loop.run_in_executor(
808
- self._executor,
809
- super().weather,
810
- location,
811
- language,
812
- )
813
- return result
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import os
5
+ import warnings
6
+ from datetime import datetime, timezone
7
+ from functools import cached_property
8
+ from itertools import cycle
9
+ from random import choice, shuffle
10
+ from time import time
11
+ from types import TracebackType
12
+ from typing import Any, Dict, List, Optional, Type, Union, cast, AsyncIterator
13
+
14
+ import httpx
15
+ from lxml.etree import _Element
16
+ from lxml.html import HTMLParser as LHTMLParser
17
+ from lxml.html import document_fromstring
18
+
19
+ from .exceptions import ConversationLimitException, RatelimitE, TimeoutE, WebscoutE
20
+ from .utils import (
21
+ _expand_proxy_tb_alias,
22
+ _extract_vqd,
23
+ _normalize,
24
+ _normalize_url,
25
+ json_loads,
26
+ )
27
+
28
+
29
+
30
+
31
+ class AsyncWEBS:
32
+ """Asynchronous webscout class to get search results."""
33
+
34
+ _impersonates = (
35
+ "chrome_100", "chrome_101", "chrome_104", "chrome_105", "chrome_106", "chrome_107",
36
+ "chrome_108", "chrome_109", "chrome_114", "chrome_116", "chrome_117", "chrome_118",
37
+ "chrome_119", "chrome_120", "chrome_123", "chrome_124", "chrome_126", "chrome_127",
38
+ "chrome_128", "chrome_129", "chrome_130", "chrome_131", "chrome_133",
39
+ "safari_ios_16.5", "safari_ios_17.2", "safari_ios_17.4.1", "safari_ios_18.1.1",
40
+ "safari_15.3", "safari_15.5", "safari_15.6.1", "safari_16", "safari_16.5",
41
+ "safari_17.0", "safari_17.2.1", "safari_17.4.1", "safari_17.5",
42
+ "safari_18", "safari_18.2",
43
+ "safari_ipad_18",
44
+ "edge_101", "edge_122", "edge_127", "edge_131",
45
+ "firefox_109", "firefox_117", "firefox_128", "firefox_133", "firefox_135",
46
+ )
47
+ _impersonates_os = ("android", "ios", "linux", "macos", "windows")
48
+ _chat_models = {
49
+ "gpt-4o-mini": "gpt-4o-mini",
50
+ "llama-3.3-70b": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
51
+ "claude-3-haiku": "claude-3-haiku-20240307",
52
+ "o3-mini": "o3-mini",
53
+ "mistral-small-3": "mistralai/Mistral-Small-24B-Instruct-2501",
54
+ }
55
+
56
+ def __init__(
57
+ self,
58
+ headers: Optional[Dict[str, str]] = None,
59
+ proxy: Optional[str] = None,
60
+ proxies: Union[Dict[str, str], str, None] = None, # deprecated
61
+ timeout: Optional[int] = 10,
62
+ verify: bool = True,
63
+ ) -> None:
64
+ """Initialize the AsyncWEBS object.
65
+
66
+ Args:
67
+ headers (dict, optional): Dictionary of headers for the HTTP client. Defaults to None.
68
+ proxy (str, optional): proxy for the HTTP client, supports http/https/socks5 protocols.
69
+ example: "http://user:pass@example.com:3128". Defaults to None.
70
+ timeout (int, optional): Timeout value for the HTTP client. Defaults to 10.
71
+ verify (bool): SSL verification when making the request. Defaults to True.
72
+ """
73
+ ddgs_proxy: Optional[str] = os.environ.get("DDGS_PROXY")
74
+ self.proxy: Optional[str] = ddgs_proxy if ddgs_proxy else _expand_proxy_tb_alias(proxy)
75
+ assert self.proxy is None or isinstance(self.proxy, str), "proxy must be a str"
76
+ if not proxy and proxies:
77
+ warnings.warn("'proxies' is deprecated, use 'proxy' instead.", stacklevel=1)
78
+ self.proxy = proxies.get("http") or proxies.get("https") if isinstance(proxies, dict) else proxies
79
+
80
+ default_headers = {
81
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
82
+ "Accept-Language": "en-US,en;q=0.5",
83
+ "Accept-Encoding": "gzip, deflate, br",
84
+ "DNT": "1",
85
+ "Connection": "keep-alive",
86
+ "Upgrade-Insecure-Requests": "1",
87
+ "Sec-Fetch-Dest": "document",
88
+ "Sec-Fetch-Mode": "navigate",
89
+ "Sec-Fetch-Site": "none",
90
+ "Sec-Fetch-User": "?1",
91
+ "Referer": "https://duckduckgo.com/",
92
+ }
93
+
94
+ self.headers = headers if headers else {}
95
+ self.headers.update(default_headers)
96
+
97
+ self.client = httpx.AsyncClient(
98
+ headers=self.headers,
99
+ proxies=self.proxy,
100
+ timeout=timeout,
101
+ follow_redirects=False,
102
+ verify=verify,
103
+ )
104
+ self.sleep_timestamp = 0.0
105
+
106
+ self._exception_event = asyncio.Event()
107
+ self._chat_messages: List[Dict[str, str]] = []
108
+ self._chat_tokens_count = 0
109
+ self._chat_vqd: str = ""
110
+ self._chat_vqd_hash: str = ""
111
+ self._chat_xfe: str = ""
112
+
113
+ async def __aenter__(self) -> AsyncWEBS:
114
+ return self
115
+
116
+ async def __aexit__(
117
+ self,
118
+ exc_type: Optional[Type[BaseException]] = None,
119
+ exc_val: Optional[BaseException] = None,
120
+ exc_tb: Optional[TracebackType] = None,
121
+ ) -> None:
122
+ await self.client.aclose()
123
+
124
+ @cached_property
125
+ def parser(self) -> LHTMLParser:
126
+ """Get HTML parser."""
127
+ return LHTMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True, collect_ids=False)
128
+
129
+ async def _sleep(self, sleeptime: float = 0.75) -> None:
130
+ """Sleep between API requests."""
131
+ delay = 0.0 if not self.sleep_timestamp else 0.0 if time() - self.sleep_timestamp >= 20 else sleeptime
132
+ self.sleep_timestamp = time()
133
+ await asyncio.sleep(delay)
134
+
135
+ async def _get_url(
136
+ self,
137
+ method: str,
138
+ url: str,
139
+ params: Optional[Dict[str, str]] = None,
140
+ content: Optional[bytes] = None,
141
+ data: Optional[Dict[str, str]] = None,
142
+ headers: Optional[Dict[str, str]] = None,
143
+ cookies: Optional[Dict[str, str]] = None,
144
+ json: Any = None,
145
+ timeout: Optional[float] = None,
146
+ ) -> Any:
147
+ """Make HTTP request with proper rate limiting."""
148
+ await self._sleep()
149
+ try:
150
+ resp = await self.client.request(
151
+ method,
152
+ url,
153
+ params=params,
154
+ content=content,
155
+ data=data,
156
+ headers=headers,
157
+ cookies=cookies,
158
+ json=json,
159
+ timeout=timeout or self.timeout,
160
+ )
161
+ except Exception as ex:
162
+ if "time" in str(ex).lower():
163
+ raise TimeoutE(f"{url} {type(ex).__name__}: {ex}") from ex
164
+ raise WebscoutE(f"{url} {type(ex).__name__}: {ex}") from ex
165
+
166
+ if resp.status_code == 200:
167
+ return resp
168
+ elif resp.status_code in (202, 301, 403, 400, 429, 418):
169
+ raise RatelimitE(f"{resp.url} {resp.status_code} Ratelimit")
170
+ raise WebscoutE(f"{resp.url} return None. {params=} {content=} {data=}")
171
+
172
+ async def _get_vqd(self, keywords: str) -> str:
173
+ """Get vqd value for a search query."""
174
+ resp_content = (await self._get_url("GET", "https://duckduckgo.com", params={"q": keywords})).content
175
+ return _extract_vqd(resp_content, keywords)
176
+
177
+ async def achat_yield(self, keywords: str, model: str = "gpt-4o-mini", timeout: int = 30) -> AsyncIterator[str]:
178
+ """Initiates an async chat session with webscout AI.
179
+
180
+ Args:
181
+ keywords (str): The initial message or question to send to the AI.
182
+ model (str): The model to use: "gpt-4o-mini", "llama-3.3-70b", "claude-3-haiku",
183
+ "o3-mini", "mistral-small-3". Defaults to "gpt-4o-mini".
184
+ timeout (int): Timeout value for the HTTP client. Defaults to 30.
185
+
186
+ Yields:
187
+ str: Chunks of the response from the AI.
188
+ """
189
+ # x-fe-version
190
+ if not self._chat_xfe:
191
+ resp_content = (await self._get_url(
192
+ method="GET",
193
+ url="https://duckduckgo.com/?q=DuckDuckGo+AI+Chat&ia=chat&duckai=1",
194
+ )).content
195
+ try:
196
+ xfe1 = resp_content.split(b'__DDG_BE_VERSION__="', maxsplit=1)[1].split(b'"', maxsplit=1)[0].decode()
197
+ xfe2 = resp_content.split(b'__DDG_FE_CHAT_HASH__="', maxsplit=1)[1].split(b'"', maxsplit=1)[0].decode()
198
+ self._chat_xfe = f"{xfe1}-{xfe2}"
199
+ except Exception as ex:
200
+ raise WebscoutE(
201
+ f"achat_yield() Error to get _chat_xfe: {type(ex).__name__}: {ex}"
202
+ ) from ex
203
+ # vqd
204
+ if not self._chat_vqd:
205
+ resp = await self._get_url(
206
+ method="GET", url="https://duckduckgo.com/duckchat/v1/status", headers={"x-vqd-accept": "1"}
207
+ )
208
+ self._chat_vqd = resp.headers.get("x-vqd-4", "")
209
+ self._chat_vqd_hash = resp.headers.get("x-vqd-hash-1", "")
210
+
211
+ self._chat_messages.append({"role": "user", "content": keywords})
212
+ self._chat_tokens_count += max(len(keywords) // 4, 1) # approximate number of tokens
213
+ if model not in self._chat_models:
214
+ warnings.warn(f"{model=} is unavailable. Using 'gpt-4o-mini'", stacklevel=1)
215
+ model = "gpt-4o-mini"
216
+ json_data = {
217
+ "model": self._chat_models[model],
218
+ "messages": self._chat_messages,
219
+ }
220
+ resp = await self._get_url(
221
+ method="POST",
222
+ url="https://duckduckgo.com/duckchat/v1/chat",
223
+ headers={
224
+ "x-fe-version": self._chat_xfe,
225
+ "x-vqd-4": self._chat_vqd,
226
+ "x-vqd-hash-1": "",
227
+ },
228
+ json=json_data,
229
+ timeout=timeout,
230
+ )
231
+ self._chat_vqd = resp.headers.get("x-vqd-4", "")
232
+ self._chat_vqd_hash = resp.headers.get("x-vqd-hash-1", "")
233
+ chunks = []
234
+ try:
235
+ async for chunk in resp.aiter_bytes():
236
+ lines = chunk.split(b"data:")
237
+ for line in lines:
238
+ if line := line.strip():
239
+ if line == b"[DONE]":
240
+ break
241
+ if line == b"[DONE][LIMIT_CONVERSATION]":
242
+ raise ConversationLimitException("ERR_CONVERSATION_LIMIT")
243
+ x = json_loads(line)
244
+ if isinstance(x, dict):
245
+ if x.get("action") == "error":
246
+ err_message = x.get("type", "")
247
+ if x.get("status") == 429:
248
+ raise (
249
+ ConversationLimitException(err_message)
250
+ if err_message == "ERR_CONVERSATION_LIMIT"
251
+ else RatelimitE(err_message)
252
+ )
253
+ raise WebscoutE(err_message)
254
+ elif message := x.get("message"):
255
+ chunks.append(message)
256
+ yield message
257
+ except Exception as ex:
258
+ raise WebscoutE(f"achat_yield() {type(ex).__name__}: {ex}") from ex
259
+
260
+ result = "".join(chunks)
261
+ self._chat_messages.append({"role": "assistant", "content": result})
262
+ self._chat_tokens_count += len(result)
263
+
264
+ async def achat(self, keywords: str, model: str = "gpt-4o-mini", timeout: int = 30) -> str:
265
+ """Initiates an async chat session with webscout AI.
266
+
267
+ Args:
268
+ keywords (str): The initial message or question to send to the AI.
269
+ model (str): The model to use: "gpt-4o-mini", "llama-3.3-70b", "claude-3-haiku",
270
+ "o3-mini", "mistral-small-3". Defaults to "gpt-4o-mini".
271
+ timeout (int): Timeout value for the HTTP client. Defaults to 30.
272
+
273
+ Returns:
274
+ str: The response from the AI.
275
+ """
276
+ chunks = []
277
+ async for chunk in self.achat_yield(keywords, model, timeout):
278
+ chunks.append(chunk)
279
+ return "".join(chunks)
280
+
281
+ async def atext(
282
+ self,
283
+ keywords: str,
284
+ region: str = "wt-wt",
285
+ safesearch: str = "moderate",
286
+ timelimit: Optional[str] = None,
287
+ backend: str = "auto",
288
+ max_results: Optional[int] = None,
289
+ ) -> List[Dict[str, str]]:
290
+ """webscout async text search. Query params: https://duckduckgo.com/params.
291
+
292
+ Args:
293
+ keywords: keywords for query.
294
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
295
+ safesearch: on, moderate, off. Defaults to "moderate".
296
+ timelimit: d, w, m, y. Defaults to None.
297
+ backend: auto, html, lite. Defaults to auto.
298
+ auto - try all backends in random order,
299
+ html - collect data from https://html.duckduckgo.com,
300
+ lite - collect data from https://lite.duckduckgo.com.
301
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
302
+
303
+ Returns:
304
+ List of dictionaries with search results.
305
+
306
+ Raises:
307
+ WebscoutE: Base exception for webscout errors.
308
+ RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
309
+ TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
310
+ """
311
+ if backend in ("api", "ecosia"):
312
+ warnings.warn(f"{backend=} is deprecated, using backend='auto'", stacklevel=2)
313
+ backend = "auto"
314
+ backends = ["html", "lite"] if backend == "auto" else [backend]
315
+ shuffle(backends)
316
+
317
+ results, err = [], None
318
+ for b in backends:
319
+ try:
320
+ if b == "html":
321
+ results = await self._text_html(keywords, region, timelimit, max_results)
322
+ elif b == "lite":
323
+ results = await self._text_lite(keywords, region, timelimit, max_results)
324
+ return results
325
+ except Exception as ex:
326
+ err = ex
327
+
328
+ raise WebscoutE(err)
329
+
330
+ async def _text_html(
331
+ self,
332
+ keywords: str,
333
+ region: str = "wt-wt",
334
+ timelimit: Optional[str] = None,
335
+ max_results: Optional[int] = None,
336
+ ) -> List[Dict[str, str]]:
337
+ """HTML backend for text search."""
338
+ assert keywords, "keywords is mandatory"
339
+
340
+ payload = {
341
+ "q": keywords,
342
+ "s": "0",
343
+ "o": "json",
344
+ "api": "d.js",
345
+ "vqd": "",
346
+ "kl": region,
347
+ "bing_market": region,
348
+ }
349
+ if timelimit:
350
+ payload["df"] = timelimit
351
+
352
+ cache = set()
353
+ results: List[Dict[str, str]] = []
354
+
355
+ for _ in range(5):
356
+ resp_content = await self._get_url("POST", "https://html.duckduckgo.com/html", data=payload)
357
+ if b"No results." in resp_content:
358
+ return results
359
+
360
+ tree = document_fromstring(resp_content, self.parser)
361
+ elements = tree.xpath("//div[h2]")
362
+ if not isinstance(elements, list):
363
+ return results
364
+
365
+ for e in elements:
366
+ if isinstance(e, _Element):
367
+ hrefxpath = e.xpath("./a/@href")
368
+ href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None
369
+ if (
370
+ href
371
+ and href not in cache
372
+ and not href.startswith(
373
+ ("http://www.google.com/search?q=", "https://duckduckgo.com/y.js?ad_domain")
374
+ )
375
+ ):
376
+ cache.add(href)
377
+ titlexpath = e.xpath("./h2/a/text()")
378
+ title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else ""
379
+ bodyxpath = e.xpath("./a//text()")
380
+ body = "".join(str(x) for x in bodyxpath) if bodyxpath and isinstance(bodyxpath, list) else ""
381
+ results.append(
382
+ {
383
+ "title": _normalize(title),
384
+ "href": _normalize_url(href),
385
+ "body": _normalize(body),
386
+ }
387
+ )
388
+ if max_results and len(results) >= max_results:
389
+ return results
390
+
391
+ npx = tree.xpath('.//div[@class="nav-link"]')
392
+ if not npx or not max_results:
393
+ return results
394
+ next_page = npx[-1] if isinstance(npx, list) else None
395
+ if isinstance(next_page, _Element):
396
+ names = next_page.xpath('.//input[@type="hidden"]/@name')
397
+ values = next_page.xpath('.//input[@type="hidden"]/@value')
398
+ if isinstance(names, list) and isinstance(values, list):
399
+ payload = {str(n): str(v) for n, v in zip(names, values)}
400
+
401
+ return results
402
+
403
+ async def _text_lite(
404
+ self,
405
+ keywords: str,
406
+ region: str = "wt-wt",
407
+ timelimit: Optional[str] = None,
408
+ max_results: Optional[int] = None,
409
+ ) -> List[Dict[str, str]]:
410
+ """Lite backend for text search."""
411
+ assert keywords, "keywords is mandatory"
412
+
413
+ payload = {
414
+ "q": keywords,
415
+ "s": "0",
416
+ "o": "json",
417
+ "api": "d.js",
418
+ "vqd": "",
419
+ "kl": region,
420
+ "bing_market": region,
421
+ }
422
+ if timelimit:
423
+ payload["df"] = timelimit
424
+
425
+ cache = set()
426
+ results: List[Dict[str, str]] = []
427
+
428
+ for _ in range(5):
429
+ resp_content = await self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
430
+ if b"No more results." in resp_content:
431
+ return results
432
+
433
+ tree = document_fromstring(resp_content, self.parser)
434
+ elements = tree.xpath("//table[last()]//tr")
435
+ if not isinstance(elements, list):
436
+ return results
437
+
438
+ data = zip(cycle(range(1, 5)), elements)
439
+ for i, e in data:
440
+ if isinstance(e, _Element):
441
+ if i == 1:
442
+ hrefxpath = e.xpath(".//a//@href")
443
+ href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None
444
+ if (
445
+ href is None
446
+ or href in cache
447
+ or href.startswith(
448
+ ("http://www.google.com/search?q=", "https://duckduckgo.com/y.js?ad_domain")
449
+ )
450
+ ):
451
+ [next(data, None) for _ in range(3)] # skip block(i=1,2,3,4)
452
+ else:
453
+ cache.add(href)
454
+ titlexpath = e.xpath(".//a//text()")
455
+ title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else ""
456
+ elif i == 2:
457
+ bodyxpath = e.xpath(".//td[@class='result-snippet']//text()")
458
+ body = (
459
+ "".join(str(x) for x in bodyxpath).strip()
460
+ if bodyxpath and isinstance(bodyxpath, list)
461
+ else ""
462
+ )
463
+ if href:
464
+ results.append(
465
+ {
466
+ "title": _normalize(title),
467
+ "href": _normalize_url(href),
468
+ "body": _normalize(body),
469
+ }
470
+ )
471
+ if max_results and len(results) >= max_results:
472
+ return results
473
+
474
+ next_page_s = tree.xpath("//form[./input[contains(@value, 'ext')]]/input[@name='s']/@value")
475
+ if not next_page_s or not max_results:
476
+ return results
477
+ elif isinstance(next_page_s, list):
478
+ payload["s"] = str(next_page_s[0])
479
+
480
+ return results
481
+
482
+ async def aimages(
483
+ self,
484
+ keywords: str,
485
+ region: str = "wt-wt",
486
+ safesearch: str = "moderate",
487
+ timelimit: Optional[str] = None,
488
+ size: Optional[str] = None,
489
+ color: Optional[str] = None,
490
+ type_image: Optional[str] = None,
491
+ layout: Optional[str] = None,
492
+ license_image: Optional[str] = None,
493
+ max_results: Optional[int] = None,
494
+ ) -> List[Dict[str, str]]:
495
+ """webscout async images search. Query params: https://duckduckgo.com/params.
496
+
497
+ Args:
498
+ keywords: keywords for query.
499
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
500
+ safesearch: on, moderate, off. Defaults to "moderate".
501
+ timelimit: Day, Week, Month, Year. Defaults to None.
502
+ size: Small, Medium, Large, Wallpaper. Defaults to None.
503
+ color: color, Monochrome, Red, Orange, Yellow, Green, Blue,
504
+ Purple, Pink, Brown, Black, Gray, Teal, White. Defaults to None.
505
+ type_image: photo, clipart, gif, transparent, line.
506
+ Defaults to None.
507
+ layout: Square, Tall, Wide. Defaults to None.
508
+ license_image: any (All Creative Commons), Public (PublicDomain),
509
+ Share (Free to Share and Use), ShareCommercially (Free to Share and Use Commercially),
510
+ Modify (Free to Modify, Share, and Use), ModifyCommercially (Free to Modify, Share, and
511
+ Use Commercially). Defaults to None.
512
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
513
+
514
+ Returns:
515
+ List of dictionaries with images search results.
516
+
517
+ Raises:
518
+ WebscoutE: Base exception for webscout errors.
519
+ RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
520
+ TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
521
+ """
522
+ result = await self._loop.run_in_executor(
523
+ self._executor,
524
+ super().images,
525
+ keywords,
526
+ region,
527
+ safesearch,
528
+ timelimit,
529
+ size,
530
+ color,
531
+ type_image,
532
+ layout,
533
+ license_image,
534
+ max_results,
535
+ )
536
+ return result
537
+
538
+ async def avideos(
539
+ self,
540
+ keywords: str,
541
+ region: str = "wt-wt",
542
+ safesearch: str = "moderate",
543
+ timelimit: Optional[str] = None,
544
+ resolution: Optional[str] = None,
545
+ duration: Optional[str] = None,
546
+ license_videos: Optional[str] = None,
547
+ max_results: Optional[int] = None,
548
+ ) -> List[Dict[str, str]]:
549
+ """webscout async videos search. Query params: https://duckduckgo.com/params.
550
+
551
+ Args:
552
+ keywords: keywords for query.
553
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
554
+ safesearch: on, moderate, off. Defaults to "moderate".
555
+ timelimit: d, w, m. Defaults to None.
556
+ resolution: high, standart. Defaults to None.
557
+ duration: short, medium, long. Defaults to None.
558
+ license_videos: creativeCommon, youtube. Defaults to None.
559
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
560
+
561
+ Returns:
562
+ List of dictionaries with videos search results.
563
+
564
+ Raises:
565
+ WebscoutE: Base exception for webscout errors.
566
+ RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
567
+ TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
568
+ """
569
+ result = await self._loop.run_in_executor(
570
+ self._executor,
571
+ super().videos,
572
+ keywords,
573
+ region,
574
+ safesearch,
575
+ timelimit,
576
+ resolution,
577
+ duration,
578
+ license_videos,
579
+ max_results,
580
+ )
581
+ return result
582
+
583
+ async def anews(
584
+ self,
585
+ keywords: str,
586
+ region: str = "wt-wt",
587
+ safesearch: str = "moderate",
588
+ timelimit: Optional[str] = None,
589
+ max_results: Optional[int] = None,
590
+ ) -> List[Dict[str, str]]:
591
+ """webscout async news search. Query params: https://duckduckgo.com/params.
592
+
593
+ Args:
594
+ keywords: keywords for query.
595
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
596
+ safesearch: on, moderate, off. Defaults to "moderate".
597
+ timelimit: d, w, m. Defaults to None.
598
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
599
+
600
+ Returns:
601
+ List of dictionaries with news search results.
602
+
603
+ Raises:
604
+ WebscoutE: Base exception for webscout errors.
605
+ RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
606
+ TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
607
+ """
608
+ result = await self._loop.run_in_executor(
609
+ self._executor,
610
+ super().news,
611
+ keywords,
612
+ region,
613
+ safesearch,
614
+ timelimit,
615
+ max_results,
616
+ )
617
+ return result
618
+
619
+ async def aanswers(
620
+ self,
621
+ keywords: str,
622
+ ) -> List[Dict[str, str]]:
623
+ """webscout async instant answers. Query params: https://duckduckgo.com/params.
624
+
625
+ Args:
626
+ keywords: keywords for query,
627
+
628
+ Returns:
629
+ List of dictionaries with instant answers results.
630
+
631
+ Raises:
632
+ WebscoutE: Base exception for webscout errors.
633
+ RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
634
+ TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
635
+ """
636
+ result = await self._loop.run_in_executor(
637
+ self._executor,
638
+ super().answers,
639
+ keywords,
640
+ )
641
+ return result
642
+
643
+ async def asuggestions(
644
+ self,
645
+ keywords: str,
646
+ region: str = "wt-wt",
647
+ ) -> List[Dict[str, str]]:
648
+ """webscout async suggestions. Query params: https://duckduckgo.com/params.
649
+
650
+ Args:
651
+ keywords: keywords for query.
652
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
653
+
654
+ Returns:
655
+ List of dictionaries with suggestions results.
656
+
657
+ Raises:
658
+ WebscoutE: Base exception for webscout errors.
659
+ RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
660
+ TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
661
+ """
662
+ result = await self._loop.run_in_executor(
663
+ self._executor,
664
+ super().suggestions,
665
+ keywords,
666
+ region,
667
+ )
668
+ return result
669
+
670
+ async def amaps(
671
+ self,
672
+ keywords: str,
673
+ place: Optional[str] = None,
674
+ street: Optional[str] = None,
675
+ city: Optional[str] = None,
676
+ county: Optional[str] = None,
677
+ state: Optional[str] = None,
678
+ country: Optional[str] = None,
679
+ postalcode: Optional[str] = None,
680
+ latitude: Optional[str] = None,
681
+ longitude: Optional[str] = None,
682
+ radius: int = 0,
683
+ max_results: Optional[int] = None,
684
+ ) -> List[Dict[str, str]]:
685
+ """webscout async maps search. Query params: https://duckduckgo.com/params.
686
+
687
+ Args:
688
+ keywords: keywords for query
689
+ place: if set, the other parameters are not used. Defaults to None.
690
+ street: house number/street. Defaults to None.
691
+ city: city of search. Defaults to None.
692
+ county: county of search. Defaults to None.
693
+ state: state of search. Defaults to None.
694
+ country: country of search. Defaults to None.
695
+ postalcode: postalcode of search. Defaults to None.
696
+ latitude: geographic coordinate (north-south position). Defaults to None.
697
+ longitude: geographic coordinate (east-west position); if latitude and
698
+ longitude are set, the other parameters are not used. Defaults to None.
699
+ radius: expand the search square by the distance in kilometers. Defaults to 0.
700
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
701
+
702
+ Returns:
703
+ List of dictionaries with maps search results, or None if there was an error.
704
+
705
+ Raises:
706
+ WebscoutE: Base exception for webscout errors.
707
+ RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
708
+ TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
709
+ """
710
+ result = await self._loop.run_in_executor(
711
+ self._executor,
712
+ super().maps,
713
+ keywords,
714
+ place,
715
+ street,
716
+ city,
717
+ county,
718
+ state,
719
+ country,
720
+ postalcode,
721
+ latitude,
722
+ longitude,
723
+ radius,
724
+ max_results,
725
+ )
726
+ return result
727
+
728
+ async def atranslate(
729
+ self,
730
+ keywords: Union[List[str], str],
731
+ from_: Optional[str] = None,
732
+ to: str = "en",
733
+ ) -> List[Dict[str, str]]:
734
+ """webscout async translate.
735
+
736
+ Args:
737
+ keywords: string or list of strings to translate.
738
+ from_: translate from (defaults automatically). Defaults to None.
739
+ to: what language to translate. Defaults to "en".
740
+
741
+ Returns:
742
+ List od dictionaries with translated keywords.
743
+
744
+ Raises:
745
+ WebscoutE: Base exception for webscout errors.
746
+ RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
747
+ TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
748
+ """
749
+ result = await self._loop.run_in_executor(
750
+ self._executor,
751
+ super().translate,
752
+ keywords,
753
+ from_,
754
+ to,
755
+ )
756
+ return result
757
+
758
+ async def aweather(
759
+ self,
760
+ location: str,
761
+ language: str = "en",
762
+ ) -> dict[str, Any]:
763
+ """Async version of weather information retrieval from DuckDuckGo.
764
+
765
+ Args:
766
+ location: Location to get weather for.
767
+ language: Language code (e.g. 'en', 'es'). Defaults to "en".
768
+
769
+ Returns:
770
+ Dictionary containing weather data with the following structure:
771
+ {
772
+ "location": str,
773
+ "current": {
774
+ "condition": str,
775
+ "temperature_c": float,
776
+ "feels_like_c": float,
777
+ "humidity": float,
778
+ "wind_speed_ms": float,
779
+ "wind_direction": float,
780
+ "visibility_m": float
781
+ },
782
+ "daily_forecast": List[{
783
+ "date": str,
784
+ "condition": str,
785
+ "max_temp_c": float,
786
+ "min_temp_c": float,
787
+ "sunrise": str,
788
+ "sunset": str
789
+ }],
790
+ "hourly_forecast": List[{
791
+ "time": str,
792
+ "condition": str,
793
+ "temperature_c": float,
794
+ "feels_like_c": float,
795
+ "humidity": float,
796
+ "wind_speed_ms": float,
797
+ "wind_direction": float,
798
+ "visibility_m": float
799
+ }]
800
+ }
801
+
802
+ Raises:
803
+ WebscoutE: Base exception for webscout errors.
804
+ RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
805
+ TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
806
+ """
807
+ result = await self._loop.run_in_executor(
808
+ self._executor,
809
+ super().weather,
810
+ location,
811
+ language,
812
+ )
813
+ return result