webscout 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

@@ -0,0 +1,861 @@
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import sys
5
+ from collections import deque
6
+ from datetime import datetime, timezone
7
+ from decimal import Decimal
8
+ from itertools import cycle
9
+ from typing import AsyncGenerator, Deque, Dict, Optional, Set, Tuple
10
+
11
+ from curl_cffi import requests
12
+ from docstring_inheritance import GoogleDocstringInheritanceMeta
13
+ from lxml import html
14
+
15
+ from .exceptions import DuckDuckGoSearchException
16
+ from .models import MapsResult
17
+ from .utils import _extract_vqd, _is_500_in_url, _normalize, _normalize_url, _text_extract_json
18
+
19
+ logger = logging.getLogger("duckduckgo_search.AsyncDDGS")
20
+ # Not working on Windows, NotImplementedError (https://curl-cffi.readthedocs.io/en/latest/faq/)
21
+ if sys.platform.lower().startswith("win"):
22
+ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
23
+
24
+
25
+ class AsyncDDGS(metaclass=GoogleDocstringInheritanceMeta):
26
+ """webscout_search async class to get search results from duckduckgo.com."""
27
+
28
+ def __init__(self, headers=None, proxies=None, timeout=10) -> None:
29
+ """Initialize the AsyncDDGS object.
30
+
31
+ Args:
32
+ headers (dict, optional): Dictionary of headers for the HTTP client. Defaults to None.
33
+ proxies (Union[dict, str], optional): Proxies for the HTTP client (can be dict or str). Defaults to None.
34
+ timeout (int, optional): Timeout value for the HTTP client. Defaults to 10.
35
+ """
36
+ self.proxies = proxies if proxies and isinstance(proxies, dict) else {"http": proxies, "https": proxies}
37
+ self._asession = requests.AsyncSession(
38
+ headers=headers, proxies=self.proxies, timeout=timeout, impersonate="chrome"
39
+ )
40
+ self._asession.headers["Referer"] = "https://duckduckgo.com/"
41
+
42
+ async def __aenter__(self) -> "AsyncDDGS":
43
+ """A context manager method that is called when entering the 'with' statement."""
44
+ return self
45
+
46
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
47
+ """Closes the session."""
48
+ return self._asession.close()
49
+
50
+ async def _aget_url(self, method: str, url: str, **kwargs) -> Optional[requests.Response]:
51
+ try:
52
+ resp = await self._asession.request(method, url, stream=True, **kwargs)
53
+ resp.raise_for_status()
54
+ resp_content = await resp.acontent()
55
+ logger.debug(f"_aget_url() {url} {resp.status_code} {resp.http_version} {resp.elapsed} {len(resp_content)}")
56
+ if _is_500_in_url(str(resp.url)) or resp.status_code == 202:
57
+ raise DuckDuckGoSearchException("Ratelimit")
58
+ if resp.status_code == 200:
59
+ return resp_content
60
+ except Exception as ex:
61
+ raise DuckDuckGoSearchException(f"_aget_url() {url} {type(ex).__name__}: {ex}") from ex
62
+
63
+ async def _aget_vqd(self, keywords: str) -> Optional[str]:
64
+ """Get vqd value for a search query."""
65
+ resp_content = await self._aget_url("POST", "https://duckduckgo.com", data={"q": keywords})
66
+ if resp_content:
67
+ return _extract_vqd(resp_content, keywords)
68
+
69
+ async def text(
70
+ self,
71
+ keywords: str,
72
+ region: str = "wt-wt",
73
+ safesearch: str = "moderate",
74
+ timelimit: Optional[str] = None,
75
+ backend: str = "api",
76
+ max_results: Optional[int] = None,
77
+ ) -> AsyncGenerator[Dict[str, Optional[str]], None]:
78
+ """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params.
79
+
80
+ Args:
81
+ keywords: keywords for query.
82
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
83
+ safesearch: on, moderate, off. Defaults to "moderate".
84
+ timelimit: d, w, m, y. Defaults to None.
85
+ backend: api, html, lite. Defaults to api.
86
+ api - collect data from https://duckduckgo.com,
87
+ html - collect data from https://html.duckduckgo.com,
88
+ lite - collect data from https://lite.duckduckgo.com.
89
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
90
+
91
+ Yields:
92
+ dict with search results.
93
+
94
+ """
95
+ if backend == "api":
96
+ results = self._text_api(keywords, region, safesearch, timelimit, max_results)
97
+ elif backend == "html":
98
+ results = self._text_html(keywords, region, safesearch, timelimit, max_results)
99
+ elif backend == "lite":
100
+ results = self._text_lite(keywords, region, timelimit, max_results)
101
+
102
+ async for result in results:
103
+ yield result
104
+
105
+ async def _text_api(
106
+ self,
107
+ keywords: str,
108
+ region: str = "wt-wt",
109
+ safesearch: str = "moderate",
110
+ timelimit: Optional[str] = None,
111
+ max_results: Optional[int] = None,
112
+ ) -> AsyncGenerator[Dict[str, Optional[str]], None]:
113
+ """webscout text search generator. Query params: https://duckduckgo.com/params.
114
+
115
+ Args:
116
+ keywords: keywords for query.
117
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
118
+ safesearch: on, moderate, off. Defaults to "moderate".
119
+ timelimit: d, w, m, y. Defaults to None.
120
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
121
+
122
+ Yields:
123
+ dict with search results.
124
+
125
+ """
126
+ assert keywords, "keywords is mandatory"
127
+
128
+ vqd = await self._aget_vqd(keywords)
129
+
130
+ payload = {
131
+ "q": keywords,
132
+ "kl": region,
133
+ "l": region,
134
+ "bing_market": region,
135
+ "s": "0",
136
+ "df": timelimit,
137
+ "vqd": vqd,
138
+ # "o": "json",
139
+ "sp": "0",
140
+ }
141
+ safesearch = safesearch.lower()
142
+ if safesearch == "moderate":
143
+ payload["ex"] = "-1"
144
+ elif safesearch == "off":
145
+ payload["ex"] = "-2"
146
+ elif safesearch == "on": # strict
147
+ payload["p"] = "1"
148
+
149
+ cache = set()
150
+ for _ in range(11):
151
+ resp_content = await self._aget_url("GET", "https://links.duckduckgo.com/d.js", params=payload)
152
+ if resp_content is None:
153
+ return
154
+
155
+ page_data = _text_extract_json(resp_content, keywords)
156
+ if page_data is None:
157
+ return
158
+
159
+ result_exists, next_page_url = False, None
160
+ for row in page_data:
161
+ href = row.get("u", None)
162
+ if href and href not in cache and href != f"http://www.google.com/search?q={keywords}":
163
+ cache.add(href)
164
+ body = _normalize(row["a"])
165
+ if body:
166
+ result_exists = True
167
+ yield {
168
+ "title": _normalize(row["t"]),
169
+ "href": _normalize_url(href),
170
+ "body": body,
171
+ }
172
+ if max_results and len(cache) >= max_results:
173
+ return
174
+ else:
175
+ next_page_url = row.get("n", None)
176
+ if max_results is None or result_exists is False or next_page_url is None:
177
+ return
178
+ payload["s"] = next_page_url.split("s=")[1].split("&")[0]
179
+
180
+ async def _text_html(
181
+ self,
182
+ keywords: str,
183
+ region: str = "wt-wt",
184
+ safesearch: str = "moderate",
185
+ timelimit: Optional[str] = None,
186
+ max_results: Optional[int] = None,
187
+ ) -> AsyncGenerator[Dict[str, Optional[str]], None]:
188
+ """webscout text search generator. Query params: https://duckduckgo.com/params.
189
+
190
+ Args:
191
+ keywords: keywords for query.
192
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
193
+ safesearch: on, moderate, off. Defaults to "moderate".
194
+ timelimit: d, w, m, y. Defaults to None.
195
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
196
+
197
+ Yields:
198
+ dict with search results.
199
+
200
+ """
201
+ assert keywords, "keywords is mandatory"
202
+
203
+ self._asession.headers["Referer"] = "https://html.duckduckgo.com/"
204
+ safesearch_base = {"on": 1, "moderate": -1, "off": -2}
205
+ payload = {
206
+ "q": keywords,
207
+ "s": "0",
208
+ "kl": region,
209
+ "p": safesearch_base[safesearch.lower()],
210
+ "df": timelimit,
211
+ }
212
+ cache: Set[str] = set()
213
+ for _ in range(11):
214
+ resp_content = await self._aget_url("POST", "https://html.duckduckgo.com/html", data=payload)
215
+ if resp_content is None:
216
+ return
217
+
218
+ tree = html.fromstring(resp_content)
219
+ if tree.xpath('//div[@class="no-results"]/text()'):
220
+ return
221
+
222
+ result_exists = False
223
+ for e in tree.xpath('//div[contains(@class, "results_links")]'):
224
+ href = e.xpath('.//a[contains(@class, "result__a")]/@href')
225
+ href = href[0] if href else None
226
+ if (
227
+ href
228
+ and href not in cache
229
+ and href != f"http://www.google.com/search?q={keywords}"
230
+ and not href.startswith("https://duckduckgo.com/y.js?ad_domain")
231
+ ):
232
+ cache.add(href)
233
+ title = e.xpath('.//a[contains(@class, "result__a")]/text()')
234
+ body = e.xpath('.//a[contains(@class, "result__snippet")]//text()')
235
+ result_exists = True
236
+ yield {
237
+ "title": _normalize(title[0]) if title else None,
238
+ "href": _normalize_url(href),
239
+ "body": _normalize("".join(body)) if body else None,
240
+ }
241
+ if max_results and len(cache) >= max_results:
242
+ return
243
+ if max_results is None or result_exists is False:
244
+ return
245
+ next_page = tree.xpath('.//div[@class="nav-link"]')
246
+ next_page = next_page[-1] if next_page else None
247
+ if next_page is None:
248
+ return
249
+
250
+ names = next_page.xpath('.//input[@type="hidden"]/@name')
251
+ values = next_page.xpath('.//input[@type="hidden"]/@value')
252
+ payload = {n: v for n, v in zip(names, values)}
253
+
254
+ async def _text_lite(
255
+ self,
256
+ keywords: str,
257
+ region: str = "wt-wt",
258
+ timelimit: Optional[str] = None,
259
+ max_results: Optional[int] = None,
260
+ ) -> AsyncGenerator[Dict[str, Optional[str]], None]:
261
+ """webscout text search generator. Query params: https://duckduckgo.com/params.
262
+
263
+ Args:
264
+ keywords: keywords for query.
265
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
266
+ timelimit: d, w, m, y. Defaults to None.
267
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
268
+
269
+ Yields:
270
+ dict with search results.
271
+
272
+ """
273
+ assert keywords, "keywords is mandatory"
274
+
275
+ self._asession.headers["Referer"] = "https://lite.duckduckgo.com/"
276
+ payload = {
277
+ "q": keywords,
278
+ "s": "0",
279
+ "o": "json",
280
+ "api": "d.js",
281
+ "kl": region,
282
+ "df": timelimit,
283
+ }
284
+ cache: Set[str] = set()
285
+ for _ in range(11):
286
+ resp_content = await self._aget_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
287
+ if resp_content is None:
288
+ return
289
+
290
+ if b"No more results." in resp_content:
291
+ return
292
+
293
+ tree = html.fromstring(resp_content)
294
+
295
+ result_exists = False
296
+ data = zip(cycle(range(1, 5)), tree.xpath("//table[last()]//tr"))
297
+ for i, e in data:
298
+ if i == 1:
299
+ href = e.xpath(".//a//@href")
300
+ href = href[0] if href else None
301
+ if (
302
+ href is None
303
+ or href in cache
304
+ or href == f"http://www.google.com/search?q={keywords}"
305
+ or href.startswith("https://duckduckgo.com/y.js?ad_domain")
306
+ ):
307
+ [next(data, None) for _ in range(3)] # skip block(i=1,2,3,4)
308
+ else:
309
+ cache.add(href)
310
+ title = e.xpath(".//a//text()")[0]
311
+ elif i == 2:
312
+ body = e.xpath(".//td[@class='result-snippet']//text()")
313
+ body = "".join(body).strip()
314
+ elif i == 3:
315
+ result_exists = True
316
+ yield {
317
+ "title": _normalize(title),
318
+ "href": _normalize_url(href),
319
+ "body": _normalize(body),
320
+ }
321
+ if max_results and len(cache) >= max_results:
322
+ return
323
+ if max_results is None or result_exists is False:
324
+ return
325
+ next_page_s = tree.xpath("//form[./input[contains(@value, 'ext')]]/input[@name='s']/@value")
326
+ if not next_page_s:
327
+ return
328
+ payload["s"] = next_page_s[0]
329
+ payload["vqd"] = _extract_vqd(resp_content, keywords)
330
+
331
+ async def images(
332
+ self,
333
+ keywords: str,
334
+ region: str = "wt-wt",
335
+ safesearch: str = "moderate",
336
+ timelimit: Optional[str] = None,
337
+ size: Optional[str] = None,
338
+ color: Optional[str] = None,
339
+ type_image: Optional[str] = None,
340
+ layout: Optional[str] = None,
341
+ license_image: Optional[str] = None,
342
+ max_results: Optional[int] = None,
343
+ ) -> AsyncGenerator[Dict[str, Optional[str]], None]:
344
+ """webscout images search. Query params: https://duckduckgo.com/params.
345
+
346
+ Args:
347
+ keywords: keywords for query.
348
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
349
+ safesearch: on, moderate, off. Defaults to "moderate".
350
+ timelimit: Day, Week, Month, Year. Defaults to None.
351
+ size: Small, Medium, Large, Wallpaper. Defaults to None.
352
+ color: color, Monochrome, Red, Orange, Yellow, Green, Blue,
353
+ Purple, Pink, Brown, Black, Gray, Teal, White. Defaults to None.
354
+ type_image: photo, clipart, gif, transparent, line.
355
+ Defaults to None.
356
+ layout: Square, Tall, Wide. Defaults to None.
357
+ license_image: any (All Creative Commons), Public (PublicDomain),
358
+ Share (Free to Share and Use), ShareCommercially (Free to Share and Use Commercially),
359
+ Modify (Free to Modify, Share, and Use), ModifyCommercially (Free to Modify, Share, and
360
+ Use Commercially). Defaults to None.
361
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
362
+
363
+ Yields:
364
+ dict with image search results.
365
+
366
+ """
367
+ assert keywords, "keywords is mandatory"
368
+
369
+ vqd = await self._aget_vqd(keywords)
370
+
371
+ safesearch_base = {"on": 1, "moderate": 1, "off": -1}
372
+ timelimit = f"time:{timelimit}" if timelimit else ""
373
+ size = f"size:{size}" if size else ""
374
+ color = f"color:{color}" if color else ""
375
+ type_image = f"type:{type_image}" if type_image else ""
376
+ layout = f"layout:{layout}" if layout else ""
377
+ license_image = f"license:{license_image}" if license_image else ""
378
+ payload = {
379
+ "l": region,
380
+ "o": "json",
381
+ "q": keywords,
382
+ "vqd": vqd,
383
+ "f": f"{timelimit},{size},{color},{type_image},{layout},{license_image}",
384
+ "p": safesearch_base[safesearch.lower()],
385
+ }
386
+
387
+ cache = set()
388
+ for _ in range(10):
389
+ resp_content = await self._aget_url("GET", "https://duckduckgo.com/i.js", params=payload)
390
+ if resp_content is None:
391
+ return
392
+ try:
393
+ resp_json = json.loads(resp_content)
394
+ except Exception:
395
+ return
396
+ page_data = resp_json.get("results", None)
397
+ if page_data is None:
398
+ return
399
+
400
+ result_exists = False
401
+ for row in page_data:
402
+ image_url = row.get("image", None)
403
+ if image_url and image_url not in cache:
404
+ cache.add(image_url)
405
+ result_exists = True
406
+ yield {
407
+ "title": row["title"],
408
+ "image": _normalize_url(image_url),
409
+ "thumbnail": _normalize_url(row["thumbnail"]),
410
+ "url": _normalize_url(row["url"]),
411
+ "height": row["height"],
412
+ "width": row["width"],
413
+ "source": row["source"],
414
+ }
415
+ if max_results and len(cache) >= max_results:
416
+ return
417
+ if max_results is None or result_exists is False:
418
+ return
419
+ next = resp_json.get("next", None)
420
+ if next is None:
421
+ return
422
+ payload["s"] = next.split("s=")[-1].split("&")[0]
423
+
424
+ async def videos(
425
+ self,
426
+ keywords: str,
427
+ region: str = "wt-wt",
428
+ safesearch: str = "moderate",
429
+ timelimit: Optional[str] = None,
430
+ resolution: Optional[str] = None,
431
+ duration: Optional[str] = None,
432
+ license_videos: Optional[str] = None,
433
+ max_results: Optional[int] = None,
434
+ ) -> AsyncGenerator[Dict[str, Optional[str]], None]:
435
+ """webscout videos search. Query params: https://duckduckgo.com/params.
436
+
437
+ Args:
438
+ keywords: keywords for query.
439
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
440
+ safesearch: on, moderate, off. Defaults to "moderate".
441
+ timelimit: d, w, m. Defaults to None.
442
+ resolution: high, standart. Defaults to None.
443
+ duration: short, medium, long. Defaults to None.
444
+ license_videos: creativeCommon, youtube. Defaults to None.
445
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
446
+
447
+ Yields:
448
+ dict with videos search results
449
+
450
+ """
451
+ assert keywords, "keywords is mandatory"
452
+
453
+ vqd = await self._aget_vqd(keywords)
454
+
455
+ safesearch_base = {"on": 1, "moderate": -1, "off": -2}
456
+ timelimit = f"publishedAfter:{timelimit}" if timelimit else ""
457
+ resolution = f"videoDefinition:{resolution}" if resolution else ""
458
+ duration = f"videoDuration:{duration}" if duration else ""
459
+ license_videos = f"videoLicense:{license_videos}" if license_videos else ""
460
+ payload = {
461
+ "l": region,
462
+ "o": "json",
463
+ "s": 0,
464
+ "q": keywords,
465
+ "vqd": vqd,
466
+ "f": f"{timelimit},{resolution},{duration},{license_videos}",
467
+ "p": safesearch_base[safesearch.lower()],
468
+ }
469
+
470
+ cache = set()
471
+ for _ in range(10):
472
+ resp_content = await self._aget_url("GET", "https://duckduckgo.com/v.js", params=payload)
473
+ if resp_content is None:
474
+ return
475
+ try:
476
+ resp_json = json.loads(resp_content)
477
+ except Exception:
478
+ return
479
+ page_data = resp_json.get("results", None)
480
+ if page_data is None:
481
+ return
482
+
483
+ result_exists = False
484
+ for row in page_data:
485
+ if row["content"] not in cache:
486
+ cache.add(row["content"])
487
+ result_exists = True
488
+ yield row
489
+ if max_results and len(cache) >= max_results:
490
+ return
491
+ if max_results is None or result_exists is False:
492
+ return
493
+ next = resp_json.get("next", None)
494
+ if next is None:
495
+ return
496
+ payload["s"] = next.split("s=")[-1].split("&")[0]
497
+
498
+ async def news(
499
+ self,
500
+ keywords: str,
501
+ region: str = "wt-wt",
502
+ safesearch: str = "moderate",
503
+ timelimit: Optional[str] = None,
504
+ max_results: Optional[int] = None,
505
+ ) -> AsyncGenerator[Dict[str, Optional[str]], None]:
506
+ """webscout news search. Query params: https://duckduckgo.com/params.
507
+
508
+ Args:
509
+ keywords: keywords for query.
510
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
511
+ safesearch: on, moderate, off. Defaults to "moderate".
512
+ timelimit: d, w, m. Defaults to None.
513
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
514
+
515
+ Yields:
516
+ dict with news search results.
517
+
518
+ """
519
+ assert keywords, "keywords is mandatory"
520
+
521
+ vqd = await self._aget_vqd(keywords)
522
+
523
+ safesearch_base = {"on": 1, "moderate": -1, "off": -2}
524
+ payload = {
525
+ "l": region,
526
+ "o": "json",
527
+ "noamp": "1",
528
+ "q": keywords,
529
+ "vqd": vqd,
530
+ "p": safesearch_base[safesearch.lower()],
531
+ "df": timelimit,
532
+ "s": 0,
533
+ }
534
+
535
+ cache = set()
536
+ for _ in range(10):
537
+ resp_content = await self._aget_url("GET", "https://duckduckgo.com/news.js", params=payload)
538
+ if resp_content is None:
539
+ return
540
+ try:
541
+ resp_json = json.loads(resp_content)
542
+ except Exception:
543
+ return
544
+ page_data = resp_json.get("results", None)
545
+ if page_data is None:
546
+ return
547
+
548
+ result_exists = False
549
+ for row in page_data:
550
+ if row["url"] not in cache:
551
+ cache.add(row["url"])
552
+ image_url = row.get("image", None)
553
+ result_exists = True
554
+ yield {
555
+ "date": datetime.fromtimestamp(row["date"], timezone.utc).isoformat(),
556
+ "title": row["title"],
557
+ "body": _normalize(row["excerpt"]),
558
+ "url": _normalize_url(row["url"]),
559
+ "image": _normalize_url(image_url) if image_url else None,
560
+ "source": row["source"],
561
+ }
562
+ if max_results and len(cache) >= max_results:
563
+ return
564
+ if max_results is None or result_exists is False:
565
+ return
566
+ next = resp_json.get("next", None)
567
+ if next is None:
568
+ return
569
+ payload["s"] = next.split("s=")[-1].split("&")[0]
570
+
571
+ async def answers(self, keywords: str) -> AsyncGenerator[Dict[str, Optional[str]], None]:
572
+ """webscout instant answers. Query params: https://duckduckgo.com/params.
573
+
574
+ Args:
575
+ keywords: keywords for query.
576
+
577
+ Yields:
578
+ dict with instant answers results.
579
+
580
+ """
581
+ assert keywords, "keywords is mandatory"
582
+
583
+ payload = {
584
+ "q": f"what is {keywords}",
585
+ "format": "json",
586
+ }
587
+
588
+ resp_content = await self._aget_url("GET", "https://api.duckduckgo.com/", params=payload)
589
+ if resp_content is None:
590
+ yield None
591
+ try:
592
+ page_data = json.loads(resp_content)
593
+ except Exception:
594
+ page_data = None
595
+
596
+ if page_data:
597
+ answer = page_data.get("AbstractText", None)
598
+ url = page_data.get("AbstractURL", None)
599
+ if answer:
600
+ yield {
601
+ "icon": None,
602
+ "text": answer,
603
+ "topic": None,
604
+ "url": url,
605
+ }
606
+
607
+ # related:
608
+ payload = {
609
+ "q": f"{keywords}",
610
+ "format": "json",
611
+ }
612
+ resp_content = await self._aget_url("GET", "https://api.duckduckgo.com/", params=payload)
613
+ if resp_content is None:
614
+ yield None
615
+ try:
616
+ page_data = json.loads(resp_content).get("RelatedTopics", None)
617
+ except Exception:
618
+ page_data = None
619
+
620
+ if page_data:
621
+ for row in page_data:
622
+ topic = row.get("Name", None)
623
+ if not topic:
624
+ icon = row["Icon"].get("URL", None)
625
+ yield {
626
+ "icon": f"https://duckduckgo.com{icon}" if icon else None,
627
+ "text": row["Text"],
628
+ "topic": None,
629
+ "url": row["FirstURL"],
630
+ }
631
+ else:
632
+ for subrow in row["Topics"]:
633
+ icon = subrow["Icon"].get("URL", None)
634
+ yield {
635
+ "icon": f"https://duckduckgo.com{icon}" if icon else None,
636
+ "text": subrow["Text"],
637
+ "topic": topic,
638
+ "url": subrow["FirstURL"],
639
+ }
640
+
641
+ async def suggestions(self, keywords: str, region: str = "wt-wt") -> AsyncGenerator[Dict[str, Optional[str]], None]:
642
+ """webscout suggestions. Query params: https://duckduckgo.com/params.
643
+
644
+ Args:
645
+ keywords: keywords for query.
646
+ region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt".
647
+
648
+ Yields:
649
+ dict with suggestions results.
650
+ """
651
+ assert keywords, "keywords is mandatory"
652
+
653
+ payload = {
654
+ "q": keywords,
655
+ "kl": region,
656
+ }
657
+ resp_content = await self._aget_url("GET", "https://duckduckgo.com/ac", params=payload)
658
+ if resp_content is None:
659
+ yield None
660
+ try:
661
+ page_data = json.loads(resp_content)
662
+ for r in page_data:
663
+ yield r
664
+ except Exception:
665
+ pass
666
+
667
+ async def maps(
668
+ self,
669
+ keywords: str,
670
+ place: Optional[str] = None,
671
+ street: Optional[str] = None,
672
+ city: Optional[str] = None,
673
+ county: Optional[str] = None,
674
+ state: Optional[str] = None,
675
+ country: Optional[str] = None,
676
+ postalcode: Optional[str] = None,
677
+ latitude: Optional[str] = None,
678
+ longitude: Optional[str] = None,
679
+ radius: int = 0,
680
+ max_results: Optional[int] = None,
681
+ ) -> AsyncGenerator[Dict[str, Optional[str]], None]:
682
+ """webscout maps search. Query params: https://duckduckgo.com/params.
683
+
684
+ Args:
685
+ keywords: keywords for query
686
+ place: if set, the other parameters are not used. Defaults to None.
687
+ street: house number/street. Defaults to None.
688
+ city: city of search. Defaults to None.
689
+ county: county of search. Defaults to None.
690
+ state: state of search. Defaults to None.
691
+ country: country of search. Defaults to None.
692
+ postalcode: postalcode of search. Defaults to None.
693
+ latitude: geographic coordinate (north-south position). Defaults to None.
694
+ longitude: geographic coordinate (east-west position); if latitude and
695
+ longitude are set, the other parameters are not used. Defaults to None.
696
+ radius: expand the search square by the distance in kilometers. Defaults to 0.
697
+ max_results: max number of results. If None, returns results only from the first response. Defaults to None.
698
+
699
+ Yields:
700
+ dict with maps search results
701
+ """
702
+ assert keywords, "keywords is mandatory"
703
+
704
+ vqd = await self._aget_vqd(keywords)
705
+
706
+ # if longitude and latitude are specified, skip the request about bbox to the nominatim api
707
+ if latitude and longitude:
708
+ lat_t = Decimal(latitude.replace(",", "."))
709
+ lat_b = Decimal(latitude.replace(",", "."))
710
+ lon_l = Decimal(longitude.replace(",", "."))
711
+ lon_r = Decimal(longitude.replace(",", "."))
712
+ if radius == 0:
713
+ radius = 1
714
+ # otherwise request about bbox to nominatim api
715
+ else:
716
+ if place:
717
+ params: Dict[str, Optional[str]] = {
718
+ "q": place,
719
+ "polygon_geojson": "0",
720
+ "format": "jsonv2",
721
+ }
722
+ else:
723
+ params = {
724
+ "street": street,
725
+ "city": city,
726
+ "county": county,
727
+ "state": state,
728
+ "country": country,
729
+ "postalcode": postalcode,
730
+ "polygon_geojson": "0",
731
+ "format": "jsonv2",
732
+ }
733
+ try:
734
+ resp_content = await self._aget_url(
735
+ "GET",
736
+ "https://nominatim.openstreetmap.org/search.php",
737
+ params=params,
738
+ )
739
+ if resp_content is None:
740
+ yield None
741
+
742
+ coordinates = json.loads(resp_content)[0]["boundingbox"]
743
+ lat_t, lon_l = Decimal(coordinates[1]), Decimal(coordinates[2])
744
+ lat_b, lon_r = Decimal(coordinates[0]), Decimal(coordinates[3])
745
+ except Exception as ex:
746
+ logger.debug(f"ddg_maps() keywords={keywords} {type(ex).__name__} {ex}")
747
+ return
748
+
749
+ # if a radius is specified, expand the search square
750
+ lat_t += Decimal(radius) * Decimal(0.008983)
751
+ lat_b -= Decimal(radius) * Decimal(0.008983)
752
+ lon_l -= Decimal(radius) * Decimal(0.008983)
753
+ lon_r += Decimal(radius) * Decimal(0.008983)
754
+ logger.debug(f"bbox coordinates\n{lat_t} {lon_l}\n{lat_b} {lon_r}")
755
+
756
+ # сreate a queue of search squares (bboxes)
757
+ work_bboxes: Deque[Tuple[Decimal, Decimal, Decimal, Decimal]] = deque()
758
+ work_bboxes.append((lat_t, lon_l, lat_b, lon_r))
759
+
760
+ # bbox iterate
761
+ cache = set()
762
+ while work_bboxes:
763
+ lat_t, lon_l, lat_b, lon_r = work_bboxes.pop()
764
+ params = {
765
+ "q": keywords,
766
+ "vqd": vqd,
767
+ "tg": "maps_places",
768
+ "rt": "D",
769
+ "mkexp": "b",
770
+ "wiki_info": "1",
771
+ "is_requery": "1",
772
+ "bbox_tl": f"{lat_t},{lon_l}",
773
+ "bbox_br": f"{lat_b},{lon_r}",
774
+ "strict_bbox": "1",
775
+ }
776
+ resp_content = await self._aget_url("GET", "https://duckduckgo.com/local.js", params=params)
777
+ if resp_content is None:
778
+ return
779
+ try:
780
+ page_data = json.loads(resp_content).get("results", [])
781
+ except Exception:
782
+ return
783
+ if page_data is None:
784
+ return
785
+
786
+ for res in page_data:
787
+ result = MapsResult()
788
+ result.title = res["name"]
789
+ result.address = res["address"]
790
+ if f"{result.title} {result.address}" in cache:
791
+ continue
792
+ else:
793
+ cache.add(f"{result.title} {result.address}")
794
+ result.country_code = res["country_code"]
795
+ result.url = _normalize_url(res["website"])
796
+ result.phone = res["phone"]
797
+ result.latitude = res["coordinates"]["latitude"]
798
+ result.longitude = res["coordinates"]["longitude"]
799
+ result.source = _normalize_url(res["url"])
800
+ if res["embed"]:
801
+ result.image = res["embed"].get("image", "")
802
+ result.desc = res["embed"].get("description", "")
803
+ result.hours = res["hours"]
804
+ result.category = res["ddg_category"]
805
+ result.facebook = f"www.facebook.com/profile.php?id={x}" if (x := res["facebook_id"]) else None
806
+ result.instagram = f"https://www.instagram.com/{x}" if (x := res["instagram_id"]) else None
807
+ result.twitter = f"https://twitter.com/{x}" if (x := res["twitter_id"]) else None
808
+ yield result.__dict__
809
+ if max_results and len(cache) >= max_results:
810
+ return
811
+ if max_results is None:
812
+ return
813
+ # divide the square into 4 parts and add to the queue
814
+ if len(page_data) >= 15:
815
+ lat_middle = (lat_t + lat_b) / 2
816
+ lon_middle = (lon_l + lon_r) / 2
817
+ bbox1 = (lat_t, lon_l, lat_middle, lon_middle)
818
+ bbox2 = (lat_t, lon_middle, lat_middle, lon_r)
819
+ bbox3 = (lat_middle, lon_l, lat_b, lon_middle)
820
+ bbox4 = (lat_middle, lon_middle, lat_b, lon_r)
821
+ work_bboxes.extendleft([bbox1, bbox2, bbox3, bbox4])
822
+
823
+ async def translate(
824
+ self, keywords: str, from_: Optional[str] = None, to: str = "en"
825
+ ) -> Optional[Dict[str, Optional[str]]]:
826
+ """webscout translate.
827
+
828
+ Args:
829
+ keywords: string or a list of strings to translate
830
+ from_: translate from (defaults automatically). Defaults to None.
831
+ to: what language to translate. Defaults to "en".
832
+
833
+ Returns:
834
+ dict with translated keywords.
835
+ """
836
+ assert keywords, "keywords is mandatory"
837
+
838
+ vqd = await self._aget_vqd("translate")
839
+
840
+ payload = {
841
+ "vqd": vqd,
842
+ "query": "translate",
843
+ "to": to,
844
+ }
845
+ if from_:
846
+ payload["from"] = from_
847
+
848
+ resp_content = await self._aget_url(
849
+ "POST",
850
+ "https://duckduckgo.com/translation.js",
851
+ params=payload,
852
+ data=keywords.encode(),
853
+ )
854
+ if resp_content is None:
855
+ return None
856
+ try:
857
+ page_data = json.loads(resp_content)
858
+ page_data["original"] = keywords
859
+ except Exception:
860
+ page_data = None
861
+ return page_data