webscout 5.9__py3-none-any.whl → 6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/Agents/Onlinesearcher.py +22 -10
- webscout/Agents/functioncall.py +2 -2
- webscout/Bard.py +21 -21
- webscout/Local/__init__.py +6 -7
- webscout/Local/formats.py +404 -194
- webscout/Local/model.py +1074 -477
- webscout/Local/samplers.py +108 -144
- webscout/Local/thread.py +251 -410
- webscout/Local/ui.py +401 -0
- webscout/Local/utils.py +308 -131
- webscout/Provider/Amigo.py +5 -3
- webscout/Provider/ChatHub.py +209 -0
- webscout/Provider/Chatify.py +3 -3
- webscout/Provider/Cloudflare.py +3 -3
- webscout/Provider/DARKAI.py +1 -1
- webscout/Provider/Deepinfra.py +95 -389
- webscout/Provider/Deepseek.py +4 -6
- webscout/Provider/DiscordRocks.py +3 -3
- webscout/Provider/Free2GPT.py +3 -3
- webscout/Provider/NinjaChat.py +200 -0
- webscout/Provider/OLLAMA.py +4 -4
- webscout/Provider/RUBIKSAI.py +3 -3
- webscout/Provider/TTI/Nexra.py +3 -3
- webscout/Provider/TTI/__init__.py +2 -1
- webscout/Provider/TTI/aiforce.py +2 -2
- webscout/Provider/TTI/imgninza.py +136 -0
- webscout/Provider/Youchat.py +4 -5
- webscout/Provider/__init__.py +13 -6
- webscout/Provider/ai4chat.py +3 -2
- webscout/Provider/aimathgpt.py +193 -0
- webscout/Provider/bagoodex.py +145 -0
- webscout/Provider/bixin.py +3 -3
- webscout/Provider/cleeai.py +3 -3
- webscout/Provider/elmo.py +2 -5
- webscout/Provider/felo_search.py +1 -1
- webscout/Provider/gaurish.py +168 -0
- webscout/Provider/geminiprorealtime.py +160 -0
- webscout/Provider/julius.py +10 -40
- webscout/Provider/llamatutor.py +2 -2
- webscout/Provider/prefind.py +3 -3
- webscout/Provider/promptrefine.py +3 -3
- webscout/Provider/turboseek.py +1 -1
- webscout/Provider/twitterclone.py +25 -41
- webscout/Provider/upstage.py +3 -3
- webscout/Provider/x0gpt.py +6 -6
- webscout/exceptions.py +5 -1
- webscout/utils.py +3 -0
- webscout/version.py +1 -1
- webscout/webscout_search.py +154 -123
- {webscout-5.9.dist-info → webscout-6.1.dist-info}/METADATA +132 -157
- {webscout-5.9.dist-info → webscout-6.1.dist-info}/RECORD +55 -49
- {webscout-5.9.dist-info → webscout-6.1.dist-info}/WHEEL +1 -1
- webscout/Local/rawdog.py +0 -946
- webscout/Provider/Poe.py +0 -208
- {webscout-5.9.dist-info → webscout-6.1.dist-info}/LICENSE.md +0 -0
- {webscout-5.9.dist-info → webscout-6.1.dist-info}/entry_points.txt +0 -0
- {webscout-5.9.dist-info → webscout-6.1.dist-info}/top_level.txt +0 -0
webscout/webscout_search.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
import
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
# import logging
|
|
2
4
|
import warnings
|
|
3
5
|
from concurrent.futures import ThreadPoolExecutor
|
|
4
6
|
from datetime import datetime, timezone
|
|
@@ -8,11 +10,9 @@ from itertools import cycle, islice
|
|
|
8
10
|
from random import choice
|
|
9
11
|
from threading import Event
|
|
10
12
|
from types import TracebackType
|
|
11
|
-
from typing import
|
|
12
|
-
|
|
13
|
-
import pyreqwest_impersonate as pri
|
|
13
|
+
from typing import cast
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
import primp # type: ignore
|
|
16
16
|
|
|
17
17
|
try:
|
|
18
18
|
from lxml.etree import _Element
|
|
@@ -23,10 +23,18 @@ try:
|
|
|
23
23
|
except ImportError:
|
|
24
24
|
LXML_AVAILABLE = False
|
|
25
25
|
|
|
26
|
-
from .exceptions import
|
|
27
|
-
|
|
26
|
+
from .exceptions import ConversationLimitException, WebscoutE, RatelimitE, TimeoutE
|
|
27
|
+
from .utils import (
|
|
28
|
+
_calculate_distance,
|
|
29
|
+
_expand_proxy_tb_alias,
|
|
30
|
+
_extract_vqd,
|
|
31
|
+
_normalize,
|
|
32
|
+
_normalize_url,
|
|
33
|
+
_text_extract_json,
|
|
34
|
+
json_loads,
|
|
35
|
+
)
|
|
28
36
|
|
|
29
|
-
logger = logging.getLogger("webscout.WEBS")
|
|
37
|
+
# logger = logging.getLogger("webscout.WEBS")
|
|
30
38
|
|
|
31
39
|
|
|
32
40
|
class WEBS:
|
|
@@ -34,21 +42,22 @@ class WEBS:
|
|
|
34
42
|
|
|
35
43
|
_executor: ThreadPoolExecutor = ThreadPoolExecutor()
|
|
36
44
|
_impersonates = (
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
|
|
40
|
-
"
|
|
41
|
-
"
|
|
42
|
-
|
|
43
|
-
"
|
|
45
|
+
"chrome_100", "chrome_101", "chrome_104", "chrome_105", "chrome_106", "chrome_107", "chrome_108",
|
|
46
|
+
"chrome_109", "chrome_114", "chrome_116", "chrome_117", "chrome_118", "chrome_119", "chrome_120",
|
|
47
|
+
#"chrome_123", "chrome_124", "chrome_126",
|
|
48
|
+
"chrome_127", "chrome_128", "chrome_129",
|
|
49
|
+
"safari_ios_16.5", "safari_ios_17.2", "safari_ios_17.4.1", "safari_15.3", "safari_15.5", "safari_15.6.1",
|
|
50
|
+
"safari_16", "safari_16.5", "safari_17.0", "safari_17.2.1", "safari_17.4.1", "safari_17.5", "safari_18",
|
|
51
|
+
"safari_ipad_18",
|
|
52
|
+
"edge_101", "edge_122", "edge_127",
|
|
44
53
|
) # fmt: skip
|
|
45
54
|
|
|
46
55
|
def __init__(
|
|
47
56
|
self,
|
|
48
|
-
headers:
|
|
49
|
-
proxy:
|
|
50
|
-
proxies:
|
|
51
|
-
timeout:
|
|
57
|
+
headers: dict[str, str] | None = None,
|
|
58
|
+
proxy: str | None = None,
|
|
59
|
+
proxies: dict[str, str] | str | None = None, # deprecated
|
|
60
|
+
timeout: int | None = 10,
|
|
52
61
|
) -> None:
|
|
53
62
|
"""Initialize the WEBS object.
|
|
54
63
|
|
|
@@ -58,14 +67,14 @@ class WEBS:
|
|
|
58
67
|
example: "http://user:pass@example.com:3128". Defaults to None.
|
|
59
68
|
timeout (int, optional): Timeout value for the HTTP client. Defaults to 10.
|
|
60
69
|
"""
|
|
61
|
-
self.proxy:
|
|
70
|
+
self.proxy: str | None = _expand_proxy_tb_alias(proxy) # replaces "tb" with "socks5://127.0.0.1:9150"
|
|
62
71
|
assert self.proxy is None or isinstance(self.proxy, str), "proxy must be a str"
|
|
63
72
|
if not proxy and proxies:
|
|
64
73
|
warnings.warn("'proxies' is deprecated, use 'proxy' instead.", stacklevel=1)
|
|
65
74
|
self.proxy = proxies.get("http") or proxies.get("https") if isinstance(proxies, dict) else proxies
|
|
66
75
|
self.headers = headers if headers else {}
|
|
67
76
|
self.headers["Referer"] = "https://duckduckgo.com/"
|
|
68
|
-
self.client =
|
|
77
|
+
self.client = primp.Client(
|
|
69
78
|
headers=self.headers,
|
|
70
79
|
proxy=self.proxy,
|
|
71
80
|
timeout=timeout,
|
|
@@ -76,22 +85,23 @@ class WEBS:
|
|
|
76
85
|
verify=False,
|
|
77
86
|
)
|
|
78
87
|
self._exception_event = Event()
|
|
79
|
-
self._chat_messages:
|
|
88
|
+
self._chat_messages: list[dict[str, str]] = []
|
|
89
|
+
self._chat_tokens_count = 0
|
|
80
90
|
self._chat_vqd: str = ""
|
|
81
91
|
|
|
82
|
-
def __enter__(self) ->
|
|
92
|
+
def __enter__(self) -> WEBS:
|
|
83
93
|
return self
|
|
84
94
|
|
|
85
95
|
def __exit__(
|
|
86
96
|
self,
|
|
87
|
-
exc_type:
|
|
88
|
-
exc_val:
|
|
89
|
-
exc_tb:
|
|
97
|
+
exc_type: type[BaseException] | None = None,
|
|
98
|
+
exc_val: BaseException | None = None,
|
|
99
|
+
exc_tb: TracebackType | None = None,
|
|
90
100
|
) -> None:
|
|
91
101
|
pass
|
|
92
102
|
|
|
93
103
|
@cached_property
|
|
94
|
-
def parser(self) ->
|
|
104
|
+
def parser(self) -> LHTMLParser:
|
|
95
105
|
"""Get HTML parser."""
|
|
96
106
|
return LHTMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True, collect_ids=False)
|
|
97
107
|
|
|
@@ -99,9 +109,9 @@ class WEBS:
|
|
|
99
109
|
self,
|
|
100
110
|
method: str,
|
|
101
111
|
url: str,
|
|
102
|
-
params:
|
|
103
|
-
content:
|
|
104
|
-
data:
|
|
112
|
+
params: dict[str, str] | None = None,
|
|
113
|
+
content: bytes | None = None,
|
|
114
|
+
data: dict[str, str] | bytes | None = None,
|
|
105
115
|
) -> bytes:
|
|
106
116
|
if self._exception_event.is_set():
|
|
107
117
|
raise WebscoutE("Exception occurred in previous call.")
|
|
@@ -112,7 +122,7 @@ class WEBS:
|
|
|
112
122
|
if "time" in str(ex).lower():
|
|
113
123
|
raise TimeoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
114
124
|
raise WebscoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
115
|
-
logger.debug(f"_get_url() {resp.url} {resp.status_code} {len(resp.content)}")
|
|
125
|
+
# logger.debug(f"_get_url() {resp.url} {resp.status_code} {len(resp.content)}")
|
|
116
126
|
if resp.status_code == 200:
|
|
117
127
|
return cast(bytes, resp.content)
|
|
118
128
|
self._exception_event.set()
|
|
@@ -122,27 +132,33 @@ class WEBS:
|
|
|
122
132
|
|
|
123
133
|
def _get_vqd(self, keywords: str) -> str:
|
|
124
134
|
"""Get vqd value for a search query."""
|
|
125
|
-
resp_content = self._get_url("
|
|
135
|
+
resp_content = self._get_url("GET", "https://duckduckgo.com", params={"q": keywords})
|
|
126
136
|
return _extract_vqd(resp_content, keywords)
|
|
127
137
|
|
|
128
|
-
def chat(self, keywords: str, model: str = "gpt-
|
|
129
|
-
"""Initiates a chat session with
|
|
138
|
+
def chat(self, keywords: str, model: str = "gpt-4o-mini", timeout: int = 30) -> str:
|
|
139
|
+
"""Initiates a chat session with webscout AI.
|
|
130
140
|
|
|
131
141
|
Args:
|
|
132
142
|
keywords (str): The initial message or question to send to the AI.
|
|
133
|
-
model (str): The model to use: "gpt-
|
|
134
|
-
Defaults to "gpt-
|
|
143
|
+
model (str): The model to use: "gpt-4o-mini", "claude-3-haiku", "llama-3.1-70b", "mixtral-8x7b".
|
|
144
|
+
Defaults to "gpt-4o-mini".
|
|
135
145
|
timeout (int): Timeout value for the HTTP client. Defaults to 20.
|
|
136
146
|
|
|
137
147
|
Returns:
|
|
138
148
|
str: The response from the AI.
|
|
139
149
|
"""
|
|
150
|
+
models_deprecated = {
|
|
151
|
+
"gpt-3.5": "gpt-4o-mini",
|
|
152
|
+
"llama-3-70b": "llama-3.1-70b",
|
|
153
|
+
}
|
|
154
|
+
if model in models_deprecated:
|
|
155
|
+
# logger.info(f"{model=} is deprecated, using {models_deprecated[model]}")
|
|
156
|
+
model = models_deprecated[model]
|
|
140
157
|
models = {
|
|
141
158
|
"claude-3-haiku": "claude-3-haiku-20240307",
|
|
142
|
-
"gpt-3.5": "gpt-3.5-turbo-0125",
|
|
143
|
-
"llama-3-70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
|
144
|
-
"mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
145
159
|
"gpt-4o-mini": "gpt-4o-mini",
|
|
160
|
+
"llama-3.1-70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
|
161
|
+
"mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
146
162
|
}
|
|
147
163
|
# vqd
|
|
148
164
|
if not self._chat_vqd:
|
|
@@ -150,6 +166,7 @@ class WEBS:
|
|
|
150
166
|
self._chat_vqd = resp.headers.get("x-vqd-4", "")
|
|
151
167
|
|
|
152
168
|
self._chat_messages.append({"role": "user", "content": keywords})
|
|
169
|
+
self._chat_tokens_count += len(keywords) // 4 if len(keywords) >= 4 else 1 # approximate number of tokens
|
|
153
170
|
|
|
154
171
|
json_data = {
|
|
155
172
|
"model": models[model],
|
|
@@ -163,10 +180,26 @@ class WEBS:
|
|
|
163
180
|
)
|
|
164
181
|
self._chat_vqd = resp.headers.get("x-vqd-4", "")
|
|
165
182
|
|
|
166
|
-
data = ",".join(x for line in resp.text.rstrip("[DONE]\n").split("data:") if (x := line.strip()))
|
|
167
|
-
|
|
183
|
+
data = ",".join(x for line in resp.text.rstrip("[DONE]LIMT_CVRSA\n").split("data:") if (x := line.strip()))
|
|
184
|
+
data = json_loads("[" + data + "]")
|
|
185
|
+
|
|
186
|
+
results = []
|
|
187
|
+
for x in data:
|
|
188
|
+
if x.get("action") == "error":
|
|
189
|
+
err_message = x.get("type", "")
|
|
190
|
+
if x.get("status") == 429:
|
|
191
|
+
raise (
|
|
192
|
+
ConversationLimitException(err_message)
|
|
193
|
+
if err_message == "ERR_CONVERSATION_LIMIT"
|
|
194
|
+
else RatelimitE(err_message)
|
|
195
|
+
)
|
|
196
|
+
raise WebscoutE(err_message)
|
|
197
|
+
elif message := x.get("message"):
|
|
198
|
+
results.append(message)
|
|
199
|
+
result = "".join(results)
|
|
168
200
|
|
|
169
201
|
self._chat_messages.append({"role": "assistant", "content": result})
|
|
202
|
+
self._chat_tokens_count += len(results)
|
|
170
203
|
return result
|
|
171
204
|
|
|
172
205
|
def text(
|
|
@@ -174,11 +207,11 @@ class WEBS:
|
|
|
174
207
|
keywords: str,
|
|
175
208
|
region: str = "wt-wt",
|
|
176
209
|
safesearch: str = "moderate",
|
|
177
|
-
timelimit:
|
|
210
|
+
timelimit: str | None = None,
|
|
178
211
|
backend: str = "api",
|
|
179
|
-
max_results:
|
|
180
|
-
) ->
|
|
181
|
-
"""
|
|
212
|
+
max_results: int | None = None,
|
|
213
|
+
) -> list[dict[str, str]]:
|
|
214
|
+
"""webscout text search. Query params: https://duckduckgo.com/params.
|
|
182
215
|
|
|
183
216
|
Args:
|
|
184
217
|
keywords: keywords for query.
|
|
@@ -216,10 +249,10 @@ class WEBS:
|
|
|
216
249
|
keywords: str,
|
|
217
250
|
region: str = "wt-wt",
|
|
218
251
|
safesearch: str = "moderate",
|
|
219
|
-
timelimit:
|
|
220
|
-
max_results:
|
|
221
|
-
) ->
|
|
222
|
-
"""
|
|
252
|
+
timelimit: str | None = None,
|
|
253
|
+
max_results: int | None = None,
|
|
254
|
+
) -> list[dict[str, str]]:
|
|
255
|
+
"""webscout text search. Query params: https://duckduckgo.com/params.
|
|
223
256
|
|
|
224
257
|
Args:
|
|
225
258
|
keywords: keywords for query.
|
|
@@ -262,9 +295,9 @@ class WEBS:
|
|
|
262
295
|
payload["df"] = timelimit
|
|
263
296
|
|
|
264
297
|
cache = set()
|
|
265
|
-
results:
|
|
298
|
+
results: list[dict[str, str]] = []
|
|
266
299
|
|
|
267
|
-
def _text_api_page(s: int) ->
|
|
300
|
+
def _text_api_page(s: int) -> list[dict[str, str]]:
|
|
268
301
|
payload["s"] = f"{s}"
|
|
269
302
|
resp_content = self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload)
|
|
270
303
|
page_data = _text_extract_json(resp_content, keywords)
|
|
@@ -299,10 +332,10 @@ class WEBS:
|
|
|
299
332
|
self,
|
|
300
333
|
keywords: str,
|
|
301
334
|
region: str = "wt-wt",
|
|
302
|
-
timelimit:
|
|
303
|
-
max_results:
|
|
304
|
-
) ->
|
|
305
|
-
"""
|
|
335
|
+
timelimit: str | None = None,
|
|
336
|
+
max_results: int | None = None,
|
|
337
|
+
) -> list[dict[str, str]]:
|
|
338
|
+
"""webscout text search. Query params: https://duckduckgo.com/params.
|
|
306
339
|
|
|
307
340
|
Args:
|
|
308
341
|
keywords: keywords for query.
|
|
@@ -336,9 +369,9 @@ class WEBS:
|
|
|
336
369
|
payload["vqd"] = vqd
|
|
337
370
|
|
|
338
371
|
cache = set()
|
|
339
|
-
results:
|
|
372
|
+
results: list[dict[str, str]] = []
|
|
340
373
|
|
|
341
|
-
def _text_html_page(s: int) ->
|
|
374
|
+
def _text_html_page(s: int) -> list[dict[str, str]]:
|
|
342
375
|
payload["s"] = f"{s}"
|
|
343
376
|
resp_content = self._get_url("POST", "https://html.duckduckgo.com/html", data=payload)
|
|
344
377
|
if b"No results." in resp_content:
|
|
@@ -347,12 +380,12 @@ class WEBS:
|
|
|
347
380
|
page_results = []
|
|
348
381
|
tree = document_fromstring(resp_content, self.parser)
|
|
349
382
|
elements = tree.xpath("//div[h2]")
|
|
350
|
-
if not isinstance(elements,
|
|
383
|
+
if not isinstance(elements, list):
|
|
351
384
|
return []
|
|
352
385
|
for e in elements:
|
|
353
386
|
if isinstance(e, _Element):
|
|
354
387
|
hrefxpath = e.xpath("./a/@href")
|
|
355
|
-
href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath,
|
|
388
|
+
href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None
|
|
356
389
|
if (
|
|
357
390
|
href
|
|
358
391
|
and href not in cache
|
|
@@ -362,9 +395,9 @@ class WEBS:
|
|
|
362
395
|
):
|
|
363
396
|
cache.add(href)
|
|
364
397
|
titlexpath = e.xpath("./h2/a/text()")
|
|
365
|
-
title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath,
|
|
398
|
+
title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else ""
|
|
366
399
|
bodyxpath = e.xpath("./a//text()")
|
|
367
|
-
body = "".join(str(x) for x in bodyxpath) if bodyxpath and isinstance(bodyxpath,
|
|
400
|
+
body = "".join(str(x) for x in bodyxpath) if bodyxpath and isinstance(bodyxpath, list) else ""
|
|
368
401
|
result = {
|
|
369
402
|
"title": _normalize(title),
|
|
370
403
|
"href": _normalize_url(href),
|
|
@@ -389,10 +422,10 @@ class WEBS:
|
|
|
389
422
|
self,
|
|
390
423
|
keywords: str,
|
|
391
424
|
region: str = "wt-wt",
|
|
392
|
-
timelimit:
|
|
393
|
-
max_results:
|
|
394
|
-
) ->
|
|
395
|
-
"""
|
|
425
|
+
timelimit: str | None = None,
|
|
426
|
+
max_results: int | None = None,
|
|
427
|
+
) -> list[dict[str, str]]:
|
|
428
|
+
"""webscout text search. Query params: https://duckduckgo.com/params.
|
|
396
429
|
|
|
397
430
|
Args:
|
|
398
431
|
keywords: keywords for query.
|
|
@@ -423,9 +456,9 @@ class WEBS:
|
|
|
423
456
|
payload["df"] = timelimit
|
|
424
457
|
|
|
425
458
|
cache = set()
|
|
426
|
-
results:
|
|
459
|
+
results: list[dict[str, str]] = []
|
|
427
460
|
|
|
428
|
-
def _text_lite_page(s: int) ->
|
|
461
|
+
def _text_lite_page(s: int) -> list[dict[str, str]]:
|
|
429
462
|
payload["s"] = f"{s}"
|
|
430
463
|
resp_content = self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
|
|
431
464
|
if b"No more results." in resp_content:
|
|
@@ -434,7 +467,7 @@ class WEBS:
|
|
|
434
467
|
page_results = []
|
|
435
468
|
tree = document_fromstring(resp_content, self.parser)
|
|
436
469
|
elements = tree.xpath("//table[last()]//tr")
|
|
437
|
-
if not isinstance(elements,
|
|
470
|
+
if not isinstance(elements, list):
|
|
438
471
|
return []
|
|
439
472
|
|
|
440
473
|
data = zip(cycle(range(1, 5)), elements)
|
|
@@ -442,7 +475,7 @@ class WEBS:
|
|
|
442
475
|
if isinstance(e, _Element):
|
|
443
476
|
if i == 1:
|
|
444
477
|
hrefxpath = e.xpath(".//a//@href")
|
|
445
|
-
href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath,
|
|
478
|
+
href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None
|
|
446
479
|
if (
|
|
447
480
|
href is None
|
|
448
481
|
or href in cache
|
|
@@ -454,12 +487,12 @@ class WEBS:
|
|
|
454
487
|
else:
|
|
455
488
|
cache.add(href)
|
|
456
489
|
titlexpath = e.xpath(".//a//text()")
|
|
457
|
-
title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath,
|
|
490
|
+
title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else ""
|
|
458
491
|
elif i == 2:
|
|
459
492
|
bodyxpath = e.xpath(".//td[@class='result-snippet']//text()")
|
|
460
493
|
body = (
|
|
461
494
|
"".join(str(x) for x in bodyxpath).strip()
|
|
462
|
-
if bodyxpath and isinstance(bodyxpath,
|
|
495
|
+
if bodyxpath and isinstance(bodyxpath, list)
|
|
463
496
|
else ""
|
|
464
497
|
)
|
|
465
498
|
if href:
|
|
@@ -488,15 +521,15 @@ class WEBS:
|
|
|
488
521
|
keywords: str,
|
|
489
522
|
region: str = "wt-wt",
|
|
490
523
|
safesearch: str = "moderate",
|
|
491
|
-
timelimit:
|
|
492
|
-
size:
|
|
493
|
-
color:
|
|
494
|
-
type_image:
|
|
495
|
-
layout:
|
|
496
|
-
license_image:
|
|
497
|
-
max_results:
|
|
498
|
-
) ->
|
|
499
|
-
"""
|
|
524
|
+
timelimit: str | None = None,
|
|
525
|
+
size: str | None = None,
|
|
526
|
+
color: str | None = None,
|
|
527
|
+
type_image: str | None = None,
|
|
528
|
+
layout: str | None = None,
|
|
529
|
+
license_image: str | None = None,
|
|
530
|
+
max_results: int | None = None,
|
|
531
|
+
) -> list[dict[str, str]]:
|
|
532
|
+
"""webscout images search. Query params: https://duckduckgo.com/params.
|
|
500
533
|
|
|
501
534
|
Args:
|
|
502
535
|
keywords: keywords for query.
|
|
@@ -544,9 +577,9 @@ class WEBS:
|
|
|
544
577
|
}
|
|
545
578
|
|
|
546
579
|
cache = set()
|
|
547
|
-
results:
|
|
580
|
+
results: list[dict[str, str]] = []
|
|
548
581
|
|
|
549
|
-
def _images_page(s: int) ->
|
|
582
|
+
def _images_page(s: int) -> list[dict[str, str]]:
|
|
550
583
|
payload["s"] = f"{s}"
|
|
551
584
|
resp_content = self._get_url("GET", "https://duckduckgo.com/i.js", params=payload)
|
|
552
585
|
resp_json = json_loads(resp_content)
|
|
@@ -586,13 +619,13 @@ class WEBS:
|
|
|
586
619
|
keywords: str,
|
|
587
620
|
region: str = "wt-wt",
|
|
588
621
|
safesearch: str = "moderate",
|
|
589
|
-
timelimit:
|
|
590
|
-
resolution:
|
|
591
|
-
duration:
|
|
592
|
-
license_videos:
|
|
593
|
-
max_results:
|
|
594
|
-
) ->
|
|
595
|
-
"""
|
|
622
|
+
timelimit: str | None = None,
|
|
623
|
+
resolution: str | None = None,
|
|
624
|
+
duration: str | None = None,
|
|
625
|
+
license_videos: str | None = None,
|
|
626
|
+
max_results: int | None = None,
|
|
627
|
+
) -> list[dict[str, str]]:
|
|
628
|
+
"""webscout videos search. Query params: https://duckduckgo.com/params.
|
|
596
629
|
|
|
597
630
|
Args:
|
|
598
631
|
keywords: keywords for query.
|
|
@@ -631,9 +664,9 @@ class WEBS:
|
|
|
631
664
|
}
|
|
632
665
|
|
|
633
666
|
cache = set()
|
|
634
|
-
results:
|
|
667
|
+
results: list[dict[str, str]] = []
|
|
635
668
|
|
|
636
|
-
def _videos_page(s: int) ->
|
|
669
|
+
def _videos_page(s: int) -> list[dict[str, str]]:
|
|
637
670
|
payload["s"] = f"{s}"
|
|
638
671
|
resp_content = self._get_url("GET", "https://duckduckgo.com/v.js", params=payload)
|
|
639
672
|
resp_json = json_loads(resp_content)
|
|
@@ -663,10 +696,10 @@ class WEBS:
|
|
|
663
696
|
keywords: str,
|
|
664
697
|
region: str = "wt-wt",
|
|
665
698
|
safesearch: str = "moderate",
|
|
666
|
-
timelimit:
|
|
667
|
-
max_results:
|
|
668
|
-
) ->
|
|
669
|
-
"""
|
|
699
|
+
timelimit: str | None = None,
|
|
700
|
+
max_results: int | None = None,
|
|
701
|
+
) -> list[dict[str, str]]:
|
|
702
|
+
"""webscout news search. Query params: https://duckduckgo.com/params.
|
|
670
703
|
|
|
671
704
|
Args:
|
|
672
705
|
keywords: keywords for query.
|
|
@@ -700,9 +733,9 @@ class WEBS:
|
|
|
700
733
|
payload["df"] = timelimit
|
|
701
734
|
|
|
702
735
|
cache = set()
|
|
703
|
-
results:
|
|
736
|
+
results: list[dict[str, str]] = []
|
|
704
737
|
|
|
705
|
-
def _news_page(s: int) ->
|
|
738
|
+
def _news_page(s: int) -> list[dict[str, str]]:
|
|
706
739
|
payload["s"] = f"{s}"
|
|
707
740
|
resp_content = self._get_url("GET", "https://duckduckgo.com/news.js", params=payload)
|
|
708
741
|
resp_json = json_loads(resp_content)
|
|
@@ -735,8 +768,8 @@ class WEBS:
|
|
|
735
768
|
|
|
736
769
|
return list(islice(results, max_results))
|
|
737
770
|
|
|
738
|
-
def answers(self, keywords: str) ->
|
|
739
|
-
"""
|
|
771
|
+
def answers(self, keywords: str) -> list[dict[str, str]]:
|
|
772
|
+
"""webscout instant answers. Query params: https://duckduckgo.com/params.
|
|
740
773
|
|
|
741
774
|
Args:
|
|
742
775
|
keywords: keywords for query,
|
|
@@ -806,8 +839,8 @@ class WEBS:
|
|
|
806
839
|
|
|
807
840
|
return results
|
|
808
841
|
|
|
809
|
-
def suggestions(self, keywords: str, region: str = "wt-wt") ->
|
|
810
|
-
"""
|
|
842
|
+
def suggestions(self, keywords: str, region: str = "wt-wt") -> list[dict[str, str]]:
|
|
843
|
+
"""webscout suggestions. Query params: https://duckduckgo.com/params.
|
|
811
844
|
|
|
812
845
|
Args:
|
|
813
846
|
keywords: keywords for query.
|
|
@@ -834,19 +867,19 @@ class WEBS:
|
|
|
834
867
|
def maps(
|
|
835
868
|
self,
|
|
836
869
|
keywords: str,
|
|
837
|
-
place:
|
|
838
|
-
street:
|
|
839
|
-
city:
|
|
840
|
-
county:
|
|
841
|
-
state:
|
|
842
|
-
country:
|
|
843
|
-
postalcode:
|
|
844
|
-
latitude:
|
|
845
|
-
longitude:
|
|
870
|
+
place: str | None = None,
|
|
871
|
+
street: str | None = None,
|
|
872
|
+
city: str | None = None,
|
|
873
|
+
county: str | None = None,
|
|
874
|
+
state: str | None = None,
|
|
875
|
+
country: str | None = None,
|
|
876
|
+
postalcode: str | None = None,
|
|
877
|
+
latitude: str | None = None,
|
|
878
|
+
longitude: str | None = None,
|
|
846
879
|
radius: int = 0,
|
|
847
|
-
max_results:
|
|
848
|
-
) ->
|
|
849
|
-
"""
|
|
880
|
+
max_results: int | None = None,
|
|
881
|
+
) -> list[dict[str, str]]:
|
|
882
|
+
"""webscout maps search. Query params: https://duckduckgo.com/params.
|
|
850
883
|
|
|
851
884
|
Args:
|
|
852
885
|
keywords: keywords for query
|
|
@@ -926,14 +959,14 @@ class WEBS:
|
|
|
926
959
|
lat_b -= Decimal(radius) * Decimal(0.008983)
|
|
927
960
|
lon_l -= Decimal(radius) * Decimal(0.008983)
|
|
928
961
|
lon_r += Decimal(radius) * Decimal(0.008983)
|
|
929
|
-
logger.debug(f"bbox coordinates\n{lat_t} {lon_l}\n{lat_b} {lon_r}")
|
|
962
|
+
# logger.debug(f"bbox coordinates\n{lat_t} {lon_l}\n{lat_b} {lon_r}")
|
|
930
963
|
|
|
931
964
|
cache = set()
|
|
932
|
-
results:
|
|
965
|
+
results: list[dict[str, str]] = []
|
|
933
966
|
|
|
934
967
|
def _maps_page(
|
|
935
|
-
bbox:
|
|
936
|
-
) ->
|
|
968
|
+
bbox: tuple[Decimal, Decimal, Decimal, Decimal],
|
|
969
|
+
) -> list[dict[str, str]] | None:
|
|
937
970
|
if max_results and len(results) >= max_results:
|
|
938
971
|
return None
|
|
939
972
|
lat_t, lon_l, lat_b, lon_r = bbox
|
|
@@ -1020,10 +1053,8 @@ class WEBS:
|
|
|
1020
1053
|
|
|
1021
1054
|
return list(islice(results, max_results))
|
|
1022
1055
|
|
|
1023
|
-
def translate(
|
|
1024
|
-
|
|
1025
|
-
) -> List[Dict[str, str]]:
|
|
1026
|
-
"""DuckDuckGo translate.
|
|
1056
|
+
def translate(self, keywords: list[str] | str, from_: str | None = None, to: str = "en") -> list[dict[str, str]]:
|
|
1057
|
+
"""webscout translate.
|
|
1027
1058
|
|
|
1028
1059
|
Args:
|
|
1029
1060
|
keywords: string or list of strings to translate.
|
|
@@ -1050,14 +1081,14 @@ class WEBS:
|
|
|
1050
1081
|
if from_:
|
|
1051
1082
|
payload["from"] = from_
|
|
1052
1083
|
|
|
1053
|
-
def _translate_keyword(keyword: str) ->
|
|
1084
|
+
def _translate_keyword(keyword: str) -> dict[str, str]:
|
|
1054
1085
|
resp_content = self._get_url(
|
|
1055
1086
|
"POST",
|
|
1056
1087
|
"https://duckduckgo.com/translation.js",
|
|
1057
1088
|
params=payload,
|
|
1058
1089
|
content=keyword.encode(),
|
|
1059
1090
|
)
|
|
1060
|
-
page_data:
|
|
1091
|
+
page_data: dict[str, str] = json_loads(resp_content)
|
|
1061
1092
|
page_data["original"] = keyword
|
|
1062
1093
|
return page_data
|
|
1063
1094
|
|