webscout 8.2.3__py3-none-any.whl → 8.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- inferno/lol.py +589 -0
- webscout/AIutel.py +226 -14
- webscout/Bard.py +579 -206
- webscout/DWEBS.py +78 -35
- webscout/Extra/tempmail/base.py +1 -1
- webscout/Provider/AISEARCH/hika_search.py +4 -0
- webscout/Provider/AllenAI.py +163 -126
- webscout/Provider/ChatGPTClone.py +96 -84
- webscout/Provider/Deepinfra.py +95 -67
- webscout/Provider/ElectronHub.py +55 -0
- webscout/Provider/GPTWeb.py +96 -46
- webscout/Provider/Groq.py +194 -91
- webscout/Provider/HeckAI.py +89 -47
- webscout/Provider/HuggingFaceChat.py +113 -106
- webscout/Provider/Hunyuan.py +94 -83
- webscout/Provider/Jadve.py +107 -75
- webscout/Provider/LambdaChat.py +106 -64
- webscout/Provider/Llama3.py +94 -39
- webscout/Provider/MCPCore.py +318 -0
- webscout/Provider/Marcus.py +85 -36
- webscout/Provider/Netwrck.py +76 -43
- webscout/Provider/OPENAI/__init__.py +4 -1
- webscout/Provider/OPENAI/ai4chat.py +286 -0
- webscout/Provider/OPENAI/chatgptclone.py +35 -14
- webscout/Provider/OPENAI/deepinfra.py +37 -0
- webscout/Provider/OPENAI/groq.py +354 -0
- webscout/Provider/OPENAI/heckai.py +6 -2
- webscout/Provider/OPENAI/mcpcore.py +376 -0
- webscout/Provider/OPENAI/multichat.py +368 -0
- webscout/Provider/OPENAI/netwrck.py +3 -1
- webscout/Provider/OpenGPT.py +48 -38
- webscout/Provider/PI.py +168 -92
- webscout/Provider/PizzaGPT.py +66 -36
- webscout/Provider/TeachAnything.py +85 -51
- webscout/Provider/TextPollinationsAI.py +109 -51
- webscout/Provider/TwoAI.py +109 -60
- webscout/Provider/Venice.py +93 -56
- webscout/Provider/VercelAI.py +2 -2
- webscout/Provider/WiseCat.py +65 -28
- webscout/Provider/Writecream.py +37 -11
- webscout/Provider/WritingMate.py +135 -63
- webscout/Provider/__init__.py +3 -21
- webscout/Provider/ai4chat.py +6 -7
- webscout/Provider/copilot.py +0 -3
- webscout/Provider/elmo.py +101 -58
- webscout/Provider/granite.py +91 -46
- webscout/Provider/hermes.py +87 -47
- webscout/Provider/koala.py +1 -1
- webscout/Provider/learnfastai.py +104 -50
- webscout/Provider/llama3mitril.py +86 -51
- webscout/Provider/llmchat.py +88 -46
- webscout/Provider/llmchatco.py +74 -49
- webscout/Provider/meta.py +41 -37
- webscout/Provider/multichat.py +54 -25
- webscout/Provider/scnet.py +93 -43
- webscout/Provider/searchchat.py +82 -75
- webscout/Provider/sonus.py +103 -51
- webscout/Provider/toolbaz.py +132 -77
- webscout/Provider/turboseek.py +92 -41
- webscout/Provider/tutorai.py +82 -64
- webscout/Provider/typefully.py +75 -33
- webscout/Provider/typegpt.py +96 -35
- webscout/Provider/uncovr.py +112 -62
- webscout/Provider/x0gpt.py +69 -26
- webscout/Provider/yep.py +79 -66
- webscout/conversation.py +35 -21
- webscout/exceptions.py +20 -0
- webscout/prompt_manager.py +56 -42
- webscout/version.py +1 -1
- webscout/webscout_search.py +65 -47
- webscout/webscout_search_async.py +81 -126
- webscout/yep_search.py +93 -43
- {webscout-8.2.3.dist-info → webscout-8.2.4.dist-info}/METADATA +22 -10
- {webscout-8.2.3.dist-info → webscout-8.2.4.dist-info}/RECORD +78 -81
- {webscout-8.2.3.dist-info → webscout-8.2.4.dist-info}/WHEEL +1 -1
- webscout/Provider/C4ai.py +0 -432
- webscout/Provider/ChatGPTES.py +0 -237
- webscout/Provider/DeepSeek.py +0 -196
- webscout/Provider/Llama.py +0 -200
- webscout/Provider/Phind.py +0 -535
- webscout/Provider/WebSim.py +0 -228
- webscout/Provider/labyrinth.py +0 -340
- webscout/Provider/lepton.py +0 -194
- webscout/Provider/llamatutor.py +0 -192
- {webscout-8.2.3.dist-info → webscout-8.2.4.dist-info}/entry_points.txt +0 -0
- {webscout-8.2.3.dist-info → webscout-8.2.4.dist-info/licenses}/LICENSE.md +0 -0
- {webscout-8.2.3.dist-info → webscout-8.2.4.dist-info}/top_level.txt +0 -0
|
@@ -11,7 +11,7 @@ from time import time
|
|
|
11
11
|
from types import TracebackType
|
|
12
12
|
from typing import Any, Dict, List, Optional, Type, Union, cast, AsyncIterator
|
|
13
13
|
|
|
14
|
-
import
|
|
14
|
+
import curl_cffi.requests
|
|
15
15
|
from lxml.etree import _Element
|
|
16
16
|
from lxml.html import HTMLParser as LHTMLParser
|
|
17
17
|
from lxml.html import document_fromstring
|
|
@@ -31,18 +31,14 @@ from .utils import (
|
|
|
31
31
|
class AsyncWEBS:
|
|
32
32
|
"""Asynchronous webscout class to get search results."""
|
|
33
33
|
|
|
34
|
+
# curl_cffi supports different browser versions than httpx
|
|
34
35
|
_impersonates = (
|
|
35
|
-
"
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
"
|
|
41
|
-
"safari_17.0", "safari_17.2.1", "safari_17.4.1", "safari_17.5",
|
|
42
|
-
"safari_18", "safari_18.2",
|
|
43
|
-
"safari_ipad_18",
|
|
44
|
-
"edge_101", "edge_122", "edge_127", "edge_131",
|
|
45
|
-
"firefox_109", "firefox_117", "firefox_128", "firefox_133", "firefox_135",
|
|
36
|
+
"chrome99", "chrome100", "chrome101", "chrome104", "chrome107", "chrome110",
|
|
37
|
+
"chrome116", "chrome119", "chrome120", "chrome123", "chrome124", "chrome131", "chrome133a",
|
|
38
|
+
"chrome99_android", "chrome131_android",
|
|
39
|
+
"safari15_3", "safari15_5", "safari17_0", "safari17_2_ios", "safari18_0", "safari18_0_ios",
|
|
40
|
+
"edge99", "edge101",
|
|
41
|
+
"firefox133", "firefox135",
|
|
46
42
|
)
|
|
47
43
|
_impersonates_os = ("android", "ios", "linux", "macos", "windows")
|
|
48
44
|
_chat_models = {
|
|
@@ -94,11 +90,14 @@ class AsyncWEBS:
|
|
|
94
90
|
self.headers = headers if headers else {}
|
|
95
91
|
self.headers.update(default_headers)
|
|
96
92
|
|
|
97
|
-
|
|
93
|
+
# Use curl_cffi AsyncSession instead of httpx.AsyncClient
|
|
94
|
+
impersonate_browser = choice(self._impersonates)
|
|
95
|
+
self.timeout = timeout
|
|
96
|
+
self.client = curl_cffi.requests.AsyncSession(
|
|
98
97
|
headers=self.headers,
|
|
99
|
-
proxies=self.proxy,
|
|
98
|
+
proxies={'http': self.proxy, 'https': self.proxy} if self.proxy else None,
|
|
100
99
|
timeout=timeout,
|
|
101
|
-
|
|
100
|
+
impersonate=impersonate_browser,
|
|
102
101
|
verify=verify,
|
|
103
102
|
)
|
|
104
103
|
self.sleep_timestamp = 0.0
|
|
@@ -147,17 +146,33 @@ class AsyncWEBS:
|
|
|
147
146
|
"""Make HTTP request with proper rate limiting."""
|
|
148
147
|
await self._sleep()
|
|
149
148
|
try:
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
149
|
+
# curl_cffi doesn't accept cookies=True in request methods
|
|
150
|
+
request_kwargs = {
|
|
151
|
+
"params": params,
|
|
152
|
+
"headers": headers,
|
|
153
|
+
"json": json,
|
|
154
|
+
"timeout": timeout or self.timeout,
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
# Add cookies if they're a dict, not a bool
|
|
158
|
+
if isinstance(cookies, dict):
|
|
159
|
+
request_kwargs["cookies"] = cookies
|
|
160
|
+
|
|
161
|
+
if method == "GET":
|
|
162
|
+
# curl_cffi uses data instead of content
|
|
163
|
+
if content:
|
|
164
|
+
request_kwargs["data"] = content
|
|
165
|
+
resp = await self.client.get(url, **request_kwargs)
|
|
166
|
+
elif method == "POST":
|
|
167
|
+
# handle both data and content
|
|
168
|
+
if data or content:
|
|
169
|
+
request_kwargs["data"] = data or content
|
|
170
|
+
resp = await self.client.post(url, **request_kwargs)
|
|
171
|
+
else:
|
|
172
|
+
# handle both data and content
|
|
173
|
+
if data or content:
|
|
174
|
+
request_kwargs["data"] = data or content
|
|
175
|
+
resp = await self.client.request(method, url, **request_kwargs)
|
|
161
176
|
except Exception as ex:
|
|
162
177
|
if "time" in str(ex).lower():
|
|
163
178
|
raise TimeoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
@@ -278,7 +293,8 @@ class AsyncWEBS:
|
|
|
278
293
|
self._chat_vqd_hash = resp.headers.get("x-vqd-hash-1", "")
|
|
279
294
|
chunks = []
|
|
280
295
|
|
|
281
|
-
|
|
296
|
+
# curl_cffi uses aiter_content instead of aiter_bytes
|
|
297
|
+
async for chunk in resp.aiter_content(chunk_size=1024):
|
|
282
298
|
lines = chunk.split(b"data:")
|
|
283
299
|
for line in lines:
|
|
284
300
|
if line := line.strip():
|
|
@@ -286,20 +302,24 @@ class AsyncWEBS:
|
|
|
286
302
|
break
|
|
287
303
|
if line == b"[DONE][LIMIT_CONVERSATION]":
|
|
288
304
|
raise ConversationLimitException("ERR_CONVERSATION_LIMIT")
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
if x
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
305
|
+
try:
|
|
306
|
+
x = json_loads(line)
|
|
307
|
+
if isinstance(x, dict):
|
|
308
|
+
if x.get("action") == "error":
|
|
309
|
+
err_message = x.get("type", "")
|
|
310
|
+
if x.get("status") == 429:
|
|
311
|
+
raise (
|
|
312
|
+
ConversationLimitException(err_message)
|
|
313
|
+
if err_message == "ERR_CONVERSATION_LIMIT"
|
|
314
|
+
else RatelimitE(err_message)
|
|
315
|
+
)
|
|
316
|
+
raise WebscoutE(err_message)
|
|
317
|
+
elif message := x.get("message"):
|
|
318
|
+
chunks.append(message)
|
|
319
|
+
yield message
|
|
320
|
+
except Exception:
|
|
321
|
+
# Skip invalid JSON data
|
|
322
|
+
continue
|
|
303
323
|
|
|
304
324
|
# If we get here, the request was successful
|
|
305
325
|
result = "".join(chunks)
|
|
@@ -421,7 +441,8 @@ class AsyncWEBS:
|
|
|
421
441
|
if b"No results." in resp_content:
|
|
422
442
|
return results
|
|
423
443
|
|
|
424
|
-
|
|
444
|
+
# curl_cffi returns bytes, not a file-like object
|
|
445
|
+
tree = document_fromstring(resp_content)
|
|
425
446
|
elements = tree.xpath("//div[h2]")
|
|
426
447
|
if not isinstance(elements, list):
|
|
427
448
|
return results
|
|
@@ -494,7 +515,8 @@ class AsyncWEBS:
|
|
|
494
515
|
if b"No more results." in resp_content:
|
|
495
516
|
return results
|
|
496
517
|
|
|
497
|
-
|
|
518
|
+
# curl_cffi returns bytes, not a file-like object
|
|
519
|
+
tree = document_fromstring(resp_content)
|
|
498
520
|
elements = tree.xpath("//table[last()]//tr")
|
|
499
521
|
if not isinstance(elements, list):
|
|
500
522
|
return results
|
|
@@ -583,21 +605,8 @@ class AsyncWEBS:
|
|
|
583
605
|
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
584
606
|
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
585
607
|
"""
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
super().images,
|
|
589
|
-
keywords,
|
|
590
|
-
region,
|
|
591
|
-
safesearch,
|
|
592
|
-
timelimit,
|
|
593
|
-
size,
|
|
594
|
-
color,
|
|
595
|
-
type_image,
|
|
596
|
-
layout,
|
|
597
|
-
license_image,
|
|
598
|
-
max_results,
|
|
599
|
-
)
|
|
600
|
-
return result
|
|
608
|
+
# These methods are not implemented in the async version yet
|
|
609
|
+
raise NotImplementedError("aimages method is not implemented yet")
|
|
601
610
|
|
|
602
611
|
async def avideos(
|
|
603
612
|
self,
|
|
@@ -630,19 +639,8 @@ class AsyncWEBS:
|
|
|
630
639
|
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
631
640
|
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
632
641
|
"""
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
super().videos,
|
|
636
|
-
keywords,
|
|
637
|
-
region,
|
|
638
|
-
safesearch,
|
|
639
|
-
timelimit,
|
|
640
|
-
resolution,
|
|
641
|
-
duration,
|
|
642
|
-
license_videos,
|
|
643
|
-
max_results,
|
|
644
|
-
)
|
|
645
|
-
return result
|
|
642
|
+
# These methods are not implemented in the async version yet
|
|
643
|
+
raise NotImplementedError("avideos method is not implemented yet")
|
|
646
644
|
|
|
647
645
|
async def anews(
|
|
648
646
|
self,
|
|
@@ -669,16 +667,8 @@ class AsyncWEBS:
|
|
|
669
667
|
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
670
668
|
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
671
669
|
"""
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
super().news,
|
|
675
|
-
keywords,
|
|
676
|
-
region,
|
|
677
|
-
safesearch,
|
|
678
|
-
timelimit,
|
|
679
|
-
max_results,
|
|
680
|
-
)
|
|
681
|
-
return result
|
|
670
|
+
# These methods are not implemented in the async version yet
|
|
671
|
+
raise NotImplementedError("anews method is not implemented yet")
|
|
682
672
|
|
|
683
673
|
async def aanswers(
|
|
684
674
|
self,
|
|
@@ -697,12 +687,8 @@ class AsyncWEBS:
|
|
|
697
687
|
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
698
688
|
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
699
689
|
"""
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
super().answers,
|
|
703
|
-
keywords,
|
|
704
|
-
)
|
|
705
|
-
return result
|
|
690
|
+
# These methods are not implemented in the async version yet
|
|
691
|
+
raise NotImplementedError("aanswers method is not implemented yet")
|
|
706
692
|
|
|
707
693
|
async def asuggestions(
|
|
708
694
|
self,
|
|
@@ -723,13 +709,8 @@ class AsyncWEBS:
|
|
|
723
709
|
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
724
710
|
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
725
711
|
"""
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
super().suggestions,
|
|
729
|
-
keywords,
|
|
730
|
-
region,
|
|
731
|
-
)
|
|
732
|
-
return result
|
|
712
|
+
# These methods are not implemented in the async version yet
|
|
713
|
+
raise NotImplementedError("asuggestions method is not implemented yet")
|
|
733
714
|
|
|
734
715
|
async def amaps(
|
|
735
716
|
self,
|
|
@@ -771,23 +752,8 @@ class AsyncWEBS:
|
|
|
771
752
|
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
772
753
|
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
773
754
|
"""
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
super().maps,
|
|
777
|
-
keywords,
|
|
778
|
-
place,
|
|
779
|
-
street,
|
|
780
|
-
city,
|
|
781
|
-
county,
|
|
782
|
-
state,
|
|
783
|
-
country,
|
|
784
|
-
postalcode,
|
|
785
|
-
latitude,
|
|
786
|
-
longitude,
|
|
787
|
-
radius,
|
|
788
|
-
max_results,
|
|
789
|
-
)
|
|
790
|
-
return result
|
|
755
|
+
# These methods are not implemented in the async version yet
|
|
756
|
+
raise NotImplementedError("amaps method is not implemented yet")
|
|
791
757
|
|
|
792
758
|
async def atranslate(
|
|
793
759
|
self,
|
|
@@ -810,14 +776,8 @@ class AsyncWEBS:
|
|
|
810
776
|
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
811
777
|
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
812
778
|
"""
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
super().translate,
|
|
816
|
-
keywords,
|
|
817
|
-
from_,
|
|
818
|
-
to,
|
|
819
|
-
)
|
|
820
|
-
return result
|
|
779
|
+
# These methods are not implemented in the async version yet
|
|
780
|
+
raise NotImplementedError("atranslate method is not implemented yet")
|
|
821
781
|
|
|
822
782
|
async def aweather(
|
|
823
783
|
self,
|
|
@@ -868,10 +828,5 @@ class AsyncWEBS:
|
|
|
868
828
|
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
869
829
|
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
870
830
|
"""
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
super().weather,
|
|
874
|
-
location,
|
|
875
|
-
language,
|
|
876
|
-
)
|
|
877
|
-
return result
|
|
831
|
+
# These methods are not implemented in the async version yet
|
|
832
|
+
raise NotImplementedError("aweather method is not implemented yet")
|
webscout/yep_search.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
import
|
|
1
|
+
from curl_cffi.requests import Session
|
|
2
2
|
from urllib.parse import urlencode
|
|
3
3
|
from webscout.litagent import LitAgent
|
|
4
4
|
from typing import List, Dict, Optional, Tuple
|
|
5
5
|
from concurrent.futures import ThreadPoolExecutor
|
|
6
6
|
import json
|
|
7
|
+
|
|
7
8
|
class YepSearch:
|
|
8
9
|
"""Yep.com search class to get search results."""
|
|
9
|
-
|
|
10
|
+
|
|
10
11
|
_executor: ThreadPoolExecutor = ThreadPoolExecutor()
|
|
11
12
|
|
|
12
13
|
def __init__(
|
|
@@ -14,47 +15,54 @@ class YepSearch:
|
|
|
14
15
|
timeout: int = 20,
|
|
15
16
|
proxies: Dict[str, str] | None = None,
|
|
16
17
|
verify: bool = True,
|
|
18
|
+
impersonate: str = "chrome110"
|
|
17
19
|
):
|
|
18
20
|
"""Initialize YepSearch.
|
|
19
|
-
|
|
21
|
+
|
|
20
22
|
Args:
|
|
21
23
|
timeout: Timeout value for the HTTP client. Defaults to 20.
|
|
22
24
|
proxies: Proxy configuration for requests. Defaults to None.
|
|
23
25
|
verify: Verify SSL certificates. Defaults to True.
|
|
26
|
+
impersonate: Browser profile to impersonate for curl_cffi. Defaults to "chrome110".
|
|
24
27
|
"""
|
|
25
28
|
self.base_url = "https://api.yep.com/fs/2/search"
|
|
26
29
|
self.timeout = timeout
|
|
27
|
-
|
|
30
|
+
# Initialize curl_cffi session
|
|
31
|
+
self.session = Session(
|
|
32
|
+
proxies=proxies,
|
|
33
|
+
verify=verify,
|
|
34
|
+
impersonate=impersonate,
|
|
35
|
+
timeout=timeout # Set timeout directly in session
|
|
36
|
+
)
|
|
28
37
|
self.session.headers.update({
|
|
29
38
|
"Accept": "*/*",
|
|
30
39
|
"Accept-Language": "en-US,en;q=0.9,en-IN;q=0.8",
|
|
31
40
|
"DNT": "1",
|
|
32
41
|
"Origin": "https://yep.com",
|
|
33
42
|
"Referer": "https://yep.com/",
|
|
43
|
+
# Sec-Ch-Ua headers are often handled by impersonate, but keeping them might be safer
|
|
34
44
|
"Sec-Ch-Ua": '"Not(A:Brand";v="99", "Microsoft Edge";v="133", "Chromium";v="133"',
|
|
35
45
|
"Sec-Ch-Ua-Mobile": "?0",
|
|
36
46
|
"Sec-Ch-Ua-Platform": '"Windows"',
|
|
37
47
|
"Sec-Fetch-Dest": "empty",
|
|
38
48
|
"Sec-Fetch-Mode": "cors",
|
|
39
49
|
"Sec-Fetch-Site": "same-site",
|
|
40
|
-
"User-Agent": LitAgent().random()
|
|
50
|
+
"User-Agent": LitAgent().random() # Keep custom User-Agent or rely on impersonate
|
|
41
51
|
})
|
|
42
|
-
|
|
43
|
-
self.session.proxies.update(proxies)
|
|
44
|
-
self.session.verify = verify
|
|
52
|
+
# Proxies and verify are handled by the Session constructor now
|
|
45
53
|
|
|
46
54
|
def _remove_html_tags(self, text: str) -> str:
|
|
47
55
|
"""Remove HTML tags from text using simple string manipulation.
|
|
48
|
-
|
|
56
|
+
|
|
49
57
|
Args:
|
|
50
58
|
text: String containing HTML tags
|
|
51
|
-
|
|
59
|
+
|
|
52
60
|
Returns:
|
|
53
61
|
Clean text without HTML tags
|
|
54
62
|
"""
|
|
55
63
|
result = ""
|
|
56
64
|
in_tag = False
|
|
57
|
-
|
|
65
|
+
|
|
58
66
|
for char in text:
|
|
59
67
|
if char == '<':
|
|
60
68
|
in_tag = True
|
|
@@ -62,7 +70,7 @@ class YepSearch:
|
|
|
62
70
|
in_tag = False
|
|
63
71
|
elif not in_tag:
|
|
64
72
|
result += char
|
|
65
|
-
|
|
73
|
+
|
|
66
74
|
# Replace common HTML entities
|
|
67
75
|
replacements = {
|
|
68
76
|
' ': ' ',
|
|
@@ -72,21 +80,21 @@ class YepSearch:
|
|
|
72
80
|
'"': '"',
|
|
73
81
|
''': "'",
|
|
74
82
|
}
|
|
75
|
-
|
|
83
|
+
|
|
76
84
|
for entity, replacement in replacements.items():
|
|
77
85
|
result = result.replace(entity, replacement)
|
|
78
|
-
|
|
86
|
+
|
|
79
87
|
return result.strip()
|
|
80
88
|
|
|
81
89
|
def format_results(self, raw_results: dict) -> List[Dict]:
|
|
82
90
|
"""Format raw API results into a consistent structure."""
|
|
83
91
|
formatted_results = []
|
|
84
|
-
|
|
92
|
+
|
|
85
93
|
if not raw_results or len(raw_results) < 2:
|
|
86
94
|
return formatted_results
|
|
87
95
|
|
|
88
96
|
results = raw_results[1].get('results', [])
|
|
89
|
-
|
|
97
|
+
|
|
90
98
|
for result in results:
|
|
91
99
|
formatted_result = {
|
|
92
100
|
"title": self._remove_html_tags(result.get("title", "")),
|
|
@@ -97,7 +105,7 @@ class YepSearch:
|
|
|
97
105
|
"type": result.get("type", "organic"),
|
|
98
106
|
"first_seen": result.get("first_seen", None)
|
|
99
107
|
}
|
|
100
|
-
|
|
108
|
+
|
|
101
109
|
# Add sitelinks if they exist
|
|
102
110
|
if "sitelinks" in result:
|
|
103
111
|
sitelinks = []
|
|
@@ -105,7 +113,7 @@ class YepSearch:
|
|
|
105
113
|
sitelinks.extend(result["sitelinks"]["full"])
|
|
106
114
|
if "short" in result["sitelinks"]:
|
|
107
115
|
sitelinks.extend(result["sitelinks"]["short"])
|
|
108
|
-
|
|
116
|
+
|
|
109
117
|
if sitelinks:
|
|
110
118
|
formatted_result["sitelinks"] = [
|
|
111
119
|
{
|
|
@@ -114,9 +122,9 @@ class YepSearch:
|
|
|
114
122
|
}
|
|
115
123
|
for link in sitelinks
|
|
116
124
|
]
|
|
117
|
-
|
|
125
|
+
|
|
118
126
|
formatted_results.append(formatted_result)
|
|
119
|
-
|
|
127
|
+
|
|
120
128
|
return formatted_results
|
|
121
129
|
|
|
122
130
|
def text(
|
|
@@ -154,20 +162,25 @@ class YepSearch:
|
|
|
154
162
|
"safeSearch": safe_setting,
|
|
155
163
|
"type": "web"
|
|
156
164
|
}
|
|
157
|
-
|
|
165
|
+
|
|
158
166
|
url = f"{self.base_url}?{urlencode(params)}"
|
|
159
167
|
try:
|
|
160
|
-
|
|
168
|
+
# Use the session timeout defined in __init__
|
|
169
|
+
response = self.session.get(url)
|
|
161
170
|
response.raise_for_status()
|
|
162
171
|
raw_results = response.json()
|
|
163
|
-
|
|
172
|
+
|
|
164
173
|
formatted_results = self.format_results(raw_results)
|
|
165
|
-
|
|
174
|
+
|
|
166
175
|
if max_results:
|
|
167
176
|
return formatted_results[:max_results]
|
|
168
177
|
return formatted_results
|
|
169
178
|
except Exception as e:
|
|
170
|
-
|
|
179
|
+
# Provide more specific error context if possible
|
|
180
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
181
|
+
raise Exception(f"Yep search failed with status {e.response.status_code}: {str(e)}")
|
|
182
|
+
else:
|
|
183
|
+
raise Exception(f"Yep search failed: {str(e)}")
|
|
171
184
|
|
|
172
185
|
def images(
|
|
173
186
|
self,
|
|
@@ -210,23 +223,24 @@ class YepSearch:
|
|
|
210
223
|
"safeSearch": safe_setting,
|
|
211
224
|
"type": "images"
|
|
212
225
|
}
|
|
213
|
-
|
|
226
|
+
|
|
214
227
|
url = f"{self.base_url}?{urlencode(params)}"
|
|
215
228
|
try:
|
|
216
|
-
|
|
229
|
+
# Use the session timeout defined in __init__
|
|
230
|
+
response = self.session.get(url)
|
|
217
231
|
response.raise_for_status()
|
|
218
232
|
raw_results = response.json()
|
|
219
|
-
|
|
233
|
+
|
|
220
234
|
if not raw_results or len(raw_results) < 2:
|
|
221
235
|
return []
|
|
222
236
|
|
|
223
237
|
formatted_results = []
|
|
224
238
|
results = raw_results[1].get('results', [])
|
|
225
|
-
|
|
239
|
+
|
|
226
240
|
for result in results:
|
|
227
241
|
if result.get("type") != "Image":
|
|
228
242
|
continue
|
|
229
|
-
|
|
243
|
+
|
|
230
244
|
formatted_result = {
|
|
231
245
|
"title": self._remove_html_tags(result.get("title", "")),
|
|
232
246
|
"image": result.get("image_id", ""),
|
|
@@ -236,19 +250,23 @@ class YepSearch:
|
|
|
236
250
|
"width": result.get("width", 0),
|
|
237
251
|
"source": result.get("visual_url", "")
|
|
238
252
|
}
|
|
239
|
-
|
|
253
|
+
|
|
240
254
|
# Add high-res thumbnail if available
|
|
241
255
|
if "srcset" in result:
|
|
242
256
|
formatted_result["thumbnail_hd"] = result["srcset"].split(",")[1].strip().split(" ")[0]
|
|
243
|
-
|
|
257
|
+
|
|
244
258
|
formatted_results.append(formatted_result)
|
|
245
|
-
|
|
259
|
+
|
|
246
260
|
if max_results:
|
|
247
261
|
return formatted_results[:max_results]
|
|
248
262
|
return formatted_results
|
|
249
|
-
|
|
263
|
+
|
|
250
264
|
except Exception as e:
|
|
251
|
-
|
|
265
|
+
# Provide more specific error context if possible
|
|
266
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
267
|
+
raise Exception(f"Yep image search failed with status {e.response.status_code}: {str(e)}")
|
|
268
|
+
else:
|
|
269
|
+
raise Exception(f"Yep image search failed: {str(e)}")
|
|
252
270
|
|
|
253
271
|
def suggestions(
|
|
254
272
|
self,
|
|
@@ -275,23 +293,55 @@ class YepSearch:
|
|
|
275
293
|
"type": "web",
|
|
276
294
|
"gl": region
|
|
277
295
|
}
|
|
278
|
-
|
|
296
|
+
|
|
279
297
|
url = f"https://api.yep.com/ac/?{urlencode(params)}"
|
|
280
|
-
|
|
298
|
+
|
|
281
299
|
try:
|
|
282
|
-
|
|
300
|
+
# Use the session timeout defined in __init__
|
|
301
|
+
response = self.session.get(url)
|
|
283
302
|
response.raise_for_status()
|
|
284
303
|
data = response.json()
|
|
285
304
|
# Return suggestions list if response format is valid
|
|
286
305
|
if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
|
|
287
306
|
return data[1]
|
|
288
307
|
return []
|
|
289
|
-
|
|
308
|
+
|
|
290
309
|
except Exception as e:
|
|
291
|
-
|
|
310
|
+
# Provide more specific error context if possible
|
|
311
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
312
|
+
raise Exception(f"Yep suggestions failed with status {e.response.status_code}: {str(e)}")
|
|
313
|
+
else:
|
|
314
|
+
raise Exception(f"Yep suggestions failed: {str(e)}")
|
|
292
315
|
|
|
293
316
|
|
|
294
317
|
if __name__ == "__main__":
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
318
|
+
from rich import print
|
|
319
|
+
yep = YepSearch(
|
|
320
|
+
timeout=20, # Optional: Set custom timeout
|
|
321
|
+
proxies=None, # Optional: Use proxies
|
|
322
|
+
verify=True # Optional: SSL verification
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Text Search
|
|
326
|
+
text_results = yep.text(
|
|
327
|
+
keywords="artificial intelligence",
|
|
328
|
+
region="all", # Optional: Region for results
|
|
329
|
+
safesearch="moderate", # Optional: "on", "moderate", "off"
|
|
330
|
+
max_results=10 # Optional: Limit number of results
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# Image Search
|
|
334
|
+
image_results = yep.images(
|
|
335
|
+
keywords="nature photography",
|
|
336
|
+
region="all",
|
|
337
|
+
safesearch="moderate",
|
|
338
|
+
max_results=10
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# Get search suggestions
|
|
342
|
+
suggestions = yep.suggestions("hist")
|
|
343
|
+
print(text_results)
|
|
344
|
+
print("---" * 30)
|
|
345
|
+
print(image_results)
|
|
346
|
+
print("---" * 30)
|
|
347
|
+
print(suggestions)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: webscout
|
|
3
|
-
Version: 8.2.
|
|
3
|
+
Version: 8.2.4
|
|
4
4
|
Summary: Search for anything using Google, DuckDuckGo, phind.com, Contains AI models, can transcribe yt videos, temporary email and phone number generation, has TTS support, webai (terminal gpt and open interpreter) and offline LLMs and more
|
|
5
5
|
Author: OEvortex
|
|
6
6
|
Author-email: helpingai5@gmail.com
|
|
@@ -41,7 +41,7 @@ Requires-Dist: pip
|
|
|
41
41
|
Requires-Dist: nodriver
|
|
42
42
|
Requires-Dist: mistune
|
|
43
43
|
Requires-Dist: tenacity
|
|
44
|
-
Requires-Dist:
|
|
44
|
+
Requires-Dist: curl_cffi
|
|
45
45
|
Requires-Dist: nest-asyncio
|
|
46
46
|
Requires-Dist: websocket-client
|
|
47
47
|
Requires-Dist: colorama
|
|
@@ -61,23 +61,35 @@ Requires-Dist: html5lib
|
|
|
61
61
|
Requires-Dist: aiofiles
|
|
62
62
|
Requires-Dist: openai
|
|
63
63
|
Requires-Dist: prompt-toolkit
|
|
64
|
-
Requires-Dist:
|
|
65
|
-
Requires-Dist:
|
|
66
|
-
Requires-Dist: gradio-client
|
|
64
|
+
Requires-Dist: pyreqwest_impersonate
|
|
65
|
+
Requires-Dist: gradio_client
|
|
67
66
|
Requires-Dist: psutil
|
|
68
67
|
Requires-Dist: aiohttp
|
|
68
|
+
Provides-Extra: dev
|
|
69
|
+
Requires-Dist: ruff>=0.1.6; extra == "dev"
|
|
70
|
+
Requires-Dist: pytest>=7.4.2; extra == "dev"
|
|
69
71
|
Provides-Extra: local
|
|
70
72
|
Requires-Dist: llama-cpp-python; extra == "local"
|
|
71
73
|
Requires-Dist: fastapi; extra == "local"
|
|
72
74
|
Requires-Dist: uvicorn; extra == "local"
|
|
73
75
|
Requires-Dist: rich; extra == "local"
|
|
74
76
|
Requires-Dist: typer; extra == "local"
|
|
75
|
-
Requires-Dist:
|
|
77
|
+
Requires-Dist: huggingface_hub; extra == "local"
|
|
76
78
|
Requires-Dist: pydantic; extra == "local"
|
|
77
79
|
Requires-Dist: requests; extra == "local"
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
80
|
+
Dynamic: author
|
|
81
|
+
Dynamic: author-email
|
|
82
|
+
Dynamic: classifier
|
|
83
|
+
Dynamic: description
|
|
84
|
+
Dynamic: description-content-type
|
|
85
|
+
Dynamic: keywords
|
|
86
|
+
Dynamic: license
|
|
87
|
+
Dynamic: license-file
|
|
88
|
+
Dynamic: project-url
|
|
89
|
+
Dynamic: provides-extra
|
|
90
|
+
Dynamic: requires-dist
|
|
91
|
+
Dynamic: requires-python
|
|
92
|
+
Dynamic: summary
|
|
81
93
|
|
|
82
94
|
<div align="center">
|
|
83
95
|
<a href="https://github.com/OEvortex/Webscout">
|