webscout 8.3.5__py3-none-any.whl → 8.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIutel.py +2 -0
- webscout/Bard.py +12 -6
- webscout/DWEBS.py +66 -57
- webscout/Provider/{UNFINISHED → AISEARCH}/PERPLEXED_search.py +34 -74
- webscout/Provider/AISEARCH/__init__.py +18 -11
- webscout/Provider/AISEARCH/scira_search.py +3 -1
- webscout/Provider/Aitopia.py +2 -3
- webscout/Provider/Andi.py +3 -3
- webscout/Provider/ChatGPTClone.py +1 -1
- webscout/Provider/ChatSandbox.py +1 -0
- webscout/Provider/Cloudflare.py +1 -1
- webscout/Provider/Cohere.py +1 -0
- webscout/Provider/Deepinfra.py +13 -10
- webscout/Provider/ExaAI.py +1 -1
- webscout/Provider/ExaChat.py +1 -80
- webscout/Provider/Flowith.py +6 -1
- webscout/Provider/Gemini.py +7 -5
- webscout/Provider/GeminiProxy.py +1 -0
- webscout/Provider/GithubChat.py +4 -1
- webscout/Provider/Groq.py +1 -1
- webscout/Provider/HeckAI.py +8 -4
- webscout/Provider/Jadve.py +23 -38
- webscout/Provider/K2Think.py +308 -0
- webscout/Provider/Koboldai.py +8 -186
- webscout/Provider/LambdaChat.py +2 -4
- webscout/Provider/Nemotron.py +3 -4
- webscout/Provider/Netwrck.py +6 -8
- webscout/Provider/OLLAMA.py +1 -0
- webscout/Provider/OPENAI/Cloudflare.py +6 -7
- webscout/Provider/OPENAI/FalconH1.py +2 -7
- webscout/Provider/OPENAI/FreeGemini.py +6 -8
- webscout/Provider/OPENAI/{monochat.py → K2Think.py} +180 -77
- webscout/Provider/OPENAI/NEMOTRON.py +3 -6
- webscout/Provider/OPENAI/PI.py +5 -4
- webscout/Provider/OPENAI/Qwen3.py +2 -3
- webscout/Provider/OPENAI/README.md +2 -1
- webscout/Provider/OPENAI/TogetherAI.py +52 -57
- webscout/Provider/OPENAI/TwoAI.py +3 -4
- webscout/Provider/OPENAI/__init__.py +17 -56
- webscout/Provider/OPENAI/ai4chat.py +313 -303
- webscout/Provider/OPENAI/base.py +9 -29
- webscout/Provider/OPENAI/chatgpt.py +7 -2
- webscout/Provider/OPENAI/chatgptclone.py +4 -7
- webscout/Provider/OPENAI/chatsandbox.py +84 -59
- webscout/Provider/OPENAI/deepinfra.py +12 -6
- webscout/Provider/OPENAI/e2b.py +60 -8
- webscout/Provider/OPENAI/flowith.py +4 -3
- webscout/Provider/OPENAI/generate_api_key.py +48 -0
- webscout/Provider/OPENAI/heckai.py +4 -1
- webscout/Provider/OPENAI/netwrck.py +9 -12
- webscout/Provider/OPENAI/refact.py +274 -0
- webscout/Provider/OPENAI/scirachat.py +6 -0
- webscout/Provider/OPENAI/textpollinations.py +3 -14
- webscout/Provider/OPENAI/toolbaz.py +14 -10
- webscout/Provider/OpenGPT.py +1 -1
- webscout/Provider/Openai.py +150 -402
- webscout/Provider/PI.py +1 -0
- webscout/Provider/Perplexitylabs.py +1 -2
- webscout/Provider/QwenLM.py +107 -89
- webscout/Provider/STT/__init__.py +17 -2
- webscout/Provider/{Llama3.py → Sambanova.py} +9 -10
- webscout/Provider/StandardInput.py +1 -1
- webscout/Provider/TTI/__init__.py +18 -12
- webscout/Provider/TTI/bing.py +14 -2
- webscout/Provider/TTI/together.py +10 -9
- webscout/Provider/TTS/README.md +0 -1
- webscout/Provider/TTS/__init__.py +18 -11
- webscout/Provider/TTS/base.py +479 -159
- webscout/Provider/TTS/deepgram.py +409 -156
- webscout/Provider/TTS/elevenlabs.py +425 -111
- webscout/Provider/TTS/freetts.py +317 -140
- webscout/Provider/TTS/gesserit.py +192 -128
- webscout/Provider/TTS/murfai.py +248 -113
- webscout/Provider/TTS/openai_fm.py +347 -129
- webscout/Provider/TTS/speechma.py +620 -586
- webscout/Provider/TeachAnything.py +1 -0
- webscout/Provider/TextPollinationsAI.py +5 -15
- webscout/Provider/TogetherAI.py +136 -142
- webscout/Provider/TwoAI.py +53 -309
- webscout/Provider/TypliAI.py +2 -1
- webscout/Provider/{GizAI.py → UNFINISHED/GizAI.py} +1 -1
- webscout/Provider/UNFINISHED/VercelAIGateway.py +339 -0
- webscout/Provider/Venice.py +2 -1
- webscout/Provider/VercelAI.py +1 -0
- webscout/Provider/WiseCat.py +2 -1
- webscout/Provider/WrDoChat.py +2 -1
- webscout/Provider/__init__.py +18 -174
- webscout/Provider/ai4chat.py +1 -1
- webscout/Provider/akashgpt.py +7 -10
- webscout/Provider/cerebras.py +194 -38
- webscout/Provider/chatglm.py +170 -83
- webscout/Provider/cleeai.py +1 -2
- webscout/Provider/deepseek_assistant.py +1 -1
- webscout/Provider/elmo.py +1 -1
- webscout/Provider/geminiapi.py +1 -1
- webscout/Provider/granite.py +1 -1
- webscout/Provider/hermes.py +1 -3
- webscout/Provider/julius.py +1 -0
- webscout/Provider/learnfastai.py +1 -1
- webscout/Provider/llama3mitril.py +1 -1
- webscout/Provider/llmchat.py +1 -1
- webscout/Provider/llmchatco.py +1 -1
- webscout/Provider/meta.py +3 -3
- webscout/Provider/oivscode.py +2 -2
- webscout/Provider/scira_chat.py +51 -124
- webscout/Provider/searchchat.py +1 -0
- webscout/Provider/sonus.py +1 -1
- webscout/Provider/toolbaz.py +15 -11
- webscout/Provider/turboseek.py +31 -22
- webscout/Provider/typefully.py +2 -1
- webscout/Provider/x0gpt.py +1 -0
- webscout/Provider/yep.py +2 -1
- webscout/conversation.py +22 -20
- webscout/sanitize.py +14 -10
- webscout/scout/README.md +20 -23
- webscout/scout/core/crawler.py +125 -38
- webscout/scout/core/scout.py +26 -5
- webscout/tempid.py +6 -0
- webscout/version.py +1 -1
- webscout/webscout_search.py +13 -6
- webscout/webscout_search_async.py +10 -8
- webscout/yep_search.py +13 -5
- {webscout-8.3.5.dist-info → webscout-8.3.7.dist-info}/METADATA +3 -1
- {webscout-8.3.5.dist-info → webscout-8.3.7.dist-info}/RECORD +132 -155
- webscout/Provider/AllenAI.py +0 -440
- webscout/Provider/Blackboxai.py +0 -793
- webscout/Provider/FreeGemini.py +0 -250
- webscout/Provider/Glider.py +0 -225
- webscout/Provider/Hunyuan.py +0 -283
- webscout/Provider/MCPCore.py +0 -322
- webscout/Provider/MiniMax.py +0 -207
- webscout/Provider/OPENAI/BLACKBOXAI.py +0 -1045
- webscout/Provider/OPENAI/MiniMax.py +0 -298
- webscout/Provider/OPENAI/autoproxy.py +0 -1067
- webscout/Provider/OPENAI/c4ai.py +0 -394
- webscout/Provider/OPENAI/copilot.py +0 -305
- webscout/Provider/OPENAI/glider.py +0 -330
- webscout/Provider/OPENAI/mcpcore.py +0 -431
- webscout/Provider/OPENAI/multichat.py +0 -378
- webscout/Provider/Reka.py +0 -214
- webscout/Provider/TTS/sthir.py +0 -94
- webscout/Provider/UNFINISHED/fetch_together_models.py +0 -90
- webscout/Provider/asksteve.py +0 -220
- webscout/Provider/copilot.py +0 -422
- webscout/Provider/freeaichat.py +0 -294
- webscout/Provider/koala.py +0 -182
- webscout/Provider/lmarena.py +0 -198
- webscout/Provider/monochat.py +0 -275
- webscout/Provider/multichat.py +0 -375
- webscout/Provider/scnet.py +0 -244
- webscout/Provider/talkai.py +0 -194
- /webscout/Provider/{Marcus.py → UNFINISHED/Marcus.py} +0 -0
- /webscout/Provider/{Qodo.py → UNFINISHED/Qodo.py} +0 -0
- /webscout/Provider/{XenAI.py → UNFINISHED/XenAI.py} +0 -0
- /webscout/Provider/{samurai.py → UNFINISHED/samurai.py} +0 -0
- {webscout-8.3.5.dist-info → webscout-8.3.7.dist-info}/WHEEL +0 -0
- {webscout-8.3.5.dist-info → webscout-8.3.7.dist-info}/entry_points.txt +0 -0
- {webscout-8.3.5.dist-info → webscout-8.3.7.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.3.5.dist-info → webscout-8.3.7.dist-info}/top_level.txt +0 -0
webscout/AIutel.py
CHANGED
|
@@ -22,6 +22,7 @@ def timeIt(func: Callable):
|
|
|
22
22
|
start_time = time.time()
|
|
23
23
|
result = func(*args, **kwargs)
|
|
24
24
|
end_time = time.time()
|
|
25
|
+
print()
|
|
25
26
|
print(f"{GREEN_BOLD}- Execution time for '{func.__name__}' : {end_time - start_time:.6f} Seconds. {RESET}\n")
|
|
26
27
|
return result
|
|
27
28
|
|
|
@@ -30,6 +31,7 @@ def timeIt(func: Callable):
|
|
|
30
31
|
start_time = time.time()
|
|
31
32
|
result = await func(*args, **kwargs)
|
|
32
33
|
end_time = time.time()
|
|
34
|
+
print()
|
|
33
35
|
print(f"{GREEN_BOLD}- Execution time for '{func.__name__}' : {end_time - start_time:.6f} Seconds. {RESET}\n")
|
|
34
36
|
return result
|
|
35
37
|
|
webscout/Bard.py
CHANGED
|
@@ -8,24 +8,30 @@ import os
|
|
|
8
8
|
import random
|
|
9
9
|
import re
|
|
10
10
|
import string
|
|
11
|
+
from datetime import datetime
|
|
11
12
|
from enum import Enum
|
|
12
13
|
from pathlib import Path
|
|
13
|
-
from
|
|
14
|
-
from typing import Dict, List, Tuple, Union, Optional
|
|
14
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
15
15
|
|
|
16
16
|
# Use curl_cffi for requests
|
|
17
|
+
# Import trio before curl_cffi to prevent eventlet socket monkey-patching conflicts
|
|
18
|
+
# See: https://github.com/python-trio/trio/issues/3015
|
|
19
|
+
try:
|
|
20
|
+
import trio # noqa: F401
|
|
21
|
+
except ImportError:
|
|
22
|
+
pass # trio is optional, ignore if not available
|
|
17
23
|
from curl_cffi import CurlError
|
|
18
24
|
from curl_cffi.requests import AsyncSession
|
|
19
|
-
# Import common request exceptions (curl_cffi often wraps these)
|
|
20
|
-
from requests.exceptions import RequestException, Timeout, HTTPError
|
|
21
25
|
|
|
22
26
|
# For image models using validation. Adjust based on organization internal pydantic.
|
|
23
27
|
# Updated import for Pydantic V2
|
|
24
28
|
from pydantic import BaseModel, field_validator
|
|
25
29
|
|
|
30
|
+
# Import common request exceptions (curl_cffi often wraps these)
|
|
31
|
+
from requests.exceptions import HTTPError, RequestException, Timeout
|
|
32
|
+
|
|
26
33
|
# Rich is retained for logging within image methods.
|
|
27
34
|
from rich.console import Console
|
|
28
|
-
from rich.markdown import Markdown
|
|
29
35
|
|
|
30
36
|
console = Console()
|
|
31
37
|
|
|
@@ -875,7 +881,7 @@ class Image(BaseModel):
|
|
|
875
881
|
# Generate filename from URL if not provided
|
|
876
882
|
if not filename:
|
|
877
883
|
try:
|
|
878
|
-
from urllib.parse import
|
|
884
|
+
from urllib.parse import unquote, urlparse
|
|
879
885
|
parsed_url = urlparse(self.url)
|
|
880
886
|
base_filename = os.path.basename(unquote(parsed_url.path))
|
|
881
887
|
# Remove invalid characters for filenames
|
webscout/DWEBS.py
CHANGED
|
@@ -3,16 +3,25 @@ DWEBS - A Google search library with advanced features
|
|
|
3
3
|
"""
|
|
4
4
|
import random
|
|
5
5
|
from time import sleep
|
|
6
|
+
|
|
6
7
|
from webscout.scout import Scout
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
|
|
9
|
+
# Import trio before curl_cffi to prevent eventlet socket monkey-patching conflicts
|
|
10
|
+
# See: https://github.com/python-trio/trio/issues/3015
|
|
11
|
+
try:
|
|
12
|
+
import trio # noqa: F401
|
|
13
|
+
except ImportError:
|
|
14
|
+
pass # trio is optional, ignore if not available
|
|
10
15
|
from concurrent.futures import ThreadPoolExecutor
|
|
16
|
+
from typing import Any, Dict, List, Optional
|
|
17
|
+
from urllib.parse import unquote, urlencode
|
|
18
|
+
|
|
19
|
+
from curl_cffi.requests import Session
|
|
11
20
|
|
|
12
21
|
|
|
13
22
|
class SearchResult:
|
|
14
23
|
"""Class to represent a search result with metadata."""
|
|
15
|
-
|
|
24
|
+
|
|
16
25
|
def __init__(self, url: str, title: str, description: str):
|
|
17
26
|
"""
|
|
18
27
|
Initialize a search result.
|
|
@@ -96,7 +105,7 @@ class GoogleSearch:
|
|
|
96
105
|
ssl_mm_version = f"SSL-MM/{random.randint(1, 2)}.{random.randint(3, 5)}"
|
|
97
106
|
openssl_version = f"OpenSSL/{random.randint(1, 3)}.{random.randint(0, 4)}.{random.randint(0, 9)}"
|
|
98
107
|
return f"{lynx_version} {libwww_version} {ssl_mm_version} {openssl_version}"
|
|
99
|
-
|
|
108
|
+
|
|
100
109
|
def _make_request(self, term: str, results: int, start: int = 0, search_type: str = None) -> str:
|
|
101
110
|
"""
|
|
102
111
|
Make a request to Google search.
|
|
@@ -116,11 +125,11 @@ class GoogleSearch:
|
|
|
116
125
|
"hl": self.lang,
|
|
117
126
|
"start": start,
|
|
118
127
|
}
|
|
119
|
-
|
|
128
|
+
|
|
120
129
|
# Add search type if specified
|
|
121
130
|
if search_type:
|
|
122
131
|
params["tbm"] = search_type
|
|
123
|
-
|
|
132
|
+
|
|
124
133
|
try:
|
|
125
134
|
# Use the curl_cffi session
|
|
126
135
|
resp = self.session.get(
|
|
@@ -137,7 +146,7 @@ class GoogleSearch:
|
|
|
137
146
|
raise RuntimeError(f"Search request failed with status {e.response.status_code}: {str(e)}")
|
|
138
147
|
else:
|
|
139
148
|
raise RuntimeError(f"Search request failed: {str(e)}")
|
|
140
|
-
|
|
149
|
+
|
|
141
150
|
def _extract_url(self, raw_link: str) -> Optional[str]:
|
|
142
151
|
"""
|
|
143
152
|
Extract actual URL from Google redirect URL.
|
|
@@ -150,7 +159,7 @@ class GoogleSearch:
|
|
|
150
159
|
"""
|
|
151
160
|
if not raw_link:
|
|
152
161
|
return None
|
|
153
|
-
|
|
162
|
+
|
|
154
163
|
if raw_link.startswith("/url?"):
|
|
155
164
|
try:
|
|
156
165
|
link = unquote(raw_link.split("&")[0].replace("/url?q=", ""))
|
|
@@ -159,9 +168,9 @@ class GoogleSearch:
|
|
|
159
168
|
return None
|
|
160
169
|
elif raw_link.startswith("http"):
|
|
161
170
|
return unquote(raw_link)
|
|
162
|
-
|
|
171
|
+
|
|
163
172
|
return None
|
|
164
|
-
|
|
173
|
+
|
|
165
174
|
def _is_valid_result(self, link: str, fetched_links: set, unique: bool) -> bool:
|
|
166
175
|
"""
|
|
167
176
|
Check if search result is valid.
|
|
@@ -176,12 +185,12 @@ class GoogleSearch:
|
|
|
176
185
|
"""
|
|
177
186
|
if any(x in link for x in ["google.", "/search?", "webcache."]):
|
|
178
187
|
return False
|
|
179
|
-
|
|
188
|
+
|
|
180
189
|
if link in fetched_links and unique:
|
|
181
190
|
return False
|
|
182
|
-
|
|
191
|
+
|
|
183
192
|
return True
|
|
184
|
-
|
|
193
|
+
|
|
185
194
|
def _parse_search_results(
|
|
186
195
|
self,
|
|
187
196
|
html: str,
|
|
@@ -204,11 +213,11 @@ class GoogleSearch:
|
|
|
204
213
|
results = []
|
|
205
214
|
soup = Scout(html, features="html.parser")
|
|
206
215
|
result_blocks = soup.find_all("div", class_="ezO2md")
|
|
207
|
-
|
|
216
|
+
|
|
208
217
|
if not result_blocks:
|
|
209
218
|
# Try alternative class patterns if the main one doesn't match
|
|
210
219
|
result_blocks = soup.find_all("div", attrs={"class": lambda c: c and "g" in c.split()})
|
|
211
|
-
|
|
220
|
+
|
|
212
221
|
for result in result_blocks:
|
|
213
222
|
# Find the link - looking for various potential Google result classes
|
|
214
223
|
link_tag = result.find("a", class_=["fuLhoc", "ZWRArf"])
|
|
@@ -216,10 +225,10 @@ class GoogleSearch:
|
|
|
216
225
|
link_tag = result.find("a")
|
|
217
226
|
if not link_tag:
|
|
218
227
|
continue
|
|
219
|
-
|
|
228
|
+
|
|
220
229
|
raw_link = link_tag.get("href", "")
|
|
221
230
|
link = self._extract_url(raw_link)
|
|
222
|
-
|
|
231
|
+
|
|
223
232
|
if not link:
|
|
224
233
|
continue
|
|
225
234
|
|
|
@@ -235,32 +244,32 @@ class GoogleSearch:
|
|
|
235
244
|
description_tag = result.find("span", class_="FrIlee")
|
|
236
245
|
if not description_tag:
|
|
237
246
|
description_tag = result.find(["div", "span"], class_=lambda c: c and any(x in c for x in ["snippet", "description", "VwiC3b"]))
|
|
238
|
-
|
|
247
|
+
|
|
239
248
|
description = description_tag.get_text(strip=True) if description_tag else ""
|
|
240
249
|
|
|
241
250
|
# Create result object
|
|
242
251
|
search_result = SearchResult(link, title, description)
|
|
243
|
-
|
|
252
|
+
|
|
244
253
|
# Add extra metadata if available
|
|
245
254
|
citation = result.find("cite")
|
|
246
255
|
if citation:
|
|
247
256
|
search_result.metadata["source"] = citation.get_text(strip=True)
|
|
248
|
-
|
|
257
|
+
|
|
249
258
|
timestamp = result.find("span", class_=lambda c: c and "ZE5qJf" in c)
|
|
250
259
|
if timestamp:
|
|
251
260
|
search_result.metadata["date"] = timestamp.get_text(strip=True)
|
|
252
261
|
|
|
253
262
|
fetched_links.add(link)
|
|
254
263
|
results.append(search_result)
|
|
255
|
-
|
|
264
|
+
|
|
256
265
|
if len(results) >= num_results:
|
|
257
266
|
break
|
|
258
|
-
|
|
267
|
+
|
|
259
268
|
return results
|
|
260
|
-
|
|
269
|
+
|
|
261
270
|
def text(
|
|
262
|
-
self,
|
|
263
|
-
keywords: str,
|
|
271
|
+
self,
|
|
272
|
+
keywords: str,
|
|
264
273
|
region: str = None,
|
|
265
274
|
safesearch: str = "moderate",
|
|
266
275
|
max_results: int = 10,
|
|
@@ -283,7 +292,7 @@ class GoogleSearch:
|
|
|
283
292
|
"""
|
|
284
293
|
if not keywords:
|
|
285
294
|
raise ValueError("Search keywords cannot be empty")
|
|
286
|
-
|
|
295
|
+
|
|
287
296
|
# Map safesearch values to Google's safe parameter
|
|
288
297
|
safe_map = {
|
|
289
298
|
"on": "active",
|
|
@@ -291,12 +300,12 @@ class GoogleSearch:
|
|
|
291
300
|
"off": "off"
|
|
292
301
|
}
|
|
293
302
|
safe = safe_map.get(safesearch.lower(), "moderate")
|
|
294
|
-
|
|
303
|
+
|
|
295
304
|
# Keep track of unique results
|
|
296
305
|
fetched_results = []
|
|
297
306
|
fetched_links = set()
|
|
298
307
|
start = start_num
|
|
299
|
-
|
|
308
|
+
|
|
300
309
|
while len(fetched_results) < max_results:
|
|
301
310
|
# Add safe search parameter to the request
|
|
302
311
|
# Note: This modifies the session params for this specific request type
|
|
@@ -307,33 +316,33 @@ class GoogleSearch:
|
|
|
307
316
|
|
|
308
317
|
response_html = self._make_request(
|
|
309
318
|
term=term_with_safe, # Pass term with safe search
|
|
310
|
-
results=max_results - len(fetched_results),
|
|
319
|
+
results=max_results - len(fetched_results),
|
|
311
320
|
start=start
|
|
312
321
|
)
|
|
313
|
-
|
|
322
|
+
|
|
314
323
|
results = self._parse_search_results(
|
|
315
324
|
html=response_html,
|
|
316
325
|
num_results=max_results - len(fetched_results),
|
|
317
326
|
fetched_links=fetched_links,
|
|
318
327
|
unique=unique
|
|
319
328
|
)
|
|
320
|
-
|
|
329
|
+
|
|
321
330
|
if not results:
|
|
322
331
|
break
|
|
323
|
-
|
|
332
|
+
|
|
324
333
|
fetched_results.extend(results)
|
|
325
|
-
|
|
334
|
+
|
|
326
335
|
if len(fetched_results) >= max_results:
|
|
327
336
|
break
|
|
328
|
-
|
|
337
|
+
|
|
329
338
|
start += 10 # Google typically uses increments of 10
|
|
330
339
|
sleep(self.sleep_interval)
|
|
331
|
-
|
|
340
|
+
|
|
332
341
|
return fetched_results[:max_results]
|
|
333
|
-
|
|
342
|
+
|
|
334
343
|
def news(
|
|
335
|
-
self,
|
|
336
|
-
keywords: str,
|
|
344
|
+
self,
|
|
345
|
+
keywords: str,
|
|
337
346
|
region: str = None,
|
|
338
347
|
safesearch: str = "moderate",
|
|
339
348
|
max_results: int = 10
|
|
@@ -352,7 +361,7 @@ class GoogleSearch:
|
|
|
352
361
|
"""
|
|
353
362
|
if not keywords:
|
|
354
363
|
raise ValueError("Search keywords cannot be empty")
|
|
355
|
-
|
|
364
|
+
|
|
356
365
|
# Map safesearch values to Google's safe parameter
|
|
357
366
|
safe_map = {
|
|
358
367
|
"on": "active",
|
|
@@ -360,7 +369,7 @@ class GoogleSearch:
|
|
|
360
369
|
"off": "off"
|
|
361
370
|
}
|
|
362
371
|
safe = safe_map.get(safesearch.lower(), "moderate")
|
|
363
|
-
|
|
372
|
+
|
|
364
373
|
# Keep track of unique results
|
|
365
374
|
fetched_links = set()
|
|
366
375
|
|
|
@@ -374,16 +383,16 @@ class GoogleSearch:
|
|
|
374
383
|
results=max_results,
|
|
375
384
|
search_type="nws"
|
|
376
385
|
)
|
|
377
|
-
|
|
386
|
+
|
|
378
387
|
results = self._parse_search_results(
|
|
379
388
|
html=response_html,
|
|
380
389
|
num_results=max_results,
|
|
381
390
|
fetched_links=fetched_links,
|
|
382
391
|
unique=True # News results are generally unique per request
|
|
383
392
|
)
|
|
384
|
-
|
|
393
|
+
|
|
385
394
|
return results[:max_results]
|
|
386
|
-
|
|
395
|
+
|
|
387
396
|
def suggestions(self, query: str, region: str = None) -> List[str]:
|
|
388
397
|
"""
|
|
389
398
|
Get search suggestions for a query term.
|
|
@@ -397,26 +406,26 @@ class GoogleSearch:
|
|
|
397
406
|
"""
|
|
398
407
|
if not query:
|
|
399
408
|
raise ValueError("Search query cannot be empty")
|
|
400
|
-
|
|
409
|
+
|
|
401
410
|
try:
|
|
402
411
|
params = {
|
|
403
412
|
"client": "firefox",
|
|
404
413
|
"q": query,
|
|
405
414
|
}
|
|
406
|
-
|
|
415
|
+
|
|
407
416
|
# Add region if specified
|
|
408
417
|
if region and region.lower() != "all":
|
|
409
418
|
params["gl"] = region
|
|
410
|
-
|
|
419
|
+
|
|
411
420
|
url = f"https://www.google.com/complete/search?{urlencode(params)}"
|
|
412
|
-
|
|
421
|
+
|
|
413
422
|
# Use a simpler header set for the suggestions API
|
|
414
423
|
headers = {
|
|
415
424
|
"User-Agent": self._get_useragent(),
|
|
416
425
|
"Accept": "application/json, text/javascript, */*",
|
|
417
426
|
"Accept-Language": self.lang,
|
|
418
427
|
}
|
|
419
|
-
|
|
428
|
+
|
|
420
429
|
# Use session.get but override headers for this specific request
|
|
421
430
|
response = self.session.get(
|
|
422
431
|
url=url,
|
|
@@ -425,13 +434,13 @@ class GoogleSearch:
|
|
|
425
434
|
# timeout and verify are handled by session
|
|
426
435
|
)
|
|
427
436
|
response.raise_for_status()
|
|
428
|
-
|
|
437
|
+
|
|
429
438
|
# Response format is typically: ["original query", ["suggestion1", "suggestion2", ...]]
|
|
430
439
|
data = response.json()
|
|
431
440
|
if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
|
|
432
441
|
return data[1]
|
|
433
442
|
return []
|
|
434
|
-
|
|
443
|
+
|
|
435
444
|
except Exception as e:
|
|
436
445
|
# Provide more specific error context if possible
|
|
437
446
|
if hasattr(e, 'response') and e.response is not None:
|
|
@@ -454,7 +463,7 @@ def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_in
|
|
|
454
463
|
sleep_interval=sleep_interval,
|
|
455
464
|
impersonate=impersonate # Pass impersonate
|
|
456
465
|
)
|
|
457
|
-
|
|
466
|
+
|
|
458
467
|
# Map legacy safe values
|
|
459
468
|
safe_search_map = {
|
|
460
469
|
"active": "on",
|
|
@@ -471,7 +480,7 @@ def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_in
|
|
|
471
480
|
start_num=start_num,
|
|
472
481
|
unique=unique
|
|
473
482
|
)
|
|
474
|
-
|
|
483
|
+
|
|
475
484
|
# Convert to simple URLs if not advanced mode
|
|
476
485
|
if not advanced:
|
|
477
486
|
return [result.url for result in results]
|
|
@@ -485,7 +494,7 @@ if __name__ == "__main__":
|
|
|
485
494
|
proxies=None, # Optional: Use proxies
|
|
486
495
|
verify=True # Optional: SSL verification
|
|
487
496
|
)
|
|
488
|
-
|
|
497
|
+
|
|
489
498
|
# Text Search
|
|
490
499
|
print("TEXT SEARCH RESULTS:")
|
|
491
500
|
text_results = google.text(
|
|
@@ -499,7 +508,7 @@ if __name__ == "__main__":
|
|
|
499
508
|
print(f"URL: {result.url}")
|
|
500
509
|
print(f"Description: {result.description}")
|
|
501
510
|
print("---")
|
|
502
|
-
|
|
511
|
+
|
|
503
512
|
# News Search
|
|
504
513
|
print("\nNEWS SEARCH RESULTS:")
|
|
505
514
|
news_results = google.news(
|
|
@@ -513,8 +522,8 @@ if __name__ == "__main__":
|
|
|
513
522
|
print(f"URL: {result.url}")
|
|
514
523
|
print(f"Description: {result.description}")
|
|
515
524
|
print("---")
|
|
516
|
-
|
|
525
|
+
|
|
517
526
|
# Search Suggestions
|
|
518
527
|
print("\nSEARCH SUGGESTIONS:")
|
|
519
528
|
suggestions = google.suggestions("how to")
|
|
520
|
-
print(suggestions)
|
|
529
|
+
print(suggestions)
|
|
@@ -5,7 +5,7 @@ from typing import Any, Dict, Generator, Optional, Union
|
|
|
5
5
|
from webscout.AIbase import AISearch, SearchResponse
|
|
6
6
|
from webscout import exceptions
|
|
7
7
|
from webscout.litagent import LitAgent
|
|
8
|
-
from webscout.
|
|
8
|
+
from webscout.sanitize import sanitize_stream
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class PERPLEXED(AISearch):
|
|
@@ -141,27 +141,13 @@ class PERPLEXED(AISearch):
|
|
|
141
141
|
|
|
142
142
|
def extract_answer_content(data):
|
|
143
143
|
"""Extract answer content from PERPLEXED response."""
|
|
144
|
-
print(f"[DEBUG] extract_answer_content received: {type(data)}")
|
|
145
144
|
if isinstance(data, dict):
|
|
146
|
-
print(f"[DEBUG] Dict keys: {list(data.keys())}")
|
|
147
|
-
print(f"[DEBUG] success: {data.get('success')}")
|
|
148
|
-
print(f"[DEBUG] stage: {data.get('stage')}")
|
|
149
|
-
print(f"[DEBUG] answer present: {'answer' in data}")
|
|
150
|
-
answer_val = data.get('answer', 'NOT_FOUND')
|
|
151
|
-
print(f"[DEBUG] answer value: {repr(answer_val[:100] if isinstance(answer_val, str) and len(answer_val) > 100 else answer_val)}")
|
|
152
|
-
|
|
153
145
|
# Check if this is the final answer - answer field exists and is not empty
|
|
154
146
|
if data.get("success") and "answer" in data and data["answer"]:
|
|
155
|
-
print(f"[DEBUG] Returning answer content (length: {len(data['answer'])})")
|
|
156
147
|
return data["answer"]
|
|
157
148
|
# Check if this is a stage update with no answer yet
|
|
158
149
|
elif data.get("success") and data.get("stage"):
|
|
159
|
-
print(f"[DEBUG] Skipping stage update: {data.get('stage')}")
|
|
160
150
|
return None # Skip stage updates without answers
|
|
161
|
-
else:
|
|
162
|
-
print(f"[DEBUG] No matching condition, returning None")
|
|
163
|
-
else:
|
|
164
|
-
print(f"[DEBUG] Data is not dict, returning None")
|
|
165
151
|
return None
|
|
166
152
|
|
|
167
153
|
def for_stream():
|
|
@@ -178,77 +164,51 @@ class PERPLEXED(AISearch):
|
|
|
178
164
|
f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
|
|
179
165
|
)
|
|
180
166
|
|
|
181
|
-
#
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
167
|
+
# Use sanitize_stream directly with response iterator
|
|
168
|
+
processed_chunks = sanitize_stream(
|
|
169
|
+
data=response.iter_lines(decode_unicode=True), # Pass iterator directly
|
|
170
|
+
intro_value="", # No prefix to remove
|
|
171
|
+
to_json=True, # Parse each chunk as JSON
|
|
172
|
+
content_extractor=lambda chunk: SearchResponse(extract_answer_content(chunk)) if extract_answer_content(chunk) else None,
|
|
173
|
+
yield_raw_on_error=False, # Skip invalid JSON chunks
|
|
174
|
+
line_delimiter="[/PERPLEXED-SEPARATOR]", # Use PERPLEXED separator to split chunks
|
|
175
|
+
skip_markers=[], # No specific markers to skip
|
|
176
|
+
raw=raw # Let sanitize_stream handle raw mode automatically
|
|
177
|
+
)
|
|
189
178
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
chunk_data = json.loads(chunk_text.strip())
|
|
195
|
-
|
|
196
|
-
if raw:
|
|
197
|
-
# For raw mode, yield the entire JSON string
|
|
198
|
-
yield {"text": chunk_text.strip()}
|
|
199
|
-
else:
|
|
200
|
-
# For non-raw mode, extract the answer if available
|
|
201
|
-
answer_content = extract_answer_content(chunk_data)
|
|
202
|
-
if answer_content:
|
|
203
|
-
yield SearchResponse(answer_content)
|
|
204
|
-
|
|
205
|
-
except json.JSONDecodeError:
|
|
206
|
-
# Skip invalid JSON chunks
|
|
207
|
-
continue
|
|
179
|
+
# Yield results from sanitize_stream - it handles raw/non-raw automatically
|
|
180
|
+
for processed_chunk in processed_chunks:
|
|
181
|
+
if processed_chunk is not None:
|
|
182
|
+
yield processed_chunk
|
|
208
183
|
|
|
209
184
|
except requests.exceptions.RequestException as e:
|
|
210
185
|
raise exceptions.APIConnectionError(f"Request failed: {e}")
|
|
211
186
|
|
|
212
187
|
def for_non_stream():
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
for chunk in for_stream():
|
|
221
|
-
full_response += str(chunk)
|
|
222
|
-
|
|
223
|
-
if full_response:
|
|
224
|
-
self.last_response = SearchResponse(full_response)
|
|
225
|
-
else:
|
|
226
|
-
# Return empty response if no content was extracted
|
|
227
|
-
self.last_response = SearchResponse("")
|
|
228
|
-
|
|
229
|
-
return self.last_response
|
|
230
|
-
|
|
188
|
+
full_response = ""
|
|
189
|
+
for chunk in for_stream():
|
|
190
|
+
full_response += str(chunk)
|
|
191
|
+
|
|
192
|
+
self.last_response = SearchResponse(full_response)
|
|
193
|
+
return self.last_response
|
|
194
|
+
|
|
231
195
|
if stream:
|
|
232
196
|
return for_stream()
|
|
233
197
|
else:
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
return list(result)[0] if list(result) else SearchResponse("")
|
|
241
|
-
except:
|
|
242
|
-
return SearchResponse("")
|
|
243
|
-
return result
|
|
198
|
+
if raw:
|
|
199
|
+
# For raw non-streaming, we need to yield each chunk individually
|
|
200
|
+
return for_stream()
|
|
201
|
+
else:
|
|
202
|
+
# For regular non-streaming, accumulate and return complete response
|
|
203
|
+
return for_non_stream()
|
|
244
204
|
|
|
245
205
|
|
|
246
206
|
if __name__ == "__main__":
|
|
247
|
-
|
|
207
|
+
|
|
248
208
|
ai = PERPLEXED()
|
|
249
209
|
|
|
250
210
|
# Test with raw=False to see debug output
|
|
251
|
-
print("=== Testing with raw=
|
|
252
|
-
response = ai.search(input(">>> "), stream=
|
|
253
|
-
|
|
254
|
-
|
|
211
|
+
print("=== Testing with raw=True ===")
|
|
212
|
+
response = ai.search(input(">>> "), stream=True, raw=True)
|
|
213
|
+
for chunks in response:
|
|
214
|
+
print(chunks, end="", flush=True)
|
|
@@ -1,11 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
from
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
# This file marks the directory as a Python package.
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import importlib
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
# Get current directory
|
|
8
|
+
current_dir = Path(__file__).parent
|
|
9
|
+
|
|
10
|
+
# Auto-import all .py files (except __init__.py)
|
|
11
|
+
for file_path in current_dir.glob("*.py"):
|
|
12
|
+
if file_path.name != "__init__.py":
|
|
13
|
+
module_name = file_path.stem
|
|
14
|
+
try:
|
|
15
|
+
module = importlib.import_module(f".{module_name}", package=__name__)
|
|
16
|
+
globals().update(vars(module))
|
|
17
|
+
except ImportError:
|
|
18
|
+
pass # Skip files that can't be imported
|
|
@@ -56,6 +56,8 @@ class Scira(AISearch):
|
|
|
56
56
|
"scira-o3": "o3",
|
|
57
57
|
"scira-qwen-32b": "qwen/qwen3-32b",
|
|
58
58
|
"scira-qwen-30b": "qwen3-30b-a3b",
|
|
59
|
+
"scira-qwen-4b": "qwen3-4b",
|
|
60
|
+
"scira-qwen-4b-thinking": "qwen3-4b-thinking",
|
|
59
61
|
"scira-deepseek-v3": "deepseek-v3-0324",
|
|
60
62
|
"scira-haiku": "claude-3-5-haiku-20241022",
|
|
61
63
|
"scira-mistral": "mistral-small-latest",
|
|
@@ -66,7 +68,7 @@ class Scira(AISearch):
|
|
|
66
68
|
"scira-anthropic-thinking": "claude-sonnet-4-20250514",
|
|
67
69
|
"scira-opus": "claude-4-opus-20250514",
|
|
68
70
|
"scira-opus-pro": "claude-4-opus-20250514",
|
|
69
|
-
"scira-llama-4": "
|
|
71
|
+
"scira-llama-4": "llama-4-maverick",
|
|
70
72
|
"scira-kimi-k2": "kimi-k2-instruct",
|
|
71
73
|
"kimi-k2-instruct": "scira-kimi-k2",
|
|
72
74
|
}
|
webscout/Provider/Aitopia.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from curl_cffi import CurlError
|
|
2
2
|
from curl_cffi.requests import Session
|
|
3
|
-
import json
|
|
4
3
|
import uuid
|
|
5
4
|
import time
|
|
6
5
|
import hashlib
|
|
@@ -9,7 +8,7 @@ from typing import Any, Dict, Optional, Generator, Union
|
|
|
9
8
|
from webscout.AIutel import Optimizers
|
|
10
9
|
from webscout.AIutel import Conversation, sanitize_stream # Import sanitize_stream
|
|
11
10
|
from webscout.AIutel import AwesomePrompts
|
|
12
|
-
from webscout.AIbase import Provider
|
|
11
|
+
from webscout.AIbase import Provider
|
|
13
12
|
from webscout import exceptions
|
|
14
13
|
from webscout.litagent import LitAgent
|
|
15
14
|
|
|
@@ -17,7 +16,7 @@ class Aitopia(Provider):
|
|
|
17
16
|
"""
|
|
18
17
|
A class to interact with the Aitopia API with LitAgent user-agent.
|
|
19
18
|
"""
|
|
20
|
-
|
|
19
|
+
required_auth = True
|
|
21
20
|
AVAILABLE_MODELS = [
|
|
22
21
|
"Claude 3 Haiku",
|
|
23
22
|
"GPT-4o Mini",
|
webscout/Provider/Andi.py
CHANGED
|
@@ -3,14 +3,14 @@ import requests
|
|
|
3
3
|
import json
|
|
4
4
|
from webscout.AIutel import Optimizers
|
|
5
5
|
from webscout.AIutel import Conversation
|
|
6
|
-
from webscout.AIutel import AwesomePrompts
|
|
7
|
-
from webscout.AIbase import Provider
|
|
6
|
+
from webscout.AIutel import AwesomePrompts
|
|
7
|
+
from webscout.AIbase import Provider
|
|
8
8
|
from webscout import exceptions
|
|
9
|
-
from typing import Union, Any, AsyncGenerator, Dict
|
|
10
9
|
from webscout import WEBS
|
|
11
10
|
from webscout.litagent import LitAgent
|
|
12
11
|
|
|
13
12
|
class AndiSearch(Provider):
|
|
13
|
+
required_auth = False
|
|
14
14
|
def __init__(
|
|
15
15
|
self,
|
|
16
16
|
is_conversation: bool = True,
|
|
@@ -20,7 +20,7 @@ class ChatGPTClone(Provider):
|
|
|
20
20
|
ChatGPTClone is a provider class for interacting with the ChatGPT Clone API.
|
|
21
21
|
Supports streaming responses.
|
|
22
22
|
"""
|
|
23
|
-
|
|
23
|
+
required_auth = False
|
|
24
24
|
url = "https://chatgpt-clone-ten-nu.vercel.app"
|
|
25
25
|
AVAILABLE_MODELS = ["gpt-4", "gpt-3.5-turbo"]
|
|
26
26
|
SUPPORTED_IMPERSONATION = [
|