webscout 8.3.4__py3-none-any.whl → 8.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIutel.py +52 -1016
- webscout/Bard.py +12 -6
- webscout/DWEBS.py +66 -57
- webscout/Provider/AISEARCH/PERPLEXED_search.py +214 -0
- webscout/Provider/AISEARCH/__init__.py +11 -10
- webscout/Provider/AISEARCH/felo_search.py +7 -3
- webscout/Provider/AISEARCH/scira_search.py +2 -0
- webscout/Provider/AISEARCH/stellar_search.py +53 -8
- webscout/Provider/Deepinfra.py +13 -1
- webscout/Provider/Flowith.py +6 -1
- webscout/Provider/GithubChat.py +1 -0
- webscout/Provider/GptOss.py +207 -0
- webscout/Provider/Kimi.py +445 -0
- webscout/Provider/Netwrck.py +3 -6
- webscout/Provider/OPENAI/README.md +2 -1
- webscout/Provider/OPENAI/TogetherAI.py +12 -8
- webscout/Provider/OPENAI/TwoAI.py +94 -1
- webscout/Provider/OPENAI/__init__.py +4 -4
- webscout/Provider/OPENAI/copilot.py +20 -4
- webscout/Provider/OPENAI/deepinfra.py +12 -0
- webscout/Provider/OPENAI/e2b.py +60 -8
- webscout/Provider/OPENAI/flowith.py +4 -3
- webscout/Provider/OPENAI/generate_api_key.py +48 -0
- webscout/Provider/OPENAI/gptoss.py +288 -0
- webscout/Provider/OPENAI/kimi.py +469 -0
- webscout/Provider/OPENAI/netwrck.py +8 -12
- webscout/Provider/OPENAI/refact.py +274 -0
- webscout/Provider/OPENAI/scirachat.py +4 -0
- webscout/Provider/OPENAI/textpollinations.py +11 -10
- webscout/Provider/OPENAI/toolbaz.py +1 -0
- webscout/Provider/OPENAI/venice.py +1 -0
- webscout/Provider/Perplexitylabs.py +163 -147
- webscout/Provider/Qodo.py +30 -6
- webscout/Provider/TTI/__init__.py +1 -0
- webscout/Provider/TTI/bing.py +14 -2
- webscout/Provider/TTI/together.py +11 -9
- webscout/Provider/TTI/venice.py +368 -0
- webscout/Provider/TTS/README.md +0 -1
- webscout/Provider/TTS/__init__.py +0 -1
- webscout/Provider/TTS/base.py +479 -159
- webscout/Provider/TTS/deepgram.py +409 -156
- webscout/Provider/TTS/elevenlabs.py +425 -111
- webscout/Provider/TTS/freetts.py +317 -140
- webscout/Provider/TTS/gesserit.py +192 -128
- webscout/Provider/TTS/murfai.py +248 -113
- webscout/Provider/TTS/openai_fm.py +347 -129
- webscout/Provider/TTS/speechma.py +620 -586
- webscout/Provider/TextPollinationsAI.py +11 -10
- webscout/Provider/TogetherAI.py +12 -4
- webscout/Provider/TwoAI.py +96 -2
- webscout/Provider/TypliAI.py +33 -27
- webscout/Provider/UNFINISHED/VercelAIGateway.py +339 -0
- webscout/Provider/UNFINISHED/fetch_together_models.py +6 -11
- webscout/Provider/Venice.py +1 -0
- webscout/Provider/WiseCat.py +18 -20
- webscout/Provider/__init__.py +2 -96
- webscout/Provider/cerebras.py +83 -33
- webscout/Provider/copilot.py +42 -23
- webscout/Provider/scira_chat.py +4 -0
- webscout/Provider/toolbaz.py +6 -10
- webscout/Provider/typefully.py +1 -11
- webscout/__init__.py +3 -15
- webscout/auth/__init__.py +19 -4
- webscout/auth/api_key_manager.py +189 -189
- webscout/auth/auth_system.py +25 -40
- webscout/auth/config.py +105 -6
- webscout/auth/database.py +377 -22
- webscout/auth/models.py +185 -130
- webscout/auth/request_processing.py +175 -11
- webscout/auth/routes.py +99 -2
- webscout/auth/server.py +9 -2
- webscout/auth/simple_logger.py +236 -0
- webscout/conversation.py +22 -20
- webscout/sanitize.py +1078 -0
- webscout/scout/README.md +20 -23
- webscout/scout/core/crawler.py +125 -38
- webscout/scout/core/scout.py +26 -5
- webscout/version.py +1 -1
- webscout/webscout_search.py +13 -6
- webscout/webscout_search_async.py +10 -8
- webscout/yep_search.py +13 -5
- {webscout-8.3.4.dist-info → webscout-8.3.6.dist-info}/METADATA +10 -149
- {webscout-8.3.4.dist-info → webscout-8.3.6.dist-info}/RECORD +88 -87
- webscout/Provider/Glider.py +0 -225
- webscout/Provider/OPENAI/README_AUTOPROXY.md +0 -238
- webscout/Provider/OPENAI/c4ai.py +0 -394
- webscout/Provider/OPENAI/glider.py +0 -330
- webscout/Provider/OPENAI/typegpt.py +0 -368
- webscout/Provider/OPENAI/uncovrAI.py +0 -477
- webscout/Provider/TTS/sthir.py +0 -94
- webscout/Provider/WritingMate.py +0 -273
- webscout/Provider/typegpt.py +0 -284
- webscout/Provider/uncovr.py +0 -333
- /webscout/Provider/{samurai.py → UNFINISHED/samurai.py} +0 -0
- {webscout-8.3.4.dist-info → webscout-8.3.6.dist-info}/WHEEL +0 -0
- {webscout-8.3.4.dist-info → webscout-8.3.6.dist-info}/entry_points.txt +0 -0
- {webscout-8.3.4.dist-info → webscout-8.3.6.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.3.4.dist-info → webscout-8.3.6.dist-info}/top_level.txt +0 -0
webscout/Bard.py
CHANGED
|
@@ -8,24 +8,30 @@ import os
|
|
|
8
8
|
import random
|
|
9
9
|
import re
|
|
10
10
|
import string
|
|
11
|
+
from datetime import datetime
|
|
11
12
|
from enum import Enum
|
|
12
13
|
from pathlib import Path
|
|
13
|
-
from
|
|
14
|
-
from typing import Dict, List, Tuple, Union, Optional
|
|
14
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
15
15
|
|
|
16
16
|
# Use curl_cffi for requests
|
|
17
|
+
# Import trio before curl_cffi to prevent eventlet socket monkey-patching conflicts
|
|
18
|
+
# See: https://github.com/python-trio/trio/issues/3015
|
|
19
|
+
try:
|
|
20
|
+
import trio # noqa: F401
|
|
21
|
+
except ImportError:
|
|
22
|
+
pass # trio is optional, ignore if not available
|
|
17
23
|
from curl_cffi import CurlError
|
|
18
24
|
from curl_cffi.requests import AsyncSession
|
|
19
|
-
# Import common request exceptions (curl_cffi often wraps these)
|
|
20
|
-
from requests.exceptions import RequestException, Timeout, HTTPError
|
|
21
25
|
|
|
22
26
|
# For image models using validation. Adjust based on organization internal pydantic.
|
|
23
27
|
# Updated import for Pydantic V2
|
|
24
28
|
from pydantic import BaseModel, field_validator
|
|
25
29
|
|
|
30
|
+
# Import common request exceptions (curl_cffi often wraps these)
|
|
31
|
+
from requests.exceptions import HTTPError, RequestException, Timeout
|
|
32
|
+
|
|
26
33
|
# Rich is retained for logging within image methods.
|
|
27
34
|
from rich.console import Console
|
|
28
|
-
from rich.markdown import Markdown
|
|
29
35
|
|
|
30
36
|
console = Console()
|
|
31
37
|
|
|
@@ -875,7 +881,7 @@ class Image(BaseModel):
|
|
|
875
881
|
# Generate filename from URL if not provided
|
|
876
882
|
if not filename:
|
|
877
883
|
try:
|
|
878
|
-
from urllib.parse import
|
|
884
|
+
from urllib.parse import unquote, urlparse
|
|
879
885
|
parsed_url = urlparse(self.url)
|
|
880
886
|
base_filename = os.path.basename(unquote(parsed_url.path))
|
|
881
887
|
# Remove invalid characters for filenames
|
webscout/DWEBS.py
CHANGED
|
@@ -3,16 +3,25 @@ DWEBS - A Google search library with advanced features
|
|
|
3
3
|
"""
|
|
4
4
|
import random
|
|
5
5
|
from time import sleep
|
|
6
|
+
|
|
6
7
|
from webscout.scout import Scout
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
|
|
9
|
+
# Import trio before curl_cffi to prevent eventlet socket monkey-patching conflicts
|
|
10
|
+
# See: https://github.com/python-trio/trio/issues/3015
|
|
11
|
+
try:
|
|
12
|
+
import trio # noqa: F401
|
|
13
|
+
except ImportError:
|
|
14
|
+
pass # trio is optional, ignore if not available
|
|
10
15
|
from concurrent.futures import ThreadPoolExecutor
|
|
16
|
+
from typing import Any, Dict, List, Optional
|
|
17
|
+
from urllib.parse import unquote, urlencode
|
|
18
|
+
|
|
19
|
+
from curl_cffi.requests import Session
|
|
11
20
|
|
|
12
21
|
|
|
13
22
|
class SearchResult:
|
|
14
23
|
"""Class to represent a search result with metadata."""
|
|
15
|
-
|
|
24
|
+
|
|
16
25
|
def __init__(self, url: str, title: str, description: str):
|
|
17
26
|
"""
|
|
18
27
|
Initialize a search result.
|
|
@@ -96,7 +105,7 @@ class GoogleSearch:
|
|
|
96
105
|
ssl_mm_version = f"SSL-MM/{random.randint(1, 2)}.{random.randint(3, 5)}"
|
|
97
106
|
openssl_version = f"OpenSSL/{random.randint(1, 3)}.{random.randint(0, 4)}.{random.randint(0, 9)}"
|
|
98
107
|
return f"{lynx_version} {libwww_version} {ssl_mm_version} {openssl_version}"
|
|
99
|
-
|
|
108
|
+
|
|
100
109
|
def _make_request(self, term: str, results: int, start: int = 0, search_type: str = None) -> str:
|
|
101
110
|
"""
|
|
102
111
|
Make a request to Google search.
|
|
@@ -116,11 +125,11 @@ class GoogleSearch:
|
|
|
116
125
|
"hl": self.lang,
|
|
117
126
|
"start": start,
|
|
118
127
|
}
|
|
119
|
-
|
|
128
|
+
|
|
120
129
|
# Add search type if specified
|
|
121
130
|
if search_type:
|
|
122
131
|
params["tbm"] = search_type
|
|
123
|
-
|
|
132
|
+
|
|
124
133
|
try:
|
|
125
134
|
# Use the curl_cffi session
|
|
126
135
|
resp = self.session.get(
|
|
@@ -137,7 +146,7 @@ class GoogleSearch:
|
|
|
137
146
|
raise RuntimeError(f"Search request failed with status {e.response.status_code}: {str(e)}")
|
|
138
147
|
else:
|
|
139
148
|
raise RuntimeError(f"Search request failed: {str(e)}")
|
|
140
|
-
|
|
149
|
+
|
|
141
150
|
def _extract_url(self, raw_link: str) -> Optional[str]:
|
|
142
151
|
"""
|
|
143
152
|
Extract actual URL from Google redirect URL.
|
|
@@ -150,7 +159,7 @@ class GoogleSearch:
|
|
|
150
159
|
"""
|
|
151
160
|
if not raw_link:
|
|
152
161
|
return None
|
|
153
|
-
|
|
162
|
+
|
|
154
163
|
if raw_link.startswith("/url?"):
|
|
155
164
|
try:
|
|
156
165
|
link = unquote(raw_link.split("&")[0].replace("/url?q=", ""))
|
|
@@ -159,9 +168,9 @@ class GoogleSearch:
|
|
|
159
168
|
return None
|
|
160
169
|
elif raw_link.startswith("http"):
|
|
161
170
|
return unquote(raw_link)
|
|
162
|
-
|
|
171
|
+
|
|
163
172
|
return None
|
|
164
|
-
|
|
173
|
+
|
|
165
174
|
def _is_valid_result(self, link: str, fetched_links: set, unique: bool) -> bool:
|
|
166
175
|
"""
|
|
167
176
|
Check if search result is valid.
|
|
@@ -176,12 +185,12 @@ class GoogleSearch:
|
|
|
176
185
|
"""
|
|
177
186
|
if any(x in link for x in ["google.", "/search?", "webcache."]):
|
|
178
187
|
return False
|
|
179
|
-
|
|
188
|
+
|
|
180
189
|
if link in fetched_links and unique:
|
|
181
190
|
return False
|
|
182
|
-
|
|
191
|
+
|
|
183
192
|
return True
|
|
184
|
-
|
|
193
|
+
|
|
185
194
|
def _parse_search_results(
|
|
186
195
|
self,
|
|
187
196
|
html: str,
|
|
@@ -204,11 +213,11 @@ class GoogleSearch:
|
|
|
204
213
|
results = []
|
|
205
214
|
soup = Scout(html, features="html.parser")
|
|
206
215
|
result_blocks = soup.find_all("div", class_="ezO2md")
|
|
207
|
-
|
|
216
|
+
|
|
208
217
|
if not result_blocks:
|
|
209
218
|
# Try alternative class patterns if the main one doesn't match
|
|
210
219
|
result_blocks = soup.find_all("div", attrs={"class": lambda c: c and "g" in c.split()})
|
|
211
|
-
|
|
220
|
+
|
|
212
221
|
for result in result_blocks:
|
|
213
222
|
# Find the link - looking for various potential Google result classes
|
|
214
223
|
link_tag = result.find("a", class_=["fuLhoc", "ZWRArf"])
|
|
@@ -216,10 +225,10 @@ class GoogleSearch:
|
|
|
216
225
|
link_tag = result.find("a")
|
|
217
226
|
if not link_tag:
|
|
218
227
|
continue
|
|
219
|
-
|
|
228
|
+
|
|
220
229
|
raw_link = link_tag.get("href", "")
|
|
221
230
|
link = self._extract_url(raw_link)
|
|
222
|
-
|
|
231
|
+
|
|
223
232
|
if not link:
|
|
224
233
|
continue
|
|
225
234
|
|
|
@@ -235,32 +244,32 @@ class GoogleSearch:
|
|
|
235
244
|
description_tag = result.find("span", class_="FrIlee")
|
|
236
245
|
if not description_tag:
|
|
237
246
|
description_tag = result.find(["div", "span"], class_=lambda c: c and any(x in c for x in ["snippet", "description", "VwiC3b"]))
|
|
238
|
-
|
|
247
|
+
|
|
239
248
|
description = description_tag.get_text(strip=True) if description_tag else ""
|
|
240
249
|
|
|
241
250
|
# Create result object
|
|
242
251
|
search_result = SearchResult(link, title, description)
|
|
243
|
-
|
|
252
|
+
|
|
244
253
|
# Add extra metadata if available
|
|
245
254
|
citation = result.find("cite")
|
|
246
255
|
if citation:
|
|
247
256
|
search_result.metadata["source"] = citation.get_text(strip=True)
|
|
248
|
-
|
|
257
|
+
|
|
249
258
|
timestamp = result.find("span", class_=lambda c: c and "ZE5qJf" in c)
|
|
250
259
|
if timestamp:
|
|
251
260
|
search_result.metadata["date"] = timestamp.get_text(strip=True)
|
|
252
261
|
|
|
253
262
|
fetched_links.add(link)
|
|
254
263
|
results.append(search_result)
|
|
255
|
-
|
|
264
|
+
|
|
256
265
|
if len(results) >= num_results:
|
|
257
266
|
break
|
|
258
|
-
|
|
267
|
+
|
|
259
268
|
return results
|
|
260
|
-
|
|
269
|
+
|
|
261
270
|
def text(
|
|
262
|
-
self,
|
|
263
|
-
keywords: str,
|
|
271
|
+
self,
|
|
272
|
+
keywords: str,
|
|
264
273
|
region: str = None,
|
|
265
274
|
safesearch: str = "moderate",
|
|
266
275
|
max_results: int = 10,
|
|
@@ -283,7 +292,7 @@ class GoogleSearch:
|
|
|
283
292
|
"""
|
|
284
293
|
if not keywords:
|
|
285
294
|
raise ValueError("Search keywords cannot be empty")
|
|
286
|
-
|
|
295
|
+
|
|
287
296
|
# Map safesearch values to Google's safe parameter
|
|
288
297
|
safe_map = {
|
|
289
298
|
"on": "active",
|
|
@@ -291,12 +300,12 @@ class GoogleSearch:
|
|
|
291
300
|
"off": "off"
|
|
292
301
|
}
|
|
293
302
|
safe = safe_map.get(safesearch.lower(), "moderate")
|
|
294
|
-
|
|
303
|
+
|
|
295
304
|
# Keep track of unique results
|
|
296
305
|
fetched_results = []
|
|
297
306
|
fetched_links = set()
|
|
298
307
|
start = start_num
|
|
299
|
-
|
|
308
|
+
|
|
300
309
|
while len(fetched_results) < max_results:
|
|
301
310
|
# Add safe search parameter to the request
|
|
302
311
|
# Note: This modifies the session params for this specific request type
|
|
@@ -307,33 +316,33 @@ class GoogleSearch:
|
|
|
307
316
|
|
|
308
317
|
response_html = self._make_request(
|
|
309
318
|
term=term_with_safe, # Pass term with safe search
|
|
310
|
-
results=max_results - len(fetched_results),
|
|
319
|
+
results=max_results - len(fetched_results),
|
|
311
320
|
start=start
|
|
312
321
|
)
|
|
313
|
-
|
|
322
|
+
|
|
314
323
|
results = self._parse_search_results(
|
|
315
324
|
html=response_html,
|
|
316
325
|
num_results=max_results - len(fetched_results),
|
|
317
326
|
fetched_links=fetched_links,
|
|
318
327
|
unique=unique
|
|
319
328
|
)
|
|
320
|
-
|
|
329
|
+
|
|
321
330
|
if not results:
|
|
322
331
|
break
|
|
323
|
-
|
|
332
|
+
|
|
324
333
|
fetched_results.extend(results)
|
|
325
|
-
|
|
334
|
+
|
|
326
335
|
if len(fetched_results) >= max_results:
|
|
327
336
|
break
|
|
328
|
-
|
|
337
|
+
|
|
329
338
|
start += 10 # Google typically uses increments of 10
|
|
330
339
|
sleep(self.sleep_interval)
|
|
331
|
-
|
|
340
|
+
|
|
332
341
|
return fetched_results[:max_results]
|
|
333
|
-
|
|
342
|
+
|
|
334
343
|
def news(
|
|
335
|
-
self,
|
|
336
|
-
keywords: str,
|
|
344
|
+
self,
|
|
345
|
+
keywords: str,
|
|
337
346
|
region: str = None,
|
|
338
347
|
safesearch: str = "moderate",
|
|
339
348
|
max_results: int = 10
|
|
@@ -352,7 +361,7 @@ class GoogleSearch:
|
|
|
352
361
|
"""
|
|
353
362
|
if not keywords:
|
|
354
363
|
raise ValueError("Search keywords cannot be empty")
|
|
355
|
-
|
|
364
|
+
|
|
356
365
|
# Map safesearch values to Google's safe parameter
|
|
357
366
|
safe_map = {
|
|
358
367
|
"on": "active",
|
|
@@ -360,7 +369,7 @@ class GoogleSearch:
|
|
|
360
369
|
"off": "off"
|
|
361
370
|
}
|
|
362
371
|
safe = safe_map.get(safesearch.lower(), "moderate")
|
|
363
|
-
|
|
372
|
+
|
|
364
373
|
# Keep track of unique results
|
|
365
374
|
fetched_links = set()
|
|
366
375
|
|
|
@@ -374,16 +383,16 @@ class GoogleSearch:
|
|
|
374
383
|
results=max_results,
|
|
375
384
|
search_type="nws"
|
|
376
385
|
)
|
|
377
|
-
|
|
386
|
+
|
|
378
387
|
results = self._parse_search_results(
|
|
379
388
|
html=response_html,
|
|
380
389
|
num_results=max_results,
|
|
381
390
|
fetched_links=fetched_links,
|
|
382
391
|
unique=True # News results are generally unique per request
|
|
383
392
|
)
|
|
384
|
-
|
|
393
|
+
|
|
385
394
|
return results[:max_results]
|
|
386
|
-
|
|
395
|
+
|
|
387
396
|
def suggestions(self, query: str, region: str = None) -> List[str]:
|
|
388
397
|
"""
|
|
389
398
|
Get search suggestions for a query term.
|
|
@@ -397,26 +406,26 @@ class GoogleSearch:
|
|
|
397
406
|
"""
|
|
398
407
|
if not query:
|
|
399
408
|
raise ValueError("Search query cannot be empty")
|
|
400
|
-
|
|
409
|
+
|
|
401
410
|
try:
|
|
402
411
|
params = {
|
|
403
412
|
"client": "firefox",
|
|
404
413
|
"q": query,
|
|
405
414
|
}
|
|
406
|
-
|
|
415
|
+
|
|
407
416
|
# Add region if specified
|
|
408
417
|
if region and region.lower() != "all":
|
|
409
418
|
params["gl"] = region
|
|
410
|
-
|
|
419
|
+
|
|
411
420
|
url = f"https://www.google.com/complete/search?{urlencode(params)}"
|
|
412
|
-
|
|
421
|
+
|
|
413
422
|
# Use a simpler header set for the suggestions API
|
|
414
423
|
headers = {
|
|
415
424
|
"User-Agent": self._get_useragent(),
|
|
416
425
|
"Accept": "application/json, text/javascript, */*",
|
|
417
426
|
"Accept-Language": self.lang,
|
|
418
427
|
}
|
|
419
|
-
|
|
428
|
+
|
|
420
429
|
# Use session.get but override headers for this specific request
|
|
421
430
|
response = self.session.get(
|
|
422
431
|
url=url,
|
|
@@ -425,13 +434,13 @@ class GoogleSearch:
|
|
|
425
434
|
# timeout and verify are handled by session
|
|
426
435
|
)
|
|
427
436
|
response.raise_for_status()
|
|
428
|
-
|
|
437
|
+
|
|
429
438
|
# Response format is typically: ["original query", ["suggestion1", "suggestion2", ...]]
|
|
430
439
|
data = response.json()
|
|
431
440
|
if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
|
|
432
441
|
return data[1]
|
|
433
442
|
return []
|
|
434
|
-
|
|
443
|
+
|
|
435
444
|
except Exception as e:
|
|
436
445
|
# Provide more specific error context if possible
|
|
437
446
|
if hasattr(e, 'response') and e.response is not None:
|
|
@@ -454,7 +463,7 @@ def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_in
|
|
|
454
463
|
sleep_interval=sleep_interval,
|
|
455
464
|
impersonate=impersonate # Pass impersonate
|
|
456
465
|
)
|
|
457
|
-
|
|
466
|
+
|
|
458
467
|
# Map legacy safe values
|
|
459
468
|
safe_search_map = {
|
|
460
469
|
"active": "on",
|
|
@@ -471,7 +480,7 @@ def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_in
|
|
|
471
480
|
start_num=start_num,
|
|
472
481
|
unique=unique
|
|
473
482
|
)
|
|
474
|
-
|
|
483
|
+
|
|
475
484
|
# Convert to simple URLs if not advanced mode
|
|
476
485
|
if not advanced:
|
|
477
486
|
return [result.url for result in results]
|
|
@@ -485,7 +494,7 @@ if __name__ == "__main__":
|
|
|
485
494
|
proxies=None, # Optional: Use proxies
|
|
486
495
|
verify=True # Optional: SSL verification
|
|
487
496
|
)
|
|
488
|
-
|
|
497
|
+
|
|
489
498
|
# Text Search
|
|
490
499
|
print("TEXT SEARCH RESULTS:")
|
|
491
500
|
text_results = google.text(
|
|
@@ -499,7 +508,7 @@ if __name__ == "__main__":
|
|
|
499
508
|
print(f"URL: {result.url}")
|
|
500
509
|
print(f"Description: {result.description}")
|
|
501
510
|
print("---")
|
|
502
|
-
|
|
511
|
+
|
|
503
512
|
# News Search
|
|
504
513
|
print("\nNEWS SEARCH RESULTS:")
|
|
505
514
|
news_results = google.news(
|
|
@@ -513,8 +522,8 @@ if __name__ == "__main__":
|
|
|
513
522
|
print(f"URL: {result.url}")
|
|
514
523
|
print(f"Description: {result.description}")
|
|
515
524
|
print("---")
|
|
516
|
-
|
|
525
|
+
|
|
517
526
|
# Search Suggestions
|
|
518
527
|
print("\nSEARCH SUGGESTIONS:")
|
|
519
528
|
suggestions = google.suggestions("how to")
|
|
520
|
-
print(suggestions)
|
|
529
|
+
print(suggestions)
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import json
|
|
3
|
+
from typing import Any, Dict, Generator, Optional, Union
|
|
4
|
+
|
|
5
|
+
from webscout.AIbase import AISearch, SearchResponse
|
|
6
|
+
from webscout import exceptions
|
|
7
|
+
from webscout.litagent import LitAgent
|
|
8
|
+
from webscout.sanitize import sanitize_stream
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PERPLEXED(AISearch):
|
|
12
|
+
"""A class to interact with the PERPLEXED stream search API.
|
|
13
|
+
|
|
14
|
+
PERPLEXED provides an AI-powered search interface that returns emotionally intelligent
|
|
15
|
+
responses based on web content. It supports both streaming and non-streaming responses.
|
|
16
|
+
|
|
17
|
+
Basic Usage:
|
|
18
|
+
>>> from webscout import PERPLEXED
|
|
19
|
+
>>> ai = PERPLEXED()
|
|
20
|
+
>>> # Non-streaming example
|
|
21
|
+
>>> response = ai.search("What is Python?")
|
|
22
|
+
>>> print(response)
|
|
23
|
+
Python is a high-level programming language...
|
|
24
|
+
|
|
25
|
+
>>> # Streaming example
|
|
26
|
+
>>> for chunk in ai.search("Tell me about AI", stream=True):
|
|
27
|
+
... print(chunk, end="", flush=True)
|
|
28
|
+
Artificial Intelligence is...
|
|
29
|
+
|
|
30
|
+
>>> # Raw response format
|
|
31
|
+
>>> for chunk in ai.search("Hello", stream=True, raw=True):
|
|
32
|
+
... print(chunk)
|
|
33
|
+
{'text': 'Hello'}
|
|
34
|
+
{'text': ' there!'}
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
|
38
|
+
proxies (dict, optional): Proxy configuration for requests. Defaults to None.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
api_endpoint (str): The PERPLEXED API endpoint URL.
|
|
42
|
+
stream_chunk_size (int): Size of chunks when streaming responses.
|
|
43
|
+
timeout (int): Request timeout in seconds.
|
|
44
|
+
headers (dict): HTTP headers used in requests.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
timeout: int = 30,
|
|
50
|
+
proxies: Optional[dict] = None,
|
|
51
|
+
):
|
|
52
|
+
"""Initialize the PERPLEXED API client.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
|
56
|
+
proxies (dict, optional): Proxy configuration for requests. Defaults to None.
|
|
57
|
+
|
|
58
|
+
Example:
|
|
59
|
+
>>> ai = PERPLEXED(timeout=60) # Longer timeout
|
|
60
|
+
>>> ai = PERPLEXED(proxies={'http': 'http://proxy.com:8080'}) # With proxy
|
|
61
|
+
"""
|
|
62
|
+
self.session = requests.Session()
|
|
63
|
+
self.api_endpoint = "https://d21l5c617zttgr.cloudfront.net/stream_search"
|
|
64
|
+
self.stream_chunk_size = 64
|
|
65
|
+
self.timeout = timeout
|
|
66
|
+
self.last_response = {}
|
|
67
|
+
self.headers = {
|
|
68
|
+
"accept": "*/*",
|
|
69
|
+
"accept-encoding": "gzip, deflate, br, zstd",
|
|
70
|
+
"accept-language": "en-US,en;q=0.9,en-IN;q=0.8",
|
|
71
|
+
"content-type": "application/json",
|
|
72
|
+
"dnt": "1",
|
|
73
|
+
"origin": "https://d37ozmhmvu2kcg.cloudfront.net",
|
|
74
|
+
"referer": "https://d37ozmhmvu2kcg.cloudfront.net/",
|
|
75
|
+
"sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Microsoft Edge";v="138"',
|
|
76
|
+
"sec-ch-ua-mobile": "?0",
|
|
77
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
78
|
+
"sec-fetch-dest": "empty",
|
|
79
|
+
"sec-fetch-mode": "cors",
|
|
80
|
+
"sec-fetch-site": "cross-site",
|
|
81
|
+
"sec-gpc": "1",
|
|
82
|
+
"user-agent": LitAgent().random()
|
|
83
|
+
}
|
|
84
|
+
self.session.headers.update(self.headers)
|
|
85
|
+
self.proxies = proxies
|
|
86
|
+
|
|
87
|
+
def search(
|
|
88
|
+
self,
|
|
89
|
+
prompt: str,
|
|
90
|
+
stream: bool = False,
|
|
91
|
+
raw: bool = False,
|
|
92
|
+
) -> Union[SearchResponse, Generator[Union[Dict[str, str], SearchResponse], None, None]]:
|
|
93
|
+
"""Search using the PERPLEXED API and get AI-generated responses.
|
|
94
|
+
|
|
95
|
+
This method sends a search query to PERPLEXED and returns the AI-generated response.
|
|
96
|
+
It supports both streaming and non-streaming modes, as well as raw response format.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
prompt (str): The search query or prompt to send to the API.
|
|
100
|
+
stream (bool, optional): If True, yields response chunks as they arrive.
|
|
101
|
+
If False, returns complete response. Defaults to False.
|
|
102
|
+
raw (bool, optional): If True, returns raw response dictionaries with 'text' key.
|
|
103
|
+
If False, returns SearchResponse objects that convert to text automatically.
|
|
104
|
+
Defaults to False.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Union[SearchResponse, Generator[Union[Dict[str, str], SearchResponse], None, None]]:
|
|
108
|
+
- If stream=False: Returns complete response as SearchResponse object
|
|
109
|
+
- If stream=True: Yields response chunks as either Dict or SearchResponse objects
|
|
110
|
+
|
|
111
|
+
Raises:
|
|
112
|
+
APIConnectionError: If the API request fails
|
|
113
|
+
|
|
114
|
+
Examples:
|
|
115
|
+
Basic search:
|
|
116
|
+
>>> ai = PERPLEXED()
|
|
117
|
+
>>> response = ai.search("What is Python?")
|
|
118
|
+
>>> print(response)
|
|
119
|
+
Python is a programming language...
|
|
120
|
+
|
|
121
|
+
Streaming response:
|
|
122
|
+
>>> for chunk in ai.search("Tell me about AI", stream=True):
|
|
123
|
+
... print(chunk, end="")
|
|
124
|
+
Artificial Intelligence...
|
|
125
|
+
|
|
126
|
+
Raw response format:
|
|
127
|
+
>>> for chunk in ai.search("Hello", stream=True, raw=True):
|
|
128
|
+
... print(chunk)
|
|
129
|
+
{'text': 'Hello'}
|
|
130
|
+
{'text': ' there!'}
|
|
131
|
+
|
|
132
|
+
Error handling:
|
|
133
|
+
>>> try:
|
|
134
|
+
... response = ai.search("My question")
|
|
135
|
+
... except exceptions.APIConnectionError as e:
|
|
136
|
+
... print(f"API error: {e}")
|
|
137
|
+
"""
|
|
138
|
+
payload = {
|
|
139
|
+
"user_prompt": prompt
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
def extract_answer_content(data):
|
|
143
|
+
"""Extract answer content from PERPLEXED response."""
|
|
144
|
+
if isinstance(data, dict):
|
|
145
|
+
# Check if this is the final answer - answer field exists and is not empty
|
|
146
|
+
if data.get("success") and "answer" in data and data["answer"]:
|
|
147
|
+
return data["answer"]
|
|
148
|
+
# Check if this is a stage update with no answer yet
|
|
149
|
+
elif data.get("success") and data.get("stage"):
|
|
150
|
+
return None # Skip stage updates without answers
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
def for_stream():
|
|
154
|
+
try:
|
|
155
|
+
with self.session.post(
|
|
156
|
+
self.api_endpoint,
|
|
157
|
+
json=payload,
|
|
158
|
+
stream=True,
|
|
159
|
+
timeout=self.timeout,
|
|
160
|
+
proxies=self.proxies,
|
|
161
|
+
) as response:
|
|
162
|
+
if not response.ok:
|
|
163
|
+
raise exceptions.APIConnectionError(
|
|
164
|
+
f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Use sanitize_stream directly with response iterator
|
|
168
|
+
processed_chunks = sanitize_stream(
|
|
169
|
+
data=response.iter_lines(decode_unicode=True), # Pass iterator directly
|
|
170
|
+
intro_value="", # No prefix to remove
|
|
171
|
+
to_json=True, # Parse each chunk as JSON
|
|
172
|
+
content_extractor=lambda chunk: SearchResponse(extract_answer_content(chunk)) if extract_answer_content(chunk) else None,
|
|
173
|
+
yield_raw_on_error=False, # Skip invalid JSON chunks
|
|
174
|
+
line_delimiter="[/PERPLEXED-SEPARATOR]", # Use PERPLEXED separator to split chunks
|
|
175
|
+
skip_markers=[], # No specific markers to skip
|
|
176
|
+
raw=raw # Let sanitize_stream handle raw mode automatically
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Yield results from sanitize_stream - it handles raw/non-raw automatically
|
|
180
|
+
for processed_chunk in processed_chunks:
|
|
181
|
+
if processed_chunk is not None:
|
|
182
|
+
yield processed_chunk
|
|
183
|
+
|
|
184
|
+
except requests.exceptions.RequestException as e:
|
|
185
|
+
raise exceptions.APIConnectionError(f"Request failed: {e}")
|
|
186
|
+
|
|
187
|
+
def for_non_stream():
|
|
188
|
+
full_response = ""
|
|
189
|
+
for chunk in for_stream():
|
|
190
|
+
full_response += str(chunk)
|
|
191
|
+
|
|
192
|
+
self.last_response = SearchResponse(full_response)
|
|
193
|
+
return self.last_response
|
|
194
|
+
|
|
195
|
+
if stream:
|
|
196
|
+
return for_stream()
|
|
197
|
+
else:
|
|
198
|
+
if raw:
|
|
199
|
+
# For raw non-streaming, we need to yield each chunk individually
|
|
200
|
+
return for_stream()
|
|
201
|
+
else:
|
|
202
|
+
# For regular non-streaming, accumulate and return complete response
|
|
203
|
+
return for_non_stream()
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
if __name__ == "__main__":
|
|
207
|
+
|
|
208
|
+
ai = PERPLEXED()
|
|
209
|
+
|
|
210
|
+
# Test with raw=False to see debug output
|
|
211
|
+
print("=== Testing with raw=True ===")
|
|
212
|
+
response = ai.search(input(">>> "), stream=True, raw=True)
|
|
213
|
+
for chunks in response:
|
|
214
|
+
print(chunks, end="", flush=True)
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
from .stellar_search import *
|
|
2
|
-
from .felo_search import *
|
|
3
|
-
from .DeepFind import *
|
|
4
|
-
from .genspark_search import *
|
|
5
|
-
from .monica_search import *
|
|
6
|
-
from .webpilotai_search import *
|
|
7
|
-
from .hika_search import *
|
|
8
|
-
from .scira_search import *
|
|
9
|
-
from .iask_search import *
|
|
10
|
-
from .Perplexity import *
|
|
1
|
+
from .stellar_search import *
|
|
2
|
+
from .felo_search import *
|
|
3
|
+
from .DeepFind import *
|
|
4
|
+
from .genspark_search import *
|
|
5
|
+
from .monica_search import *
|
|
6
|
+
from .webpilotai_search import *
|
|
7
|
+
from .hika_search import *
|
|
8
|
+
from .scira_search import *
|
|
9
|
+
from .iask_search import *
|
|
10
|
+
from .Perplexity import *
|
|
11
|
+
from .PERPLEXED_search import *
|
|
@@ -69,7 +69,7 @@ class Felo(AISearch):
|
|
|
69
69
|
"accept-encoding": "gzip, deflate, br, zstd",
|
|
70
70
|
"accept-language": "en-US,en;q=0.9,en-IN;q=0.8",
|
|
71
71
|
"content-type": "application/json",
|
|
72
|
-
"cookie": "_clck=1gifk45%7C2%7Cfoa%7C0%7C1686; _clsk=1g5lv07%7C1723558310439%7C1%7C1%7Cu.clarity.ms%2Fcollect; _ga=GA1.1.877307181.1723558313; _ga_8SZPRV97HV=GS1.1.1723558313.1.1.1723558341.0.0.0; _ga_Q9Q1E734CC=GS1.1.1723558313.1.1.1723558341.0.0.0",
|
|
72
|
+
# "cookie": "_clck=1gifk45%7C2%7Cfoa%7C0%7C1686; _clsk=1g5lv07%7C1723558310439%7C1%7C1%7Cu.clarity.ms%2Fcollect; _ga=GA1.1.877307181.1723558313; _ga_8SZPRV97HV=GS1.1.1723558313.1.1.1723558341.0.0.0; _ga_Q9Q1E734CC=GS1.1.1723558313.1.1.1723558341.0.0.0",
|
|
73
73
|
"dnt": "1",
|
|
74
74
|
"origin": "https://felo.ai",
|
|
75
75
|
"referer": "https://felo.ai/",
|
|
@@ -141,10 +141,14 @@ class Felo(AISearch):
|
|
|
141
141
|
"lang": "",
|
|
142
142
|
"agent_lang": "en",
|
|
143
143
|
"search_options": {
|
|
144
|
-
"langcode": "en-US"
|
|
144
|
+
"langcode": "en-US",
|
|
145
|
+
"search_image": True,
|
|
146
|
+
"search_video": True,
|
|
145
147
|
},
|
|
146
148
|
"search_video": True,
|
|
147
|
-
"
|
|
149
|
+
"model": "",
|
|
150
|
+
"contexts_from": "google",
|
|
151
|
+
"auto_routing": True,
|
|
148
152
|
}
|
|
149
153
|
|
|
150
154
|
def for_stream():
|
|
@@ -67,6 +67,8 @@ class Scira(AISearch):
|
|
|
67
67
|
"scira-opus": "claude-4-opus-20250514",
|
|
68
68
|
"scira-opus-pro": "claude-4-opus-20250514",
|
|
69
69
|
"scira-llama-4": "meta-llama/llama-4-maverick-17b-128e-instruct",
|
|
70
|
+
"scira-kimi-k2": "kimi-k2-instruct",
|
|
71
|
+
"kimi-k2-instruct": "scira-kimi-k2",
|
|
70
72
|
}
|
|
71
73
|
def __init__(
|
|
72
74
|
self,
|