webscout 7.2__py3-none-any.whl → 7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/Bard.py +2 -2
- webscout/Litlogger/core/level.py +3 -0
- webscout/Litlogger/core/logger.py +101 -58
- webscout/Litlogger/handlers/console.py +14 -31
- webscout/Litlogger/handlers/network.py +16 -17
- webscout/Litlogger/styles/colors.py +81 -63
- webscout/Litlogger/styles/formats.py +163 -80
- webscout/Provider/AISEARCH/ISou.py +277 -0
- webscout/Provider/AISEARCH/__init__.py +4 -2
- webscout/Provider/AISEARCH/genspark_search.py +208 -0
- webscout/Provider/AllenAI.py +282 -0
- webscout/Provider/Deepinfra.py +52 -37
- webscout/Provider/ElectronHub.py +634 -0
- webscout/Provider/Glider.py +7 -41
- webscout/Provider/HeckAI.py +200 -0
- webscout/Provider/Jadve.py +49 -63
- webscout/Provider/PI.py +106 -93
- webscout/Provider/Perplexitylabs.py +395 -0
- webscout/Provider/QwenLM.py +7 -61
- webscout/Provider/TTI/FreeAIPlayground/__init__.py +9 -0
- webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +206 -0
- webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +192 -0
- webscout/Provider/TTI/__init__.py +3 -1
- webscout/Provider/TTI/piclumen/__init__.py +23 -0
- webscout/Provider/TTI/piclumen/async_piclumen.py +268 -0
- webscout/Provider/TTI/piclumen/sync_piclumen.py +233 -0
- webscout/Provider/TextPollinationsAI.py +28 -6
- webscout/Provider/TwoAI.py +200 -0
- webscout/Provider/Venice.py +200 -0
- webscout/Provider/WiseCat.py +1 -18
- webscout/Provider/__init__.py +14 -0
- webscout/Provider/akashgpt.py +312 -0
- webscout/Provider/chatglm.py +5 -5
- webscout/Provider/freeaichat.py +251 -0
- webscout/Provider/koala.py +9 -1
- webscout/Provider/yep.py +5 -25
- webscout/__init__.py +1 -0
- webscout/version.py +1 -1
- webscout/webscout_search.py +82 -2
- webscout/webscout_search_async.py +58 -1
- webscout/yep_search.py +297 -0
- {webscout-7.2.dist-info → webscout-7.4.dist-info}/METADATA +99 -65
- {webscout-7.2.dist-info → webscout-7.4.dist-info}/RECORD +47 -30
- {webscout-7.2.dist-info → webscout-7.4.dist-info}/WHEEL +1 -1
- {webscout-7.2.dist-info → webscout-7.4.dist-info}/LICENSE.md +0 -0
- {webscout-7.2.dist-info → webscout-7.4.dist-info}/entry_points.txt +0 -0
- {webscout-7.2.dist-info → webscout-7.4.dist-info}/top_level.txt +0 -0
webscout/Provider/yep.py
CHANGED
|
@@ -12,7 +12,6 @@ from webscout.AIutel import Conversation
|
|
|
12
12
|
from webscout.AIutel import AwesomePrompts
|
|
13
13
|
from webscout.AIbase import Provider
|
|
14
14
|
from webscout import WEBS, exceptions
|
|
15
|
-
from webscout.Litlogger import Logger, LogFormat
|
|
16
15
|
from webscout.litagent import LitAgent
|
|
17
16
|
|
|
18
17
|
|
|
@@ -24,7 +23,7 @@ class YEPCHAT(Provider):
|
|
|
24
23
|
AVAILABLE_MODELS (list): List of available models for the provider.
|
|
25
24
|
"""
|
|
26
25
|
|
|
27
|
-
AVAILABLE_MODELS = ["DeepSeek-R1-Distill-Qwen-32B"]
|
|
26
|
+
AVAILABLE_MODELS = ["DeepSeek-R1-Distill-Qwen-32B", "Mixtral-8x7B-Instruct-v0.1"]
|
|
28
27
|
|
|
29
28
|
def __init__(
|
|
30
29
|
self,
|
|
@@ -39,14 +38,13 @@ class YEPCHAT(Provider):
|
|
|
39
38
|
act: str = None,
|
|
40
39
|
model: str = "DeepSeek-R1-Distill-Qwen-32B",
|
|
41
40
|
temperature: float = 0.6,
|
|
42
|
-
top_p: float = 0.7
|
|
43
|
-
logging: bool = False,
|
|
41
|
+
top_p: float = 0.7
|
|
44
42
|
):
|
|
45
43
|
"""
|
|
46
44
|
Initializes the YEPCHAT provider with the specified parameters.
|
|
47
45
|
|
|
48
46
|
Examples:
|
|
49
|
-
>>> ai = YEPCHAT(
|
|
47
|
+
>>> ai = YEPCHAT()
|
|
50
48
|
>>> ai.ask("What's the weather today?")
|
|
51
49
|
Sends a prompt to the Yep API and returns the response.
|
|
52
50
|
|
|
@@ -85,7 +83,7 @@ class YEPCHAT(Provider):
|
|
|
85
83
|
"Sec-CH-UA-Platform": '"Windows"',
|
|
86
84
|
"User-Agent": self.agent.random(), # Use LitAgent to generate a random user agent
|
|
87
85
|
}
|
|
88
|
-
self.cookies = {"__Host-session": uuid.uuid4().hex}
|
|
86
|
+
self.cookies = {"__Host-session": uuid.uuid4().hex, '__cf_bm': uuid.uuid4().hex}
|
|
89
87
|
|
|
90
88
|
self.__available_optimizers = (
|
|
91
89
|
method
|
|
@@ -106,9 +104,6 @@ class YEPCHAT(Provider):
|
|
|
106
104
|
|
|
107
105
|
self.knowledge_cutoff = "December 2023"
|
|
108
106
|
|
|
109
|
-
# Initialize logger
|
|
110
|
-
self.logger = Logger(name="YEPCHAT", format=LogFormat.MODERN_EMOJI) if logging else None
|
|
111
|
-
|
|
112
107
|
def ask(
|
|
113
108
|
self,
|
|
114
109
|
prompt: str,
|
|
@@ -128,9 +123,6 @@ class YEPCHAT(Provider):
|
|
|
128
123
|
>>> ai.ask("Tell me a joke", stream=True)
|
|
129
124
|
Streams the response from the Yep API.
|
|
130
125
|
"""
|
|
131
|
-
if self.logger:
|
|
132
|
-
self.logger.debug(f"ask() called with prompt: {prompt}")
|
|
133
|
-
|
|
134
126
|
conversation_prompt = self.conversation.gen_complete_prompt(prompt)
|
|
135
127
|
if optimizer:
|
|
136
128
|
if optimizer in self.__available_optimizers:
|
|
@@ -138,8 +130,6 @@ class YEPCHAT(Provider):
|
|
|
138
130
|
conversation_prompt if conversationally else prompt
|
|
139
131
|
)
|
|
140
132
|
else:
|
|
141
|
-
if self.logger:
|
|
142
|
-
self.logger.error(f"Invalid optimizer: {optimizer}")
|
|
143
133
|
raise Exception(
|
|
144
134
|
f"Optimizer is not one of {self.__available_optimizers}"
|
|
145
135
|
)
|
|
@@ -157,8 +147,6 @@ class YEPCHAT(Provider):
|
|
|
157
147
|
try:
|
|
158
148
|
with self.session.post(self.chat_endpoint, headers=self.headers, cookies=self.cookies, json=data, stream=True, timeout=self.timeout) as response:
|
|
159
149
|
if not response.ok:
|
|
160
|
-
if self.logger:
|
|
161
|
-
self.logger.error(f"Failed to generate response: {response.status_code} {response.reason}")
|
|
162
150
|
raise exceptions.FailedToGenerateResponseError(
|
|
163
151
|
f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
|
|
164
152
|
)
|
|
@@ -183,13 +171,9 @@ class YEPCHAT(Provider):
|
|
|
183
171
|
resp = dict(text=content)
|
|
184
172
|
yield resp if raw else resp
|
|
185
173
|
except json.JSONDecodeError:
|
|
186
|
-
if self.logger:
|
|
187
|
-
self.logger.warning("JSONDecodeError encountered.")
|
|
188
174
|
pass
|
|
189
175
|
self.conversation.update_chat_history(prompt, streaming_text)
|
|
190
176
|
except Exception as e:
|
|
191
|
-
if self.logger:
|
|
192
|
-
self.logger.error(f"Request failed: {e}")
|
|
193
177
|
raise exceptions.FailedToGenerateResponseError(f"Request failed: {e}")
|
|
194
178
|
|
|
195
179
|
def for_non_stream():
|
|
@@ -217,9 +201,6 @@ class YEPCHAT(Provider):
|
|
|
217
201
|
>>> ai.chat("What's the weather today?", stream=True)
|
|
218
202
|
Streams the chat response from the Yep API.
|
|
219
203
|
"""
|
|
220
|
-
if self.logger:
|
|
221
|
-
self.logger.debug(f"chat() called with prompt: {prompt}")
|
|
222
|
-
|
|
223
204
|
def for_stream():
|
|
224
205
|
for response in self.ask(
|
|
225
206
|
prompt, True, optimizer=optimizer, conversationally=conversationally
|
|
@@ -255,8 +236,7 @@ class YEPCHAT(Provider):
|
|
|
255
236
|
if __name__ == "__main__":
|
|
256
237
|
from rich import print
|
|
257
238
|
|
|
258
|
-
ai = YEPCHAT(
|
|
259
|
-
|
|
239
|
+
ai = YEPCHAT(model="DeepSeek-R1-Distill-Qwen-32B")
|
|
260
240
|
response = ai.chat("how many r in 'strawberry'", stream=True)
|
|
261
241
|
for chunk in response:
|
|
262
242
|
print(chunk, end="", flush=True)
|
webscout/__init__.py
CHANGED
webscout/version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "7.
|
|
1
|
+
__version__ = "7.4"
|
|
2
2
|
__prog__ = "webscout"
|
webscout/webscout_search.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
# import logging
|
|
4
|
+
import json
|
|
5
|
+
from urllib.parse import quote
|
|
4
6
|
import warnings
|
|
5
7
|
from concurrent.futures import ThreadPoolExecutor
|
|
6
8
|
from datetime import datetime, timezone
|
|
@@ -11,7 +13,7 @@ from random import choice, shuffle
|
|
|
11
13
|
from threading import Event
|
|
12
14
|
from time import sleep, time
|
|
13
15
|
from types import TracebackType
|
|
14
|
-
from typing import cast
|
|
16
|
+
from typing import Any, cast
|
|
15
17
|
|
|
16
18
|
import primp # type: ignore
|
|
17
19
|
|
|
@@ -1140,4 +1142,82 @@ class WEBS:
|
|
|
1140
1142
|
except Exception as e:
|
|
1141
1143
|
raise e
|
|
1142
1144
|
|
|
1143
|
-
return results
|
|
1145
|
+
return results
|
|
1146
|
+
|
|
1147
|
+
def weather(
|
|
1148
|
+
self,
|
|
1149
|
+
location: str,
|
|
1150
|
+
language: str = "en",
|
|
1151
|
+
) -> dict[str, Any]:
|
|
1152
|
+
"""Get weather information for a location from DuckDuckGo.
|
|
1153
|
+
|
|
1154
|
+
Args:
|
|
1155
|
+
location: Location to get weather for.
|
|
1156
|
+
language: Language code (e.g. 'en', 'es'). Defaults to "en".
|
|
1157
|
+
|
|
1158
|
+
Returns:
|
|
1159
|
+
Dictionary containing weather data with structure described in docstring.
|
|
1160
|
+
|
|
1161
|
+
Raises:
|
|
1162
|
+
WebscoutE: Base exception for webscout errors.
|
|
1163
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
1164
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
1165
|
+
"""
|
|
1166
|
+
assert location, "location is mandatory"
|
|
1167
|
+
lang = language.split('-')[0]
|
|
1168
|
+
url = f"https://duckduckgo.com/js/spice/forecast/{quote(location)}/{lang}"
|
|
1169
|
+
|
|
1170
|
+
resp = self._get_url("GET", url)
|
|
1171
|
+
resp_text = resp.decode('utf-8')
|
|
1172
|
+
|
|
1173
|
+
if "ddg_spice_forecast(" not in resp_text:
|
|
1174
|
+
raise WebscoutE(f"No weather data found for {location}")
|
|
1175
|
+
|
|
1176
|
+
json_text = resp_text[resp_text.find('(') + 1:resp_text.rfind(')')]
|
|
1177
|
+
try:
|
|
1178
|
+
result = json.loads(json_text)
|
|
1179
|
+
except Exception as e:
|
|
1180
|
+
raise WebscoutE(f"Error parsing weather JSON: {e}")
|
|
1181
|
+
|
|
1182
|
+
if not result or 'currentWeather' not in result or 'forecastDaily' not in result:
|
|
1183
|
+
raise WebscoutE(f"Invalid weather data format for {location}")
|
|
1184
|
+
|
|
1185
|
+
formatted_data = {
|
|
1186
|
+
"location": result["currentWeather"]["metadata"].get("ddg-location", "Unknown"),
|
|
1187
|
+
"current": {
|
|
1188
|
+
"condition": result["currentWeather"].get("conditionCode"),
|
|
1189
|
+
"temperature_c": result["currentWeather"].get("temperature"),
|
|
1190
|
+
"feels_like_c": result["currentWeather"].get("temperatureApparent"),
|
|
1191
|
+
"humidity": result["currentWeather"].get("humidity"),
|
|
1192
|
+
"wind_speed_ms": result["currentWeather"].get("windSpeed"),
|
|
1193
|
+
"wind_direction": result["currentWeather"].get("windDirection"),
|
|
1194
|
+
"visibility_m": result["currentWeather"].get("visibility"),
|
|
1195
|
+
},
|
|
1196
|
+
"daily_forecast": [],
|
|
1197
|
+
"hourly_forecast": []
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
for day in result["forecastDaily"]["days"]:
|
|
1201
|
+
formatted_data["daily_forecast"].append({
|
|
1202
|
+
"date": datetime.fromisoformat(day["forecastStart"].replace("Z", "+00:00")).strftime("%Y-%m-%d"),
|
|
1203
|
+
"condition": day["daytimeForecast"].get("conditionCode"),
|
|
1204
|
+
"max_temp_c": day["temperatureMax"],
|
|
1205
|
+
"min_temp_c": day["temperatureMin"],
|
|
1206
|
+
"sunrise": datetime.fromisoformat(day["sunrise"].replace("Z", "+00:00")).strftime("%H:%M"),
|
|
1207
|
+
"sunset": datetime.fromisoformat(day["sunset"].replace("Z", "+00:00")).strftime("%H:%M"),
|
|
1208
|
+
})
|
|
1209
|
+
|
|
1210
|
+
if 'forecastHourly' in result and 'hours' in result['forecastHourly']:
|
|
1211
|
+
for hour in result['forecastHourly']['hours']:
|
|
1212
|
+
formatted_data["hourly_forecast"].append({
|
|
1213
|
+
"time": datetime.fromisoformat(hour["forecastStart"].replace("Z", "+00:00")).strftime("%H:%M"),
|
|
1214
|
+
"condition": hour.get("conditionCode"),
|
|
1215
|
+
"temperature_c": hour.get("temperature"),
|
|
1216
|
+
"feels_like_c": hour.get("temperatureApparent"),
|
|
1217
|
+
"humidity": hour.get("humidity"),
|
|
1218
|
+
"wind_speed_ms": hour.get("windSpeed"),
|
|
1219
|
+
"wind_direction": hour.get("windDirection"),
|
|
1220
|
+
"visibility_m": hour.get("visibility"),
|
|
1221
|
+
})
|
|
1222
|
+
|
|
1223
|
+
return formatted_data
|
|
@@ -633,4 +633,61 @@ class AsyncWEBS:
|
|
|
633
633
|
from_,
|
|
634
634
|
to,
|
|
635
635
|
)
|
|
636
|
-
return result
|
|
636
|
+
return result
|
|
637
|
+
|
|
638
|
+
async def aweather(
|
|
639
|
+
self,
|
|
640
|
+
location: str,
|
|
641
|
+
language: str = "en",
|
|
642
|
+
) -> dict[str, Any]:
|
|
643
|
+
"""Async version of weather information retrieval from DuckDuckGo.
|
|
644
|
+
|
|
645
|
+
Args:
|
|
646
|
+
location: Location to get weather for.
|
|
647
|
+
language: Language code (e.g. 'en', 'es'). Defaults to "en".
|
|
648
|
+
|
|
649
|
+
Returns:
|
|
650
|
+
Dictionary containing weather data with the following structure:
|
|
651
|
+
{
|
|
652
|
+
"location": str,
|
|
653
|
+
"current": {
|
|
654
|
+
"condition": str,
|
|
655
|
+
"temperature_c": float,
|
|
656
|
+
"feels_like_c": float,
|
|
657
|
+
"humidity": float,
|
|
658
|
+
"wind_speed_ms": float,
|
|
659
|
+
"wind_direction": float,
|
|
660
|
+
"visibility_m": float
|
|
661
|
+
},
|
|
662
|
+
"daily_forecast": List[{
|
|
663
|
+
"date": str,
|
|
664
|
+
"condition": str,
|
|
665
|
+
"max_temp_c": float,
|
|
666
|
+
"min_temp_c": float,
|
|
667
|
+
"sunrise": str,
|
|
668
|
+
"sunset": str
|
|
669
|
+
}],
|
|
670
|
+
"hourly_forecast": List[{
|
|
671
|
+
"time": str,
|
|
672
|
+
"condition": str,
|
|
673
|
+
"temperature_c": float,
|
|
674
|
+
"feels_like_c": float,
|
|
675
|
+
"humidity": float,
|
|
676
|
+
"wind_speed_ms": float,
|
|
677
|
+
"wind_direction": float,
|
|
678
|
+
"visibility_m": float
|
|
679
|
+
}]
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
Raises:
|
|
683
|
+
WebscoutE: Base exception for webscout errors.
|
|
684
|
+
RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits.
|
|
685
|
+
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
686
|
+
"""
|
|
687
|
+
result = await self._loop.run_in_executor(
|
|
688
|
+
self._executor,
|
|
689
|
+
super().weather,
|
|
690
|
+
location,
|
|
691
|
+
language,
|
|
692
|
+
)
|
|
693
|
+
return result
|
webscout/yep_search.py
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
import cloudscraper
|
|
2
|
+
from urllib.parse import urlencode
|
|
3
|
+
from webscout.litagent import LitAgent
|
|
4
|
+
from typing import List, Dict, Optional, Tuple
|
|
5
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
6
|
+
import json
|
|
7
|
+
class YepSearch:
|
|
8
|
+
"""Yep.com search class to get search results."""
|
|
9
|
+
|
|
10
|
+
_executor: ThreadPoolExecutor = ThreadPoolExecutor()
|
|
11
|
+
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
timeout: int = 20,
|
|
15
|
+
proxies: Dict[str, str] | None = None,
|
|
16
|
+
verify: bool = True,
|
|
17
|
+
):
|
|
18
|
+
"""Initialize YepSearch.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
timeout: Timeout value for the HTTP client. Defaults to 20.
|
|
22
|
+
proxies: Proxy configuration for requests. Defaults to None.
|
|
23
|
+
verify: Verify SSL certificates. Defaults to True.
|
|
24
|
+
"""
|
|
25
|
+
self.base_url = "https://api.yep.com/fs/2/search"
|
|
26
|
+
self.timeout = timeout
|
|
27
|
+
self.session = cloudscraper.create_scraper()
|
|
28
|
+
self.session.headers.update({
|
|
29
|
+
"Accept": "*/*",
|
|
30
|
+
"Accept-Language": "en-US,en;q=0.9,en-IN;q=0.8",
|
|
31
|
+
"DNT": "1",
|
|
32
|
+
"Origin": "https://yep.com",
|
|
33
|
+
"Referer": "https://yep.com/",
|
|
34
|
+
"Sec-Ch-Ua": '"Not(A:Brand";v="99", "Microsoft Edge";v="133", "Chromium";v="133"',
|
|
35
|
+
"Sec-Ch-Ua-Mobile": "?0",
|
|
36
|
+
"Sec-Ch-Ua-Platform": '"Windows"',
|
|
37
|
+
"Sec-Fetch-Dest": "empty",
|
|
38
|
+
"Sec-Fetch-Mode": "cors",
|
|
39
|
+
"Sec-Fetch-Site": "same-site",
|
|
40
|
+
"User-Agent": LitAgent().random()
|
|
41
|
+
})
|
|
42
|
+
if proxies:
|
|
43
|
+
self.session.proxies.update(proxies)
|
|
44
|
+
self.session.verify = verify
|
|
45
|
+
|
|
46
|
+
def _remove_html_tags(self, text: str) -> str:
|
|
47
|
+
"""Remove HTML tags from text using simple string manipulation.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
text: String containing HTML tags
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Clean text without HTML tags
|
|
54
|
+
"""
|
|
55
|
+
result = ""
|
|
56
|
+
in_tag = False
|
|
57
|
+
|
|
58
|
+
for char in text:
|
|
59
|
+
if char == '<':
|
|
60
|
+
in_tag = True
|
|
61
|
+
elif char == '>':
|
|
62
|
+
in_tag = False
|
|
63
|
+
elif not in_tag:
|
|
64
|
+
result += char
|
|
65
|
+
|
|
66
|
+
# Replace common HTML entities
|
|
67
|
+
replacements = {
|
|
68
|
+
' ': ' ',
|
|
69
|
+
'&': '&',
|
|
70
|
+
'<': '<',
|
|
71
|
+
'>': '>',
|
|
72
|
+
'"': '"',
|
|
73
|
+
''': "'",
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
for entity, replacement in replacements.items():
|
|
77
|
+
result = result.replace(entity, replacement)
|
|
78
|
+
|
|
79
|
+
return result.strip()
|
|
80
|
+
|
|
81
|
+
def format_results(self, raw_results: dict) -> List[Dict]:
|
|
82
|
+
"""Format raw API results into a consistent structure."""
|
|
83
|
+
formatted_results = []
|
|
84
|
+
|
|
85
|
+
if not raw_results or len(raw_results) < 2:
|
|
86
|
+
return formatted_results
|
|
87
|
+
|
|
88
|
+
results = raw_results[1].get('results', [])
|
|
89
|
+
|
|
90
|
+
for result in results:
|
|
91
|
+
formatted_result = {
|
|
92
|
+
"title": self._remove_html_tags(result.get("title", "")),
|
|
93
|
+
"href": result.get("url", ""),
|
|
94
|
+
"body": self._remove_html_tags(result.get("snippet", "")),
|
|
95
|
+
"source": result.get("visual_url", ""),
|
|
96
|
+
"position": len(formatted_results) + 1,
|
|
97
|
+
"type": result.get("type", "organic"),
|
|
98
|
+
"first_seen": result.get("first_seen", None)
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# Add sitelinks if they exist
|
|
102
|
+
if "sitelinks" in result:
|
|
103
|
+
sitelinks = []
|
|
104
|
+
if "full" in result["sitelinks"]:
|
|
105
|
+
sitelinks.extend(result["sitelinks"]["full"])
|
|
106
|
+
if "short" in result["sitelinks"]:
|
|
107
|
+
sitelinks.extend(result["sitelinks"]["short"])
|
|
108
|
+
|
|
109
|
+
if sitelinks:
|
|
110
|
+
formatted_result["sitelinks"] = [
|
|
111
|
+
{
|
|
112
|
+
"title": self._remove_html_tags(link.get("title", "")),
|
|
113
|
+
"href": link.get("url", "")
|
|
114
|
+
}
|
|
115
|
+
for link in sitelinks
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
formatted_results.append(formatted_result)
|
|
119
|
+
|
|
120
|
+
return formatted_results
|
|
121
|
+
|
|
122
|
+
def text(
|
|
123
|
+
self,
|
|
124
|
+
keywords: str,
|
|
125
|
+
region: str = "all",
|
|
126
|
+
safesearch: str = "moderate",
|
|
127
|
+
max_results: Optional[int] = None,
|
|
128
|
+
) -> List[Dict[str, str]]:
|
|
129
|
+
"""Yep.com text search.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
keywords: Search query string.
|
|
133
|
+
region: Region for search results. Defaults to "all".
|
|
134
|
+
safesearch: SafeSearch setting ("on", "moderate", "off"). Defaults to "moderate".
|
|
135
|
+
max_results: Maximum number of results to return. Defaults to None.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
List of dictionaries containing search results.
|
|
139
|
+
"""
|
|
140
|
+
# Convert safesearch parameter
|
|
141
|
+
safe_search_map = {
|
|
142
|
+
"on": "on",
|
|
143
|
+
"moderate": "moderate",
|
|
144
|
+
"off": "off"
|
|
145
|
+
}
|
|
146
|
+
safe_setting = safe_search_map.get(safesearch.lower(), "moderate")
|
|
147
|
+
|
|
148
|
+
params = {
|
|
149
|
+
"client": "web",
|
|
150
|
+
"gl": region,
|
|
151
|
+
"limit": str(max_results) if max_results else "10",
|
|
152
|
+
"no_correct": "false",
|
|
153
|
+
"q": keywords,
|
|
154
|
+
"safeSearch": safe_setting,
|
|
155
|
+
"type": "web"
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
url = f"{self.base_url}?{urlencode(params)}"
|
|
159
|
+
try:
|
|
160
|
+
response = self.session.get(url, timeout=self.timeout)
|
|
161
|
+
response.raise_for_status()
|
|
162
|
+
raw_results = response.json()
|
|
163
|
+
|
|
164
|
+
formatted_results = self.format_results(raw_results)
|
|
165
|
+
|
|
166
|
+
if max_results:
|
|
167
|
+
return formatted_results[:max_results]
|
|
168
|
+
return formatted_results
|
|
169
|
+
except Exception as e:
|
|
170
|
+
raise Exception(f"Yep search failed: {str(e)}")
|
|
171
|
+
|
|
172
|
+
def images(
|
|
173
|
+
self,
|
|
174
|
+
keywords: str,
|
|
175
|
+
region: str = "all",
|
|
176
|
+
safesearch: str = "moderate",
|
|
177
|
+
max_results: Optional[int] = None,
|
|
178
|
+
) -> List[Dict[str, str]]:
|
|
179
|
+
"""Yep.com image search.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
keywords: Search query string.
|
|
183
|
+
region: Region for search results. Defaults to "all".
|
|
184
|
+
safesearch: SafeSearch setting ("on", "moderate", "off"). Defaults to "moderate".
|
|
185
|
+
max_results: Maximum number of results to return. Defaults to None.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
List of dictionaries containing image search results with keys:
|
|
189
|
+
- title: Image title
|
|
190
|
+
- image: Full resolution image URL
|
|
191
|
+
- thumbnail: Thumbnail image URL
|
|
192
|
+
- url: Source page URL
|
|
193
|
+
- height: Image height
|
|
194
|
+
- width: Image width
|
|
195
|
+
- source: Source website domain
|
|
196
|
+
"""
|
|
197
|
+
safe_search_map = {
|
|
198
|
+
"on": "on",
|
|
199
|
+
"moderate": "moderate",
|
|
200
|
+
"off": "off"
|
|
201
|
+
}
|
|
202
|
+
safe_setting = safe_search_map.get(safesearch.lower(), "moderate")
|
|
203
|
+
|
|
204
|
+
params = {
|
|
205
|
+
"client": "web",
|
|
206
|
+
"gl": region,
|
|
207
|
+
"limit": str(max_results) if max_results else "10",
|
|
208
|
+
"no_correct": "false",
|
|
209
|
+
"q": keywords,
|
|
210
|
+
"safeSearch": safe_setting,
|
|
211
|
+
"type": "images"
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
url = f"{self.base_url}?{urlencode(params)}"
|
|
215
|
+
try:
|
|
216
|
+
response = self.session.get(url, timeout=self.timeout)
|
|
217
|
+
response.raise_for_status()
|
|
218
|
+
raw_results = response.json()
|
|
219
|
+
|
|
220
|
+
if not raw_results or len(raw_results) < 2:
|
|
221
|
+
return []
|
|
222
|
+
|
|
223
|
+
formatted_results = []
|
|
224
|
+
results = raw_results[1].get('results', [])
|
|
225
|
+
|
|
226
|
+
for result in results:
|
|
227
|
+
if result.get("type") != "Image":
|
|
228
|
+
continue
|
|
229
|
+
|
|
230
|
+
formatted_result = {
|
|
231
|
+
"title": self._remove_html_tags(result.get("title", "")),
|
|
232
|
+
"image": result.get("image_id", ""),
|
|
233
|
+
"thumbnail": result.get("src", ""),
|
|
234
|
+
"url": result.get("host_page", ""),
|
|
235
|
+
"height": result.get("height", 0),
|
|
236
|
+
"width": result.get("width", 0),
|
|
237
|
+
"source": result.get("visual_url", "")
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
# Add high-res thumbnail if available
|
|
241
|
+
if "srcset" in result:
|
|
242
|
+
formatted_result["thumbnail_hd"] = result["srcset"].split(",")[1].strip().split(" ")[0]
|
|
243
|
+
|
|
244
|
+
formatted_results.append(formatted_result)
|
|
245
|
+
|
|
246
|
+
if max_results:
|
|
247
|
+
return formatted_results[:max_results]
|
|
248
|
+
return formatted_results
|
|
249
|
+
|
|
250
|
+
except Exception as e:
|
|
251
|
+
raise Exception(f"Yep image search failed: {str(e)}")
|
|
252
|
+
|
|
253
|
+
def suggestions(
|
|
254
|
+
self,
|
|
255
|
+
query: str,
|
|
256
|
+
region: str = "all",
|
|
257
|
+
) -> List[str]:
|
|
258
|
+
"""Get search suggestions from Yep.com autocomplete API.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
query: Search query string to get suggestions for.
|
|
262
|
+
region: Region for suggestions. Defaults to "all".
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
List of suggestion strings.
|
|
266
|
+
|
|
267
|
+
Example:
|
|
268
|
+
>>> yep = YepSearch()
|
|
269
|
+
>>> suggestions = yep.suggestions("ca")
|
|
270
|
+
>>> print(suggestions)
|
|
271
|
+
['capital one', 'car wash', 'carmax', 'cafe', ...]
|
|
272
|
+
"""
|
|
273
|
+
params = {
|
|
274
|
+
"query": query,
|
|
275
|
+
"type": "web",
|
|
276
|
+
"gl": region
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
url = f"https://api.yep.com/ac/?{urlencode(params)}"
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
response = self.session.get(url, timeout=self.timeout)
|
|
283
|
+
response.raise_for_status()
|
|
284
|
+
data = response.json()
|
|
285
|
+
# Return suggestions list if response format is valid
|
|
286
|
+
if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
|
|
287
|
+
return data[1]
|
|
288
|
+
return []
|
|
289
|
+
|
|
290
|
+
except Exception as e:
|
|
291
|
+
raise Exception(f"Yep suggestions failed: {str(e)}")
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
if __name__ == "__main__":
|
|
295
|
+
yep = YepSearch()
|
|
296
|
+
r = yep.suggestions("hi", region="all")
|
|
297
|
+
print(r)
|