webscout 2025.10.15__py3-none-any.whl → 2025.10.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/Extra/YTToolkit/README.md +1 -1
- webscout/Extra/tempmail/README.md +3 -3
- webscout/Provider/OPENAI/README.md +1 -1
- webscout/Provider/TTI/bing.py +4 -4
- webscout/__init__.py +1 -1
- webscout/client.py +4 -5
- webscout/litprinter/__init__.py +0 -42
- webscout/scout/README.md +59 -8
- webscout/scout/core/scout.py +62 -0
- webscout/scout/element.py +251 -45
- webscout/search/__init__.py +3 -4
- webscout/search/engines/bing/images.py +5 -2
- webscout/search/engines/bing/news.py +6 -4
- webscout/search/engines/bing/text.py +5 -2
- webscout/search/engines/yahoo/__init__.py +41 -0
- webscout/search/engines/yahoo/answers.py +16 -0
- webscout/search/engines/yahoo/base.py +34 -0
- webscout/search/engines/yahoo/images.py +324 -0
- webscout/search/engines/yahoo/maps.py +16 -0
- webscout/search/engines/yahoo/news.py +258 -0
- webscout/search/engines/yahoo/suggestions.py +140 -0
- webscout/search/engines/yahoo/text.py +273 -0
- webscout/search/engines/yahoo/translate.py +16 -0
- webscout/search/engines/yahoo/videos.py +302 -0
- webscout/search/engines/yahoo/weather.py +220 -0
- webscout/search/http_client.py +1 -1
- webscout/search/yahoo_main.py +54 -0
- webscout/{auth → server}/__init__.py +2 -23
- webscout/server/config.py +84 -0
- webscout/{auth → server}/request_processing.py +3 -28
- webscout/{auth → server}/routes.py +6 -148
- webscout/server/schemas.py +23 -0
- webscout/{auth → server}/server.py +11 -43
- webscout/server/simple_logger.py +84 -0
- webscout/version.py +1 -1
- webscout/version.py.bak +1 -1
- webscout/zeroart/README.md +17 -9
- webscout/zeroart/__init__.py +78 -6
- webscout/zeroart/effects.py +51 -1
- webscout/zeroart/fonts.py +559 -1
- {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/METADATA +10 -52
- {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/RECORD +49 -45
- {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/entry_points.txt +1 -1
- webscout/auth/api_key_manager.py +0 -189
- webscout/auth/auth_system.py +0 -85
- webscout/auth/config.py +0 -175
- webscout/auth/database.py +0 -755
- webscout/auth/middleware.py +0 -248
- webscout/auth/models.py +0 -185
- webscout/auth/rate_limiter.py +0 -254
- webscout/auth/schemas.py +0 -103
- webscout/auth/simple_logger.py +0 -236
- webscout/search/engines/yahoo.py +0 -65
- webscout/search/engines/yahoo_news.py +0 -64
- /webscout/{auth → server}/exceptions.py +0 -0
- /webscout/{auth → server}/providers.py +0 -0
- /webscout/{auth → server}/request_models.py +0 -0
- {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/WHEEL +0 -0
- {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""Yahoo video search engine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from typing import Any
|
|
7
|
+
from urllib.parse import parse_qs, urlparse
|
|
8
|
+
|
|
9
|
+
from .base import YahooSearchEngine
|
|
10
|
+
from ...results import VideosResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class YahooVideos(YahooSearchEngine[VideosResult]):
|
|
14
|
+
"""Yahoo video search engine with filters.
|
|
15
|
+
|
|
16
|
+
Features:
|
|
17
|
+
- Length filters (short, medium, long)
|
|
18
|
+
- Resolution filters (SD, HD, 4K)
|
|
19
|
+
- Source filters
|
|
20
|
+
- Time filters
|
|
21
|
+
- Pagination support
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
name = "yahoo"
|
|
25
|
+
category = "videos"
|
|
26
|
+
|
|
27
|
+
search_url = "https://video.search.yahoo.com/search/video"
|
|
28
|
+
search_method = "GET"
|
|
29
|
+
|
|
30
|
+
# XPath selectors for video results
|
|
31
|
+
items_xpath = "//div[@id='results']//div[contains(@class, 'dd') or contains(@class, 'vr')]"
|
|
32
|
+
elements_xpath: Mapping[str, str] = {
|
|
33
|
+
"title": ".//h3//a/text() | .//a/@title",
|
|
34
|
+
"url": ".//h3//a/@href | .//a/@href",
|
|
35
|
+
"thumbnail": ".//img/@src",
|
|
36
|
+
"duration": ".//span[contains(@class, 'time') or contains(@class, 'duration')]//text()",
|
|
37
|
+
"views": ".//span[contains(@class, 'views')]//text()",
|
|
38
|
+
"published": ".//span[contains(@class, 'date') or contains(@class, 'age')]//text()",
|
|
39
|
+
"source": ".//span[contains(@class, 'source')]//text()",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# Filter mappings
|
|
43
|
+
LENGTH_FILTERS = {
|
|
44
|
+
"short": "short", # < 4 minutes
|
|
45
|
+
"medium": "medium", # 4-20 minutes
|
|
46
|
+
"long": "long", # > 20 minutes
|
|
47
|
+
"all": "",
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
RESOLUTION_FILTERS = {
|
|
51
|
+
"sd": "sd",
|
|
52
|
+
"hd": "hd",
|
|
53
|
+
"4k": "4k",
|
|
54
|
+
"all": "",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
SOURCE_FILTERS = {
|
|
58
|
+
"youtube": "youtube",
|
|
59
|
+
"dailymotion": "dailymotion",
|
|
60
|
+
"vimeo": "vimeo",
|
|
61
|
+
"metacafe": "metacafe",
|
|
62
|
+
"all": "",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
def build_payload(
|
|
66
|
+
self,
|
|
67
|
+
query: str,
|
|
68
|
+
region: str,
|
|
69
|
+
safesearch: str,
|
|
70
|
+
timelimit: str | None,
|
|
71
|
+
page: int = 1,
|
|
72
|
+
**kwargs: Any,
|
|
73
|
+
) -> dict[str, Any]:
|
|
74
|
+
"""Build video search payload.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
query: Search query
|
|
78
|
+
region: Region code
|
|
79
|
+
safesearch: Safe search level
|
|
80
|
+
timelimit: Time filter (d, w, m)
|
|
81
|
+
page: Page number
|
|
82
|
+
**kwargs: Additional filters:
|
|
83
|
+
- length: Video length filter
|
|
84
|
+
- resolution: Video resolution filter
|
|
85
|
+
- source: Video source filter
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Query parameters dictionary
|
|
89
|
+
"""
|
|
90
|
+
payload = {
|
|
91
|
+
"p": query,
|
|
92
|
+
"fr": "sfp",
|
|
93
|
+
"fr2": "p:s,v:v,m:sb,rgn:top",
|
|
94
|
+
"ei": "UTF-8",
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
# Pagination
|
|
98
|
+
if page > 1:
|
|
99
|
+
# Each page shows ~15-20 videos
|
|
100
|
+
payload["b"] = f"{(page - 1) * 15 + 1}"
|
|
101
|
+
|
|
102
|
+
# Safe search
|
|
103
|
+
if safesearch == "on":
|
|
104
|
+
payload["safe"] = "active"
|
|
105
|
+
elif safesearch == "off":
|
|
106
|
+
payload["safe"] = "off"
|
|
107
|
+
|
|
108
|
+
# Time filter
|
|
109
|
+
if timelimit:
|
|
110
|
+
time_map = {
|
|
111
|
+
"d": "1d",
|
|
112
|
+
"w": "1w",
|
|
113
|
+
"m": "1m",
|
|
114
|
+
"y": "1y",
|
|
115
|
+
}
|
|
116
|
+
if timelimit in time_map:
|
|
117
|
+
payload["age"] = time_map[timelimit]
|
|
118
|
+
|
|
119
|
+
# Length filter
|
|
120
|
+
if "length" in kwargs and kwargs["length"] in self.LENGTH_FILTERS:
|
|
121
|
+
length_val = self.LENGTH_FILTERS[kwargs["length"]]
|
|
122
|
+
if length_val:
|
|
123
|
+
payload["vidlen"] = length_val
|
|
124
|
+
|
|
125
|
+
# Resolution filter
|
|
126
|
+
if "resolution" in kwargs and kwargs["resolution"] in self.RESOLUTION_FILTERS:
|
|
127
|
+
res_val = self.RESOLUTION_FILTERS[kwargs["resolution"]]
|
|
128
|
+
if res_val:
|
|
129
|
+
payload["vidqual"] = res_val
|
|
130
|
+
|
|
131
|
+
# Source filter
|
|
132
|
+
if "source" in kwargs and kwargs["source"] in self.SOURCE_FILTERS:
|
|
133
|
+
source_val = self.SOURCE_FILTERS[kwargs["source"]]
|
|
134
|
+
if source_val:
|
|
135
|
+
payload["site"] = source_val
|
|
136
|
+
|
|
137
|
+
return payload
|
|
138
|
+
|
|
139
|
+
def extract_video_url(self, href: str) -> str:
|
|
140
|
+
"""Extract actual video URL from Yahoo redirect.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
href: Yahoo redirect URL
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Actual video URL
|
|
147
|
+
"""
|
|
148
|
+
if not href:
|
|
149
|
+
return href
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
# Parse the URL
|
|
153
|
+
parsed = urlparse(href)
|
|
154
|
+
|
|
155
|
+
# Check if it's a Yahoo redirect
|
|
156
|
+
if "r.search.yahoo.com" in parsed.netloc or "/RU=" in href:
|
|
157
|
+
# Extract the RU parameter
|
|
158
|
+
if "/RU=" in href:
|
|
159
|
+
start = href.find("/RU=") + 4
|
|
160
|
+
end = href.find("/RK=", start)
|
|
161
|
+
if end == -1:
|
|
162
|
+
end = len(href)
|
|
163
|
+
from urllib.parse import unquote
|
|
164
|
+
return unquote(href[start:end])
|
|
165
|
+
else:
|
|
166
|
+
query_params = parse_qs(parsed.query)
|
|
167
|
+
if "url" in query_params:
|
|
168
|
+
return query_params["url"][0]
|
|
169
|
+
|
|
170
|
+
return href
|
|
171
|
+
except Exception:
|
|
172
|
+
return href
|
|
173
|
+
|
|
174
|
+
def post_extract_results(self, results: list[VideosResult]) -> list[VideosResult]:
|
|
175
|
+
"""Post-process video results.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
results: Raw extracted results
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Cleaned results
|
|
182
|
+
"""
|
|
183
|
+
cleaned_results = []
|
|
184
|
+
|
|
185
|
+
for result in results:
|
|
186
|
+
# Extract real URL
|
|
187
|
+
if result.url:
|
|
188
|
+
result.url = self.extract_video_url(result.url)
|
|
189
|
+
|
|
190
|
+
# Skip invalid results
|
|
191
|
+
if not result.url or not result.title:
|
|
192
|
+
continue
|
|
193
|
+
|
|
194
|
+
# Clean thumbnail URL
|
|
195
|
+
if result.thumbnail and result.thumbnail.startswith("data:"):
|
|
196
|
+
result.thumbnail = ""
|
|
197
|
+
|
|
198
|
+
cleaned_results.append(result)
|
|
199
|
+
|
|
200
|
+
return cleaned_results
|
|
201
|
+
|
|
202
|
+
def search(
|
|
203
|
+
self,
|
|
204
|
+
query: str,
|
|
205
|
+
region: str = "us-en",
|
|
206
|
+
safesearch: str = "moderate",
|
|
207
|
+
timelimit: str | None = None,
|
|
208
|
+
page: int = 1,
|
|
209
|
+
max_results: int | None = None,
|
|
210
|
+
**kwargs: Any,
|
|
211
|
+
) -> list[VideosResult] | None:
|
|
212
|
+
"""Search Yahoo Videos with pagination.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
query: Video search query
|
|
216
|
+
region: Region code
|
|
217
|
+
safesearch: Safe search level
|
|
218
|
+
timelimit: Time filter
|
|
219
|
+
page: Starting page
|
|
220
|
+
max_results: Maximum results
|
|
221
|
+
**kwargs: Additional filters (length, resolution, source)
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
List of VideoResult objects
|
|
225
|
+
"""
|
|
226
|
+
results = []
|
|
227
|
+
current_page = page
|
|
228
|
+
max_pages = kwargs.get("max_pages", 5)
|
|
229
|
+
|
|
230
|
+
while current_page <= max_pages:
|
|
231
|
+
payload = self.build_payload(
|
|
232
|
+
query=query,
|
|
233
|
+
region=region,
|
|
234
|
+
safesearch=safesearch,
|
|
235
|
+
timelimit=timelimit,
|
|
236
|
+
page=current_page,
|
|
237
|
+
**kwargs
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
html_text = self.request(self.search_method, self.search_url, params=payload)
|
|
241
|
+
if not html_text:
|
|
242
|
+
break
|
|
243
|
+
|
|
244
|
+
html_text = self.pre_process_html(html_text)
|
|
245
|
+
page_results = self.extract_results(html_text)
|
|
246
|
+
|
|
247
|
+
if not page_results:
|
|
248
|
+
break
|
|
249
|
+
|
|
250
|
+
results.extend(page_results)
|
|
251
|
+
|
|
252
|
+
if max_results and len(results) >= max_results:
|
|
253
|
+
break
|
|
254
|
+
|
|
255
|
+
current_page += 1
|
|
256
|
+
|
|
257
|
+
results = self.post_extract_results(results)
|
|
258
|
+
|
|
259
|
+
if max_results:
|
|
260
|
+
results = results[:max_results]
|
|
261
|
+
|
|
262
|
+
return results if results else None
|
|
263
|
+
|
|
264
|
+
def run(
|
|
265
|
+
self,
|
|
266
|
+
keywords: str,
|
|
267
|
+
region: str = "us-en",
|
|
268
|
+
safesearch: str = "moderate",
|
|
269
|
+
timelimit: str | None = None,
|
|
270
|
+
resolution: str | None = None,
|
|
271
|
+
duration: str | None = None,
|
|
272
|
+
license_videos: str | None = None,
|
|
273
|
+
max_results: int | None = None,
|
|
274
|
+
) -> list[dict[str, str]]:
|
|
275
|
+
"""Run video search and return results as dictionaries.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
keywords: Search query.
|
|
279
|
+
region: Region code.
|
|
280
|
+
safesearch: Safe search level.
|
|
281
|
+
timelimit: Time filter.
|
|
282
|
+
resolution: Video resolution filter.
|
|
283
|
+
duration: Video duration filter.
|
|
284
|
+
license_videos: License filter.
|
|
285
|
+
max_results: Maximum number of results.
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
List of video result dictionaries.
|
|
289
|
+
"""
|
|
290
|
+
results = self.search(
|
|
291
|
+
query=keywords,
|
|
292
|
+
region=region,
|
|
293
|
+
safesearch=safesearch,
|
|
294
|
+
timelimit=timelimit,
|
|
295
|
+
resolution=resolution,
|
|
296
|
+
duration=duration,
|
|
297
|
+
license_videos=license_videos,
|
|
298
|
+
max_results=max_results,
|
|
299
|
+
)
|
|
300
|
+
if results is None:
|
|
301
|
+
return []
|
|
302
|
+
return [result.to_dict() for result in results]
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Yahoo weather search using embedded JSON data."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import json
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from ...http_client import HttpClient
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class YahooWeather:
|
|
13
|
+
"""Yahoo weather search using embedded JSON extraction."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, proxy: str | None = None, timeout: int | None = None, verify: bool = True):
|
|
16
|
+
"""Initialize weather search engine.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
proxy: Proxy URL.
|
|
20
|
+
timeout: Request timeout in seconds.
|
|
21
|
+
verify: Whether to verify SSL certificates.
|
|
22
|
+
"""
|
|
23
|
+
self.http_client = HttpClient(proxy=proxy, timeout=timeout, verify=verify)
|
|
24
|
+
|
|
25
|
+
def request(self, method: str, url: str, **kwargs: Any) -> str | None:
|
|
26
|
+
"""Make a request to the weather service."""
|
|
27
|
+
try:
|
|
28
|
+
response = self.http_client.request(method, url, **kwargs)
|
|
29
|
+
return response.text
|
|
30
|
+
except Exception:
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
def run(self, *args, **kwargs) -> list[dict[str, Any]]:
|
|
34
|
+
"""Get weather data from Yahoo.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
location: Location to get weather for (e.g., "New York", "London", "Bengaluru")
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
List of weather data dictionaries
|
|
41
|
+
"""
|
|
42
|
+
location = args[0] if args else kwargs.get("location") or kwargs.get("keywords")
|
|
43
|
+
|
|
44
|
+
if not location:
|
|
45
|
+
raise ValueError("Location is required for weather search")
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
# Use the search endpoint which redirects to the correct weather page
|
|
49
|
+
search_url = f"https://weather.yahoo.com/search/?q={location.replace(' ', '+')}"
|
|
50
|
+
|
|
51
|
+
# Fetch the page
|
|
52
|
+
response = self.request("GET", search_url)
|
|
53
|
+
if not response:
|
|
54
|
+
return [{
|
|
55
|
+
"location": location,
|
|
56
|
+
"error": "Failed to fetch weather data from Yahoo"
|
|
57
|
+
}]
|
|
58
|
+
|
|
59
|
+
# Extract JSON data from the page
|
|
60
|
+
weather_data = self._extract_json_data(response, location)
|
|
61
|
+
|
|
62
|
+
if weather_data:
|
|
63
|
+
return [weather_data]
|
|
64
|
+
|
|
65
|
+
# Fallback: try regex parsing
|
|
66
|
+
return self._parse_weather_html(response, location)
|
|
67
|
+
|
|
68
|
+
except Exception as e:
|
|
69
|
+
return [{
|
|
70
|
+
"location": location,
|
|
71
|
+
"error": f"Failed to fetch weather data: {str(e)}"
|
|
72
|
+
}]
|
|
73
|
+
|
|
74
|
+
def _extract_json_data(self, html: str, location: str) -> dict[str, Any] | None:
|
|
75
|
+
"""Extract weather data from embedded JSON in the page.
|
|
76
|
+
|
|
77
|
+
Yahoo Weather embeds JSON data in script tags that can be parsed.
|
|
78
|
+
"""
|
|
79
|
+
try:
|
|
80
|
+
# Look for the main data script tag
|
|
81
|
+
# Pattern: self.__next_f.push([1,"..JSON data.."])
|
|
82
|
+
json_pattern = r'self\.__next_f\.push\(\[1,"([^"]+)"\]\)'
|
|
83
|
+
matches = re.findall(json_pattern, html)
|
|
84
|
+
|
|
85
|
+
weather_info = {}
|
|
86
|
+
|
|
87
|
+
for match in matches:
|
|
88
|
+
# Unescape the JSON string
|
|
89
|
+
try:
|
|
90
|
+
# The data is escaped, so we need to decode it
|
|
91
|
+
decoded = match.encode().decode('unicode_escape')
|
|
92
|
+
|
|
93
|
+
# Look for temperature data
|
|
94
|
+
temp_match = re.search(r'"temperature":(\d+)', decoded)
|
|
95
|
+
if temp_match and not weather_info.get('temperature'):
|
|
96
|
+
weather_info['temperature'] = int(temp_match.group(1))
|
|
97
|
+
|
|
98
|
+
# Look for condition
|
|
99
|
+
condition_match = re.search(r'"iconLabel":"([^"]+)"', decoded)
|
|
100
|
+
if condition_match and not weather_info.get('condition'):
|
|
101
|
+
weather_info['condition'] = condition_match.group(1)
|
|
102
|
+
|
|
103
|
+
# Look for high/low
|
|
104
|
+
high_match = re.search(r'"highTemperature":(\d+)', decoded)
|
|
105
|
+
if high_match and not weather_info.get('high'):
|
|
106
|
+
weather_info['high'] = int(high_match.group(1))
|
|
107
|
+
|
|
108
|
+
low_match = re.search(r'"lowTemperature":(\d+)', decoded)
|
|
109
|
+
if low_match and not weather_info.get('low'):
|
|
110
|
+
weather_info['low'] = int(low_match.group(1))
|
|
111
|
+
|
|
112
|
+
# Look for humidity
|
|
113
|
+
humidity_match = re.search(r'"value":"(\d+)%"[^}]*"category":"Humidity"', decoded)
|
|
114
|
+
if humidity_match and not weather_info.get('humidity'):
|
|
115
|
+
weather_info['humidity'] = int(humidity_match.group(1))
|
|
116
|
+
|
|
117
|
+
# Look for precipitation probability
|
|
118
|
+
precip_match = re.search(r'"probabilityOfPrecipitation":"(\d+)%"', decoded)
|
|
119
|
+
if precip_match and not weather_info.get('precipitation_chance'):
|
|
120
|
+
weather_info['precipitation_chance'] = int(precip_match.group(1))
|
|
121
|
+
|
|
122
|
+
# Look for location name
|
|
123
|
+
location_match = re.search(r'"name":"([^"]+)","code":null,"woeid":(\d+)', decoded)
|
|
124
|
+
if location_match and not weather_info.get('location_name'):
|
|
125
|
+
weather_info['location_name'] = location_match.group(1)
|
|
126
|
+
weather_info['woeid'] = int(location_match.group(2))
|
|
127
|
+
|
|
128
|
+
except Exception:
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
if weather_info and weather_info.get('temperature'):
|
|
132
|
+
return {
|
|
133
|
+
"location": weather_info.get('location_name', location),
|
|
134
|
+
"woeid": weather_info.get('woeid'),
|
|
135
|
+
"temperature_f": weather_info.get('temperature'),
|
|
136
|
+
"condition": weather_info.get('condition'),
|
|
137
|
+
"high_f": weather_info.get('high'),
|
|
138
|
+
"low_f": weather_info.get('low'),
|
|
139
|
+
"humidity_percent": weather_info.get('humidity'),
|
|
140
|
+
"precipitation_chance": weather_info.get('precipitation_chance'),
|
|
141
|
+
"source": "Yahoo Weather",
|
|
142
|
+
"units": "Fahrenheit"
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
except Exception as e:
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
def _parse_weather_html(self, html_content: str, location: str) -> list[dict[str, Any]]:
|
|
151
|
+
"""Fallback: Parse weather data from HTML content using regex.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
html_content: HTML content of weather page
|
|
155
|
+
location: Location name
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
List of weather data dictionaries
|
|
159
|
+
"""
|
|
160
|
+
try:
|
|
161
|
+
weather_data = {"location": location}
|
|
162
|
+
|
|
163
|
+
# Extract current temperature
|
|
164
|
+
temp_patterns = [
|
|
165
|
+
r'<p[^>]*class="[^"]*font-title1[^"]*"[^>]*>(\d+)°</p>',
|
|
166
|
+
r'>(\d+)°<',
|
|
167
|
+
r'"temperature":(\d+)',
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
for pattern in temp_patterns:
|
|
171
|
+
match = re.search(pattern, html_content)
|
|
172
|
+
if match:
|
|
173
|
+
weather_data["temperature_f"] = int(match.group(1))
|
|
174
|
+
break
|
|
175
|
+
|
|
176
|
+
# Extract condition
|
|
177
|
+
condition_patterns = [
|
|
178
|
+
r'"iconLabel":"([^"]+)"',
|
|
179
|
+
r'aria-label="([^"]*(?:Cloudy|Sunny|Rain|Clear|Thunder|Shower|Fog)[^"]*)"',
|
|
180
|
+
]
|
|
181
|
+
|
|
182
|
+
for pattern in condition_patterns:
|
|
183
|
+
match = re.search(pattern, html_content, re.IGNORECASE)
|
|
184
|
+
if match:
|
|
185
|
+
weather_data["condition"] = match.group(1)
|
|
186
|
+
break
|
|
187
|
+
|
|
188
|
+
# Extract high/low
|
|
189
|
+
high_match = re.search(r'"highTemperature":(\d+)', html_content)
|
|
190
|
+
if high_match:
|
|
191
|
+
weather_data["high_f"] = int(high_match.group(1))
|
|
192
|
+
|
|
193
|
+
low_match = re.search(r'"lowTemperature":(\d+)', html_content)
|
|
194
|
+
if low_match:
|
|
195
|
+
weather_data["low_f"] = int(low_match.group(1))
|
|
196
|
+
|
|
197
|
+
# Extract humidity
|
|
198
|
+
humidity_match = re.search(r'Humidity[^>]*>(\d+)%|"value":"(\d+)%"[^}]*"Humidity"', html_content, re.IGNORECASE)
|
|
199
|
+
if humidity_match:
|
|
200
|
+
weather_data["humidity_percent"] = int(humidity_match.group(1) or humidity_match.group(2))
|
|
201
|
+
|
|
202
|
+
weather_data["source"] = "Yahoo Weather"
|
|
203
|
+
weather_data["units"] = "Fahrenheit"
|
|
204
|
+
|
|
205
|
+
# Remove None values
|
|
206
|
+
weather_data = {k: v for k, v in weather_data.items() if v is not None}
|
|
207
|
+
|
|
208
|
+
if len(weather_data) > 3: # Has more than just location, source, and units
|
|
209
|
+
return [weather_data]
|
|
210
|
+
|
|
211
|
+
return [{
|
|
212
|
+
"location": location,
|
|
213
|
+
"error": "Could not extract weather data from page"
|
|
214
|
+
}]
|
|
215
|
+
|
|
216
|
+
except Exception as e:
|
|
217
|
+
return [{
|
|
218
|
+
"location": location,
|
|
219
|
+
"error": f"Failed to parse weather data: {str(e)}"
|
|
220
|
+
}]
|
webscout/search/http_client.py
CHANGED
|
@@ -120,7 +120,7 @@ class HttpClient:
|
|
|
120
120
|
else:
|
|
121
121
|
raise WebscoutE(f"{resp.url} returned {resp.status_code}")
|
|
122
122
|
|
|
123
|
-
except
|
|
123
|
+
except Exception as ex:
|
|
124
124
|
if "time" in str(ex).lower() or "timeout" in str(ex).lower():
|
|
125
125
|
raise TimeoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
126
126
|
raise WebscoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Yahoo unified search interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
from .base import BaseSearch
|
|
6
|
+
from .engines.yahoo.text import YahooText
|
|
7
|
+
from .engines.yahoo.images import YahooImages
|
|
8
|
+
from .engines.yahoo.videos import YahooVideos
|
|
9
|
+
from .engines.yahoo.news import YahooNews
|
|
10
|
+
from .engines.yahoo.suggestions import YahooSuggestions
|
|
11
|
+
from .engines.yahoo.answers import YahooAnswers
|
|
12
|
+
from .engines.yahoo.maps import YahooMaps
|
|
13
|
+
from .engines.yahoo.translate import YahooTranslate
|
|
14
|
+
from .engines.yahoo.weather import YahooWeather
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class YahooSearch(BaseSearch):
|
|
18
|
+
"""Unified Yahoo search interface."""
|
|
19
|
+
|
|
20
|
+
def text(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
|
|
21
|
+
search = YahooText()
|
|
22
|
+
return search.run(keywords, region, safesearch, max_results)
|
|
23
|
+
|
|
24
|
+
def images(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
|
|
25
|
+
search = YahooImages()
|
|
26
|
+
return search.run(keywords, region, safesearch, max_results)
|
|
27
|
+
|
|
28
|
+
def videos(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
|
|
29
|
+
search = YahooVideos()
|
|
30
|
+
return search.run(keywords, region, safesearch, max_results)
|
|
31
|
+
|
|
32
|
+
def news(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
|
|
33
|
+
search = YahooNews()
|
|
34
|
+
return search.run(keywords, region, safesearch, max_results)
|
|
35
|
+
|
|
36
|
+
def suggestions(self, keywords: str, region: str = "us") -> List[str]:
|
|
37
|
+
search = YahooSuggestions()
|
|
38
|
+
return search.run(keywords, region)
|
|
39
|
+
|
|
40
|
+
def answers(self, keywords: str) -> List[Dict[str, str]]:
|
|
41
|
+
search = YahooAnswers()
|
|
42
|
+
return search.run(keywords)
|
|
43
|
+
|
|
44
|
+
def maps(self, keywords: str, place: Optional[str] = None, street: Optional[str] = None, city: Optional[str] = None, county: Optional[str] = None, state: Optional[str] = None, country: Optional[str] = None, postalcode: Optional[str] = None, latitude: Optional[str] = None, longitude: Optional[str] = None, radius: int = 0, max_results: Optional[int] = None) -> List[Dict[str, str]]:
|
|
45
|
+
search = YahooMaps()
|
|
46
|
+
return search.run(keywords, place, street, city, county, state, country, postalcode, latitude, longitude, radius, max_results)
|
|
47
|
+
|
|
48
|
+
def translate(self, keywords: str, from_lang: Optional[str] = None, to_lang: str = "en") -> List[Dict[str, str]]:
|
|
49
|
+
search = YahooTranslate()
|
|
50
|
+
return search.run(keywords, from_lang, to_lang)
|
|
51
|
+
|
|
52
|
+
def weather(self, keywords: str) -> List[Dict[str, str]]:
|
|
53
|
+
search = YahooWeather()
|
|
54
|
+
return search.run(keywords)
|
|
@@ -1,16 +1,6 @@
|
|
|
1
|
-
# webscout/
|
|
1
|
+
# webscout/server/__init__.py
|
|
2
2
|
|
|
3
|
-
from .models import User, APIKey
|
|
4
|
-
from .database import DatabaseManager
|
|
5
|
-
from .api_key_manager import APIKeyManager
|
|
6
|
-
from .rate_limiter import RateLimiter
|
|
7
|
-
from .middleware import AuthMiddleware
|
|
8
3
|
from .schemas import (
|
|
9
|
-
APIKeyCreateRequest,
|
|
10
|
-
APIKeyCreateResponse,
|
|
11
|
-
APIKeyValidationResponse,
|
|
12
|
-
UserCreateRequest,
|
|
13
|
-
UserResponse,
|
|
14
4
|
HealthCheckResponse
|
|
15
5
|
)
|
|
16
6
|
# Import server functions lazily to avoid module execution issues
|
|
@@ -46,17 +36,6 @@ def initialize_tti_provider_map():
|
|
|
46
36
|
return _init_tti_provider_map()
|
|
47
37
|
|
|
48
38
|
__all__ = [
|
|
49
|
-
"User",
|
|
50
|
-
"APIKey",
|
|
51
|
-
"DatabaseManager",
|
|
52
|
-
"APIKeyManager",
|
|
53
|
-
"RateLimiter",
|
|
54
|
-
"AuthMiddleware",
|
|
55
|
-
"APIKeyCreateRequest",
|
|
56
|
-
"APIKeyCreateResponse",
|
|
57
|
-
"APIKeyValidationResponse",
|
|
58
|
-
"UserCreateRequest",
|
|
59
|
-
"UserResponse",
|
|
60
39
|
"HealthCheckResponse",
|
|
61
40
|
"create_app",
|
|
62
41
|
"run_api",
|
|
@@ -67,4 +46,4 @@ __all__ = [
|
|
|
67
46
|
"APIError",
|
|
68
47
|
"initialize_provider_map",
|
|
69
48
|
"initialize_tti_provider_map"
|
|
70
|
-
]
|
|
49
|
+
]
|