webscout 2025.10.15__py3-none-any.whl → 2025.10.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (63) hide show
  1. webscout/Extra/YTToolkit/README.md +1 -1
  2. webscout/Extra/tempmail/README.md +3 -3
  3. webscout/Provider/ClaudeOnline.py +350 -0
  4. webscout/Provider/OPENAI/README.md +1 -1
  5. webscout/Provider/TTI/bing.py +4 -4
  6. webscout/Provider/TTI/claudeonline.py +315 -0
  7. webscout/__init__.py +1 -1
  8. webscout/client.py +4 -5
  9. webscout/litprinter/__init__.py +0 -42
  10. webscout/scout/README.md +59 -8
  11. webscout/scout/core/scout.py +62 -0
  12. webscout/scout/element.py +251 -45
  13. webscout/search/__init__.py +3 -4
  14. webscout/search/engines/bing/images.py +5 -2
  15. webscout/search/engines/bing/news.py +6 -4
  16. webscout/search/engines/bing/text.py +5 -2
  17. webscout/search/engines/yahoo/__init__.py +41 -0
  18. webscout/search/engines/yahoo/answers.py +16 -0
  19. webscout/search/engines/yahoo/base.py +34 -0
  20. webscout/search/engines/yahoo/images.py +324 -0
  21. webscout/search/engines/yahoo/maps.py +16 -0
  22. webscout/search/engines/yahoo/news.py +258 -0
  23. webscout/search/engines/yahoo/suggestions.py +140 -0
  24. webscout/search/engines/yahoo/text.py +273 -0
  25. webscout/search/engines/yahoo/translate.py +16 -0
  26. webscout/search/engines/yahoo/videos.py +302 -0
  27. webscout/search/engines/yahoo/weather.py +220 -0
  28. webscout/search/http_client.py +1 -1
  29. webscout/search/yahoo_main.py +54 -0
  30. webscout/{auth → server}/__init__.py +2 -23
  31. webscout/server/config.py +84 -0
  32. webscout/{auth → server}/request_processing.py +3 -28
  33. webscout/{auth → server}/routes.py +6 -148
  34. webscout/server/schemas.py +23 -0
  35. webscout/{auth → server}/server.py +11 -43
  36. webscout/server/simple_logger.py +84 -0
  37. webscout/version.py +1 -1
  38. webscout/version.py.bak +1 -1
  39. webscout/zeroart/README.md +17 -9
  40. webscout/zeroart/__init__.py +78 -6
  41. webscout/zeroart/effects.py +51 -1
  42. webscout/zeroart/fonts.py +559 -1
  43. {webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/METADATA +11 -54
  44. {webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/RECORD +51 -46
  45. {webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/entry_points.txt +1 -1
  46. webscout/Extra/weather.md +0 -281
  47. webscout/auth/api_key_manager.py +0 -189
  48. webscout/auth/auth_system.py +0 -85
  49. webscout/auth/config.py +0 -175
  50. webscout/auth/database.py +0 -755
  51. webscout/auth/middleware.py +0 -248
  52. webscout/auth/models.py +0 -185
  53. webscout/auth/rate_limiter.py +0 -254
  54. webscout/auth/schemas.py +0 -103
  55. webscout/auth/simple_logger.py +0 -236
  56. webscout/search/engines/yahoo.py +0 -65
  57. webscout/search/engines/yahoo_news.py +0 -64
  58. /webscout/{auth → server}/exceptions.py +0 -0
  59. /webscout/{auth → server}/providers.py +0 -0
  60. /webscout/{auth → server}/request_models.py +0 -0
  61. {webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/WHEEL +0 -0
  62. {webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/licenses/LICENSE.md +0 -0
  63. {webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,324 @@
1
+ """Yahoo image search engine with advanced filters."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping
6
+ from typing import Any
7
+ from urllib.parse import urljoin
8
+
9
+ from .base import YahooSearchEngine
10
+ from ...results import ImagesResult
11
+
12
+
13
+ class YahooImages(YahooSearchEngine[ImagesResult]):
14
+ """Yahoo image search engine with filter support.
15
+
16
+ Features:
17
+ - Size filters (small, medium, large, wallpaper)
18
+ - Color filters (color, bw, red, orange, yellow, etc.)
19
+ - Type filters (photo, clipart, lineart, transparent)
20
+ - Layout filters (square, wide, tall)
21
+ - Time filters
22
+ - Pagination support
23
+
24
+ Note: Yahoo does not support reverse image search (searching by image upload/URL).
25
+ For reverse image search functionality, use Google Images or Bing Images instead.
26
+ """
27
+
28
+ name = "yahoo"
29
+ category = "images"
30
+
31
+ search_url = "https://images.search.yahoo.com/search/images"
32
+ search_method = "GET"
33
+ search_headers = {
34
+ "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1"
35
+ }
36
+
37
+ # XPath selectors
38
+ items_xpath = "//li[contains(@class, 'ld')]"
39
+ elements_xpath: Mapping[str, str] = {
40
+ "title": "@data",
41
+ "image": "@data",
42
+ "thumbnail": "@data",
43
+ "url": "@data",
44
+ "source": "@data",
45
+ "width": "@data",
46
+ "height": "@data",
47
+ }
48
+
49
+ # Filter mappings
50
+ SIZE_FILTERS = {
51
+ "small": "small",
52
+ "medium": "medium",
53
+ "large": "large",
54
+ "wallpaper": "wallpaper",
55
+ "all": "",
56
+ }
57
+
58
+ COLOR_FILTERS = {
59
+ "color": "color",
60
+ "bw": "bw",
61
+ "black": "black",
62
+ "white": "white",
63
+ "red": "red",
64
+ "orange": "orange",
65
+ "yellow": "yellow",
66
+ "green": "green",
67
+ "teal": "teal",
68
+ "blue": "blue",
69
+ "purple": "purple",
70
+ "pink": "pink",
71
+ "brown": "brown",
72
+ "gray": "gray",
73
+ "all": "",
74
+ }
75
+
76
+ TYPE_FILTERS = {
77
+ "photo": "photo",
78
+ "clipart": "clipart",
79
+ "lineart": "linedrawing",
80
+ "transparent": "transparent",
81
+ "gif": "animatedgif",
82
+ "all": "",
83
+ }
84
+
85
+ LAYOUT_FILTERS = {
86
+ "square": "square",
87
+ "wide": "wide",
88
+ "tall": "tall",
89
+ "all": "",
90
+ }
91
+
92
+ def build_payload(
93
+ self,
94
+ query: str,
95
+ region: str,
96
+ safesearch: str,
97
+ timelimit: str | None,
98
+ page: int = 1,
99
+ **kwargs: Any,
100
+ ) -> dict[str, Any]:
101
+ """Build image search payload with filters.
102
+
103
+ Args:
104
+ query: Search query
105
+ region: Region code
106
+ safesearch: Safe search level (on/moderate/off)
107
+ timelimit: Time filter (d, w, m)
108
+ page: Page number
109
+ **kwargs: Additional filters including:
110
+ - size: Image size filter
111
+ - color: Color filter
112
+ - type: Image type filter
113
+ - layout: Layout/aspect ratio filter
114
+ - license: Usage rights filter
115
+
116
+ Returns:
117
+ Query parameters dictionary
118
+ """
119
+ payload = {
120
+ "p": query,
121
+ }
122
+
123
+ # Pagination - Yahoo images use 'b' parameter
124
+ if page > 1:
125
+ # Each page shows approximately 40 images
126
+ payload["b"] = f"{(page - 1) * 40 + 1}"
127
+
128
+ # Safe search
129
+ if safesearch == "on":
130
+ payload["safe"] = "active"
131
+ elif safesearch == "off":
132
+ payload["safe"] = "off"
133
+
134
+ # Time filter
135
+ if timelimit:
136
+ time_map = {
137
+ "d": "1d", # Past 24 hours
138
+ "w": "1w", # Past week
139
+ "m": "1m", # Past month
140
+ }
141
+ if timelimit in time_map:
142
+ payload["age"] = time_map[timelimit]
143
+
144
+ # Size filter
145
+ if "size" in kwargs and kwargs["size"] in self.SIZE_FILTERS:
146
+ size_val = self.SIZE_FILTERS[kwargs["size"]]
147
+ if size_val:
148
+ payload["imgsz"] = size_val
149
+
150
+ # Color filter
151
+ if "color" in kwargs and kwargs["color"] in self.COLOR_FILTERS:
152
+ color_val = self.COLOR_FILTERS[kwargs["color"]]
153
+ if color_val:
154
+ payload["imgc"] = color_val
155
+
156
+ # Type filter
157
+ if "type" in kwargs and kwargs["type"] in self.TYPE_FILTERS:
158
+ type_val = self.TYPE_FILTERS[kwargs["type"]]
159
+ if type_val:
160
+ payload["imgt"] = type_val
161
+
162
+ # Layout filter
163
+ if "layout" in kwargs and kwargs["layout"] in self.LAYOUT_FILTERS:
164
+ layout_val = self.LAYOUT_FILTERS[kwargs["layout"]]
165
+ if layout_val:
166
+ payload["imgsp"] = layout_val
167
+
168
+ return payload
169
+
170
+ def post_extract_results(self, results: list[ImagesResult]) -> list[ImagesResult]:
171
+ """Post-process image results to parse JSON data.
172
+
173
+ Args:
174
+ results: Raw extracted results
175
+
176
+ Returns:
177
+ Cleaned results with proper URLs and metadata
178
+ """
179
+ import json
180
+ from urllib.parse import unquote
181
+
182
+ cleaned_results = []
183
+
184
+ for result in results:
185
+ # Parse JSON data from the data attribute
186
+ if result.title and result.title.startswith('{'):
187
+ try:
188
+ data = json.loads(result.title)
189
+
190
+ # Extract title
191
+ result.title = data.get('desc', '') or data.get('tit', '')
192
+
193
+ # Extract URLs
194
+ result.url = data.get('rurl', '')
195
+ result.thumbnail = data.get('turl', '')
196
+ result.image = data.get('turlL', '') or data.get('turl', '')
197
+
198
+ # Extract dimensions
199
+ result.width = int(data.get('imgW', 0))
200
+ result.height = int(data.get('imgH', 0))
201
+
202
+ except (json.JSONDecodeError, KeyError, ValueError):
203
+ # If JSON parsing fails, keep original data
204
+ pass
205
+
206
+ # Clean URLs if they exist
207
+ if result.url:
208
+ result.url = unquote(result.url)
209
+ if result.image:
210
+ result.image = unquote(result.image)
211
+ if result.thumbnail:
212
+ result.thumbnail = unquote(result.thumbnail)
213
+
214
+ cleaned_results.append(result)
215
+
216
+ return cleaned_results
217
+
218
+ def search(
219
+ self,
220
+ query: str,
221
+ region: str = "us-en",
222
+ safesearch: str = "moderate",
223
+ timelimit: str | None = None,
224
+ page: int = 1,
225
+ max_results: int | None = None,
226
+ **kwargs: Any,
227
+ ) -> list[ImagesResult] | None:
228
+ """Search Yahoo Images with pagination.
229
+
230
+ Args:
231
+ query: Image search query
232
+ region: Region code
233
+ safesearch: Safe search level
234
+ timelimit: Time filter
235
+ page: Starting page
236
+ max_results: Maximum results to return
237
+ **kwargs: Additional filters (size, color, type, layout)
238
+
239
+ Returns:
240
+ List of ImageResult objects
241
+ """
242
+ results = []
243
+ current_page = page
244
+ max_pages = kwargs.get("max_pages", 5)
245
+
246
+ while current_page <= max_pages:
247
+ payload = self.build_payload(
248
+ query=query,
249
+ region=region,
250
+ safesearch=safesearch,
251
+ timelimit=timelimit,
252
+ page=current_page,
253
+ **kwargs
254
+ )
255
+
256
+ html_text = self.request(self.search_method, self.search_url, params=payload)
257
+ if not html_text:
258
+ break
259
+
260
+ html_text = self.pre_process_html(html_text)
261
+ page_results = self.extract_results(html_text)
262
+
263
+ if not page_results:
264
+ break
265
+
266
+ results.extend(page_results)
267
+
268
+ if max_results and len(results) >= max_results:
269
+ break
270
+
271
+ current_page += 1
272
+
273
+ results = self.post_extract_results(results)
274
+
275
+ if max_results:
276
+ results = results[:max_results]
277
+
278
+ return results if results else None
279
+
280
+ def run(
281
+ self,
282
+ keywords: str,
283
+ region: str = "us-en",
284
+ safesearch: str = "moderate",
285
+ timelimit: str | None = None,
286
+ size: str | None = None,
287
+ color: str | None = None,
288
+ type_image: str | None = None,
289
+ layout: str | None = None,
290
+ license_image: str | None = None,
291
+ max_results: int | None = None,
292
+ ) -> list[dict[str, str]]:
293
+ """Run image search and return results as dictionaries.
294
+
295
+ Args:
296
+ keywords: Search query.
297
+ region: Region code.
298
+ safesearch: Safe search level.
299
+ timelimit: Time filter.
300
+ size: Image size filter.
301
+ color: Color filter.
302
+ type_image: Image type filter.
303
+ layout: Layout filter.
304
+ license_image: License filter.
305
+ max_results: Maximum number of results.
306
+
307
+ Returns:
308
+ List of image result dictionaries.
309
+ """
310
+ results = self.search(
311
+ query=keywords,
312
+ region=region,
313
+ safesearch=safesearch,
314
+ timelimit=timelimit,
315
+ size=size,
316
+ color=color,
317
+ type_image=type_image,
318
+ layout=layout,
319
+ license_image=license_image,
320
+ max_results=max_results,
321
+ )
322
+ if results is None:
323
+ return []
324
+ return [result.to_dict() for result in results]
@@ -0,0 +1,16 @@
1
+ """Yahoo maps search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .base import YahooSearchEngine
6
+
7
+
8
+ class YahooMaps(YahooSearchEngine):
9
+ """Yahoo maps search."""
10
+
11
+ def run(self, *args, **kwargs) -> list[dict[str, str]]:
12
+ """Get maps results from Yahoo.
13
+
14
+ Not supported.
15
+ """
16
+ raise NotImplementedError("Yahoo does not support maps search")
@@ -0,0 +1,258 @@
1
+ """Yahoo news search engine with comprehensive features."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping
6
+ from secrets import token_urlsafe
7
+ from typing import Any
8
+
9
+ from .base import YahooSearchEngine
10
+ from ...results import NewsResult
11
+
12
+
13
+ def extract_image(u: str) -> str:
14
+ """Sanitize image URL.
15
+
16
+ Args:
17
+ u: Image URL
18
+
19
+ Returns:
20
+ Cleaned URL or empty string
21
+ """
22
+ if not u:
23
+ return ""
24
+
25
+ # Skip data URIs
26
+ if u.startswith("data:image"):
27
+ return ""
28
+
29
+ return u
30
+
31
+
32
+ def extract_source(s: str) -> str:
33
+ """Remove ' via Yahoo' from source string.
34
+
35
+ Args:
36
+ s: Source string
37
+
38
+ Returns:
39
+ Cleaned source name
40
+ """
41
+ if not s:
42
+ return s
43
+
44
+ return s.replace(" via Yahoo", "").replace(" - Yahoo", "").strip()
45
+
46
+
47
+ class YahooNews(YahooSearchEngine[NewsResult]):
48
+ """Yahoo news search engine with advanced filtering.
49
+
50
+ Features:
51
+ - Time-based filtering
52
+ - Category filtering
53
+ - Source filtering
54
+ - Pagination support
55
+ - Rich metadata extraction
56
+ """
57
+
58
+ name = "yahoo"
59
+ category = "news"
60
+
61
+ search_url = "https://news.search.yahoo.com/search"
62
+ search_method = "GET"
63
+
64
+ # XPath selectors for news articles
65
+ items_xpath = "//div[contains(@class, 'NewsArticle') or contains(@class, 'dd') and contains(@class, 'algo')]"
66
+ elements_xpath: Mapping[str, str] = {
67
+ "date": ".//span[contains(@class, 'fc-2nd') or contains(@class, 'age') or contains(@class, 's-time')]//text()",
68
+ "title": ".//h4//a//text() | .//h3//a//text()",
69
+ "url": ".//h4//a/@href | .//h3//a/@href",
70
+ "body": ".//p//text() | .//div[contains(@class, 'compText')]//text()",
71
+ "image": ".//img/@src",
72
+ "source": ".//span[contains(@class, 's-source') or contains(@class, 'source')]//text()",
73
+ }
74
+
75
+ def build_payload(
76
+ self,
77
+ query: str,
78
+ region: str,
79
+ safesearch: str,
80
+ timelimit: str | None,
81
+ page: int = 1,
82
+ **kwargs: Any,
83
+ ) -> dict[str, Any]:
84
+ """Build news search payload.
85
+
86
+ Args:
87
+ query: Search query
88
+ region: Region code
89
+ safesearch: Safe search level
90
+ timelimit: Time filter (d, w, m)
91
+ page: Page number
92
+ **kwargs: Additional parameters
93
+
94
+ Returns:
95
+ Query parameters dictionary
96
+ """
97
+ # Generate dynamic URL tokens for tracking
98
+ self.search_url = (
99
+ f"https://news.search.yahoo.com/search"
100
+ f";_ylt={token_urlsafe(24 * 3 // 4)}"
101
+ f";_ylu={token_urlsafe(47 * 3 // 4)}"
102
+ )
103
+
104
+ payload = {
105
+ "p": query,
106
+ "ei": "UTF-8",
107
+ }
108
+
109
+ # Pagination - Yahoo news uses 'b' parameter
110
+ if page > 1:
111
+ # Each page shows approximately 10 articles
112
+ payload["b"] = f"{(page - 1) * 10 + 1}"
113
+
114
+ # Time filter
115
+ if timelimit:
116
+ time_map = {
117
+ "d": "1d", # Past 24 hours
118
+ "w": "1w", # Past week
119
+ "m": "1m", # Past month
120
+ }
121
+ if timelimit in time_map:
122
+ payload["btf"] = time_map[timelimit]
123
+
124
+ # Additional filters
125
+ if "category" in kwargs:
126
+ payload["category"] = kwargs["category"]
127
+
128
+ if "sort" in kwargs:
129
+ # Sort by relevance or date
130
+ payload["sort"] = kwargs["sort"]
131
+
132
+ return payload
133
+
134
+ def post_extract_results(self, results: list[NewsResult]) -> list[NewsResult]:
135
+ """Post-process news results.
136
+
137
+ Args:
138
+ results: Raw extracted results
139
+
140
+ Returns:
141
+ Cleaned news results
142
+ """
143
+ cleaned_results = []
144
+
145
+ for result in results:
146
+ # Clean image URL
147
+ result.image = extract_image(result.image)
148
+
149
+ # Clean source name
150
+ result.source = extract_source(result.source)
151
+
152
+ # Extract URL from redirect
153
+ if result.url and "/RU=" in result.url:
154
+ from urllib.parse import unquote
155
+ start = result.url.find("/RU=") + 4
156
+ end = result.url.find("/RK=", start)
157
+ if end == -1:
158
+ end = len(result.url)
159
+ result.url = unquote(result.url[start:end])
160
+
161
+ # Filter out results without essential fields
162
+ if result.title and result.url:
163
+ cleaned_results.append(result)
164
+
165
+ return cleaned_results
166
+
167
+ def search(
168
+ self,
169
+ query: str,
170
+ region: str = "us-en",
171
+ safesearch: str = "moderate",
172
+ timelimit: str | None = None,
173
+ page: int = 1,
174
+ max_results: int | None = None,
175
+ **kwargs: Any,
176
+ ) -> list[NewsResult] | None:
177
+ """Search Yahoo News with pagination.
178
+
179
+ Args:
180
+ query: News search query
181
+ region: Region code
182
+ safesearch: Safe search level
183
+ timelimit: Time filter (d, w, m)
184
+ page: Starting page
185
+ max_results: Maximum results to return
186
+ **kwargs: Additional parameters (category, sort)
187
+
188
+ Returns:
189
+ List of NewsResult objects
190
+ """
191
+ results = []
192
+ current_page = page
193
+ max_pages = kwargs.get("max_pages", 10)
194
+
195
+ while current_page <= max_pages:
196
+ payload = self.build_payload(
197
+ query=query,
198
+ region=region,
199
+ safesearch=safesearch,
200
+ timelimit=timelimit,
201
+ page=current_page,
202
+ **kwargs
203
+ )
204
+
205
+ html_text = self.request(self.search_method, self.search_url, params=payload)
206
+ if not html_text:
207
+ break
208
+
209
+ html_text = self.pre_process_html(html_text)
210
+ page_results = self.extract_results(html_text)
211
+
212
+ if not page_results:
213
+ break
214
+
215
+ results.extend(page_results)
216
+
217
+ if max_results and len(results) >= max_results:
218
+ break
219
+
220
+ current_page += 1
221
+
222
+ results = self.post_extract_results(results)
223
+
224
+ if max_results:
225
+ results = results[:max_results]
226
+
227
+ return results if results else None
228
+
229
+ def run(
230
+ self,
231
+ keywords: str,
232
+ region: str = "us-en",
233
+ safesearch: str = "moderate",
234
+ timelimit: str | None = None,
235
+ max_results: int | None = None,
236
+ ) -> list[dict[str, str]]:
237
+ """Run news search and return results as dictionaries.
238
+
239
+ Args:
240
+ keywords: Search query.
241
+ region: Region code.
242
+ safesearch: Safe search level.
243
+ timelimit: Time filter.
244
+ max_results: Maximum number of results.
245
+
246
+ Returns:
247
+ List of news result dictionaries.
248
+ """
249
+ results = self.search(
250
+ query=keywords,
251
+ region=region,
252
+ safesearch=safesearch,
253
+ timelimit=timelimit,
254
+ max_results=max_results,
255
+ )
256
+ if results is None:
257
+ return []
258
+ return [result.to_dict() for result in results]