webscout 2025.10.15__py3-none-any.whl → 2025.10.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (60) hide show
  1. webscout/Extra/YTToolkit/README.md +1 -1
  2. webscout/Extra/tempmail/README.md +3 -3
  3. webscout/Provider/OPENAI/README.md +1 -1
  4. webscout/Provider/TTI/bing.py +4 -4
  5. webscout/__init__.py +1 -1
  6. webscout/client.py +4 -5
  7. webscout/litprinter/__init__.py +0 -42
  8. webscout/scout/README.md +59 -8
  9. webscout/scout/core/scout.py +62 -0
  10. webscout/scout/element.py +251 -45
  11. webscout/search/__init__.py +3 -4
  12. webscout/search/engines/bing/images.py +5 -2
  13. webscout/search/engines/bing/news.py +6 -4
  14. webscout/search/engines/bing/text.py +5 -2
  15. webscout/search/engines/yahoo/__init__.py +41 -0
  16. webscout/search/engines/yahoo/answers.py +16 -0
  17. webscout/search/engines/yahoo/base.py +34 -0
  18. webscout/search/engines/yahoo/images.py +324 -0
  19. webscout/search/engines/yahoo/maps.py +16 -0
  20. webscout/search/engines/yahoo/news.py +258 -0
  21. webscout/search/engines/yahoo/suggestions.py +140 -0
  22. webscout/search/engines/yahoo/text.py +273 -0
  23. webscout/search/engines/yahoo/translate.py +16 -0
  24. webscout/search/engines/yahoo/videos.py +302 -0
  25. webscout/search/engines/yahoo/weather.py +220 -0
  26. webscout/search/http_client.py +1 -1
  27. webscout/search/yahoo_main.py +54 -0
  28. webscout/{auth → server}/__init__.py +2 -23
  29. webscout/server/config.py +84 -0
  30. webscout/{auth → server}/request_processing.py +3 -28
  31. webscout/{auth → server}/routes.py +6 -148
  32. webscout/server/schemas.py +23 -0
  33. webscout/{auth → server}/server.py +11 -43
  34. webscout/server/simple_logger.py +84 -0
  35. webscout/version.py +1 -1
  36. webscout/version.py.bak +1 -1
  37. webscout/zeroart/README.md +17 -9
  38. webscout/zeroart/__init__.py +78 -6
  39. webscout/zeroart/effects.py +51 -1
  40. webscout/zeroart/fonts.py +559 -1
  41. {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/METADATA +10 -52
  42. {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/RECORD +49 -45
  43. {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/entry_points.txt +1 -1
  44. webscout/auth/api_key_manager.py +0 -189
  45. webscout/auth/auth_system.py +0 -85
  46. webscout/auth/config.py +0 -175
  47. webscout/auth/database.py +0 -755
  48. webscout/auth/middleware.py +0 -248
  49. webscout/auth/models.py +0 -185
  50. webscout/auth/rate_limiter.py +0 -254
  51. webscout/auth/schemas.py +0 -103
  52. webscout/auth/simple_logger.py +0 -236
  53. webscout/search/engines/yahoo.py +0 -65
  54. webscout/search/engines/yahoo_news.py +0 -64
  55. /webscout/{auth → server}/exceptions.py +0 -0
  56. /webscout/{auth → server}/providers.py +0 -0
  57. /webscout/{auth → server}/request_models.py +0 -0
  58. {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/WHEEL +0 -0
  59. {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/licenses/LICENSE.md +0 -0
  60. {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/top_level.txt +0 -0
@@ -4,10 +4,10 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Dict, List
6
6
  from urllib.parse import urlencode
7
- from bs4 import BeautifulSoup
8
7
  from time import sleep
9
8
 
10
9
  from .base import BingBase
10
+ from webscout.scout import Scout
11
11
 
12
12
 
13
13
  class BingImagesSearch(BingBase):
@@ -17,6 +17,9 @@ class BingImagesSearch(BingBase):
17
17
  safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
18
18
  max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
19
19
 
20
+ if max_results is None:
21
+ max_results = 10
22
+
20
23
  if not keywords:
21
24
  raise ValueError("Keywords are mandatory")
22
25
 
@@ -59,7 +62,7 @@ class BingImagesSearch(BingBase):
59
62
  except Exception as e:
60
63
  raise Exception(f"Failed to fetch images: {str(e)}")
61
64
 
62
- soup = BeautifulSoup(html, 'html.parser')
65
+ soup = Scout(html)
63
66
  img_tags = soup.select('a.iusc img')
64
67
 
65
68
  for img in img_tags:
@@ -4,10 +4,10 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Dict, List
6
6
  from urllib.parse import urlencode
7
- from bs4 import BeautifulSoup
8
7
  from time import sleep
9
8
 
10
9
  from .base import BingBase
10
+ from webscout.scout import Scout
11
11
 
12
12
 
13
13
  class BingNewsSearch(BingBase):
@@ -17,6 +17,9 @@ class BingNewsSearch(BingBase):
17
17
  safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
18
18
  max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
19
19
 
20
+ if max_results is None:
21
+ max_results = 10
22
+
20
23
  if not keywords:
21
24
  raise ValueError("Keywords are mandatory")
22
25
 
@@ -50,15 +53,14 @@ class BingNewsSearch(BingBase):
50
53
  try:
51
54
  response = self.session.get(full_url, timeout=self.timeout)
52
55
  response.raise_for_status()
53
- data = response.json()
56
+ html = response.text
54
57
  except Exception as e:
55
58
  raise Exception(f"Failed to fetch news: {str(e)}")
56
59
 
57
- html = data.get('html', '')
58
60
  if not html:
59
61
  break
60
62
 
61
- soup = BeautifulSoup(html, 'html.parser')
63
+ soup = Scout(html)
62
64
  news_items = soup.select('div.newsitem')
63
65
 
64
66
  for item in news_items:
@@ -4,10 +4,10 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Dict, List
6
6
  from urllib.parse import urlencode
7
- from bs4 import BeautifulSoup
8
7
  from time import sleep
9
8
 
10
9
  from .base import BingBase
10
+ from webscout.scout import Scout
11
11
 
12
12
 
13
13
  class BingTextSearch(BingBase):
@@ -18,6 +18,9 @@ class BingTextSearch(BingBase):
18
18
  max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
19
19
  unique = kwargs.get("unique", True)
20
20
 
21
+ if max_results is None:
22
+ max_results = 10
23
+
21
24
  if not keywords:
22
25
  raise ValueError("Keywords are mandatory")
23
26
 
@@ -46,7 +49,7 @@ class BingTextSearch(BingBase):
46
49
  while len(fetched_results) < max_results and urls_to_fetch:
47
50
  current_url = urls_to_fetch.pop(0)
48
51
  html = fetch_page(current_url)
49
- soup = BeautifulSoup(html, 'html.parser')
52
+ soup = Scout(html)
50
53
 
51
54
  links = soup.select('ol#b_results > li.b_algo')
52
55
  for link in links:
@@ -0,0 +1,41 @@
1
+ """Yahoo search engines package.
2
+
3
+ This package provides comprehensive Yahoo search functionality including:
4
+ - Text search with multi-page pagination
5
+ - Image search with advanced filters
6
+ - Video search with quality and length filters
7
+ - News search with time filtering
8
+ - Search suggestions/autocomplete
9
+
10
+ All engines support:
11
+ - Human-like browsing through multiple pages
12
+ - Rich metadata extraction
13
+ - Filter support
14
+ - Clean result formatting
15
+
16
+ Example:
17
+ >>> from webscout.search.engines.yahoo import YahooText
18
+ >>>
19
+ >>> # Search with automatic pagination
20
+ >>> searcher = YahooText()
21
+ >>> results = searcher.search("python programming", max_results=50)
22
+ >>>
23
+ >>> for result in results:
24
+ ... print(f"{result.title}: {result.url}")
25
+ """
26
+
27
+ from .base import YahooSearchEngine
28
+ from .images import YahooImages
29
+ from .news import YahooNews
30
+ from .suggestions import YahooSuggestions
31
+ from .text import YahooText
32
+ from .videos import YahooVideos
33
+
34
+ __all__ = [
35
+ "YahooSearchEngine",
36
+ "YahooText",
37
+ "YahooImages",
38
+ "YahooVideos",
39
+ "YahooNews",
40
+ "YahooSuggestions",
41
+ ]
@@ -0,0 +1,16 @@
1
+ """Yahoo answers search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .base import YahooSearchEngine
6
+
7
+
8
+ class YahooAnswers(YahooSearchEngine):
9
+ """Yahoo instant answers."""
10
+
11
+ def run(self, *args, **kwargs) -> list[dict[str, str]]:
12
+ """Get instant answers from Yahoo.
13
+
14
+ Not supported.
15
+ """
16
+ raise NotImplementedError("Yahoo does not support instant answers")
@@ -0,0 +1,34 @@
1
+ """Base class for Yahoo search engines."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from secrets import token_urlsafe
6
+ from typing import Any, Generic, TypeVar
7
+
8
+ from ...base import BaseSearchEngine
9
+
10
+ T = TypeVar("T")
11
+
12
+ class YahooSearchEngine(BaseSearchEngine[T], Generic[T]):
13
+ """Base class for Yahoo search engines.
14
+
15
+ Yahoo search is powered by Bing but has its own interface.
16
+ All Yahoo searches use dynamic URLs with tokens for tracking.
17
+ """
18
+
19
+ provider = "yahoo"
20
+ _base_url = "https://search.yahoo.com"
21
+
22
+ def generate_ylt_token(self) -> str:
23
+ """Generate Yahoo _ylt tracking token."""
24
+ return token_urlsafe(24 * 3 // 4)
25
+
26
+ def generate_ylu_token(self) -> str:
27
+ """Generate Yahoo _ylu tracking token."""
28
+ return token_urlsafe(47 * 3 // 4)
29
+
30
+ def build_search_url(self, base_path: str) -> str:
31
+ """Build search URL with tracking tokens."""
32
+ ylt = self.generate_ylt_token()
33
+ ylu = self.generate_ylu_token()
34
+ return f"{self._base_url}/{base_path};_ylt={ylt};_ylu={ylu}"
@@ -0,0 +1,324 @@
1
+ """Yahoo image search engine with advanced filters."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping
6
+ from typing import Any
7
+ from urllib.parse import urljoin
8
+
9
+ from .base import YahooSearchEngine
10
+ from ...results import ImagesResult
11
+
12
+
13
+ class YahooImages(YahooSearchEngine[ImagesResult]):
14
+ """Yahoo image search engine with filter support.
15
+
16
+ Features:
17
+ - Size filters (small, medium, large, wallpaper)
18
+ - Color filters (color, bw, red, orange, yellow, etc.)
19
+ - Type filters (photo, clipart, lineart, transparent)
20
+ - Layout filters (square, wide, tall)
21
+ - Time filters
22
+ - Pagination support
23
+
24
+ Note: Yahoo does not support reverse image search (searching by image upload/URL).
25
+ For reverse image search functionality, use Google Images or Bing Images instead.
26
+ """
27
+
28
+ name = "yahoo"
29
+ category = "images"
30
+
31
+ search_url = "https://images.search.yahoo.com/search/images"
32
+ search_method = "GET"
33
+ search_headers = {
34
+ "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1"
35
+ }
36
+
37
+ # XPath selectors
38
+ items_xpath = "//li[contains(@class, 'ld')]"
39
+ elements_xpath: Mapping[str, str] = {
40
+ "title": "@data",
41
+ "image": "@data",
42
+ "thumbnail": "@data",
43
+ "url": "@data",
44
+ "source": "@data",
45
+ "width": "@data",
46
+ "height": "@data",
47
+ }
48
+
49
+ # Filter mappings
50
+ SIZE_FILTERS = {
51
+ "small": "small",
52
+ "medium": "medium",
53
+ "large": "large",
54
+ "wallpaper": "wallpaper",
55
+ "all": "",
56
+ }
57
+
58
+ COLOR_FILTERS = {
59
+ "color": "color",
60
+ "bw": "bw",
61
+ "black": "black",
62
+ "white": "white",
63
+ "red": "red",
64
+ "orange": "orange",
65
+ "yellow": "yellow",
66
+ "green": "green",
67
+ "teal": "teal",
68
+ "blue": "blue",
69
+ "purple": "purple",
70
+ "pink": "pink",
71
+ "brown": "brown",
72
+ "gray": "gray",
73
+ "all": "",
74
+ }
75
+
76
+ TYPE_FILTERS = {
77
+ "photo": "photo",
78
+ "clipart": "clipart",
79
+ "lineart": "linedrawing",
80
+ "transparent": "transparent",
81
+ "gif": "animatedgif",
82
+ "all": "",
83
+ }
84
+
85
+ LAYOUT_FILTERS = {
86
+ "square": "square",
87
+ "wide": "wide",
88
+ "tall": "tall",
89
+ "all": "",
90
+ }
91
+
92
+ def build_payload(
93
+ self,
94
+ query: str,
95
+ region: str,
96
+ safesearch: str,
97
+ timelimit: str | None,
98
+ page: int = 1,
99
+ **kwargs: Any,
100
+ ) -> dict[str, Any]:
101
+ """Build image search payload with filters.
102
+
103
+ Args:
104
+ query: Search query
105
+ region: Region code
106
+ safesearch: Safe search level (on/moderate/off)
107
+ timelimit: Time filter (d, w, m)
108
+ page: Page number
109
+ **kwargs: Additional filters including:
110
+ - size: Image size filter
111
+ - color: Color filter
112
+ - type: Image type filter
113
+ - layout: Layout/aspect ratio filter
114
+ - license: Usage rights filter
115
+
116
+ Returns:
117
+ Query parameters dictionary
118
+ """
119
+ payload = {
120
+ "p": query,
121
+ }
122
+
123
+ # Pagination - Yahoo images use 'b' parameter
124
+ if page > 1:
125
+ # Each page shows approximately 40 images
126
+ payload["b"] = f"{(page - 1) * 40 + 1}"
127
+
128
+ # Safe search
129
+ if safesearch == "on":
130
+ payload["safe"] = "active"
131
+ elif safesearch == "off":
132
+ payload["safe"] = "off"
133
+
134
+ # Time filter
135
+ if timelimit:
136
+ time_map = {
137
+ "d": "1d", # Past 24 hours
138
+ "w": "1w", # Past week
139
+ "m": "1m", # Past month
140
+ }
141
+ if timelimit in time_map:
142
+ payload["age"] = time_map[timelimit]
143
+
144
+ # Size filter
145
+ if "size" in kwargs and kwargs["size"] in self.SIZE_FILTERS:
146
+ size_val = self.SIZE_FILTERS[kwargs["size"]]
147
+ if size_val:
148
+ payload["imgsz"] = size_val
149
+
150
+ # Color filter
151
+ if "color" in kwargs and kwargs["color"] in self.COLOR_FILTERS:
152
+ color_val = self.COLOR_FILTERS[kwargs["color"]]
153
+ if color_val:
154
+ payload["imgc"] = color_val
155
+
156
+ # Type filter
157
+ if "type" in kwargs and kwargs["type"] in self.TYPE_FILTERS:
158
+ type_val = self.TYPE_FILTERS[kwargs["type"]]
159
+ if type_val:
160
+ payload["imgt"] = type_val
161
+
162
+ # Layout filter
163
+ if "layout" in kwargs and kwargs["layout"] in self.LAYOUT_FILTERS:
164
+ layout_val = self.LAYOUT_FILTERS[kwargs["layout"]]
165
+ if layout_val:
166
+ payload["imgsp"] = layout_val
167
+
168
+ return payload
169
+
170
+ def post_extract_results(self, results: list[ImagesResult]) -> list[ImagesResult]:
171
+ """Post-process image results to parse JSON data.
172
+
173
+ Args:
174
+ results: Raw extracted results
175
+
176
+ Returns:
177
+ Cleaned results with proper URLs and metadata
178
+ """
179
+ import json
180
+ from urllib.parse import unquote
181
+
182
+ cleaned_results = []
183
+
184
+ for result in results:
185
+ # Parse JSON data from the data attribute
186
+ if result.title and result.title.startswith('{'):
187
+ try:
188
+ data = json.loads(result.title)
189
+
190
+ # Extract title
191
+ result.title = data.get('desc', '') or data.get('tit', '')
192
+
193
+ # Extract URLs
194
+ result.url = data.get('rurl', '')
195
+ result.thumbnail = data.get('turl', '')
196
+ result.image = data.get('turlL', '') or data.get('turl', '')
197
+
198
+ # Extract dimensions
199
+ result.width = int(data.get('imgW', 0))
200
+ result.height = int(data.get('imgH', 0))
201
+
202
+ except (json.JSONDecodeError, KeyError, ValueError):
203
+ # If JSON parsing fails, keep original data
204
+ pass
205
+
206
+ # Clean URLs if they exist
207
+ if result.url:
208
+ result.url = unquote(result.url)
209
+ if result.image:
210
+ result.image = unquote(result.image)
211
+ if result.thumbnail:
212
+ result.thumbnail = unquote(result.thumbnail)
213
+
214
+ cleaned_results.append(result)
215
+
216
+ return cleaned_results
217
+
218
+ def search(
219
+ self,
220
+ query: str,
221
+ region: str = "us-en",
222
+ safesearch: str = "moderate",
223
+ timelimit: str | None = None,
224
+ page: int = 1,
225
+ max_results: int | None = None,
226
+ **kwargs: Any,
227
+ ) -> list[ImagesResult] | None:
228
+ """Search Yahoo Images with pagination.
229
+
230
+ Args:
231
+ query: Image search query
232
+ region: Region code
233
+ safesearch: Safe search level
234
+ timelimit: Time filter
235
+ page: Starting page
236
+ max_results: Maximum results to return
237
+ **kwargs: Additional filters (size, color, type, layout)
238
+
239
+ Returns:
240
+ List of ImageResult objects
241
+ """
242
+ results = []
243
+ current_page = page
244
+ max_pages = kwargs.get("max_pages", 5)
245
+
246
+ while current_page <= max_pages:
247
+ payload = self.build_payload(
248
+ query=query,
249
+ region=region,
250
+ safesearch=safesearch,
251
+ timelimit=timelimit,
252
+ page=current_page,
253
+ **kwargs
254
+ )
255
+
256
+ html_text = self.request(self.search_method, self.search_url, params=payload)
257
+ if not html_text:
258
+ break
259
+
260
+ html_text = self.pre_process_html(html_text)
261
+ page_results = self.extract_results(html_text)
262
+
263
+ if not page_results:
264
+ break
265
+
266
+ results.extend(page_results)
267
+
268
+ if max_results and len(results) >= max_results:
269
+ break
270
+
271
+ current_page += 1
272
+
273
+ results = self.post_extract_results(results)
274
+
275
+ if max_results:
276
+ results = results[:max_results]
277
+
278
+ return results if results else None
279
+
280
+ def run(
281
+ self,
282
+ keywords: str,
283
+ region: str = "us-en",
284
+ safesearch: str = "moderate",
285
+ timelimit: str | None = None,
286
+ size: str | None = None,
287
+ color: str | None = None,
288
+ type_image: str | None = None,
289
+ layout: str | None = None,
290
+ license_image: str | None = None,
291
+ max_results: int | None = None,
292
+ ) -> list[dict[str, str]]:
293
+ """Run image search and return results as dictionaries.
294
+
295
+ Args:
296
+ keywords: Search query.
297
+ region: Region code.
298
+ safesearch: Safe search level.
299
+ timelimit: Time filter.
300
+ size: Image size filter.
301
+ color: Color filter.
302
+ type_image: Image type filter.
303
+ layout: Layout filter.
304
+ license_image: License filter.
305
+ max_results: Maximum number of results.
306
+
307
+ Returns:
308
+ List of image result dictionaries.
309
+ """
310
+ results = self.search(
311
+ query=keywords,
312
+ region=region,
313
+ safesearch=safesearch,
314
+ timelimit=timelimit,
315
+ size=size,
316
+ color=color,
317
+ type_image=type_image,
318
+ layout=layout,
319
+ license_image=license_image,
320
+ max_results=max_results,
321
+ )
322
+ if results is None:
323
+ return []
324
+ return [result.to_dict() for result in results]
@@ -0,0 +1,16 @@
1
+ """Yahoo maps search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .base import YahooSearchEngine
6
+
7
+
8
+ class YahooMaps(YahooSearchEngine):
9
+ """Yahoo maps search."""
10
+
11
+ def run(self, *args, **kwargs) -> list[dict[str, str]]:
12
+ """Get maps results from Yahoo.
13
+
14
+ Not supported.
15
+ """
16
+ raise NotImplementedError("Yahoo does not support maps search")