webscout 2025.10.14.1__py3-none-any.whl → 2025.10.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (70) hide show
  1. webscout/Extra/YTToolkit/README.md +1 -1
  2. webscout/Extra/tempmail/README.md +3 -3
  3. webscout/Provider/OPENAI/README.md +1 -1
  4. webscout/Provider/TTI/bing.py +4 -4
  5. webscout/__init__.py +1 -1
  6. webscout/cli.py +0 -147
  7. webscout/client.py +4 -5
  8. webscout/litprinter/__init__.py +0 -42
  9. webscout/scout/README.md +59 -8
  10. webscout/scout/core/scout.py +62 -0
  11. webscout/scout/element.py +251 -45
  12. webscout/search/__init__.py +5 -8
  13. webscout/search/bing_main.py +42 -0
  14. webscout/search/engines/bing/__init__.py +1 -0
  15. webscout/search/engines/bing/base.py +33 -0
  16. webscout/search/engines/bing/images.py +108 -0
  17. webscout/search/engines/bing/news.py +91 -0
  18. webscout/search/engines/bing/suggestions.py +34 -0
  19. webscout/search/engines/bing/text.py +106 -0
  20. webscout/search/engines/duckduckgo/maps.py +13 -0
  21. webscout/search/engines/yahoo/__init__.py +41 -0
  22. webscout/search/engines/yahoo/answers.py +16 -0
  23. webscout/search/engines/yahoo/base.py +34 -0
  24. webscout/search/engines/yahoo/images.py +324 -0
  25. webscout/search/engines/yahoo/maps.py +16 -0
  26. webscout/search/engines/yahoo/news.py +258 -0
  27. webscout/search/engines/yahoo/suggestions.py +140 -0
  28. webscout/search/engines/yahoo/text.py +273 -0
  29. webscout/search/engines/yahoo/translate.py +16 -0
  30. webscout/search/engines/yahoo/videos.py +302 -0
  31. webscout/search/engines/yahoo/weather.py +220 -0
  32. webscout/search/http_client.py +1 -1
  33. webscout/search/yahoo_main.py +54 -0
  34. webscout/{auth → server}/__init__.py +2 -23
  35. webscout/server/config.py +84 -0
  36. webscout/{auth → server}/request_processing.py +3 -28
  37. webscout/{auth → server}/routes.py +14 -170
  38. webscout/server/schemas.py +23 -0
  39. webscout/{auth → server}/server.py +11 -43
  40. webscout/server/simple_logger.py +84 -0
  41. webscout/version.py +1 -1
  42. webscout/version.py.bak +1 -1
  43. webscout/zeroart/README.md +17 -9
  44. webscout/zeroart/__init__.py +78 -6
  45. webscout/zeroart/effects.py +51 -1
  46. webscout/zeroart/fonts.py +559 -1
  47. {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/METADATA +15 -332
  48. {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/RECORD +55 -48
  49. {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/entry_points.txt +1 -1
  50. webscout/Bing_search.py +0 -417
  51. webscout/DWEBS.py +0 -529
  52. webscout/auth/api_key_manager.py +0 -189
  53. webscout/auth/auth_system.py +0 -85
  54. webscout/auth/config.py +0 -175
  55. webscout/auth/database.py +0 -755
  56. webscout/auth/middleware.py +0 -248
  57. webscout/auth/models.py +0 -185
  58. webscout/auth/rate_limiter.py +0 -254
  59. webscout/auth/schemas.py +0 -103
  60. webscout/auth/simple_logger.py +0 -236
  61. webscout/search/engines/bing.py +0 -84
  62. webscout/search/engines/bing_news.py +0 -52
  63. webscout/search/engines/yahoo.py +0 -65
  64. webscout/search/engines/yahoo_news.py +0 -64
  65. /webscout/{auth → server}/exceptions.py +0 -0
  66. /webscout/{auth → server}/providers.py +0 -0
  67. /webscout/{auth → server}/request_models.py +0 -0
  68. {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/WHEEL +0 -0
  69. {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/licenses/LICENSE.md +0 -0
  70. {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,106 @@
1
+ """Bing text search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List
6
+ from urllib.parse import urlencode
7
+ from time import sleep
8
+
9
+ from .base import BingBase
10
+ from webscout.scout import Scout
11
+
12
+
13
+ class BingTextSearch(BingBase):
14
+ def run(self, *args, **kwargs) -> List[Dict[str, str]]:
15
+ keywords = args[0] if args else kwargs.get("keywords")
16
+ region = args[1] if len(args) > 1 else kwargs.get("region", "us")
17
+ safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
18
+ max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
19
+ unique = kwargs.get("unique", True)
20
+
21
+ if max_results is None:
22
+ max_results = 10
23
+
24
+ if not keywords:
25
+ raise ValueError("Keywords are mandatory")
26
+
27
+ safe_map = {
28
+ "on": "Strict",
29
+ "moderate": "Moderate",
30
+ "off": "Off"
31
+ }
32
+ safe = safe_map.get(safesearch.lower(), "Moderate")
33
+
34
+ fetched_results = []
35
+ fetched_links = set()
36
+
37
+ def fetch_page(url):
38
+ try:
39
+ response = self.session.get(url, timeout=self.timeout)
40
+ response.raise_for_status()
41
+ return response.text
42
+ except Exception as e:
43
+ raise Exception(f"Failed to fetch page: {str(e)}")
44
+
45
+ # Get first page URL
46
+ url = f'{self.base_url}/search?q={keywords}&search=&form=QBLH'
47
+ urls_to_fetch = [url]
48
+
49
+ while len(fetched_results) < max_results and urls_to_fetch:
50
+ current_url = urls_to_fetch.pop(0)
51
+ html = fetch_page(current_url)
52
+ soup = Scout(html)
53
+
54
+ links = soup.select('ol#b_results > li.b_algo')
55
+ for link in links:
56
+ if len(fetched_results) >= max_results:
57
+ break
58
+ title_tag = link.select_one('h2')
59
+ url_tag = link.select_one('h2 a')
60
+ text_tag = link.select_one('p')
61
+
62
+ if title_tag and url_tag and text_tag:
63
+ title = title_tag.get_text(strip=True)
64
+ href = url_tag.get('href', '')
65
+ body = text_tag.get_text(strip=True)
66
+
67
+ # Decode Bing URL if needed
68
+ if href.startswith('/ck/a?'):
69
+ # Simple unwrap, similar to bing.py
70
+ from urllib.parse import parse_qs, urlparse
71
+ try:
72
+ parsed = urlparse(href)
73
+ query_params = parse_qs(parsed.query)
74
+ if 'u' in query_params:
75
+ encoded_url = query_params['u'][0]
76
+ if encoded_url.startswith('a1'):
77
+ encoded_url = encoded_url[2:]
78
+ padding = len(encoded_url) % 4
79
+ if padding:
80
+ encoded_url += '=' * (4 - padding)
81
+ import base64
82
+ decoded = base64.urlsafe_b64decode(encoded_url).decode()
83
+ href = decoded
84
+ except:
85
+ pass
86
+
87
+ if unique and href in fetched_links:
88
+ continue
89
+ fetched_links.add(href)
90
+
91
+ fetched_results.append({
92
+ 'title': title,
93
+ 'href': href,
94
+ 'body': body
95
+ })
96
+
97
+ # Get next page
98
+ next_page_tag = soup.select_one('div#b_content nav[role="navigation"] a.sb_pagN')
99
+ if next_page_tag and next_page_tag.get('href'):
100
+ next_url = self.base_url + next_page_tag['href']
101
+ urls_to_fetch.append(next_url)
102
+
103
+ if self.sleep_interval:
104
+ sleep(self.sleep_interval)
105
+
106
+ return fetched_results[:max_results]
@@ -1,12 +1,25 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from decimal import Decimal
4
+ from math import sqrt
4
5
 
5
6
  from ....exceptions import WebscoutE
6
7
  from .base import DuckDuckGoBase
7
8
 
8
9
 
9
10
  class DuckDuckGoMaps(DuckDuckGoBase):
11
+ def _calculate_distance(self, lat_t: Decimal, lon_l: Decimal, lat_b: Decimal, lon_r: Decimal) -> float:
12
+ """Calculate the Euclidean distance between top-left and bottom-right corners of bounding box."""
13
+ # Convert to float for math operations
14
+ lat_t_f = float(lat_t)
15
+ lon_l_f = float(lon_l)
16
+ lat_b_f = float(lat_b)
17
+ lon_r_f = float(lon_r)
18
+
19
+ # Calculate Euclidean distance
20
+ distance = sqrt((lat_t_f - lat_b_f) ** 2 + (lon_r_f - lon_l_f) ** 2)
21
+ return distance
22
+
10
23
  def run(self, *args, **kwargs) -> list[dict[str, str]]:
11
24
  keywords = args[0] if args else kwargs.get("keywords")
12
25
  place = args[1] if len(args) > 1 else kwargs.get("place")
@@ -0,0 +1,41 @@
1
+ """Yahoo search engines package.
2
+
3
+ This package provides comprehensive Yahoo search functionality including:
4
+ - Text search with multi-page pagination
5
+ - Image search with advanced filters
6
+ - Video search with quality and length filters
7
+ - News search with time filtering
8
+ - Search suggestions/autocomplete
9
+
10
+ All engines support:
11
+ - Human-like browsing through multiple pages
12
+ - Rich metadata extraction
13
+ - Filter support
14
+ - Clean result formatting
15
+
16
+ Example:
17
+ >>> from webscout.search.engines.yahoo import YahooText
18
+ >>>
19
+ >>> # Search with automatic pagination
20
+ >>> searcher = YahooText()
21
+ >>> results = searcher.search("python programming", max_results=50)
22
+ >>>
23
+ >>> for result in results:
24
+ ... print(f"{result.title}: {result.url}")
25
+ """
26
+
27
+ from .base import YahooSearchEngine
28
+ from .images import YahooImages
29
+ from .news import YahooNews
30
+ from .suggestions import YahooSuggestions
31
+ from .text import YahooText
32
+ from .videos import YahooVideos
33
+
34
+ __all__ = [
35
+ "YahooSearchEngine",
36
+ "YahooText",
37
+ "YahooImages",
38
+ "YahooVideos",
39
+ "YahooNews",
40
+ "YahooSuggestions",
41
+ ]
@@ -0,0 +1,16 @@
1
+ """Yahoo answers search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .base import YahooSearchEngine
6
+
7
+
8
+ class YahooAnswers(YahooSearchEngine):
9
+ """Yahoo instant answers."""
10
+
11
+ def run(self, *args, **kwargs) -> list[dict[str, str]]:
12
+ """Get instant answers from Yahoo.
13
+
14
+ Not supported.
15
+ """
16
+ raise NotImplementedError("Yahoo does not support instant answers")
@@ -0,0 +1,34 @@
1
+ """Base class for Yahoo search engines."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from secrets import token_urlsafe
6
+ from typing import Any, Generic, TypeVar
7
+
8
+ from ...base import BaseSearchEngine
9
+
10
+ T = TypeVar("T")
11
+
12
+ class YahooSearchEngine(BaseSearchEngine[T], Generic[T]):
13
+ """Base class for Yahoo search engines.
14
+
15
+ Yahoo search is powered by Bing but has its own interface.
16
+ All Yahoo searches use dynamic URLs with tokens for tracking.
17
+ """
18
+
19
+ provider = "yahoo"
20
+ _base_url = "https://search.yahoo.com"
21
+
22
+ def generate_ylt_token(self) -> str:
23
+ """Generate Yahoo _ylt tracking token."""
24
+ return token_urlsafe(24 * 3 // 4)
25
+
26
+ def generate_ylu_token(self) -> str:
27
+ """Generate Yahoo _ylu tracking token."""
28
+ return token_urlsafe(47 * 3 // 4)
29
+
30
+ def build_search_url(self, base_path: str) -> str:
31
+ """Build search URL with tracking tokens."""
32
+ ylt = self.generate_ylt_token()
33
+ ylu = self.generate_ylu_token()
34
+ return f"{self._base_url}/{base_path};_ylt={ylt};_ylu={ylu}"
@@ -0,0 +1,324 @@
1
+ """Yahoo image search engine with advanced filters."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping
6
+ from typing import Any
7
+ from urllib.parse import urljoin
8
+
9
+ from .base import YahooSearchEngine
10
+ from ...results import ImagesResult
11
+
12
+
13
+ class YahooImages(YahooSearchEngine[ImagesResult]):
14
+ """Yahoo image search engine with filter support.
15
+
16
+ Features:
17
+ - Size filters (small, medium, large, wallpaper)
18
+ - Color filters (color, bw, red, orange, yellow, etc.)
19
+ - Type filters (photo, clipart, lineart, transparent)
20
+ - Layout filters (square, wide, tall)
21
+ - Time filters
22
+ - Pagination support
23
+
24
+ Note: Yahoo does not support reverse image search (searching by image upload/URL).
25
+ For reverse image search functionality, use Google Images or Bing Images instead.
26
+ """
27
+
28
+ name = "yahoo"
29
+ category = "images"
30
+
31
+ search_url = "https://images.search.yahoo.com/search/images"
32
+ search_method = "GET"
33
+ search_headers = {
34
+ "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1"
35
+ }
36
+
37
+ # XPath selectors
38
+ items_xpath = "//li[contains(@class, 'ld')]"
39
+ elements_xpath: Mapping[str, str] = {
40
+ "title": "@data",
41
+ "image": "@data",
42
+ "thumbnail": "@data",
43
+ "url": "@data",
44
+ "source": "@data",
45
+ "width": "@data",
46
+ "height": "@data",
47
+ }
48
+
49
+ # Filter mappings
50
+ SIZE_FILTERS = {
51
+ "small": "small",
52
+ "medium": "medium",
53
+ "large": "large",
54
+ "wallpaper": "wallpaper",
55
+ "all": "",
56
+ }
57
+
58
+ COLOR_FILTERS = {
59
+ "color": "color",
60
+ "bw": "bw",
61
+ "black": "black",
62
+ "white": "white",
63
+ "red": "red",
64
+ "orange": "orange",
65
+ "yellow": "yellow",
66
+ "green": "green",
67
+ "teal": "teal",
68
+ "blue": "blue",
69
+ "purple": "purple",
70
+ "pink": "pink",
71
+ "brown": "brown",
72
+ "gray": "gray",
73
+ "all": "",
74
+ }
75
+
76
+ TYPE_FILTERS = {
77
+ "photo": "photo",
78
+ "clipart": "clipart",
79
+ "lineart": "linedrawing",
80
+ "transparent": "transparent",
81
+ "gif": "animatedgif",
82
+ "all": "",
83
+ }
84
+
85
+ LAYOUT_FILTERS = {
86
+ "square": "square",
87
+ "wide": "wide",
88
+ "tall": "tall",
89
+ "all": "",
90
+ }
91
+
92
+ def build_payload(
93
+ self,
94
+ query: str,
95
+ region: str,
96
+ safesearch: str,
97
+ timelimit: str | None,
98
+ page: int = 1,
99
+ **kwargs: Any,
100
+ ) -> dict[str, Any]:
101
+ """Build image search payload with filters.
102
+
103
+ Args:
104
+ query: Search query
105
+ region: Region code
106
+ safesearch: Safe search level (on/moderate/off)
107
+ timelimit: Time filter (d, w, m)
108
+ page: Page number
109
+ **kwargs: Additional filters including:
110
+ - size: Image size filter
111
+ - color: Color filter
112
+ - type: Image type filter
113
+ - layout: Layout/aspect ratio filter
114
+ - license: Usage rights filter
115
+
116
+ Returns:
117
+ Query parameters dictionary
118
+ """
119
+ payload = {
120
+ "p": query,
121
+ }
122
+
123
+ # Pagination - Yahoo images use 'b' parameter
124
+ if page > 1:
125
+ # Each page shows approximately 40 images
126
+ payload["b"] = f"{(page - 1) * 40 + 1}"
127
+
128
+ # Safe search
129
+ if safesearch == "on":
130
+ payload["safe"] = "active"
131
+ elif safesearch == "off":
132
+ payload["safe"] = "off"
133
+
134
+ # Time filter
135
+ if timelimit:
136
+ time_map = {
137
+ "d": "1d", # Past 24 hours
138
+ "w": "1w", # Past week
139
+ "m": "1m", # Past month
140
+ }
141
+ if timelimit in time_map:
142
+ payload["age"] = time_map[timelimit]
143
+
144
+ # Size filter
145
+ if "size" in kwargs and kwargs["size"] in self.SIZE_FILTERS:
146
+ size_val = self.SIZE_FILTERS[kwargs["size"]]
147
+ if size_val:
148
+ payload["imgsz"] = size_val
149
+
150
+ # Color filter
151
+ if "color" in kwargs and kwargs["color"] in self.COLOR_FILTERS:
152
+ color_val = self.COLOR_FILTERS[kwargs["color"]]
153
+ if color_val:
154
+ payload["imgc"] = color_val
155
+
156
+ # Type filter
157
+ if "type" in kwargs and kwargs["type"] in self.TYPE_FILTERS:
158
+ type_val = self.TYPE_FILTERS[kwargs["type"]]
159
+ if type_val:
160
+ payload["imgt"] = type_val
161
+
162
+ # Layout filter
163
+ if "layout" in kwargs and kwargs["layout"] in self.LAYOUT_FILTERS:
164
+ layout_val = self.LAYOUT_FILTERS[kwargs["layout"]]
165
+ if layout_val:
166
+ payload["imgsp"] = layout_val
167
+
168
+ return payload
169
+
170
+ def post_extract_results(self, results: list[ImagesResult]) -> list[ImagesResult]:
171
+ """Post-process image results to parse JSON data.
172
+
173
+ Args:
174
+ results: Raw extracted results
175
+
176
+ Returns:
177
+ Cleaned results with proper URLs and metadata
178
+ """
179
+ import json
180
+ from urllib.parse import unquote
181
+
182
+ cleaned_results = []
183
+
184
+ for result in results:
185
+ # Parse JSON data from the data attribute
186
+ if result.title and result.title.startswith('{'):
187
+ try:
188
+ data = json.loads(result.title)
189
+
190
+ # Extract title
191
+ result.title = data.get('desc', '') or data.get('tit', '')
192
+
193
+ # Extract URLs
194
+ result.url = data.get('rurl', '')
195
+ result.thumbnail = data.get('turl', '')
196
+ result.image = data.get('turlL', '') or data.get('turl', '')
197
+
198
+ # Extract dimensions
199
+ result.width = int(data.get('imgW', 0))
200
+ result.height = int(data.get('imgH', 0))
201
+
202
+ except (json.JSONDecodeError, KeyError, ValueError):
203
+ # If JSON parsing fails, keep original data
204
+ pass
205
+
206
+ # Clean URLs if they exist
207
+ if result.url:
208
+ result.url = unquote(result.url)
209
+ if result.image:
210
+ result.image = unquote(result.image)
211
+ if result.thumbnail:
212
+ result.thumbnail = unquote(result.thumbnail)
213
+
214
+ cleaned_results.append(result)
215
+
216
+ return cleaned_results
217
+
218
+ def search(
219
+ self,
220
+ query: str,
221
+ region: str = "us-en",
222
+ safesearch: str = "moderate",
223
+ timelimit: str | None = None,
224
+ page: int = 1,
225
+ max_results: int | None = None,
226
+ **kwargs: Any,
227
+ ) -> list[ImagesResult] | None:
228
+ """Search Yahoo Images with pagination.
229
+
230
+ Args:
231
+ query: Image search query
232
+ region: Region code
233
+ safesearch: Safe search level
234
+ timelimit: Time filter
235
+ page: Starting page
236
+ max_results: Maximum results to return
237
+ **kwargs: Additional filters (size, color, type, layout)
238
+
239
+ Returns:
240
+ List of ImageResult objects
241
+ """
242
+ results = []
243
+ current_page = page
244
+ max_pages = kwargs.get("max_pages", 5)
245
+
246
+ while current_page <= max_pages:
247
+ payload = self.build_payload(
248
+ query=query,
249
+ region=region,
250
+ safesearch=safesearch,
251
+ timelimit=timelimit,
252
+ page=current_page,
253
+ **kwargs
254
+ )
255
+
256
+ html_text = self.request(self.search_method, self.search_url, params=payload)
257
+ if not html_text:
258
+ break
259
+
260
+ html_text = self.pre_process_html(html_text)
261
+ page_results = self.extract_results(html_text)
262
+
263
+ if not page_results:
264
+ break
265
+
266
+ results.extend(page_results)
267
+
268
+ if max_results and len(results) >= max_results:
269
+ break
270
+
271
+ current_page += 1
272
+
273
+ results = self.post_extract_results(results)
274
+
275
+ if max_results:
276
+ results = results[:max_results]
277
+
278
+ return results if results else None
279
+
280
+ def run(
281
+ self,
282
+ keywords: str,
283
+ region: str = "us-en",
284
+ safesearch: str = "moderate",
285
+ timelimit: str | None = None,
286
+ size: str | None = None,
287
+ color: str | None = None,
288
+ type_image: str | None = None,
289
+ layout: str | None = None,
290
+ license_image: str | None = None,
291
+ max_results: int | None = None,
292
+ ) -> list[dict[str, str]]:
293
+ """Run image search and return results as dictionaries.
294
+
295
+ Args:
296
+ keywords: Search query.
297
+ region: Region code.
298
+ safesearch: Safe search level.
299
+ timelimit: Time filter.
300
+ size: Image size filter.
301
+ color: Color filter.
302
+ type_image: Image type filter.
303
+ layout: Layout filter.
304
+ license_image: License filter.
305
+ max_results: Maximum number of results.
306
+
307
+ Returns:
308
+ List of image result dictionaries.
309
+ """
310
+ results = self.search(
311
+ query=keywords,
312
+ region=region,
313
+ safesearch=safesearch,
314
+ timelimit=timelimit,
315
+ size=size,
316
+ color=color,
317
+ type_image=type_image,
318
+ layout=layout,
319
+ license_image=license_image,
320
+ max_results=max_results,
321
+ )
322
+ if results is None:
323
+ return []
324
+ return [result.to_dict() for result in results]
@@ -0,0 +1,16 @@
1
+ """Yahoo maps search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .base import YahooSearchEngine
6
+
7
+
8
+ class YahooMaps(YahooSearchEngine):
9
+ """Yahoo maps search."""
10
+
11
+ def run(self, *args, **kwargs) -> list[dict[str, str]]:
12
+ """Get maps results from Yahoo.
13
+
14
+ Not supported.
15
+ """
16
+ raise NotImplementedError("Yahoo does not support maps search")