webscout 2025.10.15__py3-none-any.whl → 2025.10.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (63) hide show
  1. webscout/Extra/YTToolkit/README.md +1 -1
  2. webscout/Extra/tempmail/README.md +3 -3
  3. webscout/Provider/ClaudeOnline.py +350 -0
  4. webscout/Provider/OPENAI/README.md +1 -1
  5. webscout/Provider/TTI/bing.py +4 -4
  6. webscout/Provider/TTI/claudeonline.py +315 -0
  7. webscout/__init__.py +1 -1
  8. webscout/client.py +4 -5
  9. webscout/litprinter/__init__.py +0 -42
  10. webscout/scout/README.md +59 -8
  11. webscout/scout/core/scout.py +62 -0
  12. webscout/scout/element.py +251 -45
  13. webscout/search/__init__.py +3 -4
  14. webscout/search/engines/bing/images.py +5 -2
  15. webscout/search/engines/bing/news.py +6 -4
  16. webscout/search/engines/bing/text.py +5 -2
  17. webscout/search/engines/yahoo/__init__.py +41 -0
  18. webscout/search/engines/yahoo/answers.py +16 -0
  19. webscout/search/engines/yahoo/base.py +34 -0
  20. webscout/search/engines/yahoo/images.py +324 -0
  21. webscout/search/engines/yahoo/maps.py +16 -0
  22. webscout/search/engines/yahoo/news.py +258 -0
  23. webscout/search/engines/yahoo/suggestions.py +140 -0
  24. webscout/search/engines/yahoo/text.py +273 -0
  25. webscout/search/engines/yahoo/translate.py +16 -0
  26. webscout/search/engines/yahoo/videos.py +302 -0
  27. webscout/search/engines/yahoo/weather.py +220 -0
  28. webscout/search/http_client.py +1 -1
  29. webscout/search/yahoo_main.py +54 -0
  30. webscout/{auth → server}/__init__.py +2 -23
  31. webscout/server/config.py +84 -0
  32. webscout/{auth → server}/request_processing.py +3 -28
  33. webscout/{auth → server}/routes.py +6 -148
  34. webscout/server/schemas.py +23 -0
  35. webscout/{auth → server}/server.py +11 -43
  36. webscout/server/simple_logger.py +84 -0
  37. webscout/version.py +1 -1
  38. webscout/version.py.bak +1 -1
  39. webscout/zeroart/README.md +17 -9
  40. webscout/zeroart/__init__.py +78 -6
  41. webscout/zeroart/effects.py +51 -1
  42. webscout/zeroart/fonts.py +559 -1
  43. {webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/METADATA +11 -54
  44. {webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/RECORD +51 -46
  45. {webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/entry_points.txt +1 -1
  46. webscout/Extra/weather.md +0 -281
  47. webscout/auth/api_key_manager.py +0 -189
  48. webscout/auth/auth_system.py +0 -85
  49. webscout/auth/config.py +0 -175
  50. webscout/auth/database.py +0 -755
  51. webscout/auth/middleware.py +0 -248
  52. webscout/auth/models.py +0 -185
  53. webscout/auth/rate_limiter.py +0 -254
  54. webscout/auth/schemas.py +0 -103
  55. webscout/auth/simple_logger.py +0 -236
  56. webscout/search/engines/yahoo.py +0 -65
  57. webscout/search/engines/yahoo_news.py +0 -64
  58. /webscout/{auth → server}/exceptions.py +0 -0
  59. /webscout/{auth → server}/providers.py +0 -0
  60. /webscout/{auth → server}/request_models.py +0 -0
  61. {webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/WHEEL +0 -0
  62. {webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/licenses/LICENSE.md +0 -0
  63. {webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/top_level.txt +0 -0
webscout/scout/element.py CHANGED
@@ -267,7 +267,14 @@ class Tag:
267
267
  def select(self, selector: str) -> List['Tag']:
268
268
  """
269
269
  Select elements using CSS selector.
270
- Enhanced to support more complex selectors.
270
+ Enhanced to support more complex selectors including:
271
+ - Tag selectors: 'p', 'div'
272
+ - Class selectors: '.class', 'p.class'
273
+ - ID selectors: '#id', 'div#id'
274
+ - Attribute selectors: '[attr]', '[attr=value]'
275
+ - Descendant selectors: 'div p'
276
+ - Child selectors: 'div > p'
277
+ - Multiple classes: '.class1.class2'
271
278
 
272
279
  Args:
273
280
  selector (str): CSS selector string
@@ -275,54 +282,248 @@ class Tag:
275
282
  Returns:
276
283
  List[Tag]: List of matching elements
277
284
  """
278
- # More advanced CSS selector parsing
279
- # This is a simplified implementation and might need more robust parsing
280
- parts = re.split(r'\s+', selector.strip())
281
285
  results = []
282
-
283
- def _match_selector(tag, selector_part):
284
- # Support more complex selectors
285
- if selector_part.startswith('.'):
286
- # Class selector
287
- return selector_part[1:] in tag.get('class', [])
288
- elif selector_part.startswith('#'):
289
- # ID selector
290
- return tag.get('id') == selector_part[1:]
291
- elif '[' in selector_part and ']' in selector_part:
292
- # Attribute selector
293
- attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
294
- if attr_match:
295
- tag_name, attr, value = attr_match.groups()
296
- if tag_name and tag.name != tag_name:
286
+
287
+ def _parse_simple_selector(simple_sel: str) -> dict:
288
+ """Parse a simple selector like 'p.class#id[attr=value]' into components."""
289
+ components = {
290
+ 'tag': None,
291
+ 'id': None,
292
+ 'classes': [],
293
+ 'attrs': {}
294
+ }
295
+
296
+ # Extract tag name (at the start)
297
+ tag_match = re.match(r'^([a-zA-Z][\w-]*)', simple_sel)
298
+ if tag_match:
299
+ components['tag'] = tag_match.group(1)
300
+ simple_sel = simple_sel[len(tag_match.group(1)):]
301
+
302
+ # Extract ID
303
+ id_matches = re.findall(r'#([\w-]+)', simple_sel)
304
+ if id_matches:
305
+ components['id'] = id_matches[0]
306
+
307
+ # Extract classes
308
+ class_matches = re.findall(r'\.([\w-]+)', simple_sel)
309
+ components['classes'] = class_matches
310
+
311
+ # Extract attributes
312
+ attr_matches = re.findall(r'\[([^\]]+)\]', simple_sel)
313
+ for attr_expr in attr_matches:
314
+ if '=' in attr_expr:
315
+ attr_name, attr_value = attr_expr.split('=', 1)
316
+ components['attrs'][attr_name.strip()] = attr_value.strip('\'"')
317
+ else:
318
+ components['attrs'][attr_expr.strip()] = None
319
+
320
+ return components
321
+
322
+ def _match_simple_selector(tag: 'Tag', components: dict) -> bool:
323
+ """Check if a tag matches the parsed selector components."""
324
+ # Check tag name
325
+ if components['tag'] and tag.name != components['tag']:
326
+ return False
327
+
328
+ # Check ID
329
+ if components['id'] and tag.get('id') != components['id']:
330
+ return False
331
+
332
+ # Check classes
333
+ tag_classes = tag.get('class', '')
334
+ if isinstance(tag_classes, str):
335
+ tag_classes = tag_classes.split()
336
+ elif not isinstance(tag_classes, list):
337
+ tag_classes = [str(tag_classes)] if tag_classes else []
338
+
339
+ for cls in components['classes']:
340
+ if cls not in tag_classes:
341
+ return False
342
+
343
+ # Check attributes
344
+ for attr_name, attr_value in components['attrs'].items():
345
+ if attr_value is None:
346
+ # Just check attribute exists
347
+ if attr_name not in tag.attrs:
297
348
  return False
298
- if value:
299
- return tag.get(attr) == value.strip("'\"")
300
- return attr in tag.attrs
301
- else:
302
- # Tag selector
303
- return tag.name == selector_part
304
-
305
- def _recursive_select(element, selector_parts):
306
- if not selector_parts:
307
- results.append(element)
308
- return
309
-
310
- current_selector = selector_parts[0]
311
- remaining_selectors = selector_parts[1:]
312
-
313
- if _match_selector(element, current_selector):
314
- if not remaining_selectors:
315
- results.append(element)
316
349
  else:
317
- for child in element.contents:
318
- if isinstance(child, Tag):
319
- _recursive_select(child, remaining_selectors)
320
-
321
- for child in self.contents:
322
- if isinstance(child, Tag):
323
- _recursive_select(child, parts)
324
-
350
+ # Check attribute value
351
+ if tag.get(attr_name) != attr_value:
352
+ return False
353
+
354
+ return True
355
+
356
+ def _find_all_matching(element: 'Tag', components: dict) -> List['Tag']:
357
+ """Recursively find all elements matching the selector components."""
358
+ matches = []
359
+
360
+ # Check current element
361
+ if _match_simple_selector(element, components):
362
+ matches.append(element)
363
+
364
+ # Check children recursively
365
+ for child in element.contents:
366
+ if isinstance(child, Tag):
367
+ matches.extend(_find_all_matching(child, components))
368
+
369
+ return matches
370
+
371
+ # Handle combinators (descendant ' ' and child '>')
372
+ if ' > ' in selector:
373
+ # Child combinator
374
+ parts = [p.strip() for p in selector.split(' > ')]
375
+ return self._select_with_child_combinator(parts)
376
+ elif ' ' in selector.strip():
377
+ # Descendant combinator
378
+ parts = [p.strip() for p in selector.split()]
379
+ return self._select_with_descendant_combinator(parts)
380
+ else:
381
+ # Simple selector
382
+ components = _parse_simple_selector(selector)
383
+ return _find_all_matching(self, components)
384
+
385
+ def _select_with_descendant_combinator(self, parts: List[str]) -> List['Tag']:
386
+ """Handle descendant combinator (space)."""
387
+ if not parts:
388
+ return []
389
+
390
+ if len(parts) == 1:
391
+ components = self._parse_selector_components(parts[0])
392
+ return self._find_all_matching_in_tree(self, components)
393
+
394
+ # Find elements matching the first part
395
+ first_components = self._parse_selector_components(parts[0])
396
+ first_matches = self._find_all_matching_in_tree(self, first_components)
397
+
398
+ # For each match, find descendants matching remaining parts
399
+ results = []
400
+ remaining_selector = ' '.join(parts[1:])
401
+ for match in first_matches:
402
+ descendants = match.select(remaining_selector)
403
+ results.extend(descendants)
404
+
325
405
  return results
406
+
407
+ def _select_with_child_combinator(self, parts: List[str]) -> List['Tag']:
408
+ """Handle child combinator (>)."""
409
+ if not parts:
410
+ return []
411
+
412
+ if len(parts) == 1:
413
+ components = self._parse_selector_components(parts[0])
414
+ return self._find_all_matching_in_tree(self, components)
415
+
416
+ # Find elements matching the first part
417
+ first_components = self._parse_selector_components(parts[0])
418
+ first_matches = self._find_all_matching_in_tree(self, first_components)
419
+
420
+ # For each match, find direct children matching the next part
421
+ if len(parts) == 2:
422
+ # Last part, just check direct children
423
+ next_components = self._parse_selector_components(parts[1])
424
+ results = []
425
+ for match in first_matches:
426
+ for child in match.contents:
427
+ if isinstance(child, Tag) and self._match_selector_components(child, next_components):
428
+ results.append(child)
429
+ return results
430
+ else:
431
+ # More parts, need to continue recursively
432
+ results = []
433
+ next_components = self._parse_selector_components(parts[1])
434
+ remaining_parts = parts[2:]
435
+ for match in first_matches:
436
+ for child in match.contents:
437
+ if isinstance(child, Tag) and self._match_selector_components(child, next_components):
438
+ # Continue with remaining parts
439
+ remaining_selector = ' > '.join(remaining_parts)
440
+ descendants = child.select(remaining_selector)
441
+ results.extend(descendants)
442
+ return results
443
+
444
+ def _parse_selector_components(self, simple_sel: str) -> dict:
445
+ """Parse a simple selector like 'p.class#id[attr=value]' into components."""
446
+ components = {
447
+ 'tag': None,
448
+ 'id': None,
449
+ 'classes': [],
450
+ 'attrs': {}
451
+ }
452
+
453
+ # Extract tag name (at the start)
454
+ tag_match = re.match(r'^([a-zA-Z][\w-]*)', simple_sel)
455
+ if tag_match:
456
+ components['tag'] = tag_match.group(1)
457
+ simple_sel = simple_sel[len(tag_match.group(1)):]
458
+
459
+ # Extract ID
460
+ id_matches = re.findall(r'#([\w-]+)', simple_sel)
461
+ if id_matches:
462
+ components['id'] = id_matches[0]
463
+
464
+ # Extract classes
465
+ class_matches = re.findall(r'\.([\w-]+)', simple_sel)
466
+ components['classes'] = class_matches
467
+
468
+ # Extract attributes
469
+ attr_matches = re.findall(r'\[([^\]]+)\]', simple_sel)
470
+ for attr_expr in attr_matches:
471
+ if '=' in attr_expr:
472
+ attr_name, attr_value = attr_expr.split('=', 1)
473
+ components['attrs'][attr_name.strip()] = attr_value.strip('\'"')
474
+ else:
475
+ components['attrs'][attr_expr.strip()] = None
476
+
477
+ return components
478
+
479
+ def _match_selector_components(self, tag: 'Tag', components: dict) -> bool:
480
+ """Check if a tag matches the parsed selector components."""
481
+ # Check tag name
482
+ if components['tag'] and tag.name != components['tag']:
483
+ return False
484
+
485
+ # Check ID
486
+ if components['id'] and tag.get('id') != components['id']:
487
+ return False
488
+
489
+ # Check classes
490
+ tag_classes = tag.get('class', '')
491
+ if isinstance(tag_classes, str):
492
+ tag_classes = tag_classes.split()
493
+ elif not isinstance(tag_classes, list):
494
+ tag_classes = [str(tag_classes)] if tag_classes else []
495
+
496
+ for cls in components['classes']:
497
+ if cls not in tag_classes:
498
+ return False
499
+
500
+ # Check attributes
501
+ for attr_name, attr_value in components['attrs'].items():
502
+ if attr_value is None:
503
+ # Just check attribute exists
504
+ if attr_name not in tag.attrs:
505
+ return False
506
+ else:
507
+ # Check attribute value
508
+ if tag.get(attr_name) != attr_value:
509
+ return False
510
+
511
+ return True
512
+
513
+ def _find_all_matching_in_tree(self, element: 'Tag', components: dict) -> List['Tag']:
514
+ """Recursively find all elements matching the selector components."""
515
+ matches = []
516
+
517
+ # Check current element
518
+ if self._match_selector_components(element, components):
519
+ matches.append(element)
520
+
521
+ # Check children recursively
522
+ for child in element.contents:
523
+ if isinstance(child, Tag):
524
+ matches.extend(self._find_all_matching_in_tree(child, components))
525
+
526
+ return matches
326
527
 
327
528
  def select_one(self, selector: str) -> Optional['Tag']:
328
529
  """
@@ -462,6 +663,11 @@ class Tag:
462
663
  new_child.parent = self
463
664
  self.contents.append(new_child)
464
665
 
666
+ def extend(self, new_children: List[Union['Tag', NavigableString, str]]) -> None:
667
+ """Extend the contents of this tag with a list of new children."""
668
+ for child in new_children:
669
+ self.append(child)
670
+
465
671
  def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
466
672
  """Insert a new child at the given index with error handling."""
467
673
  if isinstance(new_child, str):
@@ -4,14 +4,14 @@ from .base import BaseSearch, BaseSearchEngine
4
4
  from .duckduckgo_main import DuckDuckGoSearch
5
5
  from .yep_main import YepSearch
6
6
  from .bing_main import BingSearch
7
+ from .yahoo_main import YahooSearch
7
8
 
8
9
  # Import new search engines
9
10
  from .engines.brave import Brave
10
11
  from .engines.mojeek import Mojeek
11
- from .engines.yahoo import Yahoo
12
+
12
13
  from .engines.yandex import Yandex
13
14
  from .engines.wikipedia import Wikipedia
14
- from .engines.yahoo_news import YahooNews
15
15
 
16
16
  # Import result models
17
17
  from .results import (
@@ -31,14 +31,13 @@ __all__ = [
31
31
  "DuckDuckGoSearch",
32
32
  "YepSearch",
33
33
  "BingSearch",
34
+ "YahooSearch",
34
35
 
35
36
  # Individual engines
36
37
  "Brave",
37
38
  "Mojeek",
38
- "Yahoo",
39
39
  "Yandex",
40
40
  "Wikipedia",
41
- "YahooNews",
42
41
 
43
42
  # Result models
44
43
  "TextResult",
@@ -4,10 +4,10 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Dict, List
6
6
  from urllib.parse import urlencode
7
- from bs4 import BeautifulSoup
8
7
  from time import sleep
9
8
 
10
9
  from .base import BingBase
10
+ from webscout.scout import Scout
11
11
 
12
12
 
13
13
  class BingImagesSearch(BingBase):
@@ -17,6 +17,9 @@ class BingImagesSearch(BingBase):
17
17
  safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
18
18
  max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
19
19
 
20
+ if max_results is None:
21
+ max_results = 10
22
+
20
23
  if not keywords:
21
24
  raise ValueError("Keywords are mandatory")
22
25
 
@@ -59,7 +62,7 @@ class BingImagesSearch(BingBase):
59
62
  except Exception as e:
60
63
  raise Exception(f"Failed to fetch images: {str(e)}")
61
64
 
62
- soup = BeautifulSoup(html, 'html.parser')
65
+ soup = Scout(html)
63
66
  img_tags = soup.select('a.iusc img')
64
67
 
65
68
  for img in img_tags:
@@ -4,10 +4,10 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Dict, List
6
6
  from urllib.parse import urlencode
7
- from bs4 import BeautifulSoup
8
7
  from time import sleep
9
8
 
10
9
  from .base import BingBase
10
+ from webscout.scout import Scout
11
11
 
12
12
 
13
13
  class BingNewsSearch(BingBase):
@@ -17,6 +17,9 @@ class BingNewsSearch(BingBase):
17
17
  safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
18
18
  max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
19
19
 
20
+ if max_results is None:
21
+ max_results = 10
22
+
20
23
  if not keywords:
21
24
  raise ValueError("Keywords are mandatory")
22
25
 
@@ -50,15 +53,14 @@ class BingNewsSearch(BingBase):
50
53
  try:
51
54
  response = self.session.get(full_url, timeout=self.timeout)
52
55
  response.raise_for_status()
53
- data = response.json()
56
+ html = response.text
54
57
  except Exception as e:
55
58
  raise Exception(f"Failed to fetch news: {str(e)}")
56
59
 
57
- html = data.get('html', '')
58
60
  if not html:
59
61
  break
60
62
 
61
- soup = BeautifulSoup(html, 'html.parser')
63
+ soup = Scout(html)
62
64
  news_items = soup.select('div.newsitem')
63
65
 
64
66
  for item in news_items:
@@ -4,10 +4,10 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Dict, List
6
6
  from urllib.parse import urlencode
7
- from bs4 import BeautifulSoup
8
7
  from time import sleep
9
8
 
10
9
  from .base import BingBase
10
+ from webscout.scout import Scout
11
11
 
12
12
 
13
13
  class BingTextSearch(BingBase):
@@ -18,6 +18,9 @@ class BingTextSearch(BingBase):
18
18
  max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
19
19
  unique = kwargs.get("unique", True)
20
20
 
21
+ if max_results is None:
22
+ max_results = 10
23
+
21
24
  if not keywords:
22
25
  raise ValueError("Keywords are mandatory")
23
26
 
@@ -46,7 +49,7 @@ class BingTextSearch(BingBase):
46
49
  while len(fetched_results) < max_results and urls_to_fetch:
47
50
  current_url = urls_to_fetch.pop(0)
48
51
  html = fetch_page(current_url)
49
- soup = BeautifulSoup(html, 'html.parser')
52
+ soup = Scout(html)
50
53
 
51
54
  links = soup.select('ol#b_results > li.b_algo')
52
55
  for link in links:
@@ -0,0 +1,41 @@
1
+ """Yahoo search engines package.
2
+
3
+ This package provides comprehensive Yahoo search functionality including:
4
+ - Text search with multi-page pagination
5
+ - Image search with advanced filters
6
+ - Video search with quality and length filters
7
+ - News search with time filtering
8
+ - Search suggestions/autocomplete
9
+
10
+ All engines support:
11
+ - Human-like browsing through multiple pages
12
+ - Rich metadata extraction
13
+ - Filter support
14
+ - Clean result formatting
15
+
16
+ Example:
17
+ >>> from webscout.search.engines.yahoo import YahooText
18
+ >>>
19
+ >>> # Search with automatic pagination
20
+ >>> searcher = YahooText()
21
+ >>> results = searcher.search("python programming", max_results=50)
22
+ >>>
23
+ >>> for result in results:
24
+ ... print(f"{result.title}: {result.url}")
25
+ """
26
+
27
+ from .base import YahooSearchEngine
28
+ from .images import YahooImages
29
+ from .news import YahooNews
30
+ from .suggestions import YahooSuggestions
31
+ from .text import YahooText
32
+ from .videos import YahooVideos
33
+
34
+ __all__ = [
35
+ "YahooSearchEngine",
36
+ "YahooText",
37
+ "YahooImages",
38
+ "YahooVideos",
39
+ "YahooNews",
40
+ "YahooSuggestions",
41
+ ]
@@ -0,0 +1,16 @@
1
+ """Yahoo answers search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .base import YahooSearchEngine
6
+
7
+
8
+ class YahooAnswers(YahooSearchEngine):
9
+ """Yahoo instant answers."""
10
+
11
+ def run(self, *args, **kwargs) -> list[dict[str, str]]:
12
+ """Get instant answers from Yahoo.
13
+
14
+ Not supported.
15
+ """
16
+ raise NotImplementedError("Yahoo does not support instant answers")
@@ -0,0 +1,34 @@
1
+ """Base class for Yahoo search engines."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from secrets import token_urlsafe
6
+ from typing import Any, Generic, TypeVar
7
+
8
+ from ...base import BaseSearchEngine
9
+
10
+ T = TypeVar("T")
11
+
12
+ class YahooSearchEngine(BaseSearchEngine[T], Generic[T]):
13
+ """Base class for Yahoo search engines.
14
+
15
+ Yahoo search is powered by Bing but has its own interface.
16
+ All Yahoo searches use dynamic URLs with tokens for tracking.
17
+ """
18
+
19
+ provider = "yahoo"
20
+ _base_url = "https://search.yahoo.com"
21
+
22
+ def generate_ylt_token(self) -> str:
23
+ """Generate Yahoo _ylt tracking token."""
24
+ return token_urlsafe(24 * 3 // 4)
25
+
26
+ def generate_ylu_token(self) -> str:
27
+ """Generate Yahoo _ylu tracking token."""
28
+ return token_urlsafe(47 * 3 // 4)
29
+
30
+ def build_search_url(self, base_path: str) -> str:
31
+ """Build search URL with tracking tokens."""
32
+ ylt = self.generate_ylt_token()
33
+ ylu = self.generate_ylu_token()
34
+ return f"{self._base_url}/{base_path};_ylt={ylt};_ylu={ylu}"