PyPI - webscout - Versions diffs - 2025.10.15__py3-none-any.whl → 2025.10.17__py3-none-any.whl - Mend

webscout 2025.10.15py3-none-any.whl → 2025.10.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of webscout might be problematic. Click here for more details.

Files changed (63) hide show

webscout/Extra/YTToolkit/README.md +1 -1
webscout/Extra/tempmail/README.md +3 -3
webscout/Provider/ClaudeOnline.py +350 -0
webscout/Provider/OPENAI/README.md +1 -1
webscout/Provider/TTI/bing.py +4 -4
webscout/Provider/TTI/claudeonline.py +315 -0
webscout/__init__.py +1 -1
webscout/client.py +4 -5
webscout/litprinter/__init__.py +0 -42
webscout/scout/README.md +59 -8
webscout/scout/core/scout.py +62 -0
webscout/scout/element.py +251 -45
webscout/search/__init__.py +3 -4
webscout/search/engines/bing/images.py +5 -2
webscout/search/engines/bing/news.py +6 -4
webscout/search/engines/bing/text.py +5 -2
webscout/search/engines/yahoo/__init__.py +41 -0
webscout/search/engines/yahoo/answers.py +16 -0
webscout/search/engines/yahoo/base.py +34 -0
webscout/search/engines/yahoo/images.py +324 -0
webscout/search/engines/yahoo/maps.py +16 -0
webscout/search/engines/yahoo/news.py +258 -0
webscout/search/engines/yahoo/suggestions.py +140 -0
webscout/search/engines/yahoo/text.py +273 -0
webscout/search/engines/yahoo/translate.py +16 -0
webscout/search/engines/yahoo/videos.py +302 -0
webscout/search/engines/yahoo/weather.py +220 -0
webscout/search/http_client.py +1 -1
webscout/search/yahoo_main.py +54 -0
webscout/{auth → server}/__init__.py +2 -23
webscout/server/config.py +84 -0
webscout/{auth → server}/request_processing.py +3 -28
webscout/{auth → server}/routes.py +6 -148
webscout/server/schemas.py +23 -0
webscout/{auth → server}/server.py +11 -43
webscout/server/simple_logger.py +84 -0
webscout/version.py +1 -1
webscout/version.py.bak +1 -1
webscout/zeroart/README.md +17 -9
webscout/zeroart/__init__.py +78 -6
webscout/zeroart/effects.py +51 -1
webscout/zeroart/fonts.py +559 -1
{webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/METADATA +11 -54
{webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/RECORD +51 -46
{webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/entry_points.txt +1 -1
webscout/Extra/weather.md +0 -281
webscout/auth/api_key_manager.py +0 -189
webscout/auth/auth_system.py +0 -85
webscout/auth/config.py +0 -175
webscout/auth/database.py +0 -755
webscout/auth/middleware.py +0 -248
webscout/auth/models.py +0 -185
webscout/auth/rate_limiter.py +0 -254
webscout/auth/schemas.py +0 -103
webscout/auth/simple_logger.py +0 -236
webscout/search/engines/yahoo.py +0 -65
webscout/search/engines/yahoo_news.py +0 -64
/webscout/{auth → server}/exceptions.py +0 -0
/webscout/{auth → server}/providers.py +0 -0
/webscout/{auth → server}/request_models.py +0 -0
{webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/WHEEL +0 -0
{webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/licenses/LICENSE.md +0 -0
{webscout-2025.10.15.dist-info → webscout-2025.10.17.dist-info}/top_level.txt +0 -0

webscout/scout/element.py CHANGED Viewed

@@ -267,7 +267,14 @@ class Tag:
     def select(self, selector: str) -> List['Tag']:
         """
         Select elements using CSS selector.
-        Enhanced to support more complex selectors.
+        Enhanced to support more complex selectors including:
+        - Tag selectors: 'p', 'div'
+        - Class selectors: '.class', 'p.class'
+        - ID selectors: '#id', 'div#id'
+        - Attribute selectors: '[attr]', '[attr=value]'
+        - Descendant selectors: 'div p'
+        - Child selectors: 'div > p'
+        - Multiple classes: '.class1.class2'
         Args:
             selector (str): CSS selector string
@@ -275,54 +282,248 @@ class Tag:
         Returns:
             List[Tag]: List of matching elements
         """
-        # More advanced CSS selector parsing
-        # This is a simplified implementation and might need more robust parsing
-        parts = re.split(r'\s+', selector.strip())
         results = []
-        def _match_selector(tag, selector_part):
-            # Support more complex selectors
-            if selector_part.startswith('.'):
-                # Class selector
-                return selector_part[1:] in tag.get('class', [])
-            elif selector_part.startswith('#'):
-                # ID selector
-                return tag.get('id') == selector_part[1:]
-            elif '[' in selector_part and ']' in selector_part:
-                # Attribute selector
-                attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
-                if attr_match:
-                    tag_name, attr, value = attr_match.groups()
-                    if tag_name and tag.name != tag_name:
+        def _parse_simple_selector(simple_sel: str) -> dict:
+            """Parse a simple selector like 'p.class#id[attr=value]' into components."""
+            components = {
+                'tag': None,
+                'id': None,
+                'classes': [],
+                'attrs': {}
+            }
+            # Extract tag name (at the start)
+            tag_match = re.match(r'^([a-zA-Z][\w-]*)', simple_sel)
+            if tag_match:
+                components['tag'] = tag_match.group(1)
+                simple_sel = simple_sel[len(tag_match.group(1)):]
+            # Extract ID
+            id_matches = re.findall(r'#([\w-]+)', simple_sel)
+            if id_matches:
+                components['id'] = id_matches[0]
+            # Extract classes
+            class_matches = re.findall(r'\.([\w-]+)', simple_sel)
+            components['classes'] = class_matches
+            # Extract attributes
+            attr_matches = re.findall(r'\[([^\]]+)\]', simple_sel)
+            for attr_expr in attr_matches:
+                if '=' in attr_expr:
+                    attr_name, attr_value = attr_expr.split('=', 1)
+                    components['attrs'][attr_name.strip()] = attr_value.strip('\'"')
+                else:
+                    components['attrs'][attr_expr.strip()] = None
+            return components
+        def _match_simple_selector(tag: 'Tag', components: dict) -> bool:
+            """Check if a tag matches the parsed selector components."""
+            # Check tag name
+            if components['tag'] and tag.name != components['tag']:
+                return False
+            # Check ID
+            if components['id'] and tag.get('id') != components['id']:
+                return False
+            # Check classes
+            tag_classes = tag.get('class', '')
+            if isinstance(tag_classes, str):
+                tag_classes = tag_classes.split()
+            elif not isinstance(tag_classes, list):
+                tag_classes = [str(tag_classes)] if tag_classes else []
+            for cls in components['classes']:
+                if cls not in tag_classes:
+                    return False
+            # Check attributes
+            for attr_name, attr_value in components['attrs'].items():
+                if attr_value is None:
+                    # Just check attribute exists
+                    if attr_name not in tag.attrs:
                         return False
-                    if value:
-                        return tag.get(attr) == value.strip("'\"")
-                    return attr in tag.attrs
-            else:
-                # Tag selector
-                return tag.name == selector_part
-        def _recursive_select(element, selector_parts):
-            if not selector_parts:
-                results.append(element)
-                return
-            current_selector = selector_parts[0]
-            remaining_selectors = selector_parts[1:]
-            if _match_selector(element, current_selector):
-                if not remaining_selectors:
-                    results.append(element)
                 else:
-                    for child in element.contents:
-                        if isinstance(child, Tag):
-                            _recursive_select(child, remaining_selectors)
-        for child in self.contents:
-            if isinstance(child, Tag):
-                _recursive_select(child, parts)
+                    # Check attribute value
+                    if tag.get(attr_name) != attr_value:
+                        return False
+            return True
+        def _find_all_matching(element: 'Tag', components: dict) -> List['Tag']:
+            """Recursively find all elements matching the selector components."""
+            matches = []
+            # Check current element
+            if _match_simple_selector(element, components):
+                matches.append(element)
+            # Check children recursively
+            for child in element.contents:
+                if isinstance(child, Tag):
+                    matches.extend(_find_all_matching(child, components))
+            return matches
+        # Handle combinators (descendant ' ' and child '>')
+        if ' > ' in selector:
+            # Child combinator
+            parts = [p.strip() for p in selector.split(' > ')]
+            return self._select_with_child_combinator(parts)
+        elif ' ' in selector.strip():
+            # Descendant combinator
+            parts = [p.strip() for p in selector.split()]
+            return self._select_with_descendant_combinator(parts)
+        else:
+            # Simple selector
+            components = _parse_simple_selector(selector)
+            return _find_all_matching(self, components)
+    def _select_with_descendant_combinator(self, parts: List[str]) -> List['Tag']:
+        """Handle descendant combinator (space)."""
+        if not parts:
+            return []
+        if len(parts) == 1:
+            components = self._parse_selector_components(parts[0])
+            return self._find_all_matching_in_tree(self, components)
+        # Find elements matching the first part
+        first_components = self._parse_selector_components(parts[0])
+        first_matches = self._find_all_matching_in_tree(self, first_components)
+        # For each match, find descendants matching remaining parts
+        results = []
+        remaining_selector = ' '.join(parts[1:])
+        for match in first_matches:
+            descendants = match.select(remaining_selector)
+            results.extend(descendants)
         return results
+    def _select_with_child_combinator(self, parts: List[str]) -> List['Tag']:
+        """Handle child combinator (>)."""
+        if not parts:
+            return []
+        if len(parts) == 1:
+            components = self._parse_selector_components(parts[0])
+            return self._find_all_matching_in_tree(self, components)
+        # Find elements matching the first part
+        first_components = self._parse_selector_components(parts[0])
+        first_matches = self._find_all_matching_in_tree(self, first_components)
+        # For each match, find direct children matching the next part
+        if len(parts) == 2:
+            # Last part, just check direct children
+            next_components = self._parse_selector_components(parts[1])
+            results = []
+            for match in first_matches:
+                for child in match.contents:
+                    if isinstance(child, Tag) and self._match_selector_components(child, next_components):
+                        results.append(child)
+            return results
+        else:
+            # More parts, need to continue recursively
+            results = []
+            next_components = self._parse_selector_components(parts[1])
+            remaining_parts = parts[2:]
+            for match in first_matches:
+                for child in match.contents:
+                    if isinstance(child, Tag) and self._match_selector_components(child, next_components):
+                        # Continue with remaining parts
+                        remaining_selector = ' > '.join(remaining_parts)
+                        descendants = child.select(remaining_selector)
+                        results.extend(descendants)
+            return results
+    def _parse_selector_components(self, simple_sel: str) -> dict:
+        """Parse a simple selector like 'p.class#id[attr=value]' into components."""
+        components = {
+            'tag': None,
+            'id': None,
+            'classes': [],
+            'attrs': {}
+        }
+        # Extract tag name (at the start)
+        tag_match = re.match(r'^([a-zA-Z][\w-]*)', simple_sel)
+        if tag_match:
+            components['tag'] = tag_match.group(1)
+            simple_sel = simple_sel[len(tag_match.group(1)):]
+        # Extract ID
+        id_matches = re.findall(r'#([\w-]+)', simple_sel)
+        if id_matches:
+            components['id'] = id_matches[0]
+        # Extract classes
+        class_matches = re.findall(r'\.([\w-]+)', simple_sel)
+        components['classes'] = class_matches
+        # Extract attributes
+        attr_matches = re.findall(r'\[([^\]]+)\]', simple_sel)
+        for attr_expr in attr_matches:
+            if '=' in attr_expr:
+                attr_name, attr_value = attr_expr.split('=', 1)
+                components['attrs'][attr_name.strip()] = attr_value.strip('\'"')
+            else:
+                components['attrs'][attr_expr.strip()] = None
+        return components
+    def _match_selector_components(self, tag: 'Tag', components: dict) -> bool:
+        """Check if a tag matches the parsed selector components."""
+        # Check tag name
+        if components['tag'] and tag.name != components['tag']:
+            return False
+        # Check ID
+        if components['id'] and tag.get('id') != components['id']:
+            return False
+        # Check classes
+        tag_classes = tag.get('class', '')
+        if isinstance(tag_classes, str):
+            tag_classes = tag_classes.split()
+        elif not isinstance(tag_classes, list):
+            tag_classes = [str(tag_classes)] if tag_classes else []
+        for cls in components['classes']:
+            if cls not in tag_classes:
+                return False
+        # Check attributes
+        for attr_name, attr_value in components['attrs'].items():
+            if attr_value is None:
+                # Just check attribute exists
+                if attr_name not in tag.attrs:
+                    return False
+            else:
+                # Check attribute value
+                if tag.get(attr_name) != attr_value:
+                    return False
+        return True
+    def _find_all_matching_in_tree(self, element: 'Tag', components: dict) -> List['Tag']:
+        """Recursively find all elements matching the selector components."""
+        matches = []
+        # Check current element
+        if self._match_selector_components(element, components):
+            matches.append(element)
+        # Check children recursively
+        for child in element.contents:
+            if isinstance(child, Tag):
+                matches.extend(self._find_all_matching_in_tree(child, components))
+        return matches
     def select_one(self, selector: str) -> Optional['Tag']:
         """
@@ -462,6 +663,11 @@ class Tag:
             new_child.parent = self
         self.contents.append(new_child)
+    def extend(self, new_children: List[Union['Tag', NavigableString, str]]) -> None:
+        """Extend the contents of this tag with a list of new children."""
+        for child in new_children:
+            self.append(child)
     def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
         """Insert a new child at the given index with error handling."""
         if isinstance(new_child, str):

webscout/search/__init__.py CHANGED Viewed

@@ -4,14 +4,14 @@ from .base import BaseSearch, BaseSearchEngine
 from .duckduckgo_main import DuckDuckGoSearch
 from .yep_main import YepSearch
 from .bing_main import BingSearch
+from .yahoo_main import YahooSearch
 # Import new search engines
 from .engines.brave import Brave
 from .engines.mojeek import Mojeek
-from .engines.yahoo import Yahoo
 from .engines.yandex import Yandex
 from .engines.wikipedia import Wikipedia
-from .engines.yahoo_news import YahooNews
 # Import result models
 from .results import (
@@ -31,14 +31,13 @@ __all__ = [
     "DuckDuckGoSearch",
     "YepSearch",
     "BingSearch",
+    "YahooSearch",
     # Individual engines
     "Brave",
     "Mojeek",
-    "Yahoo",
     "Yandex",
     "Wikipedia",
-    "YahooNews",
     # Result models
     "TextResult",

webscout/search/engines/bing/images.py CHANGED Viewed

@@ -4,10 +4,10 @@ from __future__ import annotations
 from typing import Dict, List
 from urllib.parse import urlencode
-from bs4 import BeautifulSoup
 from time import sleep
 from .base import BingBase
+from webscout.scout import Scout
 class BingImagesSearch(BingBase):
@@ -17,6 +17,9 @@ class BingImagesSearch(BingBase):
         safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
         max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
+        if max_results is None:
+            max_results = 10
         if not keywords:
             raise ValueError("Keywords are mandatory")
@@ -59,7 +62,7 @@ class BingImagesSearch(BingBase):
             except Exception as e:
                 raise Exception(f"Failed to fetch images: {str(e)}")
-            soup = BeautifulSoup(html, 'html.parser')
+            soup = Scout(html)
             img_tags = soup.select('a.iusc img')
             for img in img_tags:

webscout/search/engines/bing/news.py CHANGED Viewed

@@ -4,10 +4,10 @@ from __future__ import annotations
 from typing import Dict, List
 from urllib.parse import urlencode
-from bs4 import BeautifulSoup
 from time import sleep
 from .base import BingBase
+from webscout.scout import Scout
 class BingNewsSearch(BingBase):
@@ -17,6 +17,9 @@ class BingNewsSearch(BingBase):
         safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
         max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
+        if max_results is None:
+            max_results = 10
         if not keywords:
             raise ValueError("Keywords are mandatory")
@@ -50,15 +53,14 @@ class BingNewsSearch(BingBase):
             try:
                 response = self.session.get(full_url, timeout=self.timeout)
                 response.raise_for_status()
-                data = response.json()
+                html = response.text
             except Exception as e:
                 raise Exception(f"Failed to fetch news: {str(e)}")
-            html = data.get('html', '')
             if not html:
                 break
-            soup = BeautifulSoup(html, 'html.parser')
+            soup = Scout(html)
             news_items = soup.select('div.newsitem')
             for item in news_items:

webscout/search/engines/bing/text.py CHANGED Viewed

@@ -4,10 +4,10 @@ from __future__ import annotations
 from typing import Dict, List
 from urllib.parse import urlencode
-from bs4 import BeautifulSoup
 from time import sleep
 from .base import BingBase
+from webscout.scout import Scout
 class BingTextSearch(BingBase):
@@ -18,6 +18,9 @@ class BingTextSearch(BingBase):
         max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
         unique = kwargs.get("unique", True)
+        if max_results is None:
+            max_results = 10
         if not keywords:
             raise ValueError("Keywords are mandatory")
@@ -46,7 +49,7 @@ class BingTextSearch(BingBase):
         while len(fetched_results) < max_results and urls_to_fetch:
             current_url = urls_to_fetch.pop(0)
             html = fetch_page(current_url)
-            soup = BeautifulSoup(html, 'html.parser')
+            soup = Scout(html)
             links = soup.select('ol#b_results > li.b_algo')
             for link in links:

webscout/search/engines/yahoo/__init__.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""Yahoo search engines package.
+This package provides comprehensive Yahoo search functionality including:
+- Text search with multi-page pagination
+- Image search with advanced filters
+- Video search with quality and length filters
+- News search with time filtering
+- Search suggestions/autocomplete
+All engines support:
+- Human-like browsing through multiple pages
+- Rich metadata extraction
+- Filter support
+- Clean result formatting
+Example:
+    >>> from webscout.search.engines.yahoo import YahooText
+    >>>
+    >>> # Search with automatic pagination
+    >>> searcher = YahooText()
+    >>> results = searcher.search("python programming", max_results=50)
+    >>>
+    >>> for result in results:
+    ...     print(f"{result.title}: {result.url}")
+"""
+from .base import YahooSearchEngine
+from .images import YahooImages
+from .news import YahooNews
+from .suggestions import YahooSuggestions
+from .text import YahooText
+from .videos import YahooVideos
+__all__ = [
+    "YahooSearchEngine",
+    "YahooText",
+    "YahooImages",
+    "YahooVideos",
+    "YahooNews",
+    "YahooSuggestions",
+]

webscout/search/engines/yahoo/answers.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Yahoo answers search."""
+from __future__ import annotations
+from .base import YahooSearchEngine
+class YahooAnswers(YahooSearchEngine):
+    """Yahoo instant answers."""
+    def run(self, *args, **kwargs) -> list[dict[str, str]]:
+        """Get instant answers from Yahoo.
+        Not supported.
+        """
+        raise NotImplementedError("Yahoo does not support instant answers")

webscout/search/engines/yahoo/base.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""Base class for Yahoo search engines."""
+from __future__ import annotations
+from secrets import token_urlsafe
+from typing import Any, Generic, TypeVar
+from ...base import BaseSearchEngine
+T = TypeVar("T")
+class YahooSearchEngine(BaseSearchEngine[T], Generic[T]):
+    """Base class for Yahoo search engines.
+    Yahoo search is powered by Bing but has its own interface.
+    All Yahoo searches use dynamic URLs with tokens for tracking.
+    """
+    provider = "yahoo"
+    _base_url = "https://search.yahoo.com"
+    def generate_ylt_token(self) -> str:
+        """Generate Yahoo _ylt tracking token."""
+        return token_urlsafe(24 * 3 // 4)
+    def generate_ylu_token(self) -> str:
+        """Generate Yahoo _ylu tracking token."""
+        return token_urlsafe(47 * 3 // 4)
+    def build_search_url(self, base_path: str) -> str:
+        """Build search URL with tracking tokens."""
+        ylt = self.generate_ylt_token()
+        ylu = self.generate_ylu_token()
+        return f"{self._base_url}/{base_path};_ylt={ylt};_ylu={ylu}"

webscout 2025.10.15__py3-none-any.whl → 2025.10.17__py3-none-any.whl

Potentially problematic release.

webscout 2025.10.15py3-none-any.whl → 2025.10.17py3-none-any.whl