PyPI - webscout - Versions diffs - 2025.10.15__py3-none-any.whl → 2025.10.16__py3-none-any.whl - Mend

webscout 2025.10.15py3-none-any.whl → 2025.10.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of webscout might be problematic. Click here for more details.

Files changed (60) hide show

webscout/Extra/YTToolkit/README.md +1 -1
webscout/Extra/tempmail/README.md +3 -3
webscout/Provider/OPENAI/README.md +1 -1
webscout/Provider/TTI/bing.py +4 -4
webscout/__init__.py +1 -1
webscout/client.py +4 -5
webscout/litprinter/__init__.py +0 -42
webscout/scout/README.md +59 -8
webscout/scout/core/scout.py +62 -0
webscout/scout/element.py +251 -45
webscout/search/__init__.py +3 -4
webscout/search/engines/bing/images.py +5 -2
webscout/search/engines/bing/news.py +6 -4
webscout/search/engines/bing/text.py +5 -2
webscout/search/engines/yahoo/__init__.py +41 -0
webscout/search/engines/yahoo/answers.py +16 -0
webscout/search/engines/yahoo/base.py +34 -0
webscout/search/engines/yahoo/images.py +324 -0
webscout/search/engines/yahoo/maps.py +16 -0
webscout/search/engines/yahoo/news.py +258 -0
webscout/search/engines/yahoo/suggestions.py +140 -0
webscout/search/engines/yahoo/text.py +273 -0
webscout/search/engines/yahoo/translate.py +16 -0
webscout/search/engines/yahoo/videos.py +302 -0
webscout/search/engines/yahoo/weather.py +220 -0
webscout/search/http_client.py +1 -1
webscout/search/yahoo_main.py +54 -0
webscout/{auth → server}/__init__.py +2 -23
webscout/server/config.py +84 -0
webscout/{auth → server}/request_processing.py +3 -28
webscout/{auth → server}/routes.py +6 -148
webscout/server/schemas.py +23 -0
webscout/{auth → server}/server.py +11 -43
webscout/server/simple_logger.py +84 -0
webscout/version.py +1 -1
webscout/version.py.bak +1 -1
webscout/zeroart/README.md +17 -9
webscout/zeroart/__init__.py +78 -6
webscout/zeroart/effects.py +51 -1
webscout/zeroart/fonts.py +559 -1
{webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/METADATA +10 -52
{webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/RECORD +49 -45
{webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/entry_points.txt +1 -1
webscout/auth/api_key_manager.py +0 -189
webscout/auth/auth_system.py +0 -85
webscout/auth/config.py +0 -175
webscout/auth/database.py +0 -755
webscout/auth/middleware.py +0 -248
webscout/auth/models.py +0 -185
webscout/auth/rate_limiter.py +0 -254
webscout/auth/schemas.py +0 -103
webscout/auth/simple_logger.py +0 -236
webscout/search/engines/yahoo.py +0 -65
webscout/search/engines/yahoo_news.py +0 -64
/webscout/{auth → server}/exceptions.py +0 -0
/webscout/{auth → server}/providers.py +0 -0
/webscout/{auth → server}/request_models.py +0 -0
{webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/WHEEL +0 -0
{webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/licenses/LICENSE.md +0 -0
{webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/top_level.txt +0 -0

webscout/Extra/YTToolkit/README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 <div align="center">
-  <a href="https://github.com/OEvortex/Webscout">
+  <a href="https://github.com/pyscout/Webscout">
     <img src="https://img.shields.io/badge/YTToolkit-YouTube%20Toolkit-red?style=for-the-badge&logo=youtube&logoColor=white" alt="YTToolkit Logo">
   </a>
   <h1>YTToolkit</h1>

webscout/Extra/tempmail/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 <div align="center">
-  <a href="https://github.com/OEvortex/Webscout">
-    <img src="https://img.shields.io/badge/WebScout-TempMail%20Module-blue?style=for-the-badge&logo=mail&logoColor=white" alt="WebScout TempMail">
+  <a href="https://github.com/pyscout/Webscout">
+    <img src="https://img.shields.io/badge/WebScout-TempMail%20Toolkit-blue?style=for-the-badge&logo=maildotru&logoColor=white" alt="WebScout TempMail Toolkit">
   </a>
   <h1>📧 TempMail</h1>
@@ -484,5 +484,5 @@ Please refer to the main Webscout project's contributing guidelines if you plan
     <a href="https://buymeacoffee.com/oevortex"><img alt="Buy Me A Coffee" src="https://img.shields.io/badge/Buy%20Me%20A%20Coffee-FFDD00?style=for-the-badge&logo=buymeacoffee&logoColor=black"></a>
   </div>
   <p>📧 TempMail - Part of the Webscout Toolkit</p>
-  <a href="https://github.com/OEvortex/Webscout">Back to Main Webscout Project</a>
+  <a href="https://github.com/pyscout/Webscout">Back to Main Webscout Project</a>
 </div>

webscout/Provider/OPENAI/README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 <div align="center">
-  <a href="https://github.com/OEvortex/Webscout">
+  <a href="https://github.com/pyscout/Webscout">
     <img src="https://img.shields.io/badge/WebScout-OpenAI%20Compatible%20Providers-4285F4?style=for-the-badge&logo=openai&logoColor=white" alt="WebScout OpenAI Compatible Providers">
   </a>
   <br/>

webscout/Provider/TTI/bing.py CHANGED Viewed

@@ -7,6 +7,7 @@ from webscout.Provider.TTI.utils import ImageData, ImageResponse
 from webscout.Provider.TTI.base import TTICompatibleProvider, BaseImages
 from io import BytesIO
 from webscout.litagent import LitAgent
+from webscout.scout import Scout
 try:
     from PIL import Image
@@ -81,9 +82,8 @@ class Images(BaseImages):
                 time.sleep(3)
                 try:
                     poll_resp = session.get(polling_url, headers=headers, timeout=timeout)
-                    from bs4 import BeautifulSoup
-                    soup = BeautifulSoup(poll_resp.text, "html.parser")
-                    imgs = [img["src"].split("?")[0] for img in soup.select(".img_cont .mimg") if img.get("src")]
+                    scout = Scout(poll_resp.text, features='html.parser')
+                    imgs = [img["src"].split("?")[0] for img in scout.select(".img_cont .mimg") if img.attrs.get("src")]
                     if imgs:
                         img_url = imgs[0]
                         break
@@ -232,7 +232,7 @@ class BingImageAI(TTICompatibleProvider):
 if __name__ == "__main__":
     from rich import print
-    client = BingImageAI(cookie="1QyBY4Z1eHBW6fbI25kdM5TrlRGWzn5PFySapCOfvvz04zaounFG660EipVJSOXXvcdeXXLwsWHdDI8bNymucF_QnMHSlY1mc0pPI7e9Ar6o-_7e9Ik5QOe1nkJIe5vz22pibioTqx0IfVKwmVbX22A3bFD7ODaSZalKFr-AuxgAaRVod-giTTry6Ei7RVgisF7BHlkMPPwtCeO234ujgug")
+    client = BingImageAI(cookie="1Fw9daLSZzVBJXgevTDuc0jHZ60l4m5IiQEwjRCFOwEkpEBDmw3b8CEAALFSwZ1QBu-rATNkfD0i0gfJmVHeFlogqIriGwxNwT9T6fVREgAQD4_qn0VnQYP681NN4K80t6o-eJXnK1MBhdjxTIaok8173LGmLkEWLqHC0k3dYnF7m2kHRhf1dxjEH3WDI56hxiSPZtnggdzrfnuFAmOgCQQ")
     response = client.images.create(
         model="gpt4o",
         prompt="A cat riding a bicycle",

webscout/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # webscout/__init__.py
-from .search import DuckDuckGoSearch, YepSearch, BingSearch
+from .search import *
 from .version import __version__
 from .Provider import *
 from .Provider.TTI import *

webscout/client.py CHANGED Viewed

@@ -34,20 +34,19 @@ from webscout.Provider.OPENAI import *
 try:
     # Use lazy import to avoid module execution issues
     def run_api(*args, **kwargs):
-        """Run the Webscout OpenAI-compatible API server (FastAPI backend)."""
-        from webscout.auth.server import run_api as _run_api
+        from webscout.server.server import run_api as _run_api
         return _run_api(*args, **kwargs)
     def start_server(**kwargs):
         """Start the Webscout OpenAI-compatible API server (FastAPI backend)."""
-        from webscout.auth.server import run_api as _run_api
+        from webscout.server.server import run_api as _run_api
         return _run_api(**kwargs)
 except ImportError:
     # Fallback for environments where the backend is not available
     def run_api(*args, **kwargs):
-        raise ImportError("webscout.auth.server.run_api is not available in this environment.")
+        raise ImportError("webscout.server.server.run_api is not available in this environment.")
     def start_server(*args, **kwargs):
-        raise ImportError("webscout.auth.server.start_server is not available in this environment.")
+        raise ImportError("webscout.server.server.start_server is not available in this environment.")
 # ---
 # API Documentation

webscout/litprinter/__init__.py CHANGED Viewed

@@ -1,45 +1,3 @@
-"""
->>> from litprinter import litprint
->>> from litprinter import lit
->>> from litprinter import install, uninstall
->>>
->>> litprint("Hello, world!")
-LIT -> [__main__.py:1] in () >>> Hello, world!
->>>
->>> def my_function():
-...    lit(1, 2, 3)
->>> my_function()
-LIT -> [__main__.py:4] in my_function() >>> 1, 2, 3
->>> install()
->>> ic("This is now the builtins.ic()")
-LIT -> [__main__.py:7] in () >>> This is now the builtins.ic()
->>> uninstall()
-This module provides enhanced print and logging functionalities for Python,
-allowing developers to debug their code with style and precision. It
-includes the litprint and lit functions for debugging, log for logging, and
-install/uninstall functions for integration into the builtins module.
-It also handles colorizing output and provides different styles and customizable
-options.
-LITPRINTER is inspired by the icecream package and provides similar functionality
-with additional features:
-- Variable inspection with expression display
-- Return value handling for inline usage
-- Support for custom formatters for specific data types
-- Execution context tracking
-- Rich-like colorized output with multiple themes (JARVIS, RICH, MODERN, NEON, CYBERPUNK)
-- Better JSON formatting with indent=2 by default
-- Advanced pretty printing for complex data structures with smart truncation
-- Clickable file paths in supported terminals and editors (VSCode compatible)
-- Enhanced visual formatting with better spacing and separators
-- Special formatters for common types (Exception, bytes, set, frozenset, etc.)
-- Smart object introspection for custom classes
-- Logging capabilities with timestamp and log levels
-"""
-# Try to import from the standalone litprinter package first
-# If it's not installed
 try:
     import litprinter
     # If standalone package is found, re-export all its components

webscout/scout/README.md CHANGED Viewed

@@ -43,7 +43,7 @@ pip install webscout
 Or install the latest version from GitHub:
 ```bash
-pip install git+https://github.com/OEvortex/Webscout.git
+pip install git+https://github.com/pyscout/Webscout.git
 ```
 ## 🚀 Quick Start
@@ -147,10 +147,57 @@ Scout provides powerful tools for navigating and manipulating HTML/XML documents
 - **Document Manipulation**: Modify, replace, or remove elements
 - **Dynamic Building**: Easily append or insert new nodes
+#### CSS Selector Support
+Scout includes a comprehensive CSS selector engine that supports all common selector types:
 ```python
-# CSS selector support
-elements = scout.select('div.content > p')
+# Tag selectors
+paragraphs = scout.select('p')
+divs = scout.select('div')
+# Class selectors
+items = scout.select('.item')              # Single class
+cards = scout.select('div.card')           # Tag + class
+special = scout.select('.card.special')    # Multiple classes
+# ID selectors
+header = scout.select_one('#header')       # Single element by ID
+menu = scout.select('nav#main-menu')       # Tag + ID
+# Attribute selectors
+links = scout.select('a[href]')                    # Has attribute
+external = scout.select('a[rel="nofollow"]')       # Attribute value
+images = scout.select('img[alt]')                  # Has alt attribute
+# Descendant selectors (space)
+nested = scout.select('div p')                     # Any p inside div
+deep = scout.select('article section p')           # Deeply nested
+# Child selectors (>)
+direct = scout.select('ul > li')                   # Direct children only
+menu_items = scout.select('nav#menu > ul > li')    # Multiple levels
+# Combined selectors
+complex = scout.select('div.container > p.text[lang="en"]')
+links = scout.select('ol#results > li.item a[href]')
+# Get first match only
+first = scout.select_one('p.intro')
+```
+**Supported Selector Types:**
+- **Tag**: `p`, `div`, `a`
+- **Class**: `.class`, `div.class`, `.class1.class2`
+- **ID**: `#id`, `div#id`
+- **Attribute**: `[attr]`, `[attr="value"]`
+- **Descendant**: `div p`, `article section p`
+- **Child**: `div > p`, `ul > li`
+- **Combined**: `p.class#id[attr="value"]`
+#### Element Navigation
+```python
 # Advanced find with attribute matching
 results = scout.find_all('a', attrs={'class': 'external', 'rel': 'nofollow'})
@@ -340,6 +387,10 @@ cached_data = scout.cache('parsed_data')
 - `__init__(markup, features='html.parser', from_encoding=None)`: Initialize with HTML content
 - `find(name, attrs={}, recursive=True, text=None)`: Find first matching element
 - `find_all(name, attrs={}, recursive=True, text=None, limit=None)`: Find all matching elements
+- `find_next(name, attrs={}, text=None)`: Find next element in document order
+- `find_all_next(name, attrs={}, text=None, limit=None)`: Find all next elements in document order
+- `find_previous(name, attrs={}, text=None)`: Find previous element in document order
+- `find_all_previous(name, attrs={}, text=None, limit=None)`: Find all previous elements in document order
 - `select(selector)`: Find elements using CSS selector
 - `get_text(separator=' ', strip=False)`: Extract text from document
 - `analyze_text()`: Perform text analysis
@@ -358,7 +409,7 @@ cached_data = scout.cache('parsed_data')
 - `_crawl_page(url, depth=0)`: Crawl a single page (internal method)
 - `_is_valid_url(url)`: Check if a URL is valid (internal method)
-For detailed API documentation, please refer to the [documentation](https://github.com/OEvortex/Webscout/wiki).
+For detailed API documentation, please refer to the [documentation](https://github.com/pyscout/Webscout/wiki).
 ## 🔧 Dependencies
@@ -393,9 +444,9 @@ This project is licensed under the MIT License - see the LICENSE file for detail
 <div align="center">
   <p>Made with ❤️ by the Webscout team</p>
   <p>
-    <a href="https://github.com/OEvortex/Webscout">GitHub</a> •
-    <a href="https://github.com/OEvortex/Webscout/wiki">Documentation</a> •
-    <a href="https://github.com/OEvortex/Webscout/issues">Report Bug</a> •
-    <a href="https://github.com/OEvortex/Webscout/issues">Request Feature</a>
+    <a href="https://github.com/pyscout/Webscout">GitHub</a> •
+    <a href="https://github.com/pyscout/Webscout/wiki">Documentation</a> •
+    <a href="https://github.com/pyscout/Webscout/issues">Report Bug</a> •
+    <a href="https://github.com/pyscout/Webscout/issues">Request Feature</a>
   </p>
 </div>

webscout/scout/core/scout.py CHANGED Viewed

@@ -454,6 +454,68 @@ class Scout:
             pass
         return siblings
+    def find_next(self, name=None, attrs={}, text=None, **kwargs) -> Optional[Tag]:
+        """
+        Find the next element in document order.
+        Args:
+            name: Tag name to search for
+            attrs: Attributes to match
+            text: Text content to match
+            **kwargs: Additional attributes
+        Returns:
+            Optional[Tag]: Next matching element or None
+        """
+        return self._soup.find_next(name, attrs, text, **kwargs)
+    def find_all_next(self, name=None, attrs={}, text=None, limit=None, **kwargs) -> List[Tag]:
+        """
+        Find all next elements in document order.
+        Args:
+            name: Tag name to search for
+            attrs: Attributes to match
+            text: Text content to match
+            limit: Maximum number of results
+            **kwargs: Additional attributes
+        Returns:
+            List[Tag]: List of matching elements
+        """
+        return self._soup.find_all_next(name, attrs, text, limit, **kwargs)
+    def find_previous(self, name=None, attrs={}, text=None, **kwargs) -> Optional[Tag]:
+        """
+        Find the previous element in document order.
+        Args:
+            name: Tag name to search for
+            attrs: Attributes to match
+            text: Text content to match
+            **kwargs: Additional attributes
+        Returns:
+            Optional[Tag]: Previous matching element or None
+        """
+        return self._soup.find_previous(name, attrs, text, **kwargs)
+    def find_all_previous(self, name=None, attrs={}, text=None, limit=None, **kwargs) -> List[Tag]:
+        """
+        Find all previous elements in document order.
+        Args:
+            name: Tag name to search for
+            attrs: Attributes to match
+            text: Text content to match
+            limit: Maximum number of results
+            **kwargs: Additional attributes
+        Returns:
+            List[Tag]: List of matching elements
+        """
+        return self._soup.find_all_previous(name, attrs, text, limit, **kwargs)
     def select(self, selector: str) -> List[Tag]:
         """
         Select elements using CSS selector.

webscout/scout/element.py CHANGED Viewed

@@ -267,7 +267,14 @@ class Tag:
     def select(self, selector: str) -> List['Tag']:
         """
         Select elements using CSS selector.
-        Enhanced to support more complex selectors.
+        Enhanced to support more complex selectors including:
+        - Tag selectors: 'p', 'div'
+        - Class selectors: '.class', 'p.class'
+        - ID selectors: '#id', 'div#id'
+        - Attribute selectors: '[attr]', '[attr=value]'
+        - Descendant selectors: 'div p'
+        - Child selectors: 'div > p'
+        - Multiple classes: '.class1.class2'
         Args:
             selector (str): CSS selector string
@@ -275,54 +282,248 @@ class Tag:
         Returns:
             List[Tag]: List of matching elements
         """
-        # More advanced CSS selector parsing
-        # This is a simplified implementation and might need more robust parsing
-        parts = re.split(r'\s+', selector.strip())
         results = []
-        def _match_selector(tag, selector_part):
-            # Support more complex selectors
-            if selector_part.startswith('.'):
-                # Class selector
-                return selector_part[1:] in tag.get('class', [])
-            elif selector_part.startswith('#'):
-                # ID selector
-                return tag.get('id') == selector_part[1:]
-            elif '[' in selector_part and ']' in selector_part:
-                # Attribute selector
-                attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
-                if attr_match:
-                    tag_name, attr, value = attr_match.groups()
-                    if tag_name and tag.name != tag_name:
+        def _parse_simple_selector(simple_sel: str) -> dict:
+            """Parse a simple selector like 'p.class#id[attr=value]' into components."""
+            components = {
+                'tag': None,
+                'id': None,
+                'classes': [],
+                'attrs': {}
+            }
+            # Extract tag name (at the start)
+            tag_match = re.match(r'^([a-zA-Z][\w-]*)', simple_sel)
+            if tag_match:
+                components['tag'] = tag_match.group(1)
+                simple_sel = simple_sel[len(tag_match.group(1)):]
+            # Extract ID
+            id_matches = re.findall(r'#([\w-]+)', simple_sel)
+            if id_matches:
+                components['id'] = id_matches[0]
+            # Extract classes
+            class_matches = re.findall(r'\.([\w-]+)', simple_sel)
+            components['classes'] = class_matches
+            # Extract attributes
+            attr_matches = re.findall(r'\[([^\]]+)\]', simple_sel)
+            for attr_expr in attr_matches:
+                if '=' in attr_expr:
+                    attr_name, attr_value = attr_expr.split('=', 1)
+                    components['attrs'][attr_name.strip()] = attr_value.strip('\'"')
+                else:
+                    components['attrs'][attr_expr.strip()] = None
+            return components
+        def _match_simple_selector(tag: 'Tag', components: dict) -> bool:
+            """Check if a tag matches the parsed selector components."""
+            # Check tag name
+            if components['tag'] and tag.name != components['tag']:
+                return False
+            # Check ID
+            if components['id'] and tag.get('id') != components['id']:
+                return False
+            # Check classes
+            tag_classes = tag.get('class', '')
+            if isinstance(tag_classes, str):
+                tag_classes = tag_classes.split()
+            elif not isinstance(tag_classes, list):
+                tag_classes = [str(tag_classes)] if tag_classes else []
+            for cls in components['classes']:
+                if cls not in tag_classes:
+                    return False
+            # Check attributes
+            for attr_name, attr_value in components['attrs'].items():
+                if attr_value is None:
+                    # Just check attribute exists
+                    if attr_name not in tag.attrs:
                         return False
-                    if value:
-                        return tag.get(attr) == value.strip("'\"")
-                    return attr in tag.attrs
-            else:
-                # Tag selector
-                return tag.name == selector_part
-        def _recursive_select(element, selector_parts):
-            if not selector_parts:
-                results.append(element)
-                return
-            current_selector = selector_parts[0]
-            remaining_selectors = selector_parts[1:]
-            if _match_selector(element, current_selector):
-                if not remaining_selectors:
-                    results.append(element)
                 else:
-                    for child in element.contents:
-                        if isinstance(child, Tag):
-                            _recursive_select(child, remaining_selectors)
-        for child in self.contents:
-            if isinstance(child, Tag):
-                _recursive_select(child, parts)
+                    # Check attribute value
+                    if tag.get(attr_name) != attr_value:
+                        return False
+            return True
+        def _find_all_matching(element: 'Tag', components: dict) -> List['Tag']:
+            """Recursively find all elements matching the selector components."""
+            matches = []
+            # Check current element
+            if _match_simple_selector(element, components):
+                matches.append(element)
+            # Check children recursively
+            for child in element.contents:
+                if isinstance(child, Tag):
+                    matches.extend(_find_all_matching(child, components))
+            return matches
+        # Handle combinators (descendant ' ' and child '>')
+        if ' > ' in selector:
+            # Child combinator
+            parts = [p.strip() for p in selector.split(' > ')]
+            return self._select_with_child_combinator(parts)
+        elif ' ' in selector.strip():
+            # Descendant combinator
+            parts = [p.strip() for p in selector.split()]
+            return self._select_with_descendant_combinator(parts)
+        else:
+            # Simple selector
+            components = _parse_simple_selector(selector)
+            return _find_all_matching(self, components)
+    def _select_with_descendant_combinator(self, parts: List[str]) -> List['Tag']:
+        """Handle descendant combinator (space)."""
+        if not parts:
+            return []
+        if len(parts) == 1:
+            components = self._parse_selector_components(parts[0])
+            return self._find_all_matching_in_tree(self, components)
+        # Find elements matching the first part
+        first_components = self._parse_selector_components(parts[0])
+        first_matches = self._find_all_matching_in_tree(self, first_components)
+        # For each match, find descendants matching remaining parts
+        results = []
+        remaining_selector = ' '.join(parts[1:])
+        for match in first_matches:
+            descendants = match.select(remaining_selector)
+            results.extend(descendants)
         return results
+    def _select_with_child_combinator(self, parts: List[str]) -> List['Tag']:
+        """Handle child combinator (>)."""
+        if not parts:
+            return []
+        if len(parts) == 1:
+            components = self._parse_selector_components(parts[0])
+            return self._find_all_matching_in_tree(self, components)
+        # Find elements matching the first part
+        first_components = self._parse_selector_components(parts[0])
+        first_matches = self._find_all_matching_in_tree(self, first_components)
+        # For each match, find direct children matching the next part
+        if len(parts) == 2:
+            # Last part, just check direct children
+            next_components = self._parse_selector_components(parts[1])
+            results = []
+            for match in first_matches:
+                for child in match.contents:
+                    if isinstance(child, Tag) and self._match_selector_components(child, next_components):
+                        results.append(child)
+            return results
+        else:
+            # More parts, need to continue recursively
+            results = []
+            next_components = self._parse_selector_components(parts[1])
+            remaining_parts = parts[2:]
+            for match in first_matches:
+                for child in match.contents:
+                    if isinstance(child, Tag) and self._match_selector_components(child, next_components):
+                        # Continue with remaining parts
+                        remaining_selector = ' > '.join(remaining_parts)
+                        descendants = child.select(remaining_selector)
+                        results.extend(descendants)
+            return results
+    def _parse_selector_components(self, simple_sel: str) -> dict:
+        """Parse a simple selector like 'p.class#id[attr=value]' into components."""
+        components = {
+            'tag': None,
+            'id': None,
+            'classes': [],
+            'attrs': {}
+        }
+        # Extract tag name (at the start)
+        tag_match = re.match(r'^([a-zA-Z][\w-]*)', simple_sel)
+        if tag_match:
+            components['tag'] = tag_match.group(1)
+            simple_sel = simple_sel[len(tag_match.group(1)):]
+        # Extract ID
+        id_matches = re.findall(r'#([\w-]+)', simple_sel)
+        if id_matches:
+            components['id'] = id_matches[0]
+        # Extract classes
+        class_matches = re.findall(r'\.([\w-]+)', simple_sel)
+        components['classes'] = class_matches
+        # Extract attributes
+        attr_matches = re.findall(r'\[([^\]]+)\]', simple_sel)
+        for attr_expr in attr_matches:
+            if '=' in attr_expr:
+                attr_name, attr_value = attr_expr.split('=', 1)
+                components['attrs'][attr_name.strip()] = attr_value.strip('\'"')
+            else:
+                components['attrs'][attr_expr.strip()] = None
+        return components
+    def _match_selector_components(self, tag: 'Tag', components: dict) -> bool:
+        """Check if a tag matches the parsed selector components."""
+        # Check tag name
+        if components['tag'] and tag.name != components['tag']:
+            return False
+        # Check ID
+        if components['id'] and tag.get('id') != components['id']:
+            return False
+        # Check classes
+        tag_classes = tag.get('class', '')
+        if isinstance(tag_classes, str):
+            tag_classes = tag_classes.split()
+        elif not isinstance(tag_classes, list):
+            tag_classes = [str(tag_classes)] if tag_classes else []
+        for cls in components['classes']:
+            if cls not in tag_classes:
+                return False
+        # Check attributes
+        for attr_name, attr_value in components['attrs'].items():
+            if attr_value is None:
+                # Just check attribute exists
+                if attr_name not in tag.attrs:
+                    return False
+            else:
+                # Check attribute value
+                if tag.get(attr_name) != attr_value:
+                    return False
+        return True
+    def _find_all_matching_in_tree(self, element: 'Tag', components: dict) -> List['Tag']:
+        """Recursively find all elements matching the selector components."""
+        matches = []
+        # Check current element
+        if self._match_selector_components(element, components):
+            matches.append(element)
+        # Check children recursively
+        for child in element.contents:
+            if isinstance(child, Tag):
+                matches.extend(self._find_all_matching_in_tree(child, components))
+        return matches
     def select_one(self, selector: str) -> Optional['Tag']:
         """
@@ -462,6 +663,11 @@ class Tag:
             new_child.parent = self
         self.contents.append(new_child)
+    def extend(self, new_children: List[Union['Tag', NavigableString, str]]) -> None:
+        """Extend the contents of this tag with a list of new children."""
+        for child in new_children:
+            self.append(child)
     def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
         """Insert a new child at the given index with error handling."""
         if isinstance(new_child, str):

webscout/search/__init__.py CHANGED Viewed

@@ -4,14 +4,14 @@ from .base import BaseSearch, BaseSearchEngine
 from .duckduckgo_main import DuckDuckGoSearch
 from .yep_main import YepSearch
 from .bing_main import BingSearch
+from .yahoo_main import YahooSearch
 # Import new search engines
 from .engines.brave import Brave
 from .engines.mojeek import Mojeek
-from .engines.yahoo import Yahoo
 from .engines.yandex import Yandex
 from .engines.wikipedia import Wikipedia
-from .engines.yahoo_news import YahooNews
 # Import result models
 from .results import (
@@ -31,14 +31,13 @@ __all__ = [
     "DuckDuckGoSearch",
     "YepSearch",
     "BingSearch",
+    "YahooSearch",
     # Individual engines
     "Brave",
     "Mojeek",
-    "Yahoo",
     "Yandex",
     "Wikipedia",
-    "YahooNews",
     # Result models
     "TextResult",

webscout 2025.10.15__py3-none-any.whl → 2025.10.16__py3-none-any.whl

Potentially problematic release.

webscout 2025.10.15py3-none-any.whl → 2025.10.16py3-none-any.whl