PyPI - webscout - Versions diffs - 8.3.5__py3-none-any.whl → 8.3.6__py3-none-any.whl - Mend

webscout 8.3.5py3-none-any.whl → 8.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of webscout might be problematic. Click here for more details.

Files changed (63) hide show

webscout/Bard.py +12 -6
webscout/DWEBS.py +66 -57
webscout/Provider/{UNFINISHED → AISEARCH}/PERPLEXED_search.py +34 -74
webscout/Provider/AISEARCH/__init__.py +1 -1
webscout/Provider/Deepinfra.py +6 -0
webscout/Provider/Flowith.py +6 -1
webscout/Provider/GithubChat.py +1 -0
webscout/Provider/GptOss.py +207 -0
webscout/Provider/Kimi.py +445 -0
webscout/Provider/Netwrck.py +3 -6
webscout/Provider/OPENAI/README.md +2 -1
webscout/Provider/OPENAI/TogetherAI.py +50 -55
webscout/Provider/OPENAI/__init__.py +4 -2
webscout/Provider/OPENAI/copilot.py +20 -4
webscout/Provider/OPENAI/deepinfra.py +6 -0
webscout/Provider/OPENAI/e2b.py +60 -8
webscout/Provider/OPENAI/flowith.py +4 -3
webscout/Provider/OPENAI/generate_api_key.py +48 -0
webscout/Provider/OPENAI/gptoss.py +288 -0
webscout/Provider/OPENAI/kimi.py +469 -0
webscout/Provider/OPENAI/netwrck.py +8 -12
webscout/Provider/OPENAI/refact.py +274 -0
webscout/Provider/OPENAI/textpollinations.py +3 -6
webscout/Provider/OPENAI/toolbaz.py +1 -0
webscout/Provider/TTI/bing.py +14 -2
webscout/Provider/TTI/together.py +10 -9
webscout/Provider/TTS/README.md +0 -1
webscout/Provider/TTS/__init__.py +0 -1
webscout/Provider/TTS/base.py +479 -159
webscout/Provider/TTS/deepgram.py +409 -156
webscout/Provider/TTS/elevenlabs.py +425 -111
webscout/Provider/TTS/freetts.py +317 -140
webscout/Provider/TTS/gesserit.py +192 -128
webscout/Provider/TTS/murfai.py +248 -113
webscout/Provider/TTS/openai_fm.py +347 -129
webscout/Provider/TTS/speechma.py +620 -586
webscout/Provider/TextPollinationsAI.py +3 -6
webscout/Provider/TogetherAI.py +50 -55
webscout/Provider/UNFINISHED/VercelAIGateway.py +339 -0
webscout/Provider/__init__.py +2 -90
webscout/Provider/cerebras.py +83 -33
webscout/Provider/copilot.py +42 -23
webscout/Provider/toolbaz.py +1 -0
webscout/conversation.py +22 -20
webscout/sanitize.py +14 -10
webscout/scout/README.md +20 -23
webscout/scout/core/crawler.py +125 -38
webscout/scout/core/scout.py +26 -5
webscout/version.py +1 -1
webscout/webscout_search.py +13 -6
webscout/webscout_search_async.py +10 -8
webscout/yep_search.py +13 -5
{webscout-8.3.5.dist-info → webscout-8.3.6.dist-info}/METADATA +2 -1
{webscout-8.3.5.dist-info → webscout-8.3.6.dist-info}/RECORD +59 -56
webscout/Provider/Glider.py +0 -225
webscout/Provider/OPENAI/c4ai.py +0 -394
webscout/Provider/OPENAI/glider.py +0 -330
webscout/Provider/TTS/sthir.py +0 -94
/webscout/Provider/{samurai.py → UNFINISHED/samurai.py} +0 -0
{webscout-8.3.5.dist-info → webscout-8.3.6.dist-info}/WHEEL +0 -0
{webscout-8.3.5.dist-info → webscout-8.3.6.dist-info}/entry_points.txt +0 -0
{webscout-8.3.5.dist-info → webscout-8.3.6.dist-info}/licenses/LICENSE.md +0 -0
{webscout-8.3.5.dist-info → webscout-8.3.6.dist-info}/top_level.txt +0 -0

webscout/Provider/cerebras.py CHANGED Viewed

@@ -1,34 +1,48 @@
 import re
+# Import trio before curl_cffi to prevent eventlet socket monkey-patching conflicts
+# See: https://github.com/python-trio/trio/issues/3015
+try:
+    import trio  # noqa: F401
+except ImportError:
+    pass  # trio is optional, ignore if not available
+import json
+from typing import Any, Dict, Generator, List, Optional, Union
 import curl_cffi
 from curl_cffi.requests import Session
-import json
-import os
-from typing import Any, Dict, Optional, Generator, List, Union
-from webscout.AIutel import Optimizers, Conversation, AwesomePrompts, sanitize_stream # Import sanitize_stream
-from webscout.AIbase import Provider
 from webscout import exceptions
+from webscout.AIbase import Provider
+from webscout.AIutel import (  # Import sanitize_stream
+    AwesomePrompts,
+    Conversation,
+    Optimizers,
+    sanitize_stream,
+)
 from webscout.litagent import LitAgent as UserAgent
 class Cerebras(Provider):
     """
     A class to interact with the Cerebras API using a cookie for authentication.
     """
     AVAILABLE_MODELS = [
-        "llama3.1-8b",
-        "llama-3.3-70b",
-        "deepseek-r1-distill-llama-70b",
-        "llama-4-scout-17b-16e-instruct",
+        "qwen-3-coder-480b",
+        "qwen-3-235b-a22b-instruct-2507",
+        "qwen-3-235b-a22b-thinking-2507",
         "qwen-3-32b",
+        "llama-3.3-70b",
+        "llama-4-maverick-17b-128e-instruct"
     ]
     def __init__(
         self,
+        cookie_path: str = None,
         is_conversation: bool = True,
-        max_tokens: int = 2049,
+        max_tokens: int = 40000,
         timeout: int = 30,
         intro: str = None,
         filepath: str = None,
@@ -36,9 +50,11 @@ class Cerebras(Provider):
         proxies: dict = {},
         history_offset: int = 10250,
         act: str = None,
-        cookie_path: str = "cookie.json",
-        model: str = "llama3.1-8b",
+        api_key: str = None,
+        model: str = "qwen-3-coder-480b",
         system_prompt: str = "You are a helpful assistant.",
+        temperature: float = 0.7,
+        top_p: float = 0.8,
     ):
         # Validate model choice
         if model not in self.AVAILABLE_MODELS:
@@ -52,15 +68,26 @@ class Cerebras(Provider):
         self.system_prompt = system_prompt
         self.is_conversation = is_conversation
         self.max_tokens_to_sample = max_tokens
+        self.temperature = temperature
+        self.top_p = top_p
         self.last_response = {}
         self.session = Session() # Initialize curl_cffi session
-        # Get API key first
-        try:
-            self.api_key = self.get_demo_api_key(cookie_path)
-        except Exception as e:
-            raise exceptions.APIConnectionError(f"Failed to initialize Cerebras client: {e}")
+        # Handle API key - either provided directly or retrieved from cookies
+        if api_key:
+            self.api_key = api_key.strip()
+            # Basic validation for API key format
+            if not self.api_key or len(self.api_key) < 10:
+                raise ValueError("Invalid API key format. API key must be at least 10 characters long.")
+        elif cookie_path:
+            # Get API key from cookies
+            try:
+                self.api_key = self.get_demo_api_key(cookie_path)
+            except Exception as e:
+                raise exceptions.APIConnectionError(f"Failed to initialize Cerebras client: {e}")
+        else:
+            raise ValueError("Either api_key must be provided or cookie_path must be specified")
         # Initialize optimizers
         self.__available_optimizers = (
@@ -72,16 +99,16 @@ class Cerebras(Provider):
         # Initialize conversation settings
         Conversation.intro = (
             AwesomePrompts().get_act(
-                act, raise_not_found=True, default=None, case_insensitive=True
+                act, raise_not_found=True, default="You are a helpful assistant.", case_insensitive=True
             )
             if act
-            else None
+            else "You are a helpful assistant."
         )
         self.conversation = Conversation(
             is_conversation, self.max_tokens_to_sample, filepath, update_file
         )
         self.conversation.history_offset = history_offset
         # Apply proxies to the session
         self.session.proxies = proxies
@@ -105,8 +132,10 @@ class Cerebras(Provider):
             return chunk.get("choices", [{}])[0].get("delta", {}).get("content")
         return None
-    def get_demo_api_key(self, cookie_path: str) -> str: # Keep this using requests or switch to curl_cffi
+    def get_demo_api_key(self, cookie_path: str = None) -> str: # Keep this using requests or switch to curl_cffi
         """Retrieves the demo API key using the provided cookie."""
+        if not cookie_path:
+            raise ValueError("cookie_path must be provided when using cookie-based authentication")
         try:
             with open(cookie_path, "r") as file:
                 cookies = {item["name"]: item["value"] for item in json.load(file)}
@@ -159,7 +188,10 @@ class Cerebras(Provider):
         payload = {
             "model": self.model,
             "messages": messages,
-            "stream": stream
+            "stream": stream,
+            "max_tokens": self.max_tokens_to_sample,
+            "temperature": self.temperature,
+            "top_p": self.top_p
         }
         try:
@@ -197,8 +229,26 @@ class Cerebras(Provider):
         except curl_cffi.CurlError as e:
             raise exceptions.APIConnectionError(f"Request failed (CurlError): {e}") from e
-        except Exception as e: # Catch other potential errors
-            raise exceptions.APIConnectionError(f"Request failed: {e}")
+        except Exception as e:
+            # Check if it's an HTTP error with status code
+            if hasattr(e, 'response') and hasattr(e.response, 'status_code'):
+                status_code = e.response.status_code
+                if status_code == 401:
+                    raise exceptions.APIConnectionError(
+                        "Authentication failed (401): Invalid API key. Please check your API key and try again."
+                    ) from e
+                elif status_code == 403:
+                    raise exceptions.APIConnectionError(
+                        "Access forbidden (403): Your API key may not have permission to access this resource."
+                    ) from e
+                elif status_code == 429:
+                    raise exceptions.APIConnectionError(
+                        "Rate limit exceeded (429): Too many requests. Please wait and try again."
+                    ) from e
+                else:
+                    raise exceptions.APIConnectionError(f"HTTP {status_code} error: {e}") from e
+            else:
+                raise exceptions.APIConnectionError(f"Request failed: {e}") from e
     def ask(
         self,
@@ -225,7 +275,7 @@ class Cerebras(Provider):
         try:
             response = self._make_request(messages, stream)
             if stream:
                 # Wrap the generator to yield dicts or raw strings
                 def stream_wrapper():
@@ -256,7 +306,7 @@ class Cerebras(Provider):
         """Chat with the model."""
         # Ask returns a generator for stream=True, dict/str for stream=False
         response_gen_or_dict = self.ask(prompt, stream, raw=False, optimizer=optimizer, conversationally=conversationally)
         if stream:
             # Wrap the generator from ask() to get message text
             def stream_wrapper():
@@ -276,14 +326,14 @@ class Cerebras(Provider):
 if __name__ == "__main__":
     from rich import print
     # Example usage
     cerebras = Cerebras(
-        cookie_path=r'cookies.json',
-        model='llama3.1-8b',
+        api_key='csk-**********************',  # Replace with your actual API key
+        model='qwen-3-235b-a22b-instruct-2507',
         system_prompt="You are a helpful AI assistant."
     )
     # Test with streaming
     response = cerebras.chat("Hello!", stream=True)
     for chunk in response:

webscout/Provider/copilot.py CHANGED Viewed

@@ -1,17 +1,21 @@
-import os
-import json
-import base64
 import asyncio
+import base64
+import json
+import os
+from typing import Any, Dict, Generator, Union
 from urllib.parse import quote
-from typing import Optional, Dict, Any, List, Union, Generator
-from curl_cffi.requests import Session, CurlWsFlag
+# Import trio before curl_cffi to prevent eventlet socket monkey-patching conflicts
+# See: https://github.com/python-trio/trio/issues/3015
+try:
+    import trio  # noqa: F401
+except ImportError:
+    pass  # trio is optional, ignore if not available
+from curl_cffi.requests import CurlWsFlag, Session
-from webscout.AIutel import Optimizers
-from webscout.AIutel import Conversation
-from webscout.AIutel import AwesomePrompts, sanitize_stream
-from webscout.AIbase import Provider, AsyncProvider
 from webscout import exceptions
+from webscout.AIbase import Provider
+from webscout.AIutel import AwesomePrompts, Conversation, Optimizers
 from webscout.litagent import LitAgent
 try:
@@ -41,12 +45,17 @@ class Copilot(Provider):
     """
     A class to interact with the Microsoft Copilot API.
     """
     label = "Microsoft Copilot"
     url = "https://copilot.microsoft.com"
     websocket_url = "wss://copilot.microsoft.com/c/api/chat?api-version=2"
     conversation_url = f"{url}/c/api/conversations"
-    AVAILABLE_MODELS = ["Copilot", "Think Deeper"]
+    AVAILABLE_MODELS = ["Copilot", "Think Deeper", "Smart"]
+    MODEL_ALIASES = {
+        "gpt-4o": "Copilot",
+        "o4-mini": "Think Deeper",
+        "gpt-5": "Smart",
+    }
     _access_token: str = None
     _cookies: dict = None
@@ -64,9 +73,12 @@ class Copilot(Provider):
         model: str = "Copilot"
     ):
         """Initializes the Copilot API client."""
-        if model not in self.AVAILABLE_MODELS:
+        # Map alias to real model name if needed
+        real_model = self.MODEL_ALIASES.get(model, model)
+        if real_model not in self.AVAILABLE_MODELS:
             raise ValueError(f"Invalid model: {model}. Choose from: {self.AVAILABLE_MODELS}")
+        self.model = real_model
         # Use LitAgent for user-agent
         self.headers = {
             'User-Agent': LitAgent().random(),
@@ -79,7 +91,7 @@ class Copilot(Provider):
             'Sec-Fetch-Mode': 'cors',
             'Sec-Fetch-Site': 'same-origin',
         }
         self.is_conversation = is_conversation
         self.max_tokens_to_sample = max_tokens
         self.timeout = timeout
@@ -253,6 +265,12 @@ class Copilot(Provider):
                     # WebSocket connection
                     wss = session.ws_connect(websocket_url)
                     wss.send(json.dumps({"event": "setOptions", "supportedCards": ["weather", "local", "image", "sports", "video", "ads", "finance"], "ads": {"supportedTypes": ["multimedia", "product", "tourActivity", "propertyPromotion", "text"]}}))
+                    if self.model == "Smart":
+                        mode_value = "smart"
+                    elif "Think" in self.model:
+                        mode_value = "reasoning"
+                    else:
+                        mode_value = "chat"
                     wss.send(json.dumps({
                         "event": "send",
                         "conversationId": conversation_id,
@@ -260,7 +278,8 @@ class Copilot(Provider):
                             "type": "text",
                             "text": conversation_prompt,
                         }],
-                        "mode": "reasoning" if "Think" in self.model else "chat"
+                        "mode": mode_value,
+                        "model": self.model
                     }).encode(), CurlWsFlag.TEXT)
                     # Event-driven response loop
@@ -307,8 +326,8 @@ class Copilot(Provider):
         **kwargs
     ) -> Union[str, Generator]:
         def for_stream():
-            for response in self.ask(prompt, True, optimizer=optimizer,
-                                     conversationally=conversationally,
+            for response in self.ask(prompt, True, optimizer=optimizer,
+                                     conversationally=conversationally,
                                      images=images, api_key=api_key, **kwargs):
                 if isinstance(response, dict):
                     if "text" in response:
@@ -320,13 +339,13 @@ class Copilot(Provider):
                             yield "\nSuggested follow-up questions:\n"
                             for suggestion in response["suggestions"]:
                                 yield f"- {suggestion}\n"
         def for_non_stream():
-            response = self.ask(prompt, False, optimizer=optimizer,
+            response = self.ask(prompt, False, optimizer=optimizer,
                                 conversationally=conversationally,
                                 images=images, api_key=api_key, **kwargs)
             return self.get_message(response)
         return for_stream() if stream else for_non_stream()
     def get_message(self, response: dict) -> str:
@@ -379,7 +398,7 @@ def readHAR(url: str):
             for file in os.listdir(path):
                 if file.endswith(".har"):
                     har_files.append(os.path.join(path, file))
     for path in har_files:
         with open(path, 'rb') as file:
             try:
@@ -416,7 +435,7 @@ async def get_nodriver(proxy=None, user_data_dir=None):
 if __name__ == "__main__":
     from rich import print
-    ai = Copilot(timeout=900, model="Think Deeper")
+    ai = Copilot(timeout=900, model="gpt-5")
     response = ai.chat(input("> "), stream=True)
     for chunk in response:
-        print(chunk, end="", flush=True)
+        print(chunk, end="", flush=True)

webscout/Provider/toolbaz.py CHANGED Viewed

@@ -34,6 +34,7 @@ class Toolbaz(Provider):
         "Llama-4-Maverick",
         "Llama-4-Scout",
         "Llama-3.3-70B",
+        "gpt-oss-120b",
         "Qwen2.5-72B",
         "grok-2-1212",
         "grok-3-beta",

webscout/conversation.py CHANGED Viewed

@@ -165,29 +165,19 @@ class Conversation:
             ))
     def _compress_history(self) -> None:
-        """Compress history when it exceeds threshold."""
+        """Delete old history when it exceeds threshold."""
         if len(self.messages) > self.compression_threshold:
-            # Keep recent messages and summarize older ones
-            keep_recent = 100  # Adjust based on needs
-            self.messages = (
-                [self._summarize_messages(self.messages[:-keep_recent])] +
-                self.messages[-keep_recent:]
-            )
-    def _summarize_messages(self, messages: List[Message]) -> Message:
-        """Create a summary message from older messages."""
-        return Message(
-            role="system",
-            content="[History Summary] Previous conversation summarized for context",
-            metadata={"summarized_count": len(messages)}
-        )
+            # Remove oldest messages, keep only the most recent ones
+            self.messages = self.messages[-self.compression_threshold:]
+    # _summarize_messages removed
     def gen_complete_prompt(self, prompt: str, intro: Optional[str] = None) -> str:
         """Generate complete prompt with enhanced context management."""
         if not self.status:
             return prompt
-        intro = intro or self.intro
+        intro = intro or self.intro or ""
         # Add tool information if available
         tools_description = self.get_tools_description()
@@ -260,6 +250,7 @@ Your goal is to assist the user effectively. Analyze each query and choose one o
     def _trim_chat_history(self, chat_history: str, intro: str) -> str:
         """Trim chat history with improved token management."""
+        intro = intro or ""
         total_length = len(intro) + len(chat_history)
         if total_length > self.history_offset:
@@ -273,20 +264,31 @@ Your goal is to assist the user effectively. Analyze each query and choose one o
         return chat_history
     def add_message(self, role: str, content: str, metadata: Optional[Dict[str, Any]] = None) -> None:
-        """Add a message with enhanced validation and metadata support."""
+        """Add a message with enhanced validation and metadata support. Deletes oldest messages if total word count exceeds max_tokens_to_sample."""
         try:
             role = role.lower()  # Normalize role to lowercase
             if not self.validate_message(role, content):
                 raise MessageValidationError("Invalid message role or content")
+            # Calculate total word count in history
+            def total_word_count(messages):
+                return sum(len(msg.content.split()) for msg in messages)
+            # Remove oldest messages until total word count is below limit
+            temp_messages = self.messages.copy()
+            while temp_messages and (total_word_count(temp_messages) + len(content.split()) > self.max_tokens_to_sample):
+                temp_messages.pop(0)
+            self.messages = temp_messages
             message = Message(role=role, content=content, metadata=metadata or {})
             self.messages.append(message)
             if self.file and self.update_file:
                 self._append_to_file(message)
             self._compress_history()
         except Exception as e:
             raise ConversationError(f"Failed to add message: {str(e)}") from e

webscout/sanitize.py CHANGED Viewed

@@ -143,7 +143,7 @@ def _process_chunk(
     if to_json:
         try:
             # Only strip before JSON parsing if both boundaries are incorrect
-            if sanitized_chunk[0] not in '{[' and sanitized_chunk[-1] not in '}]':
+            if len(sanitized_chunk) >= 2 and sanitized_chunk[0] not in '{[' and sanitized_chunk[-1] not in '}]':
                 sanitized_chunk = sanitized_chunk.strip()
             return json.loads(sanitized_chunk)
         except (json.JSONDecodeError, Exception) as e:
@@ -646,13 +646,14 @@ async def _sanitize_stream_async(
             f"Stream must yield strings or bytes, not {type(first_item).__name__}"
         )
-    async for line in line_iterator:
-        if not line:
-            continue
-        buffer += line
-        while True:
-            if not found_start and start_marker:
-                idx = buffer.find(start_marker)
+    try:
+        async for line in line_iterator:
+            if not line:
+                continue
+            buffer += line
+            while True:
+                if not found_start and start_marker:
+                    idx = buffer.find(start_marker)
                 if idx != -1:
                     found_start = True
                     buffer = buffer[idx + len(start_marker) :]
@@ -735,6 +736,9 @@ async def _sanitize_stream_async(
                 break
             else:
                 break
+    except Exception as e:
+        import sys
+        print(f"Async stream processing error: {str(e)}", file=sys.stderr)
 def sanitize_stream(
@@ -937,7 +941,7 @@ def sanitize_stream(
             payload, intro_value, to_json, skip_markers, strip_chars,
             start_marker, end_marker, content_extractor, yield_raw_on_error,
             encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
-            skip_regexes, extract_regexes,
+            skip_regexes, extract_regexes, raw,
         )
     # Handle async iterables
@@ -966,6 +970,7 @@ def sanitize_stream(
 # --- Decorator version of sanitize_stream ---
 import functools
+import asyncio
 from typing import overload
 def _sanitize_stream_decorator(
@@ -1057,7 +1062,6 @@ sanitize_stream_decorator = _sanitize_stream_decorator
 lit_streamer = _sanitize_stream_decorator
 # Allow @sanitize_stream and @lit_streamer as decorators
-import asyncio
 sanitize_stream.__decorator__ = _sanitize_stream_decorator
 LITSTREAM.__decorator__ = _sanitize_stream_decorator
 lit_streamer.__decorator__ = _sanitize_stream_decorator

webscout/scout/README.md CHANGED Viewed

@@ -1,27 +1,24 @@
-# 🕵️ Scout: Next-Gen Web Parsing Library
+**🚀 The Most Advanced HTML Parser & Web Crawler for AI/LLM Data Collection**
-<div align="center">
-[![Python](https://img.shields.io/badge/Python-3.8%2B-blue)](https://www.python.org/)
-[![License](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
-[![Maintenance](https://img.shields.io/badge/Maintained-Yes-brightgreen.svg)](https://github.com/OE-LUCIFER/Webscout)
-[![Documentation](https://img.shields.io/badge/Docs-Wiki-orange)](https://github.com/OE-LUCIFER/Webscout/wiki)
-[![PRs Welcome](https://img.shields.io/badge/PRs-Welcome-brightgreen.svg)](https://github.com/OE-LUCIFER/Webscout/pulls)
+**🌟 Built for the Future • Powered by Intelligence • Trusted by Developers**
-</div>
 ## 📋 Overview
-Scout is a powerful, flexible, and performant HTML parsing library that makes web scraping a breeze! It provides intelligent HTML/XML parsing with advanced features like web crawling, text analysis, semantic extraction, and Markdown conversion. Scout goes beyond traditional parsing libraries with its intuitive API and comprehensive feature set.
+Scout is an ultra-powerful, enterprise-grade HTML parsing and web crawling library designed for the AI era. Built with LLM data collection in mind, Scout provides unparalleled capabilities for extracting, analyzing, and processing web content at scale. With its BeautifulSoup-compatible API enhanced with modern features, Scout is the go-to solution for serious web scraping projects.
 <details open>
-<summary><b>Why Choose Scout?</b></summary>
-- **Powerful Parsing**: Multiple parser backends with intelligent markup handling
-- **Advanced Analysis**: Built-in text and web content analysis tools
-- **Concurrent Crawling**: Efficient multi-threaded web crawling
-- **Flexible API**: Intuitive interface similar to BeautifulSoup but with enhanced capabilities
-- **Format Conversion**: Convert HTML to JSON, Markdown, and more
+<summary><b>🌟 Why Scout is the Ultimate Choice</b></summary>
+- **🧠 LLM-Optimized Crawling**: Purpose-built for collecting high-quality training data for Large Language Models
+- **🌐 Subdomain Intelligence**: Automatically discovers and crawls subdomains (e.g., blog.example.com, docs.example.com)
+- **⚡ Lightning-Fast Performance**: Multi-threaded concurrent crawling with intelligent rate limiting
+- **🎯 Surgical Precision**: Advanced content extraction that preserves structure while removing noise
+- **🔍 Deep Analysis**: Built-in NLP capabilities for entity extraction, text analysis, and semantic understanding
+- **🛡️ Enterprise-Ready**: Robust error handling, retry mechanisms, and respect for robots.txt
+- **📊 Rich Data Extraction**: Captures metadata, structured data, semantic content, and more
+- **🔄 Format Flexibility**: Export to JSON, Markdown, CSV, or custom formats
+- **🎨 BeautifulSoup++ API**: Familiar interface with 10x more features
 </details>
@@ -46,7 +43,7 @@ pip install webscout
 Or install the latest version from GitHub:
 ```bash
-pip install git+https://github.com/OE-LUCIFER/Webscout.git
+pip install git+https://github.com/OEvortex/Webscout.git
 ```
 ## 🚀 Quick Start
@@ -361,7 +358,7 @@ cached_data = scout.cache('parsed_data')
 - `_crawl_page(url, depth=0)`: Crawl a single page (internal method)
 - `_is_valid_url(url)`: Check if a URL is valid (internal method)
-For detailed API documentation, please refer to the [documentation](https://github.com/OE-LUCIFER/Webscout/wiki).
+For detailed API documentation, please refer to the [documentation](https://github.com/OEvortex/Webscout/wiki).
 ## 🔧 Dependencies
@@ -396,9 +393,9 @@ This project is licensed under the MIT License - see the LICENSE file for detail
 <div align="center">
   <p>Made with ❤️ by the Webscout team</p>
   <p>
-    <a href="https://github.com/OE-LUCIFER/Webscout">GitHub</a> •
-    <a href="https://github.com/OE-LUCIFER/Webscout/wiki">Documentation</a> •
-    <a href="https://github.com/OE-LUCIFER/Webscout/issues">Report Bug</a> •
-    <a href="https://github.com/OE-LUCIFER/Webscout/issues">Request Feature</a>
+    <a href="https://github.com/OEvortex/Webscout">GitHub</a> •
+    <a href="https://github.com/OEvortex/Webscout/wiki">Documentation</a> •
+    <a href="https://github.com/OEvortex/Webscout/issues">Report Bug</a> •
+    <a href="https://github.com/OEvortex/Webscout/issues">Request Feature</a>
   </p>
 </div>

webscout 8.3.5__py3-none-any.whl → 8.3.6__py3-none-any.whl

Potentially problematic release.

webscout 8.3.5py3-none-any.whl → 8.3.6py3-none-any.whl