PyPI - webscout - Versions diffs - 5.1__py3-none-any.whl → 5.2__py3-none-any.whl - Mend

webscout 5.1py3-none-any.whl → 5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of webscout might be problematic. Click here for more details.

Files changed (30) hide show

webscout/AIauto.py +83 -277
webscout/AIbase.py +106 -4
webscout/AIutel.py +31 -0
webscout/Agents/Onlinesearcher.py +91 -104
webscout/Agents/__init__.py +2 -1
webscout/Agents/ai.py +186 -0
webscout/Agents/functioncall.py +57 -27
webscout/Bing_search.py +73 -43
webscout/Local/_version.py +1 -1
webscout/Provider/AI21.py +177 -0
webscout/Provider/Cloudflare.py +0 -4
webscout/Provider/EDITEE.py +215 -0
webscout/Provider/NetFly.py +256 -0
webscout/Provider/TTI/PollinationsAI.py +138 -0
webscout/Provider/TTI/__init__.py +2 -0
webscout/Provider/TTI/deepinfra.py +148 -0
webscout/Provider/TTS/__init__.py +2 -0
webscout/Provider/TTS/streamElements.py +296 -0
webscout/Provider/TTS/voicepod.py +114 -0
webscout/Provider/TeachAnything.py +177 -0
webscout/Provider/__init__.py +8 -0
webscout/__init__.py +2 -0
webscout/version.py +1 -1
{webscout-5.1.dist-info → webscout-5.2.dist-info}/METADATA +32 -12
{webscout-5.1.dist-info → webscout-5.2.dist-info}/RECORD +29 -19
webscout/async_providers.py +0 -21
{webscout-5.1.dist-info → webscout-5.2.dist-info}/LICENSE.md +0 -0
{webscout-5.1.dist-info → webscout-5.2.dist-info}/WHEEL +0 -0
{webscout-5.1.dist-info → webscout-5.2.dist-info}/entry_points.txt +0 -0
{webscout-5.1.dist-info → webscout-5.2.dist-info}/top_level.txt +0 -0

webscout/Agents/Onlinesearcher.py CHANGED Viewed

@@ -1,100 +1,81 @@
 import json
+import colorlog
 from webscout import WEBS
+from webscout.Agents.ai import LLAMA3
 import httpx
 from bs4 import BeautifulSoup
 from typing import List, Dict
-class DeepInfra:
-    def __init__(
-        self,
-        model: str = "meta-llama/Meta-Llama-3.1-70B-Instruct",
-        max_tokens: int = 8000,
-        timeout: int = 120,
-        system_prompt: str = "You are a helpful AI assistant.",
-        proxies: dict = {}
-    ):
-        self.model = model
-        self.max_tokens = max_tokens
-        self.timeout = timeout
-        self.system_prompt = system_prompt
-        self.chat_endpoint = "https://api.deepinfra.com/v1/openai/chat/completions"
-        self.headers = {
-            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
-            'Accept-Language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3',
-            'Cache-Control': 'no-cache',
-            'Connection': 'keep-alive',
-            'Content-Type': 'application/json',
-            'Origin': 'https://deepinfra.com',
-            'Pragma': 'no-cache',
-            'Referer': 'https://deepinfra.com/',
-            'Sec-Fetch-Dest': 'empty',
-            'Sec-Fetch-Mode': 'cors',
-            'Sec-Fetch-Site': 'same-site',
-            'X-Deepinfra-Source': 'web-embed',
-            'accept': 'text/event-stream',
-            'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
-            'sec-ch-ua-mobile': '?0',
-            'sec-ch-ua-platform': '"macOS"'
-        }
-        self.client = httpx.Client(proxies=proxies, headers=self.headers)
-    def ask(self, prompt: str, system_prompt: str = None) -> str:
-        payload = {
-            'model': self.model,
-            'messages': [
-                {"role": "system", "content": system_prompt or self.system_prompt},
-                {"role": "user", "content": prompt},
-            ],
-            'temperature': 0.7,
-            'max_tokens': self.max_tokens,
-            'stop': []
-        }
-        response = self.client.post(self.chat_endpoint, json=payload, timeout=self.timeout)
-        if response.status_code != 200:
-            raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason_phrase}) - {response.text}")
-        resp = response.json()
-        return resp["choices"][0]["message"]["content"]
+import logging
 class WebSearchAgent:
-    def __init__(self, model="Qwen/Qwen2-72B-Instruct"):
+    def __init__(self):
         self.webs = WEBS()
-        self.deepinfra = DeepInfra(model=model)
+        self.ai = LLAMA3(system="You are an advanced AI assistant specialized in generating optimal search queries and providing comprehensive answers based on web search results.")
-    def generate_search_query(self, information):
+    def generate_search_queries(self, information, num_queries=3):
         prompt = f"""
+        Task: Generate {num_queries} optimal search queries based on the given information.
         Instructions:
-        You are a smart online searcher for a large language model.
-        Given information, you must create a search query to search the internet for relevant information.
-        Your search query must be in the form of a json response.
-        Exact json response format must be as follows:
+        1. Analyze the provided information carefully.
+        2. Identify key concepts, entities, and relationships.
+        3. Formulate {num_queries} concise and specific search queries that will yield relevant results.
+        4. Each query should focus on a different aspect or angle of the information.
+        5. The queries should be in natural language, not in the form of keywords.
+        6. Avoid unnecessary words or phrases that might limit the search results.
+        Your response must be in the following JSON format:
         {{
-            "search_query": "your search query"
+            "search_queries": [
+                "Your first search query here",
+                "Your second search query here",
+                "Your third search query here"
+            ]
         }}
-        - You must only provide ONE search query
-        - You must provide the BEST search query for the given information
-        - The search query must be normal text.
-        Information: {information}
+        Ensure that:
+        - You provide exactly {num_queries} search queries.
+        - Each query is unique and focuses on a different aspect of the information.
+        - The queries are in plain text, suitable for a web search engine.
+        Information to base the search queries on:
+        {information}
+        Now, generate the optimal search queries:
         """
-        response = self.deepinfra.ask(prompt)
-        return json.loads(response)["search_query"]
+        response = ""
+        for chunk in self.ai.chat(prompt):
+            response += chunk
+        try:
+            json_response = json.loads(response)
+            return json_response["search_queries"]
+        except json.JSONDecodeError:
+            print(f"Warning: Failed to parse JSON. Raw response: {response}")
+            # Fallback: try to extract queries manually
+            queries = []
+            for line in response.split('\n'):
+                if line.strip().startswith('"') and line.strip().endswith('"'):
+                    queries.append(line.strip(' "'))
+            if queries:
+                return queries[:num_queries]
+            else:
+                print(f"Warning: Using original information as search query.")
+                return [information]
     def search(self, information, region='wt-wt', safesearch='off', timelimit='y', max_results=5):
-        search_query = self.generate_search_query(information)
+        search_queries = self.generate_search_queries(information)
+        all_results = []
-        results = []
-        with self.webs as webs:
-            for result in webs.text(search_query, region=region, safesearch=safesearch, timelimit=timelimit, max_results=max_results):
-                results.append(result)
+        for query in search_queries:
+            results = []
+            with self.webs as webs:
+                for result in webs.text(query, region=region, safesearch=safesearch, timelimit=timelimit, max_results=max_results):
+                    results.append(result)
+            all_results.extend(results)
-        return results
+        return all_results
     def extract_urls(self, results):
         urls = []
@@ -102,7 +83,7 @@ class WebSearchAgent:
             url = result.get('href')
             if url:
                 urls.append(url)
-        return list(set(urls))  # Remove duplicates
+        return list(set(urls))
     def fetch_webpage(self, url: str) -> str:
         try:
@@ -110,16 +91,11 @@ class WebSearchAgent:
             if response.status_code == 200:
                 html = response.text
                 soup = BeautifulSoup(html, 'html.parser')
-                # Extract text from <p> tags
                 paragraphs = soup.find_all('p')
                 text = ' '.join([p.get_text() for p in paragraphs])
-                # Limit the text to around 4000 words
                 words = text.split()
-                if len(words) > 4000:
-                    text = ' '.join(words[:4000]) + '...'
+                if len(words) > 150:
+                    text = ' '.join(words[:150]) + '...'
                 return text
             else:
                 return f"Failed to fetch {url}: HTTP {response.status}"
@@ -134,34 +110,44 @@ class WebSearchAgent:
         return contents
 class OnlineSearcher:
-    def __init__(self, model="meta-llama/Meta-Llama-3.1-405B-Instruct"):
-        self.agent = WebSearchAgent(model)
-        self.deepinfra = DeepInfra(model="model")
+    def __init__(self):
+        self.agent = WebSearchAgent()
+        self.ai = LLAMA3(system="You are an advanced AI assistant specialized in providing comprehensive and accurate answers based on web search results and your general knowledge.")
-    def answer_question(self, question: str) -> str:
-        # Perform web search
+    def answer_question(self, question: str):
         search_results = self.agent.search(question)
-        # Extract URLs
         urls = self.agent.extract_urls(search_results)
-        # Fetch webpage contents
         webpage_contents = self.agent.fetch_all_webpages(urls)
-        # Prepare context for AI
-        context = "Based on the following search results and webpage contents:\n\n"
+        context = "Web search results and extracted content:\n\n"
         for i, result in enumerate(search_results, 1):
             context += f"{i}. Title: {result['title']}\n   URL: {result['href']}\n   Snippet: {result['body']}\n\n"
         context += "Extracted webpage contents:\n"
         for i, webpage in enumerate(webpage_contents):
-            context += f"{i}. URL: {webpage['url']}\n   Content: {webpage['content'][:4000]}...\n\n"
+            context += f"{i}. URL: {webpage['url']}\n   Content: {webpage['content'][:150]}...\n\n"
+        prompt = f"""
+        Task: Provide a comprehensive and accurate answer to the given question based on the provided web search results and your general knowledge.
-        # Generate answer using AI
-        prompt = f"{context}\n\nQuestion: {question}\n\nPlease provide a comprehensive answer to the question based on the search results and webpage contents above. Include relevant webpage URLs in your answer when appropriate. If the search results and webpage contents don't contain relevant information, please state that and provide the best answer you can based on your general knowledge. [YOUR RESPONSE WITH SOURCE LINKS ([➊](URL))"
+        Question: {question}
+        Context:
+        {context}
+        Instructions:
+        1. Carefully analyze the provided web search results and extracted content.
+        2. Synthesize the information to form a coherent and comprehensive answer.
+        3. If the search results contain relevant information, incorporate it into your answer.
+        4. Don't provide irrelevant information, and don't say "according to web page".
+        5. If the search results don't contain sufficient information, clearly state this and provide the best answer based on your general knowledge.
+        6. Ensure your answer is well-structured, factual, and directly addresses the question.
+        7. If appropriate, provide additional context or related information that might be helpful.
+        Your response should be informative, accurate, and properly sourced when possible. Begin your answer now:
+        """
-        answer = self.deepinfra.ask(prompt)
-        return answer
+        return self.ai.chat(prompt)
 # Usage example
 if __name__ == "__main__":
@@ -170,6 +156,7 @@ if __name__ == "__main__":
         question = input(">>> ")
         if question.lower() == 'quit':
             break
-        answer = assistant.answer_question(question)
-        print(answer)
-        print("\n" + "-"*50 + "\n")
+        print("\n" + "="*50)
+        for chunk in assistant.answer_question(question):
+            print(chunk, end="", flush=True)
+        print("\n" + "="*50)

webscout/Agents/__init__.py CHANGED Viewed

@@ -1,2 +1,3 @@
 from .Onlinesearcher import *
-from .functioncall import *
+from .functioncall import *
+from .ai import *

webscout/Agents/ai.py ADDED Viewed

@@ -0,0 +1,186 @@
+import requests
+import json
+from webscout.AIutel import Optimizers
+from webscout.AIutel import Conversation
+from webscout.AIutel import AwesomePrompts
+from webscout.AIbase import Provider
+class LLAMA3(Provider):
+    AVAILABLE_MODELS = ["llama3-70b", "llama3-8b", "llama3-405b"]
+    def __init__(
+        self,
+        is_conversation: bool = True,
+        max_tokens: int = 600,
+        timeout: int = 30,
+        intro: str = None,
+        filepath: str = None,
+        update_file: bool = True,
+        proxies: dict = {},
+        history_offset: int = 10250,
+        act: str = None,
+        model: str = "llama3-8b",
+        system: str = "GPT syle",
+    ):
+        """Instantiates Snova
+        Args:
+            is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True.
+            max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600.
+            timeout (int, optional): Http request timeout. Defaults to 30.
+            intro (str, optional): Conversation introductory prompt. Defaults to None.
+            filepath (str, optional): Path to file containing conversation history. Defaults to None.
+            update_file (bool, optional): Add new prompts and responses to the file. Defaults to True.
+            proxies (dict, optional): Http request proxies. Defaults to {}.
+            history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250.
+            act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None.
+            model (str, optional): Snova model name. Defaults to "llama3-70b".
+            system (str, optional): System prompt for Snova. Defaults to "Answer as concisely as possible.".
+        """
+        if model not in self.AVAILABLE_MODELS:
+            raise ValueError(f"Invalid model: {model}. Choose from: {self.AVAILABLE_MODELS}")
+        self.session = requests.Session()
+        self.is_conversation = is_conversation
+        self.max_tokens_to_sample = max_tokens
+        self.timeout = timeout
+        self.model = model
+        self.system = system
+        self.last_response = {}
+        self.env_type = "tp16405b" if "405b" in model else "tp16"
+        self.headers = {'content-type': 'application/json'}
+        self.__available_optimizers = (
+            method
+            for method in dir(Optimizers)
+            if callable(getattr(Optimizers, method)) and not method.startswith("__")
+        )
+        self.session.headers.update(self.headers)
+        Conversation.intro = (
+            AwesomePrompts().get_act(
+                act, raise_not_found=True, default=None, case_insensitive=True
+            )
+            if act
+            else intro or Conversation.intro
+        )
+        self.conversation = Conversation(
+            is_conversation, self.max_tokens_to_sample, filepath, update_file
+        )
+        self.conversation.history_offset = history_offset
+        self.session.proxies = proxies
+    def ask(
+        self,
+        prompt: str,
+        stream: bool = False,
+        raw: bool = False,
+        optimizer: str = None,
+        conversationally: bool = False,
+    ) -> dict:
+        """Chat with AI
+        Args:
+            prompt (str): Prompt to be send.
+            stream (bool, optional): Flag for streaming response. Defaults to False.
+            raw (bool, optional): Stream back raw response as received. Defaults to False.
+            optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
+            conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
+        Returns:
+           dict : {}
+        ```json
+        {
+           "text" : "How may I assist you today?"
+        }
+        ```
+        """
+        conversation_prompt = self.conversation.gen_complete_prompt(prompt)
+        if optimizer:
+            if optimizer in self.__available_optimizers:
+                conversation_prompt = getattr(Optimizers, optimizer)(
+                    conversation_prompt if conversationally else prompt
+                )
+            else:
+                raise Exception(
+                    f"Optimizer is not one of {self.__available_optimizers}"
+                )
+        data = {'body': {'messages': [{'role': 'system', 'content': self.system}, {'role': 'user', 'content': conversation_prompt}], 'stream': True, 'model': self.model}, 'env_type': self.env_type}
+        def for_stream(data=data):  # Pass data as a default argument
+            response = self.session.post('https://fast.snova.ai/api/completion', headers=self.headers, json=data, stream=True, timeout=self.timeout)
+            output = ''
+            for line in response.iter_lines(decode_unicode=True):
+                if line.startswith('data:'):
+                    try:
+                        data = json.loads(line[len('data: '):])
+                        output += data.get("choices", [{}])[0].get("delta", {}).get("content", '')
+                        self.last_response.update(dict(text=output))
+                        yield data if raw else dict(text=output)
+                    except json.JSONDecodeError:
+                        if line[len('data: '):] == '[DONE]':
+                            break
+            self.conversation.update_chat_history(
+                prompt, self.get_message(self.last_response)
+            )
+        def for_non_stream():
+            for _ in for_stream():
+                pass
+            return self.last_response
+        return for_stream() if stream else for_non_stream()
+    def chat(
+        self,
+        prompt: str,
+        stream: bool = False,
+        optimizer: str = None,
+        conversationally: bool = False,
+    ) -> str:
+        """Generate response `str`
+        Args:
+            prompt (str): Prompt to be send.
+            stream (bool, optional): Flag for streaming response. Defaults to False.
+            optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
+            conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
+        Returns:
+            str: Response generated
+        """
+        def for_stream():
+            for response in self.ask(
+                prompt, True, optimizer=optimizer, conversationally=conversationally
+            ):
+                yield self.get_message(response)
+        def for_non_stream():
+            return self.get_message(
+                self.ask(
+                    prompt,
+                    False,
+                    optimizer=optimizer,
+                    conversationally=conversationally,
+                )
+            )
+        return for_stream() if stream else for_non_stream()
+    def get_message(self, response: dict) -> str:
+        """Retrieves message only from response
+        Args:
+            response (dict): Response generated by `self.ask`
+        Returns:
+            str: Message extracted
+        """
+        assert isinstance(response, dict), "Response should be of dict data-type only"
+        return response["text"]
+if __name__ == "__main__":
+    from rich import print
+    ai = LLAMA3()
+    # Stream the response
+    response = ai.chat(input(">>> "))
+    for chunk in response:
+        print(chunk, end="", flush=True)

webscout/Agents/functioncall.py CHANGED Viewed

@@ -1,6 +1,62 @@
 import json
 import logging
-from webscout import LLAMA3, WEBS
+import time
+from typing import Any, Dict, Optional
+import requests
+from webscout import WEBS  # Import only WEBS from webscout
+class LLAMA3:
+    AVAILABLE_MODELS = ["llama3-70b", "llama3-8b", "llama3-405b"]
+    def __init__(
+        self,
+        is_conversation: bool = True,
+        max_tokens: int = 600,
+        timeout: int = 30,
+        model: str = "llama3-8b",
+        system: str = "GPT syle",
+        proxies: dict = {},  # Add proxies parameter
+    ):
+        """Instantiates Snova
+        Args:
+            is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True.
+            max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600.
+            timeout (int, optional): Http request timeout. Defaults to 30.
+            model (str, optional): Snova model name. Defaults to "llama3-70b".
+            system (str, optional): System prompt for Snova. Defaults to "Answer as concisely as possible.".
+            proxies (dict, optional): Proxy settings for requests. Defaults to an empty dictionary.
+        """
+        if model not in self.AVAILABLE_MODELS:
+            raise ValueError(f"Invalid model: {model}. Choose from: {self.AVAILABLE_MODELS}")
+        self.session = requests.Session()
+        self.is_conversation = is_conversation
+        self.max_tokens_to_sample = max_tokens
+        self.timeout = timeout
+        self.model = model
+        self.system = system
+        self.last_response = {}
+        self.env_type = "tp16405b" if "405b" in model else "tp16"
+        self.headers = {'content-type': 'application/json'}
+        self.session.headers.update(self.headers)
+        self.session.proxies = proxies
+    def chat(self, prompt: str) -> str:
+        data = {'body': {'messages': [{'role': 'system', 'content': self.system}, {'role': 'user', 'content': prompt}], 'stream': True, 'model': self.model}, 'env_type': self.env_type}
+        response = self.session.post('https://fast.snova.ai/api/completion', headers=self.headers, json=data, stream=True, timeout=self.timeout)
+        output = ''
+        for line in response.iter_lines(decode_unicode=True):
+            if line.startswith('data:'):
+                try:
+                    data = json.loads(line[len('data: '):])
+                    output += data.get("choices", [{}])[0].get("delta", {}).get("content", '')
+                except json.JSONDecodeError:
+                    if line[len('data: '):] == '[DONE]':
+                        break
+        return output
 class FunctionCallingAgent:
     def __init__(self, model: str = "llama3-8b",
@@ -66,32 +122,6 @@ class FunctionCallingAgent:
         logging.info(f"Executing function: {function_name} with arguments: {arguments}")
-    #     if function_name == "web_search":
-    #         return self._handle_web_search(arguments)
-    #     elif function_name == "general_ai":
-    #         return self._handle_general_ai(arguments)
-    #     else:
-    #         return f"Function '{function_name}' is not implemented."
-    # def _handle_web_search(self, arguments: dict) -> str:
-    #     query = arguments.get("query")
-    #     if not query:
-    #         return "Please provide a search query."
-    #     search_results = self.webs.text(query, max_results=3)
-    #     formatted_results = "\n\n".join(
-    #         f"{i+1}. {result['title']}\n{result['body']}\nURL: {result['href']}"
-    #         for i, result in enumerate(search_results)
-    #     )
-    #     return f"Here's what I found:\n\n{formatted_results}"
-    # def _handle_general_ai(self, arguments: dict) -> str:
-    #     question = arguments.get("question")
-    #     if not question:
-    #         return "Please provide a question for the AI to answer."
-    #     response = self.LLAMA3.chat(question)
-    #     return response
 # Example usage
 if __name__ == "__main__":

webscout 5.1__py3-none-any.whl → 5.2__py3-none-any.whl

Potentially problematic release.

webscout 5.1py3-none-any.whl → 5.2py3-none-any.whl