PyPI - webscout - Versions diffs - 1.2.2__tar.gz → 1.2.4__tar.gz - Mend

webscout 1.2.2tar.gz → 1.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of webscout might be problematic. Click here for more details.

Files changed (43) hide show

webscout-1.2.4/DeepWEBS/documents/query_results_extractor.py ADDED Viewed

@@ -0,0 +1,99 @@
+from bs4 import BeautifulSoup
+from pathlib import Path
+from DeepWEBS.utilsdw.logger import logger
+class QueryResultsExtractor:
+    def __init__(self) -> None:
+        self.query_results = []
+        self.related_questions = []
+    def load_html(self, html_path):
+        try:
+            with open(html_path, "r", encoding="utf-8") as f:
+                html = f.read()
+            self.soup = BeautifulSoup(html, "html.parser")
+        except FileNotFoundError:
+            logger.error(f"File not found: {html_path}")
+        except Exception as e:
+            logger.error(f"Error loading HTML: {e}")
+    def extract_query_results(self):
+        try:
+            self.query = self.soup.find("textarea").text.strip()
+            query_result_elements = self.soup.find_all("div", class_="g")
+            for idx, result in enumerate(query_result_elements):
+                try:
+                    site = result.find("cite").find_previous("span").text.strip()
+                    url = result.find("a")["href"]
+                    title = result.find("h3").text.strip()
+                    abstract_element_conditions = [
+                        {"data-sncf": "1"},
+                        {"class_": "ITZIwc"},
+                    ]
+                    for condition in abstract_element_conditions:
+                        abstract_element = result.find("div", condition)
+                        if abstract_element is not None:
+                            abstract = abstract_element.text.strip()
+                            break
+                    else:
+                        abstract = ""
+                    logger.mesg(
+                        f"{title}\n"
+                        f" - {site}\n"
+                        f" - {url}\n"
+                        f" - {abstract}\n"
+                        f"\n"
+                    )
+                    self.query_results.append(
+                        {
+                            "title": title,
+                            "site": site,
+                            "url": url,
+                            "abstract": abstract,
+                            "index": idx,
+                            "type": "web",
+                        }
+                    )
+                except Exception as e:
+                    logger.error(f"Error extracting query result: {e}")
+            logger.success(f"- {len(query_result_elements)} query results")
+        except Exception as e:
+            logger.error(f"Error extracting query results: {e}")
+    def extract_related_questions(self):
+        try:
+            related_question_elements = self.soup.find_all(
+                "div", class_="related-question-pair"
+            )
+            for question_element in related_question_elements:
+                try:
+                    question = question_element.find("span").text.strip()
+                    print(question)
+                    self.related_questions.append(question)
+                except Exception as e:
+                    logger.error(f"Error extracting related question: {e}")
+            logger.success(f"- {len(self.related_questions)} related questions")
+        except Exception as e:
+            logger.error(f"Error extracting related questions: {e}")
+    def extract(self, html_path):
+        self.load_html(html_path)
+        self.extract_query_results()
+        self.extract_related_questions()
+        self.search_results = {
+            "query": self.query,
+            "query_results": self.query_results,
+            "related_questions": self.related_questions,
+        }
+        return self.search_results
+if __name__ == "__main__":
+    html_path_root = Path(__file__).parents[1] / "files"
+    html_filename = "python_tutorials"
+    html_path = html_path_root / f"{html_filename}.html"
+    extractor = QueryResultsExtractor()
+    try:
+        extractor.extract(html_path)
+    except Exception as e:
+        logger.error(f"Error in main function: {e}")

webscout-1.2.4/DeepWEBS/networks/google_searcher.py ADDED Viewed

@@ -0,0 +1,52 @@
+import requests
+from pathlib import Path
+from typing import Optional
+import random
+from DeepWEBS.utilsdw.enver import enver
+from DeepWEBS.utilsdw.logger import logger
+from DeepWEBS.networks.filepath_converter import QueryToFilepathConverter
+from DeepWEBS.networks.network_configs import REQUESTS_HEADERS
+class GoogleSearcher:
+    def __init__(self):
+        self.url = "https://www.google.com/search"
+        self.enver = enver
+        self.enver.set_envs(proxies=True)
+        self.filepath_converter = QueryToFilepathConverter()
+    def send_request(self, query: str, result_num: int = 10, safe: bool = False) -> requests.Response:
+        params = {
+            "q": query,
+            "num": result_num,
+        }
+        response = requests.get(
+            self.url,
+            headers=REQUESTS_HEADERS,
+            params=params,
+            proxies=self.enver.requests_proxies,
+        )
+        response.raise_for_status()  # Raise an exception for non-2xx status codes
+        return response
+    def save_response(self, response: requests.Response, html_path: Path) -> None:
+        html_path.parent.mkdir(parents=True, exist_ok=True)
+        logger.note(f"Saving to: [{html_path}]")
+        with html_path.open("wb") as wf:
+            wf.write(response.content)
+    def search(self, query: str, result_num: int = 10, safe: bool = False, overwrite: bool = False) -> Path:
+        html_path = self.filepath_converter.convert(query)
+        logger.note(f"Searching: [{query}]")
+        if html_path.exists() and not overwrite:
+            logger.success(f"HTML existed: {html_path}")
+        else:
+            response = self.send_request(query, result_num, safe)
+            self.save_response(response, html_path)
+        return html_path
+if __name__ == "__main__":
+    searcher = GoogleSearcher()
+    html_path = searcher.search("python tutorials")
+    print(f"HTML file saved at: {html_path}")

webscout-1.2.4/DeepWEBS/networks/webpage_fetcher.py ADDED Viewed

@@ -0,0 +1,97 @@
+import concurrent.futures
+import random
+import requests
+import tldextract
+from pathlib import Path
+from typing import List, Tuple, Dict
+from DeepWEBS.utilsdw.enver import enver
+from DeepWEBS.utilsdw.logger import logger
+from DeepWEBS.networks.filepath_converter import UrlToFilepathConverter
+from DeepWEBS.networks.network_configs import IGNORE_HOSTS, REQUESTS_HEADERS
+class WebpageFetcher:
+    def __init__(self):
+        self.enver = enver
+        self.enver.set_envs(proxies=True)
+        self.filepath_converter = UrlToFilepathConverter()
+    def is_ignored_host(self, url: str) -> bool:
+        host = tldextract.extract(url).registered_domain
+        return host in IGNORE_HOSTS
+    def send_request(self, url: str) -> requests.Response:
+        try:
+            user_agent = random.choice(REQUESTS_HEADERS["User-Agent"])
+            response = requests.get(
+                url=url,
+                headers={"User-Agent": user_agent},
+                proxies=self.enver.requests_proxies,
+                timeout=15,
+            )
+            response.raise_for_status()
+            return response
+        except requests.exceptions.RequestException as e:
+            logger.warn(f"Failed to fetch: [{url}] | {e}")
+            return None
+    def save_response(self, response: requests.Response, html_path: Path) -> None:
+        if response is None:
+            return
+        html_path.parent.mkdir(parents=True, exist_ok=True)
+        logger.success(f"Saving to: [{html_path}]")
+        with html_path.open("wb") as wf:
+            wf.write(response.content)
+    def fetch(self, url: str, overwrite: bool = False, output_parent: str = None) -> Path:
+        logger.note(f"Fetching: [{url}]")
+        html_path = self.filepath_converter.convert(url, parent=output_parent)
+        if self.is_ignored_host(url):
+            logger.warn(f"Ignored host: [{tldextract.extract(url).registered_domain}]")
+            return html_path
+        if html_path.exists() and not overwrite:
+            logger.success(f"HTML existed: [{html_path}]")
+        else:
+            response = self.send_request(url)
+            self.save_response(response, html_path)
+        return html_path
+class BatchWebpageFetcher:
+    def __init__(self):
+        self.done_count = 0
+        self.total_count = 0
+        self.url_and_html_path_list: List[Dict[str, str]] = []
+    def fetch_single_webpage(self, url: str, overwrite: bool = False, output_parent: str = None) -> Tuple[str, Path]:
+        webpage_fetcher = WebpageFetcher()
+        html_path = webpage_fetcher.fetch(url, overwrite, output_parent)
+        self.url_and_html_path_list.append({"url": url, "html_path": str(html_path)})
+        self.done_count += 1
+        logger.success(f"> [{self.done_count}/{self.total_count}] Fetched: {url}")
+        return url, html_path
+    def fetch(self, urls: List[str], overwrite: bool = False, output_parent: str = None) -> List[Dict[str, str]]:
+        self.urls = urls
+        self.total_count = len(self.urls)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            futures = [
+                executor.submit(self.fetch_single_webpage, url, overwrite, output_parent)
+                for url in urls
+            ]
+            concurrent.futures.wait(futures)
+        return self.url_and_html_path_list
+if __name__ == "__main__":
+    urls = [
+        "https://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename",
+        "https://www.liaoxuefeng.com/wiki/1016959663602400/1017495723838528",
+        "https://docs.python.org/zh-cn/3/tutorial/interpreter.html",
+    ]
+    batch_webpage_fetcher = BatchWebpageFetcher()
+    batch_webpage_fetcher.fetch(urls=urls, overwrite=True, output_parent="python tutorials")

{webscout-1.2.2 → webscout-1.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: webscout
-Version: 1.2.2
+Version: 1.2.4
 Summary: Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models
 Author: OEvortex
 Author-email: helpingai5@gmail.com
@@ -45,6 +45,8 @@ Requires-Dist: sse_starlette
 Requires-Dist: termcolor
 Requires-Dist: tiktoken
 Requires-Dist: tldextract
+Requires-Dist: gpt4all
+Requires-Dist: orjson
 Provides-Extra: dev
 Requires-Dist: ruff>=0.1.6; extra == "dev"
 Requires-Dist: pytest>=7.4.2; extra == "dev"
@@ -69,6 +71,7 @@ Also containes AI models that you can use
   - [Regions](#regions)
   - [DeepWEBS: Advanced Web Searches](#deepwebs-advanced-web-searches)
     - [Activating DeepWEBS](#activating-deepwebs)
+    - [Point to remember before using `DeepWEBS`](#point-to-remember-before-using-deepwebs)
     - [Usage Example](#usage-example)
   - [WEBS and AsyncWEBS classes](#webs-and-asyncwebs-classes)
   - [Exceptions](#exceptions)
@@ -91,6 +94,7 @@ Also containes AI models that you can use
     - [6. `BlackBox` - Search/chat With BlackBox](#6-blackbox---searchchat-with-blackbox)
     - [7. `PERPLEXITY` - Search With PERPLEXITY](#7-perplexity---search-with-perplexity)
     - [8. `OpenGPT` - chat With OPENGPT](#8-opengpt---chat-with-opengpt)
+    - [9. `GPT4ALL` - chat offline with Language models using gpt4all from webscout](#9-gpt4all---chat-offline-with-language-models-using-gpt4all-from-webscout)
   - [usage of special .LLM file from webscout (webscout.LLM)](#usage-of-special-llm-file-from-webscout-webscoutllm)
     - [`LLM`](#llm)
@@ -224,26 +228,33 @@ ___
 To utilize the `DeepWEBS` feature, you must first create an instance of the `DeepWEBS` . This is designed to be used independently of the `WEBS` , offering a focused approach to web searches.
+### Point to remember before using `DeepWEBS`
+As `DeepWEBS` is designed to extract relevant information directly from webpages and Search engine, It extracts html from webpages and saves them to folder named files in `DeepWEBS` that can be found at `C:\Users\Username\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\DeepWEBS`
 ### Usage Example
 Here's a basic example of how to use the `DeepWEBS` :
 ```python
 from webscout import DeepWEBS
 def perform_web_search(query):
-    D = DeepWEBS()
-    item = D.DeepSearch(
-        queries=[query],  # Query to search
-        result_num=5,  # Number of search results
-        safe=True,  # Enable SafeSearch
-        types=["web"],  # Search type:  web
+    # Initialize the DeepWEBS class
+    D = DeepWEBS()
+    # Set up the search parameters
+    search_params = D.DeepSearch(
+        queries=[query], # Query to search
+        result_num=5, # Number of search results
+        safe=True, # Enable SafeSearch
+        types=["web"], # Search type: web
         extract_webpage=True, # True for extracting webpages
         overwrite_query_html=False,
         overwrite_webpage_html=False,
     )
-    results = D.queries_to_search_results(item)
+    # Execute the search and retrieve results
+    results = D.queries_to_search_results(search_params)
     return results
 def print_search_results(results):
@@ -260,8 +271,13 @@ def print_search_results(results):
         print("No search results found.")
 def main():
+    # Prompt the user for a search query
     query = input("Enter your search query: ")
+    # Perform the web search
     results = perform_web_search(query)
+    # Print the search results
     print_search_results(results)
 if __name__ == "__main__":
@@ -592,6 +608,37 @@ prompt = "tell me about india"
 response_str = opengpt.chat(prompt)
 print(response_str)
 ```
+### 9. `GPT4ALL` - chat offline with Language models using gpt4all from webscout
+```python
+from webscout import GPT4ALL
+# Initialize the GPT4ALL class with your model path and other optional parameters
+gpt4all_instance = GPT4ALL(
+    model="path/to/your/model/file", # Replace with the actual path to your model file
+    is_conversation=True,
+    max_tokens=800,
+    temperature=0.7,
+    presence_penalty=0,
+    frequency_penalty=1.18,
+    top_p=0.4,
+    intro="Hello, how can I assist you today?",
+    filepath="path/to/conversation/history/file", # Optional, for conversation history
+    update_file=True,
+    history_offset=10250,
+    act=None # Optional, for using an awesome prompt as intro
+)
+# Generate a response from the AI model
+response = gpt4all_instance.chat(
+    prompt="What is the weather like today?",
+    stream=False, # Set to True if you want to stream the response
+    optimizer=None, # Optional, specify an optimizer if needed
+    conversationally=False # Set to True for conversationally generated responses
+)
+# Print the generated response
+print(response)
+```
 ## usage of special .LLM file from webscout (webscout.LLM)

{webscout-1.2.2 → webscout-1.2.4}/README.md RENAMED Viewed

@@ -18,6 +18,7 @@ Also containes AI models that you can use
   - [Regions](#regions)
   - [DeepWEBS: Advanced Web Searches](#deepwebs-advanced-web-searches)
     - [Activating DeepWEBS](#activating-deepwebs)
+    - [Point to remember before using `DeepWEBS`](#point-to-remember-before-using-deepwebs)
     - [Usage Example](#usage-example)
   - [WEBS and AsyncWEBS classes](#webs-and-asyncwebs-classes)
   - [Exceptions](#exceptions)
@@ -40,6 +41,7 @@ Also containes AI models that you can use
     - [6. `BlackBox` - Search/chat With BlackBox](#6-blackbox---searchchat-with-blackbox)
     - [7. `PERPLEXITY` - Search With PERPLEXITY](#7-perplexity---search-with-perplexity)
     - [8. `OpenGPT` - chat With OPENGPT](#8-opengpt---chat-with-opengpt)
+    - [9. `GPT4ALL` - chat offline with Language models using gpt4all from webscout](#9-gpt4all---chat-offline-with-language-models-using-gpt4all-from-webscout)
   - [usage of special .LLM file from webscout (webscout.LLM)](#usage-of-special-llm-file-from-webscout-webscoutllm)
     - [`LLM`](#llm)
@@ -173,26 +175,33 @@ ___
 To utilize the `DeepWEBS` feature, you must first create an instance of the `DeepWEBS` . This is designed to be used independently of the `WEBS` , offering a focused approach to web searches.
+### Point to remember before using `DeepWEBS`
+As `DeepWEBS` is designed to extract relevant information directly from webpages and Search engine, It extracts html from webpages and saves them to folder named files in `DeepWEBS` that can be found at `C:\Users\Username\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\DeepWEBS`
 ### Usage Example
 Here's a basic example of how to use the `DeepWEBS` :
 ```python
 from webscout import DeepWEBS
 def perform_web_search(query):
-    D = DeepWEBS()
-    item = D.DeepSearch(
-        queries=[query],  # Query to search
-        result_num=5,  # Number of search results
-        safe=True,  # Enable SafeSearch
-        types=["web"],  # Search type:  web
+    # Initialize the DeepWEBS class
+    D = DeepWEBS()
+    # Set up the search parameters
+    search_params = D.DeepSearch(
+        queries=[query], # Query to search
+        result_num=5, # Number of search results
+        safe=True, # Enable SafeSearch
+        types=["web"], # Search type: web
         extract_webpage=True, # True for extracting webpages
         overwrite_query_html=False,
         overwrite_webpage_html=False,
     )
-    results = D.queries_to_search_results(item)
+    # Execute the search and retrieve results
+    results = D.queries_to_search_results(search_params)
     return results
 def print_search_results(results):
@@ -209,8 +218,13 @@ def print_search_results(results):
         print("No search results found.")
 def main():
+    # Prompt the user for a search query
     query = input("Enter your search query: ")
+    # Perform the web search
     results = perform_web_search(query)
+    # Print the search results
     print_search_results(results)
 if __name__ == "__main__":
@@ -541,6 +555,37 @@ prompt = "tell me about india"
 response_str = opengpt.chat(prompt)
 print(response_str)
 ```
+### 9. `GPT4ALL` - chat offline with Language models using gpt4all from webscout
+```python
+from webscout import GPT4ALL
+# Initialize the GPT4ALL class with your model path and other optional parameters
+gpt4all_instance = GPT4ALL(
+    model="path/to/your/model/file", # Replace with the actual path to your model file
+    is_conversation=True,
+    max_tokens=800,
+    temperature=0.7,
+    presence_penalty=0,
+    frequency_penalty=1.18,
+    top_p=0.4,
+    intro="Hello, how can I assist you today?",
+    filepath="path/to/conversation/history/file", # Optional, for conversation history
+    update_file=True,
+    history_offset=10250,
+    act=None # Optional, for using an awesome prompt as intro
+)
+# Generate a response from the AI model
+response = gpt4all_instance.chat(
+    prompt="What is the weather like today?",
+    stream=False, # Set to True if you want to stream the response
+    optimizer=None, # Optional, specify an optimizer if needed
+    conversationally=False # Set to True for conversationally generated responses
+)
+# Print the generated response
+print(response)
+```
 ## usage of special .LLM file from webscout (webscout.LLM)

{webscout-1.2.2 → webscout-1.2.4}/setup.py RENAMED Viewed

@@ -9,7 +9,7 @@ with open("README.md", encoding="utf-8") as f:
 setup(
     name="webscout",
-    version="1.2.2", # Use the version variable from the version.py file
+    version="1.2.4", # Use the version variable from the version.py file
     description="Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models",
     long_description=README,
     long_description_content_type="text/markdown",
@@ -53,6 +53,8 @@ setup(
         "termcolor",
         "tiktoken",
         "tldextract",
+        "gpt4all",
+        "orjson",
     ],
     entry_points={
         "console_scripts": [

webscout 1.2.2__tar.gz → 1.2.4__tar.gz

Potentially problematic release.

webscout 1.2.2tar.gz → 1.2.4tar.gz