webscout 5.0__py3-none-any.whl → 5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

webscout/Bing_search.py CHANGED
@@ -2,10 +2,12 @@ from bs4 import BeautifulSoup
2
2
  import requests
3
3
  from typing import Dict, List, Optional, Union
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
- from urllib.parse import urlparse
6
- from termcolor import colored
7
- import time
8
- import random
5
+ from urllib.parse import quote, urlparse, parse_qs
6
+ import base64
7
+ import urllib3
8
+
9
+ # Disable SSL warnings
10
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
9
11
 
10
12
  class BingS:
11
13
  """Bing search class to get search results from bing.com."""
@@ -21,7 +23,7 @@ class BingS:
21
23
  """Initialize the BingS object."""
22
24
  self.proxy: Optional[str] = proxy
23
25
  self.headers = headers if headers else {
24
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62"
26
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
25
27
  }
26
28
  self.headers["Referer"] = "https://www.bing.com/"
27
29
  self.client = requests.Session()
@@ -43,82 +45,110 @@ class BingS:
43
45
  data: Optional[Union[Dict[str, str], bytes]] = None,
44
46
  ) -> bytes:
45
47
  try:
46
- resp = self.client.request(method, url, params=params, data=data, timeout=self.timeout)
48
+ resp = self.client.request(method, url, params=params, data=data, timeout=self.timeout, verify=False)
47
49
  except Exception as ex:
48
50
  raise Exception(f"{url} {type(ex).__name__}: {ex}") from ex
49
51
  if resp.status_code == 200:
50
52
  return resp.content
51
53
  raise Exception(f"{resp.url} returned status code {resp.status_code}. {params=} {data=}")
52
54
 
55
+ def extract_text_from_webpage(self, html_content, max_characters=None):
56
+ """Extracts visible text from HTML content using BeautifulSoup."""
57
+ soup = BeautifulSoup(html_content, "html.parser")
58
+ # Remove unwanted tags
59
+ for tag in soup(["script", "style", "header", "footer", "nav"]):
60
+ tag.extract()
61
+ # Get the remaining visible text
62
+ visible_text = soup.get_text(separator=' ', strip=True)
63
+ if max_characters:
64
+ visible_text = visible_text[:max_characters]
65
+ return visible_text
66
+
53
67
  def search(
54
68
  self,
55
69
  keywords: str,
56
- region: str = "us-EN", # Bing uses us-EN
57
- lang: str = "en",
58
- safe: str = "off",
59
- timelimit: Optional[str] = None, # Not directly supported by Bing
60
- max_results: Optional[int] = None,
70
+ max_results: Optional[int] = 10,
71
+ extract_webpage_text: bool = False,
72
+ max_extract_characters: Optional[int] = 100,
61
73
  ) -> List[Dict[str, str]]:
62
74
  """Bing text search."""
63
75
  assert keywords, "keywords is mandatory"
64
76
 
65
77
  results = []
66
- start = 1 # Bing uses 1-based indexing for pages
67
- while len(results) < (max_results or float('inf')):
78
+ futures = []
79
+ start = 1
80
+ while len(results) < max_results:
68
81
  params = {
69
82
  "q": keywords,
70
- "count": 10, # Number of results per page
71
- "mkt": region,
72
- "setlang": lang,
73
- "safeSearch": safe,
74
- "first": start, # Bing uses 'first' for pagination
83
+ "first": start
75
84
  }
85
+ futures.append(self._executor.submit(self._get_url, "GET", "https://www.bing.com/search", params=params))
86
+ start += 10
76
87
 
77
- try:
78
- resp_content = self._get_url("GET", "https://www.bing.com/search", params=params)
79
- soup = BeautifulSoup(resp_content, "html.parser")
80
- result_block = soup.find_all("li", class_="b_algo")
81
-
82
- if not result_block:
83
- break
88
+ for future in as_completed(futures):
89
+ try:
90
+ resp_content = future.result()
91
+ soup = BeautifulSoup(resp_content, "html.parser")
92
+ result_block = soup.select('li.b_algo')
84
93
 
85
- for result in result_block:
86
- try:
87
- link = result.find("a", href=True)
88
- if link:
89
- initial_url = link["href"]
94
+ if not result_block:
95
+ break
90
96
 
91
- title = result.find("h2").text if result.find("h2") else ""
92
- description = result.find("p").text.strip() if result.find("p") else "" # Strip whitespace
97
+ for result in result_block:
98
+ try:
99
+ link = result.select_one('h2 a')
100
+ title = link.text if link else ""
101
+ url = link['href'] if link else ""
102
+ abstract = result.select_one('.b_caption p')
103
+ description = abstract.text if abstract else ""
93
104
 
94
- # Remove 'WEB' prefix if present
105
+ # Remove "WEB" from the beginning of the description if it exists
95
106
  if description.startswith("WEB"):
96
- description = description[4:] # Skip the first 4 characters ('WEB ')
107
+ description = description[3:].strip()
108
+
109
+ visible_text = ""
110
+ if extract_webpage_text:
111
+ try:
112
+ actual_url = self._decode_bing_url(url)
113
+ page_content = self._get_url("GET", actual_url)
114
+ visible_text = self.extract_text_from_webpage(
115
+ page_content, max_characters=max_extract_characters
116
+ )
117
+ except Exception as e:
118
+ print(f"Error extracting text from {url}: {e}")
97
119
 
98
120
  results.append({
99
121
  "title": title,
100
- "href": initial_url,
122
+ "href": url,
101
123
  "abstract": description,
102
124
  "index": len(results),
103
125
  "type": "web",
126
+ "visible_text": visible_text,
104
127
  })
105
128
 
106
129
  if len(results) >= max_results:
107
130
  return results
108
131
 
109
- except Exception as e:
110
- print(f"Error extracting result: {e}")
132
+ except Exception as e:
133
+ print(f"Error extracting result: {e}")
111
134
 
112
- except Exception as e:
113
- print(f"Error fetching URL: {e}")
114
-
115
- start += 10
135
+ except Exception as e:
136
+ print(f"Error fetching URL: {e}")
116
137
 
117
138
  return results
118
139
 
140
+ def _decode_bing_url(self, url):
141
+ if 'bing.com/ck/a' in url:
142
+ parsed_url = urlparse(url)
143
+ query_params = parse_qs(parsed_url.query)
144
+ if 'u' in query_params:
145
+ encoded_url = query_params['u'][0]
146
+ return base64.b64decode(encoded_url).decode('utf-8')
147
+ return url
148
+
119
149
  if __name__ == "__main__":
120
150
  from rich import print
121
151
  searcher = BingS()
122
- results = searcher.search("Python development tools", max_results=30)
152
+ results = searcher.search("Python development tools", max_results=5, extract_webpage_text=True, max_extract_characters=2000)
123
153
  for result in results:
124
- print(result)
154
+ print(result)
@@ -1,209 +1,179 @@
1
- import subprocess
2
- import argparse
1
+ import warnings
2
+ from datetime import time
3
3
  import os
4
- from rich.console import Console
5
- from rich.panel import Panel
6
- from rich.progress import track
7
- from yaspin import yaspin
8
- from pyfiglet import figlet_format
9
- import time
10
-
11
- console = Console()
12
-
13
- def autollama(model_path, gguf_file):
14
- """Manages models with Ollama using the autollama.sh script.
15
-
16
- Args:
17
- model_path (str): The path to the Hugging Face model.
18
- gguf_file (str): The name of the GGUF file.
19
- """
20
- console.print(f"[bold green]{figlet_format('Autollama')}[/]\n", justify="center")
21
-
22
- # Check if autollama.sh exists in the current working directory
23
- script_path = os.path.join(os.getcwd(), "autollama.sh")
24
- if not os.path.exists(script_path):
25
- # Create autollama.sh with the content provided
26
- with open(script_path, "w") as f:
27
- f.write("""
28
- function show_art() {
29
- cat << "EOF"
30
- Made with love in India
31
- EOF
32
- }
33
-
34
- show_art
35
-
36
- # Initialize default values
37
- MODEL_PATH=""
38
- GGUF_FILE=""
39
-
40
- # Display help/usage information
41
- usage() {
42
- echo "Usage: $0 -m <model_path> -g <gguf_file>"
43
- echo
44
- echo "Options:"
45
- echo " -m <model_path> Set the path to the model"
46
- echo " -g <gguf_file> Set the GGUF file name"
47
- echo " -h Display this help and exit"
48
- echo
49
- }
50
-
51
- # Parse command-line options
52
- while getopts ":m:g:h" opt; do
53
- case ${opt} in
54
- m )
55
- MODEL_PATH=$OPTARG
56
- ;;
57
- g )
58
- GGUF_FILE=$OPTARG
59
- ;;
60
- h )
61
- usage
62
- exit 0
63
- ;;
64
- \? )
65
- echo "Invalid Option: -$OPTARG" 1>&2
66
- usage
67
- exit 1
68
- ;;
69
- : )
70
- echo "Invalid Option: -$OPTARG requires an argument" 1>&2
71
- usage
72
- exit 1
73
- ;;
74
- esac
75
- done
76
-
77
- # Check required parameters
78
- if [ -z "$MODEL_PATH" ] || [ -z "$GGUF_FILE" ]; then
79
- echo "Error: -m (model_path) and -g (gguf_file) are required."
80
- usage
81
- exit 1
82
- fi
83
-
84
- # Derive MODEL_NAME
85
- MODEL_NAME=$(echo $GGUF_FILE | sed 's/\(.*\)\.Q4.*/\\1/')
86
-
87
- # Log file where downloaded models are recorded
88
- DOWNLOAD_LOG="downloaded_models.log"
89
-
90
- # Composite logging name
91
- LOGGING_NAME="${MODEL_PATH}_${MODEL_NAME}"
92
-
93
- # Check if the model has been downloaded
94
- function is_model_downloaded {
95
- grep -qxF "$LOGGING_NAME" "$DOWNLOAD_LOG" && return 0 || return 1
96
- }
97
-
98
- # Log the downloaded model
99
- function log_downloaded_model {
100
- echo "$LOGGING_NAME" >> "$DOWNLOAD_LOG"
101
- }
102
-
103
- # Function to check if the model has already been created
104
- function is_model_created {
105
- # 'ollama list' lists all models
106
- ollama list | grep -q "$MODEL_NAME" && return 0 || return 1
107
- }
108
-
109
- # Check if huggingface-hub is installed, and install it if not
110
- if ! pip show huggingface-hub > /dev/null; then
111
- echo "Installing huggingface-hub..."
112
- pip install -U "huggingface_hub[cli]"
113
- else
114
- echo "huggingface-hub is already installed."
115
- fi
116
-
117
- # Check if the model has already been downloaded
118
- if is_model_downloaded; then
119
- echo "Model $LOGGING_NAME has already been downloaded. Skipping download."
120
- else
121
- echo "Downloading model $LOGGING_NAME..."
122
- # Download the model
123
- huggingface-cli download $MODEL_PATH $GGUF_FILE --local-dir downloads --local-dir-use-symlinks False
124
-
125
- # Log the downloaded model
126
- log_downloaded_model
127
- echo "Model $LOGGING_NAME downloaded and logged."
128
- fi
129
-
130
- # Check if Ollama is installed, and install it if not
131
- if ! command -v ollama &> /dev/null; then
132
- echo "Installing Ollama..."
133
- curl -fsSL https://ollama.com/install.sh | sh
134
- else
135
- echo "Ollama is already installed."
136
- fi
137
-
138
- # Check if Ollama is already running
139
- if pgrep -f 'ollama serve' > /dev/null; then
140
- echo "Ollama is already running. Skipping the start."
141
- else
142
- echo "Starting Ollama..."
143
- # Start Ollama in the background
144
- ollama serve &
145
-
146
- # Wait for Ollama to start
147
- while true; do
148
- if pgrep -f 'ollama serve' > /dev/null; then
149
- echo "Ollama has started."
150
- sleep 60
151
- break
152
- else
153
- echo "Waiting for Ollama to start..."
154
- sleep 1 # Wait for 1 second before checking again
155
- fi
156
- done
157
- fi
158
-
159
- # Check if the model has already been created
160
- if is_model_created; then
161
- echo "Model $MODEL_NAME is already created. Skipping creation."
162
- else
163
- echo "Creating model $MODEL_NAME..."
164
- # Create the model in Ollama
165
- # Prepare Modelfile with the downloaded path
166
- echo "FROM ./downloads/$GGUF_FILE" > Modelfile
167
- ollama create $MODEL_NAME -f Modelfile
168
- echo "Model $MODEL_NAME created."
169
- fi
170
-
171
-
172
- echo "model name is > $MODEL_NAME"
173
- echo "Use Ollama run $MODEL_NAME"
174
- """)
175
- # Make autollama.sh executable (using chmod)
176
- os.chmod(script_path, 0o755)
177
-
178
- # Initialize command list
179
- command = ["bash", script_path, "-m", model_path, "-g", gguf_file]
180
-
181
- # Execute the command
182
- process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
183
-
184
- for line in iter(process.stdout.readline, ''):
185
- console.print(Panel(line.strip(), title="Autollama Output", expand=False))
186
-
187
- for line in iter(process.stderr.readline, ''):
188
- console.print(Panel(line.strip(), title="Autollama Errors (if any)", expand=False))
189
-
190
- process.wait()
191
- console.print("[green]Model is ready![/]")
192
-
193
- def main():
194
- parser = argparse.ArgumentParser(description='Automatically create and run an Ollama model in Ollama')
195
- parser.add_argument('-m', '--model_path', required=True, help='Set the huggingface model id to the Hugging Face model')
196
- parser.add_argument('-g', '--gguf_file', required=True, help='Set the GGUF file name')
4
+ import sys
5
+ import subprocess
6
+ import logging
7
+ import psutil
8
+ from huggingface_hub import hf_hub_url, cached_download
9
+ import colorlog
10
+ import ollama # Import ollama for interactive chat
11
+ import argparse # Import argparse for command-line arguments
12
+
13
+ # Suppress specific warnings
14
+ warnings.filterwarnings(
15
+ "ignore", category=FutureWarning, module="huggingface_hub.file_download"
16
+ )
17
+
18
+ # Configure logging with colors
19
+ handler = colorlog.StreamHandler()
20
+ handler.setFormatter(
21
+ colorlog.ColoredFormatter(
22
+ "%(log_color)s%(asctime)s - %(levelname)s - %(message)s",
23
+ datefmt="%Y-%m-%d %H:%M:%S",
24
+ log_colors={
25
+ "DEBUG": "cyan",
26
+ "INFO": "green",
27
+ "WARNING": "yellow",
28
+ "ERROR": "red",
29
+ "CRITICAL": "red,bg_white",
30
+ },
31
+ )
32
+ )
33
+
34
+ logger = colorlog.getLogger(__name__)
35
+ if not logger.hasHandlers():
36
+ logger.addHandler(handler)
37
+ logger.setLevel(logging.INFO)
38
+
39
+ # Redirect warnings to the logger but avoid duplication
40
+ logging.captureWarnings(True)
41
+ py_warnings_logger = logging.getLogger("py.warnings")
42
+ if not py_warnings_logger.hasHandlers():
43
+ py_warnings_logger.addHandler(handler)
44
+
45
+
46
+ def show_art():
47
+ logger.info("Made with love in India")
48
+
49
+
50
+ def usage():
51
+ logger.info("Usage: python script.py -m <model_path> -g <gguf_file>")
52
+ logger.info("Options:")
53
+ logger.info(" -m <model_path> Set the path to the model")
54
+ logger.info(" -g <gguf_file> Set the GGUF file name")
55
+ logger.info(" -h Display this help and exit")
56
+
57
+
58
+ def is_model_downloaded(logging_name, download_log):
59
+ if not os.path.exists(download_log):
60
+ return False
61
+ with open(download_log, "r") as f:
62
+ for line in f:
63
+ if line.strip() == logging_name:
64
+ return True
65
+ return False
66
+
67
+
68
+ def log_downloaded_model(logging_name, download_log):
69
+ with open(download_log, "a") as f:
70
+ f.write(logging_name + "\n")
71
+
72
+
73
+ def is_model_created(model_name):
74
+ result = subprocess.run(["ollama", "list"], stdout=subprocess.PIPE)
75
+ return model_name in result.stdout.decode("utf-8")
76
+
77
+
78
+ def download_model(repo_id, filename, token, cache_dir="downloads"):
79
+ url = hf_hub_url(repo_id, filename)
80
+ filepath = cached_download(
81
+ url, cache_dir=cache_dir, force_filename=filename, use_auth_token=token
82
+ )
83
+ return filepath
84
+
85
+
86
+ def is_ollama_running():
87
+ for proc in psutil.process_iter(["name"]):
88
+ if proc.info["name"] in ["ollama", "ollama.exe"]:
89
+ return True
90
+ return False
91
+
92
+
93
+ def main(model_path=None, gguf_file=None): # Modified to handle both CLI and non-CLI
94
+ show_art()
95
+
96
+ # Parse command-line arguments if provided
97
+ parser = argparse.ArgumentParser(description="Download and create an Ollama model")
98
+ parser.add_argument("-m", "--model_path", help="Path to the model on Hugging Face Hub")
99
+ parser.add_argument("-g", "--gguf_file", help="Name of the GGUF file")
197
100
  args = parser.parse_args()
198
101
 
102
+ # Use arguments from command line or function parameters
103
+ model_path = args.model_path if args.model_path else model_path
104
+ gguf_file = args.gguf_file if args.gguf_file else gguf_file
105
+
106
+ if not model_path or not gguf_file:
107
+ logger.error("Error: model_path and gguf_file are required.")
108
+ usage()
109
+ sys.exit(2)
110
+
111
+ model_name = gguf_file.split(".Q4")[0]
112
+ download_log = "downloaded_models.log"
113
+ logging_name = f"{model_path}_{model_name}"
114
+
115
+ # Ensure the log file exists
116
+ if not os.path.exists(download_log):
117
+ with open(download_log, 'w') as f:
118
+ pass
119
+
120
+ # Check if huggingface-hub is installed, and install it if not
199
121
  try:
200
- with yaspin(text="Processing...") as spinner:
201
- autollama(args.model_path, args.gguf_file)
202
- spinner.ok("Done!")
203
- except Exception as e:
204
- console.print(f"[red]Error: {e}[/]")
205
- exit(1)
122
+ subprocess.check_output(['pip', 'show', 'huggingface-hub'])
123
+ except subprocess.CalledProcessError:
124
+ logger.info("Installing huggingface-hub...")
125
+ subprocess.check_call(['pip', 'install', '-U', 'huggingface_hub[cli]'])
126
+ else:
127
+ logger.info("huggingface-hub is already installed.")
128
+
129
+ # Check if the model has already been downloaded
130
+ if is_model_downloaded(logging_name, download_log):
131
+ logger.info(f"Model {logging_name} has already been downloaded. Skipping download.")
132
+ else:
133
+ logger.info(f"Downloading model {logging_name}...")
134
+ token = os.getenv('HUGGINGFACE_TOKEN', None)
135
+ if not token:
136
+ logger.warning("Warning: HUGGINGFACE_TOKEN environment variable is not set. Using None.")
137
+ token = None
138
+
139
+ filepath = download_model(model_path, gguf_file, token)
140
+ log_downloaded_model(logging_name, download_log)
141
+ logger.info(f"Model {logging_name} downloaded and logged.")
142
+
143
+ # Check if Ollama is installed, and install it if not
144
+ try:
145
+ subprocess.check_output(['ollama', '--version'])
146
+ except subprocess.CalledProcessError:
147
+ logger.info("Installing Ollama...")
148
+ subprocess.check_call(['curl', '-fsSL', 'https://ollama.com/install.sh', '|', 'sh'])
149
+ else:
150
+ logger.info("Ollama is already installed.")
151
+
152
+ # Check if Ollama is already running
153
+ if is_ollama_running():
154
+ logger.info("Ollama is already running. Skipping the start.")
155
+ else:
156
+ logger.info("Starting Ollama...")
157
+ subprocess.Popen(['ollama', 'serve'])
158
+
159
+ while not is_ollama_running():
160
+ logger.info("Waiting for Ollama to start...")
161
+ time.sleep(1)
162
+
163
+ logger.info("Ollama has started.")
164
+
165
+ # Check if the model has already been created
166
+ if is_model_created(model_name):
167
+ logger.info(f"Model {model_name} is already created. Skipping creation.")
168
+ else:
169
+ logger.info(f"Creating model {model_name}...")
170
+ with open('Modelfile', 'w') as f:
171
+ f.write(f"FROM ./downloads/{gguf_file}")
172
+ subprocess.check_call(['ollama', 'create', model_name, '-f', 'Modelfile'])
173
+ logger.info(f"Model {model_name} created.")
174
+
175
+ logger.info(f"model name is > {model_name}")
176
+ logger.info(f"Use Ollama run {model_name}")
206
177
 
207
178
  if __name__ == "__main__":
208
- main()
209
-
179
+ main()
@@ -1,3 +1,3 @@
1
1
  from llama_cpp import __version__ as __llama_cpp_version__
2
2
 
3
- __version__ = '4.8'
3
+ __version__ = '5.2'