webscout 5.1__py3-none-any.whl → 5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

webscout/Bing_search.py CHANGED
@@ -2,10 +2,12 @@ from bs4 import BeautifulSoup
2
2
  import requests
3
3
  from typing import Dict, List, Optional, Union
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
- from urllib.parse import urlparse
6
- from termcolor import colored
7
- import time
8
- import random
5
+ from urllib.parse import quote, urlparse, parse_qs
6
+ import base64
7
+ import urllib3
8
+
9
+ # Disable SSL warnings
10
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
9
11
 
10
12
  class BingS:
11
13
  """Bing search class to get search results from bing.com."""
@@ -21,7 +23,7 @@ class BingS:
21
23
  """Initialize the BingS object."""
22
24
  self.proxy: Optional[str] = proxy
23
25
  self.headers = headers if headers else {
24
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62"
26
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
25
27
  }
26
28
  self.headers["Referer"] = "https://www.bing.com/"
27
29
  self.client = requests.Session()
@@ -43,82 +45,110 @@ class BingS:
43
45
  data: Optional[Union[Dict[str, str], bytes]] = None,
44
46
  ) -> bytes:
45
47
  try:
46
- resp = self.client.request(method, url, params=params, data=data, timeout=self.timeout)
48
+ resp = self.client.request(method, url, params=params, data=data, timeout=self.timeout, verify=False)
47
49
  except Exception as ex:
48
50
  raise Exception(f"{url} {type(ex).__name__}: {ex}") from ex
49
51
  if resp.status_code == 200:
50
52
  return resp.content
51
53
  raise Exception(f"{resp.url} returned status code {resp.status_code}. {params=} {data=}")
52
54
 
55
+ def extract_text_from_webpage(self, html_content, max_characters=None):
56
+ """Extracts visible text from HTML content using BeautifulSoup."""
57
+ soup = BeautifulSoup(html_content, "html.parser")
58
+ # Remove unwanted tags
59
+ for tag in soup(["script", "style", "header", "footer", "nav"]):
60
+ tag.extract()
61
+ # Get the remaining visible text
62
+ visible_text = soup.get_text(separator=' ', strip=True)
63
+ if max_characters:
64
+ visible_text = visible_text[:max_characters]
65
+ return visible_text
66
+
53
67
  def search(
54
68
  self,
55
69
  keywords: str,
56
- region: str = "us-EN", # Bing uses us-EN
57
- lang: str = "en",
58
- safe: str = "off",
59
- timelimit: Optional[str] = None, # Not directly supported by Bing
60
- max_results: Optional[int] = None,
70
+ max_results: Optional[int] = 10,
71
+ extract_webpage_text: bool = False,
72
+ max_extract_characters: Optional[int] = 100,
61
73
  ) -> List[Dict[str, str]]:
62
74
  """Bing text search."""
63
75
  assert keywords, "keywords is mandatory"
64
76
 
65
77
  results = []
66
- start = 1 # Bing uses 1-based indexing for pages
67
- while len(results) < (max_results or float('inf')):
78
+ futures = []
79
+ start = 1
80
+ while len(results) < max_results:
68
81
  params = {
69
82
  "q": keywords,
70
- "count": 10, # Number of results per page
71
- "mkt": region,
72
- "setlang": lang,
73
- "safeSearch": safe,
74
- "first": start, # Bing uses 'first' for pagination
83
+ "first": start
75
84
  }
85
+ futures.append(self._executor.submit(self._get_url, "GET", "https://www.bing.com/search", params=params))
86
+ start += 10
76
87
 
77
- try:
78
- resp_content = self._get_url("GET", "https://www.bing.com/search", params=params)
79
- soup = BeautifulSoup(resp_content, "html.parser")
80
- result_block = soup.find_all("li", class_="b_algo")
81
-
82
- if not result_block:
83
- break
88
+ for future in as_completed(futures):
89
+ try:
90
+ resp_content = future.result()
91
+ soup = BeautifulSoup(resp_content, "html.parser")
92
+ result_block = soup.select('li.b_algo')
84
93
 
85
- for result in result_block:
86
- try:
87
- link = result.find("a", href=True)
88
- if link:
89
- initial_url = link["href"]
94
+ if not result_block:
95
+ break
90
96
 
91
- title = result.find("h2").text if result.find("h2") else ""
92
- description = result.find("p").text.strip() if result.find("p") else "" # Strip whitespace
97
+ for result in result_block:
98
+ try:
99
+ link = result.select_one('h2 a')
100
+ title = link.text if link else ""
101
+ url = link['href'] if link else ""
102
+ abstract = result.select_one('.b_caption p')
103
+ description = abstract.text if abstract else ""
93
104
 
94
- # Remove 'WEB' prefix if present
105
+ # Remove "WEB" from the beginning of the description if it exists
95
106
  if description.startswith("WEB"):
96
- description = description[4:] # Skip the first 4 characters ('WEB ')
107
+ description = description[3:].strip()
108
+
109
+ visible_text = ""
110
+ if extract_webpage_text:
111
+ try:
112
+ actual_url = self._decode_bing_url(url)
113
+ page_content = self._get_url("GET", actual_url)
114
+ visible_text = self.extract_text_from_webpage(
115
+ page_content, max_characters=max_extract_characters
116
+ )
117
+ except Exception as e:
118
+ print(f"Error extracting text from {url}: {e}")
97
119
 
98
120
  results.append({
99
121
  "title": title,
100
- "href": initial_url,
122
+ "href": url,
101
123
  "abstract": description,
102
124
  "index": len(results),
103
125
  "type": "web",
126
+ "visible_text": visible_text,
104
127
  })
105
128
 
106
129
  if len(results) >= max_results:
107
130
  return results
108
131
 
109
- except Exception as e:
110
- print(f"Error extracting result: {e}")
132
+ except Exception as e:
133
+ print(f"Error extracting result: {e}")
111
134
 
112
- except Exception as e:
113
- print(f"Error fetching URL: {e}")
114
-
115
- start += 10
135
+ except Exception as e:
136
+ print(f"Error fetching URL: {e}")
116
137
 
117
138
  return results
118
139
 
140
+ def _decode_bing_url(self, url):
141
+ if 'bing.com/ck/a' in url:
142
+ parsed_url = urlparse(url)
143
+ query_params = parse_qs(parsed_url.query)
144
+ if 'u' in query_params:
145
+ encoded_url = query_params['u'][0]
146
+ return base64.b64decode(encoded_url).decode('utf-8')
147
+ return url
148
+
119
149
  if __name__ == "__main__":
120
150
  from rich import print
121
151
  searcher = BingS()
122
- results = searcher.search("Python development tools", max_results=30)
152
+ results = searcher.search("Python development tools", max_results=5, extract_webpage_text=True, max_extract_characters=2000)
123
153
  for result in results:
124
- print(result)
154
+ print(result)
@@ -1,3 +1,3 @@
1
1
  from llama_cpp import __version__ as __llama_cpp_version__
2
2
 
3
- __version__ = '4.8'
3
+ __version__ = '5.2'
@@ -0,0 +1,177 @@
1
+ import requests
2
+ import json
3
+ from typing import Dict, Any
4
+
5
+ from webscout.AIutel import Optimizers
6
+ from webscout.AIutel import Conversation
7
+ from webscout.AIutel import AwesomePrompts
8
+ from webscout.AIbase import Provider
9
+ from webscout import exceptions
10
+
11
+ class AI21(Provider):
12
+ """
13
+ A class to interact with the AI21 Studio API.
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ api_key: str,
19
+ model: str = "jamba-1.5-large",
20
+ max_tokens: int = 1024,
21
+ temperature: float = 0.4,
22
+ top_p: float = 1,
23
+ is_conversation: bool = True,
24
+ timeout: int = 30,
25
+ intro: str = None,
26
+ filepath: str = None,
27
+ update_file: bool = True,
28
+ proxies: dict = {},
29
+ history_offset: int = 10250,
30
+ act: str = None,
31
+ system_prompt: str = "You are a helpful and informative AI assistant."
32
+ ):
33
+ """
34
+ Initializes the AI21 Studio API with given parameters.
35
+ """
36
+ self.api_key = api_key
37
+ self.api_endpoint = "https://api.ai21.com/studio/v1/chat/completions"
38
+ self.model = model
39
+ self.max_tokens = max_tokens
40
+ self.temperature = temperature
41
+ self.top_p = top_p
42
+ self.system_prompt = system_prompt
43
+ self.session = requests.Session()
44
+ self.is_conversation = is_conversation
45
+ self.max_tokens_to_sample = max_tokens
46
+ self.timeout = timeout
47
+ self.last_response = {}
48
+ self.headers = {
49
+ 'Accept': 'application/json, text/plain, */*',
50
+ 'Accept-Encoding': 'gzip, deflate, br, zstd',
51
+ 'Accept-Language': 'en-US,en;q=0.9,en-IN;q=0.8',
52
+ 'Authorization': f"Bearer {self.api_key}",
53
+ 'Content-Type': 'application/json',
54
+ 'DNT': '1',
55
+ 'Origin': 'https://studio.ai21.com',
56
+ 'Referer': 'https://studio.ai21.com/',
57
+ 'Sec-CH-UA': '"Chromium";v="128", "Not;A=Brand";v="24", "Microsoft Edge";v="128"',
58
+ 'Sec-CH-UA-Mobile': '?0',
59
+ 'Sec-CH-UA-Platform': '"Windows"',
60
+ 'Sec-Fetch-Dest': 'empty',
61
+ 'Sec-Fetch-Mode': 'cors',
62
+ 'Sec-Fetch-Site': 'same-site',
63
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0',
64
+ }
65
+
66
+ self.__available_optimizers = (
67
+ method
68
+ for method in dir(Optimizers)
69
+ if callable(getattr(Optimizers, method)) and not method.startswith("__")
70
+ )
71
+ self.session.headers.update(self.headers)
72
+ Conversation.intro = (
73
+ AwesomePrompts().get_act(
74
+ act, raise_not_found=True, default=None, case_insensitive=True
75
+ )
76
+ if act
77
+ else intro or Conversation.intro
78
+ )
79
+ self.conversation = Conversation(
80
+ is_conversation, self.max_tokens_to_sample, filepath, update_file
81
+ )
82
+ self.conversation.history_offset = history_offset
83
+ self.session.proxies = proxies
84
+
85
+ def ask(
86
+ self,
87
+ prompt: str,
88
+ stream: bool = False,
89
+ raw: bool = False,
90
+ optimizer: str = None,
91
+ conversationally: bool = False,
92
+ ) -> Dict[str, Any]:
93
+ """
94
+ Sends a prompt to the AI21 Studio API and returns the response.
95
+ """
96
+ conversation_prompt = self.conversation.gen_complete_prompt(prompt)
97
+ if optimizer:
98
+ if optimizer in self.__available_optimizers:
99
+ conversation_prompt = getattr(Optimizers, optimizer)(
100
+ conversation_prompt if conversationally else prompt
101
+ )
102
+ else:
103
+ raise Exception(
104
+ f"Optimizer is not one of {self.__available_optimizers}"
105
+ )
106
+
107
+ payload = {
108
+ "messages": [
109
+ {"role": "system", "content": self.system_prompt},
110
+ {"role": "user", "content": conversation_prompt}
111
+ ],
112
+ "n": 1,
113
+ "max_tokens": self.max_tokens,
114
+ "model": self.model,
115
+ "stop": [],
116
+ "temperature": self.temperature,
117
+ "top_p": self.top_p,
118
+ "documents": [],
119
+ }
120
+
121
+ response = self.session.post(self.api_endpoint, headers=self.headers, json=payload, timeout=self.timeout)
122
+
123
+ if not response.ok:
124
+ raise exceptions.FailedToGenerateResponseError(
125
+ f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
126
+ )
127
+
128
+ resp = response.json()
129
+ self.last_response.update(resp)
130
+ self.conversation.update_chat_history(
131
+ prompt, self.get_message(self.last_response)
132
+ )
133
+ return self.last_response
134
+
135
+ def chat(
136
+ self,
137
+ prompt: str,
138
+ stream: bool = False,
139
+ optimizer: str = None,
140
+ conversationally: bool = False,
141
+ ) -> str:
142
+ """
143
+ Generates a response from the AI21 API.
144
+ """
145
+
146
+ def for_stream():
147
+ for response in self.ask(
148
+ prompt, True, optimizer=optimizer, conversationally=conversationally
149
+ ):
150
+ yield self.get_message(response)
151
+
152
+ def for_non_stream():
153
+ return self.get_message(
154
+ self.ask(
155
+ prompt,
156
+ False,
157
+ optimizer=optimizer,
158
+ conversationally=conversationally,
159
+ )
160
+ )
161
+
162
+ return for_stream() if stream else for_non_stream()
163
+
164
+ def get_message(self, response: dict) -> str:
165
+ """
166
+ Extracts the message from the API response.
167
+ """
168
+ assert isinstance(response, dict), "Response should be of dict data-type only"
169
+ return response['choices'][0]['message']['content']
170
+
171
+ # Example usage
172
+ if __name__ == "__main__":
173
+ from rich import print
174
+ ai = AI21(api_key="api_key")
175
+ response = ai.chat(input(">>> "))
176
+ for line in response:
177
+ print(line, end="", flush=True)
@@ -30,10 +30,6 @@ import httpx
30
30
  import cloudscraper
31
31
 
32
32
  class Cloudflare(Provider):
33
- """
34
- This class provides methods for interacting with the Playground AI API
35
- (Cloudflare) in a consistent provider structure for webscout.
36
- """
37
33
 
38
34
  AVAILABLE_MODELS = [
39
35
  "@cf/llava-hf/llava-1.5-7b-hf",
@@ -0,0 +1,215 @@
1
+ import time
2
+ import uuid
3
+ from selenium import webdriver
4
+ from selenium.webdriver.chrome.options import Options
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from selenium.webdriver.support.ui import WebDriverWait
8
+ import click
9
+ import requests
10
+ from requests import get
11
+ from uuid import uuid4
12
+ from re import findall
13
+ from requests.exceptions import RequestException
14
+ from curl_cffi.requests import get, RequestsError
15
+ import g4f
16
+ from random import randint
17
+ from PIL import Image
18
+ import io
19
+ import re
20
+ import json
21
+ import yaml
22
+ from webscout.AIutel import Optimizers
23
+ from webscout.AIutel import Conversation, Proxy
24
+ from webscout.AIutel import AwesomePrompts, sanitize_stream
25
+ from webscout.AIbase import Provider, AsyncProvider
26
+ from webscout import exceptions
27
+ from typing import Any, AsyncGenerator, Dict
28
+ import logging
29
+ import httpx
30
+ import random
31
+ proxy = Proxy()
32
+
33
+ class Editee(Provider):
34
+ """
35
+ A class to interact with the Editee.com API.
36
+ """
37
+ AVAILABLE_MODELS = [
38
+ "gemini", # it is gemini 1.5pro
39
+ "claude", # it is claude 3.5
40
+ "gpt4", # it is gpt4o
41
+ "mistrallarge", # it is mistral large2
42
+ ]
43
+
44
+ def __init__(
45
+ self,
46
+ is_conversation: bool = True,
47
+ max_tokens: int = 600,
48
+ timeout: int = 30,
49
+ intro: str = None,
50
+ filepath: str = None,
51
+ update_file: bool = True,
52
+ proxies: dict = {},
53
+ history_offset: int = 10250,
54
+ act: str = None,
55
+ model: str = "mistrallarge",
56
+ ) -> None:
57
+ """
58
+ Initializes the Editee API with given parameters.
59
+
60
+ Args:
61
+ is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True.
62
+ max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600.
63
+ timeout (int, optional): Http request timeout. Defaults to 30.
64
+ intro (str, optional): Conversation introductory prompt. Defaults to None.
65
+ filepath (str, optional): Path to file containing conversation history. Defaults to None.
66
+ update_file (bool, optional): Add new prompts and responses to the file. Defaults to True.
67
+ proxies (dict, optional): Http request proxies. Defaults to {}.
68
+ history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250.
69
+ act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None.
70
+ model (str, optional): AI model to use for text generation. Defaults to "gemini".
71
+ """
72
+ if model not in self.AVAILABLE_MODELS:
73
+ raise ValueError(f"Invalid model: {model}. Choose from: {self.AVAILABLE_MODELS}")
74
+
75
+ self.session = requests.Session()
76
+ self.is_conversation = is_conversation
77
+ self.max_tokens_to_sample = max_tokens
78
+ self.api_endpoint = "https://editee.com/submit/chatgptfree"
79
+ self.stream_chunk_size = 64
80
+ self.timeout = timeout
81
+ self.last_response = {}
82
+ self.model = model
83
+ self._sessionValue = self._get_session()
84
+ self.headers = {
85
+ "authority": "editee.com",
86
+ "path": "/submit/chatgptfree",
87
+ "scheme": "https",
88
+ "accept": "application/json, text/plain, */*",
89
+ "accept-encoding": "gzip, deflate, br",
90
+ "accept-language": "ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7",
91
+ "content-type": "application/json",
92
+ "cookie": f"editeecom_session={self._sessionValue}",
93
+ "origin": "https://editee.com",
94
+ "referer": "https://editee.com/chat-gpt",
95
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
96
+ "x-requested-with": "XMLHttpRequest"
97
+ }
98
+ self.__available_optimizers = (
99
+ method
100
+ for method in dir(Optimizers)
101
+ if callable(getattr(Optimizers, method)) and not method.startswith("__")
102
+ )
103
+ self.session.headers.update(self.headers)
104
+ Conversation.intro = (
105
+ AwesomePrompts().get_act(
106
+ act, raise_not_found=True, default=None, case_insensitive=True
107
+ )
108
+ if act
109
+ else intro or Conversation.intro
110
+ )
111
+ self.conversation = Conversation(
112
+ is_conversation, self.max_tokens_to_sample, filepath, update_file
113
+ )
114
+ self.conversation.history_offset = history_offset
115
+ self.session.proxies = proxies
116
+
117
+ def _get_session(self):
118
+ """Gets the editeecom_session value."""
119
+ res = proxy.get("https://editee.com/chat-gpt")
120
+ if res.cookies.get_dict():
121
+ first_cookie_name, session_value = next(iter(res.cookies.get_dict().items()))
122
+ return session_value
123
+
124
+ def ask(
125
+ self,
126
+ prompt: str,
127
+ stream: bool = False,
128
+ raw: bool = False,
129
+ optimizer: str = None,
130
+ conversationally: bool = False,
131
+ ) -> dict:
132
+ """Chat with AI
133
+
134
+ Args:
135
+ prompt (str): Prompt to be send.
136
+ stream (bool, optional): Whether to stream the response. Defaults to False.
137
+ raw (bool, optional): Whether to return the raw response. Defaults to False.
138
+ optimizer (str, optional): The name of the optimizer to use. Defaults to None.
139
+ conversationally (bool, optional): Whether to chat conversationally. Defaults to False.
140
+
141
+ Returns:
142
+ The response from the API.
143
+ """
144
+ conversation_prompt = self.conversation.gen_complete_prompt(prompt)
145
+ if optimizer:
146
+ if optimizer in self.__available_optimizers:
147
+ conversation_prompt = getattr(Optimizers, optimizer)(
148
+ conversation_prompt if conversationally else prompt
149
+ )
150
+ else:
151
+ raise Exception(
152
+ f"Optimizer is not one of {self.__available_optimizers}"
153
+ )
154
+
155
+ payload = {
156
+ "context": " ",
157
+ "selected_model": self.model,
158
+ "template_id": "",
159
+ "user_input": conversation_prompt
160
+ }
161
+
162
+ response = proxy.post(self.api_endpoint, headers=self.headers, json=payload, timeout=self.timeout)
163
+ if not response.ok:
164
+ raise exceptions.FailedToGenerateResponseError(
165
+ f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
166
+ )
167
+
168
+ resp = response.json()
169
+ self.last_response.update(dict(text=resp['text']))
170
+ self.conversation.update_chat_history(
171
+ prompt, self.get_message(self.last_response)
172
+ )
173
+ return self.last_response
174
+
175
+ def chat(
176
+ self,
177
+ prompt: str,
178
+ stream: bool = False,
179
+ optimizer: str = None,
180
+ conversationally: bool = False,
181
+ ) -> str:
182
+ """Generate response `str`
183
+ Args:
184
+ prompt (str): Prompt to be send.
185
+ stream (bool, optional): Flag for streaming response. Defaults to False.
186
+ optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
187
+ conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
188
+ Returns:
189
+ str: Response generated
190
+ """
191
+ return self.get_message(
192
+ self.ask(
193
+ prompt,
194
+ optimizer=optimizer,
195
+ conversationally=conversationally,
196
+ )
197
+ )
198
+
199
+ def get_message(self, response: dict) -> str:
200
+ """Retrieves message only from response
201
+
202
+ Args:
203
+ response (dict): Response generated by `self.ask`
204
+
205
+ Returns:
206
+ str: Message extracted
207
+ """
208
+ assert isinstance(response, dict), "Response should be of dict data-type only"
209
+ return response["text"]
210
+ if __name__ == '__main__':
211
+ from rich import print
212
+ ai = Editee()
213
+ response = ai.chat("tell me about india")
214
+ for chunk in response:
215
+ print(chunk, end="", flush=True)