webscout 4.4__py3-none-any.whl → 4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

webscout/AIutel.py CHANGED
@@ -52,7 +52,9 @@ webai = [
52
52
  "vtlchat",
53
53
  "geminiflash",
54
54
  "geminipro",
55
- "ollama"
55
+ "ollama",
56
+ "andi",
57
+ "llama3"
56
58
  ]
57
59
 
58
60
  gpt4free_providers = [
@@ -532,6 +534,15 @@ LLM:
532
534
  ```python
533
535
  print("The essay is about...")
534
536
  ```
537
+
538
+ 3. User: Weather in qazigund
539
+
540
+ LLM:
541
+ ```python
542
+ from webscout import weather as w
543
+ weather = w.get("Qazigund")
544
+ w.print_weather(weather)
545
+ ```
535
546
  """
536
547
 
537
548
 
@@ -0,0 +1,175 @@
1
+ import json
2
+ from webscout import WEBS
3
+ import httpx
4
+ from bs4 import BeautifulSoup
5
+ from typing import List, Dict
6
+
7
+ class DeepInfra:
8
+ def __init__(
9
+ self,
10
+ model: str = "meta-llama/Meta-Llama-3.1-70B-Instruct",
11
+ max_tokens: int = 8000,
12
+ timeout: int = 120,
13
+ system_prompt: str = "You are a helpful AI assistant.",
14
+ proxies: dict = {}
15
+ ):
16
+ self.model = model
17
+ self.max_tokens = max_tokens
18
+ self.timeout = timeout
19
+ self.system_prompt = system_prompt
20
+ self.chat_endpoint = "https://api.deepinfra.com/v1/openai/chat/completions"
21
+
22
+ self.headers = {
23
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
24
+ 'Accept-Language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3',
25
+ 'Cache-Control': 'no-cache',
26
+ 'Connection': 'keep-alive',
27
+ 'Content-Type': 'application/json',
28
+ 'Origin': 'https://deepinfra.com',
29
+ 'Pragma': 'no-cache',
30
+ 'Referer': 'https://deepinfra.com/',
31
+ 'Sec-Fetch-Dest': 'empty',
32
+ 'Sec-Fetch-Mode': 'cors',
33
+ 'Sec-Fetch-Site': 'same-site',
34
+ 'X-Deepinfra-Source': 'web-embed',
35
+ 'accept': 'text/event-stream',
36
+ 'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
37
+ 'sec-ch-ua-mobile': '?0',
38
+ 'sec-ch-ua-platform': '"macOS"'
39
+ }
40
+
41
+ self.client = httpx.Client(proxies=proxies, headers=self.headers)
42
+
43
+ def ask(self, prompt: str, system_prompt: str = None) -> str:
44
+ payload = {
45
+ 'model': self.model,
46
+ 'messages': [
47
+ {"role": "system", "content": system_prompt or self.system_prompt},
48
+ {"role": "user", "content": prompt},
49
+ ],
50
+ 'temperature': 0.7,
51
+ 'max_tokens': self.max_tokens,
52
+ 'stop': []
53
+ }
54
+
55
+ response = self.client.post(self.chat_endpoint, json=payload, timeout=self.timeout)
56
+ if response.status_code != 200:
57
+ raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason_phrase}) - {response.text}")
58
+
59
+ resp = response.json()
60
+ return resp["choices"][0]["message"]["content"]
61
+
62
+ class WebSearchAgent:
63
+
64
+ def __init__(self, model="Qwen/Qwen2-72B-Instruct"):
65
+ self.webs = WEBS()
66
+ self.deepinfra = DeepInfra(model=model)
67
+
68
+ def generate_search_query(self, information):
69
+ prompt = f"""
70
+ Instructions:
71
+ You are a smart online searcher for a large language model.
72
+ Given information, you must create a search query to search the internet for relevant information.
73
+ Your search query must be in the form of a json response.
74
+ Exact json response format must be as follows:
75
+
76
+ {{
77
+ "search_query": "your search query"
78
+ }}
79
+ - You must only provide ONE search query
80
+ - You must provide the BEST search query for the given information
81
+ - The search query must be normal text.
82
+
83
+ Information: {information}
84
+ """
85
+
86
+ response = self.deepinfra.ask(prompt)
87
+ return json.loads(response)["search_query"]
88
+
89
+ def search(self, information, region='wt-wt', safesearch='off', timelimit='y', max_results=5):
90
+ search_query = self.generate_search_query(information)
91
+
92
+ results = []
93
+ with self.webs as webs:
94
+ for result in webs.text(search_query, region=region, safesearch=safesearch, timelimit=timelimit, max_results=max_results):
95
+ results.append(result)
96
+
97
+ return results
98
+
99
+ def extract_urls(self, results):
100
+ urls = []
101
+ for result in results:
102
+ url = result.get('href')
103
+ if url:
104
+ urls.append(url)
105
+ return list(set(urls)) # Remove duplicates
106
+
107
+ def fetch_webpage(self, url: str) -> str:
108
+ try:
109
+ response = httpx.get(url, timeout=120)
110
+ if response.status_code == 200:
111
+ html = response.text
112
+ soup = BeautifulSoup(html, 'html.parser')
113
+
114
+ # Extract text from <p> tags
115
+ paragraphs = soup.find_all('p')
116
+ text = ' '.join([p.get_text() for p in paragraphs])
117
+
118
+ # Limit the text to around 4000 words
119
+ words = text.split()
120
+ if len(words) > 4000:
121
+ text = ' '.join(words[:4000]) + '...'
122
+
123
+ return text
124
+ else:
125
+ return f"Failed to fetch {url}: HTTP {response.status}"
126
+ except Exception as e:
127
+ return f"Error fetching {url}: {str(e)}"
128
+
129
+ def fetch_all_webpages(self, urls: List[str]) -> List[Dict[str, str]]:
130
+ contents = []
131
+ for url in urls:
132
+ content = self.fetch_webpage(url)
133
+ contents.append({"url": url, "content": content})
134
+ return contents
135
+
136
+ class OnlineSearcher:
137
+ def __init__(self, model="meta-llama/Meta-Llama-3.1-405B-Instruct"):
138
+ self.agent = WebSearchAgent(model)
139
+ self.deepinfra = DeepInfra(model="model")
140
+
141
+ def answer_question(self, question: str) -> str:
142
+ # Perform web search
143
+ search_results = self.agent.search(question)
144
+
145
+ # Extract URLs
146
+ urls = self.agent.extract_urls(search_results)
147
+
148
+ # Fetch webpage contents
149
+ webpage_contents = self.agent.fetch_all_webpages(urls)
150
+
151
+ # Prepare context for AI
152
+ context = "Based on the following search results and webpage contents:\n\n"
153
+ for i, result in enumerate(search_results, 1):
154
+ context += f"{i}. Title: {result['title']}\n URL: {result['href']}\n Snippet: {result['body']}\n\n"
155
+
156
+ context += "Extracted webpage contents:\n"
157
+ for i, webpage in enumerate(webpage_contents):
158
+ context += f"{i}. URL: {webpage['url']}\n Content: {webpage['content'][:4000]}...\n\n"
159
+
160
+ # Generate answer using AI
161
+ prompt = f"{context}\n\nQuestion: {question}\n\nPlease provide a comprehensive answer to the question based on the search results and webpage contents above. Include relevant webpage URLs in your answer when appropriate. If the search results and webpage contents don't contain relevant information, please state that and provide the best answer you can based on your general knowledge. [YOUR RESPONSE WITH SOURCE LINKS ([➊](URL))"
162
+
163
+ answer = self.deepinfra.ask(prompt)
164
+ return answer
165
+
166
+ # Usage example
167
+ if __name__ == "__main__":
168
+ assistant = OnlineSearcher()
169
+ while True:
170
+ question = input(">>> ")
171
+ if question.lower() == 'quit':
172
+ break
173
+ answer = assistant.answer_question(question)
174
+ print(answer)
175
+ print("\n" + "-"*50 + "\n")
@@ -0,0 +1,2 @@
1
+ from .Onlinesearcher import *
2
+ from .functioncall import *
@@ -0,0 +1,126 @@
1
+ import json
2
+ import logging
3
+ from webscout import DeepInfra, WEBS
4
+
5
+ class FunctionCallingAgent:
6
+ def __init__(self, model: str = "Qwen/Qwen2-72B-Instruct", system_prompt: str = 'You are a helpful assistant that will always answer what user wants', tools: list = None):
7
+ self.deepinfra = DeepInfra(model=model, system_prompt=system_prompt)
8
+ self.tools = tools if tools is not None else []
9
+ # logging.basicConfig(level=logging.INFO)
10
+ # self.webs = WEBS() # Initialize a WEBS object for web search
11
+
12
+ def function_call_handler(self, message_text: str):
13
+ """Handles function calls based on the provided message text
14
+
15
+ Args:
16
+ message_text (str): The input message text from the user.
17
+
18
+ Returns:
19
+ dict: The extracted function call and arguments.
20
+ """
21
+ system_message = f'[SYSTEM]You are a helpful assistant. You have access to the following functions: \n {str(self.tools)}\n\nTo use these functions respond with:\n<functioncall> {{ "name": "function_name", "arguments": {{ "arg_1": "value_1", "arg_2": "value_2", ... }} }} </functioncall> [USER] {message_text}'
22
+
23
+ response = self.deepinfra.chat(system_message)
24
+ # logging.info(f"Raw response: {response}")
25
+
26
+ try:
27
+ # Extract the JSON-like part of the response
28
+ start_idx = response.find("{")
29
+ end_idx = response.rfind("}") + 1
30
+ if start_idx == -1 or end_idx == -1:
31
+ raise ValueError("JSON-like structure not found in the response")
32
+
33
+ response_json_str = response[start_idx:end_idx]
34
+ # Ensure the JSON string is properly formatted
35
+ response_json_str = response_json_str.replace("'", '"') # Replace single quotes with double quotes
36
+ response_json_str = response_json_str.strip()
37
+ response_data = json.loads(response_json_str)
38
+ except (ValueError, json.JSONDecodeError) as e:
39
+ # logging.error(f"An error occurred while parsing response: {e}")
40
+ return {"error": str(e)}
41
+
42
+ return response_data
43
+
44
+ def execute_function(self, function_call_data: dict) -> str:
45
+ """Executes the specified function with the provided arguments.
46
+
47
+ Args:
48
+ function_call_data (dict): A dictionary containing the function name and arguments.
49
+
50
+ Returns:
51
+ str: The result of the function execution.
52
+ """
53
+ function_name = function_call_data.get("name")
54
+ arguments = function_call_data.get("arguments", "{}") # Default to empty dict if not present
55
+
56
+ # Parse the arguments string into a dictionary
57
+ try:
58
+ arguments_dict = json.loads(arguments)
59
+ except json.JSONDecodeError:
60
+ # logging.error("Failed to parse arguments as JSON.")
61
+ return "Invalid arguments format."
62
+
63
+ # logging.info(f"Executing function: {function_name} with arguments: {arguments_dict}")
64
+
65
+ # if function_name == "web_search":
66
+ # query = arguments_dict.get("query")
67
+ # if query:
68
+ # search_results = self.webs.text(query)
69
+ # # You can process the search results here, e.g., extract URLs, summarize, etc.
70
+ # return f"Here's what I found:\n\n{search_results}"
71
+ # else:
72
+ # return "Please provide a search query."
73
+ # else:
74
+ # return f"Function '{function_name}' is not yet implemented."
75
+
76
+ # Example usage
77
+ if __name__ == "__main__":
78
+ tools = [
79
+ {
80
+ "type": "function",
81
+ "function": {
82
+ "name": "UserDetail",
83
+ "parameters": {
84
+ "type": "object",
85
+ "title": "UserDetail",
86
+ "properties": {
87
+ "name": {
88
+ "title": "Name",
89
+ "type": "string"
90
+ },
91
+ "age": {
92
+ "title": "Age",
93
+ "type": "integer"
94
+ }
95
+ },
96
+ "required": ["name", "age"]
97
+ }
98
+ }
99
+ },
100
+ {
101
+ "type": "function",
102
+ "function": {
103
+ "name": "web_search",
104
+ "description": "Search query on google",
105
+ "parameters": {
106
+ "type": "object",
107
+ "properties": {
108
+ "query": {
109
+ "type": "string",
110
+ "description": "web search query"
111
+ }
112
+ },
113
+ "required": ["query"]
114
+ }
115
+ }
116
+ }
117
+ ]
118
+
119
+ agent = FunctionCallingAgent(tools=tools)
120
+ message = "tell me about HelpingAI flash"
121
+ function_call_data = agent.function_call_handler(message)
122
+ print(f"Function Call Data: {function_call_data}")
123
+
124
+ if "error" not in function_call_data:
125
+ result = agent.execute_function(function_call_data)
126
+ # print(f"Function Execution Result: {result}")
webscout/Extra/gguf.py CHANGED
@@ -153,7 +153,7 @@ huggingface-cli download "$MODEL_ID" --local-dir "./${MODEL_NAME}" --local-dir-u
153
153
  # Convert to fp16
154
154
  FP16="${MODEL_NAME}/${MODEL_NAME,,}.fp16.bin"
155
155
  echo "Converting the model to fp16..."
156
- python3 llama.cpp/convert-hf-to-gguf.py "$MODEL_NAME" --outtype f16 --outfile "$FP16"
156
+ python3 llama.cpp/convert_hf_to_gguf.py "$MODEL_NAME" --outtype f16 --outfile "$FP16"
157
157
 
158
158
  # Quantize the model
159
159
  echo "Quantizing the model..."
@@ -0,0 +1,275 @@
1
+ import time
2
+ import uuid
3
+ from selenium import webdriver
4
+ from selenium.webdriver.chrome.options import Options
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from selenium.webdriver.support.ui import WebDriverWait
8
+ import click
9
+ import requests
10
+ from requests import get
11
+ from uuid import uuid4
12
+ from re import findall
13
+ from requests.exceptions import RequestException
14
+ from curl_cffi.requests import get, RequestsError
15
+ import g4f
16
+ from random import randint
17
+ from PIL import Image
18
+ import io
19
+ import re
20
+ import json
21
+ import yaml
22
+ from webscout.AIutel import Optimizers
23
+ from webscout.AIutel import Conversation
24
+ from webscout.AIutel import AwesomePrompts, sanitize_stream
25
+ from webscout.AIbase import Provider, AsyncProvider
26
+ from webscout import exceptions
27
+ from typing import Any, AsyncGenerator, Dict
28
+ import logging
29
+ import httpx
30
+ from webscout import WEBS
31
+ from rich import print
32
+
33
+ class AndiSearch(Provider):
34
+ def __init__(
35
+ self,
36
+ is_conversation: bool = True,
37
+ max_tokens: int = 600,
38
+ timeout: int = 30,
39
+ intro: str = None,
40
+ filepath: str = None,
41
+ update_file: bool = True,
42
+ proxies: dict = {},
43
+ history_offset: int = 10250,
44
+ act: str = None,
45
+ ):
46
+ """Instantiates AndiSearch
47
+
48
+ Args:
49
+ is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True.
50
+ max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600.
51
+ timeout (int, optional): Http request timeout. Defaults to 30.
52
+ intro (str, optional): Conversation introductory prompt. Defaults to None.
53
+ filepath (str, optional): Path to file containing conversation history. Defaults to None.
54
+ update_file (bool, optional): Add new prompts and responses to the file. Defaults to True.
55
+ proxies (dict, optional): Http request proxies. Defaults to {}.
56
+ history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250.
57
+ act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None.
58
+ """
59
+ self.session = requests.Session()
60
+ self.is_conversation = is_conversation
61
+ self.max_tokens_to_sample = max_tokens
62
+ self.chat_endpoint = "https://write.andisearch.com/v1/write_streaming"
63
+ self.stream_chunk_size = 64
64
+ self.timeout = timeout
65
+ self.last_response = {}
66
+ self.headers = {
67
+ "accept": "text/event-stream",
68
+ "accept-encoding": "gzip, deflate, br, zstd",
69
+ "accept-language": "en-US,en;q=0.9,en-IN;q=0.8",
70
+ "andi-auth-key": "andi-summarizer",
71
+ "andi-origin": "x-andi-origin",
72
+ "authorization": str(uuid4()),
73
+ "content-type": "application/json",
74
+ "dnt": "1",
75
+ "origin": "https://andisearch.com",
76
+ "priority": "u=1, i",
77
+ "sec-ch-ua": '"Not)A;Brand";v="99", "Microsoft Edge";v="127", "Chromium";v="127"',
78
+ "sec-ch-ua-mobile": "?0",
79
+ "sec-ch-ua-platform": '"Windows"',
80
+ "sec-fetch-dest": "empty",
81
+ "sec-fetch-mode": "cors",
82
+ "sec-fetch-site": "same-site",
83
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0",
84
+ "x-amz-date": "20240730T031106Z",
85
+ "x-amz-security-token": str(uuid4()),
86
+ }
87
+
88
+ self.__available_optimizers = (
89
+ method
90
+ for method in dir(Optimizers)
91
+ if callable(getattr(Optimizers, method)) and not method.startswith("__")
92
+ )
93
+ self.session.headers.update(self.headers)
94
+ Conversation.intro = (
95
+ AwesomePrompts().get_act(
96
+ act, raise_not_found=True, default=None, case_insensitive=True
97
+ )
98
+ if act
99
+ else intro or Conversation.intro
100
+ )
101
+ self.conversation = Conversation(
102
+ is_conversation, self.max_tokens_to_sample, filepath, update_file
103
+ )
104
+ self.conversation.history_offset = history_offset
105
+ self.session.proxies = proxies
106
+
107
+ def ask(
108
+ self,
109
+ prompt: str,
110
+ stream: bool = False,
111
+ raw: bool = False,
112
+ optimizer: str = None,
113
+ conversationally: bool = False,
114
+ ) -> dict:
115
+ """Chat with AI
116
+
117
+ Args:
118
+ prompt (str): Prompt to be send.
119
+ stream (bool, optional): Flag for streaming response. Defaults to False.
120
+ raw (bool, optional): Stream back raw response as received. Defaults to False.
121
+ optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
122
+ conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
123
+ Returns:
124
+ dict : {}
125
+ ```json
126
+ {
127
+ "id": "chatcmpl-TaREJpBZsRVQFRFic1wIA7Q7XfnaD",
128
+ "object": "chat.completion",
129
+ "created": 1704623244,
130
+ "model": "gpt-3.5-turbo",
131
+ "usage": {
132
+ "prompt_tokens": 0,
133
+ "completion_tokens": 0,
134
+ "total_tokens": 0
135
+ },
136
+ "choices": [
137
+ {
138
+ "message": {
139
+ "role": "assistant",
140
+ "content": "Hello! How can I assist you today?"
141
+ },
142
+ "finish_reason": "stop",
143
+ "index": 0
144
+ }
145
+ ]
146
+ }
147
+ ```
148
+ """
149
+ conversation_prompt = self.conversation.gen_complete_prompt(prompt)
150
+ if optimizer:
151
+ if optimizer in self.__available_optimizers:
152
+ conversation_prompt = getattr(Optimizers, optimizer)(
153
+ conversation_prompt if conversationally else prompt
154
+ )
155
+ else:
156
+ raise Exception(
157
+ f"Optimizer is not one of {self.__available_optimizers}"
158
+ )
159
+
160
+ # Initialize the webscout instance
161
+ webs = WEBS()
162
+
163
+ # Fetch search results
164
+ search_query = prompt
165
+ search_results = webs.text(search_query, max_results=7)
166
+
167
+ # Format the search results into the required serp payload structure
168
+ serp_payload = {
169
+ "query": search_query,
170
+ "serp": {
171
+ "results_type": "Search",
172
+ "answer": "",
173
+ "type": "navigation",
174
+ "title": "",
175
+ "description": "",
176
+ "image": "",
177
+ "link": "",
178
+ "source": "liftndrift.com",
179
+ "engine": "andi-b",
180
+ "results": [
181
+ {
182
+ "title": result["title"],
183
+ "link": result["href"],
184
+ "desc": result["body"],
185
+ "image": "",
186
+ "type": "website",
187
+ "source": result["href"].split("//")[1].split("/")[0] # Extract the domain name
188
+ }
189
+ for result in search_results
190
+ ]
191
+ }
192
+ }
193
+ self.session.headers.update(self.headers)
194
+ payload = serp_payload
195
+
196
+ def for_stream():
197
+ response = self.session.post(
198
+ self.chat_endpoint, json=payload, stream=True, timeout=self.timeout
199
+ )
200
+ if not response.ok:
201
+ raise exceptions.FailedToGenerateResponseError(
202
+ f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
203
+ )
204
+
205
+ streaming_text = ""
206
+ for value in response.iter_lines(
207
+ decode_unicode=True,
208
+ chunk_size=self.stream_chunk_size,
209
+ delimiter="\n",
210
+ ):
211
+ try:
212
+ if bool(value):
213
+ streaming_text += value + ("\n" if stream else "")
214
+ resp = dict(text=streaming_text)
215
+ self.last_response.update(resp)
216
+ yield value if raw else resp
217
+ except json.decoder.JSONDecodeError:
218
+ pass
219
+ self.conversation.update_chat_history(
220
+ prompt, self.get_message(self.last_response)
221
+ )
222
+
223
+ def for_non_stream():
224
+ for _ in for_stream():
225
+ pass
226
+ return self.last_response
227
+
228
+ return for_stream() if stream else for_non_stream()
229
+
230
+ def chat(
231
+ self,
232
+ prompt: str,
233
+ stream: bool = False,
234
+ optimizer: str = None,
235
+ conversationally: bool = False,
236
+ ) -> str:
237
+ """Generate response `str`
238
+ Args:
239
+ prompt (str): Prompt to be send.
240
+ stream (bool, optional): Flag for streaming response. Defaults to False.
241
+ optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
242
+ conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
243
+ Returns:
244
+ str: Response generated
245
+ """
246
+
247
+ def for_stream():
248
+ for response in self.ask(
249
+ prompt, True, optimizer=optimizer, conversationally=conversationally
250
+ ):
251
+ yield self.get_message(response)
252
+
253
+ def for_non_stream():
254
+ return self.get_message(
255
+ self.ask(
256
+ prompt,
257
+ False,
258
+ optimizer=optimizer,
259
+ conversationally=conversationally,
260
+ )
261
+ )
262
+
263
+ return for_stream() if stream else for_non_stream()
264
+
265
+ def get_message(self, response: dict) -> str:
266
+ """Retrieves message only from response
267
+
268
+ Args:
269
+ response (dict): Response generated by `self.ask`
270
+
271
+ Returns:
272
+ str: Message extracted
273
+ """
274
+ assert isinstance(response, dict), "Response should be of dict data-type only"
275
+ return response["text"]