webscout 6.4__py3-none-any.whl → 6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIutel.py +7 -54
- webscout/DWEBS.py +48 -26
- webscout/{YTdownloader.py → Extra/YTToolkit/YTdownloader.py} +990 -1103
- webscout/Extra/YTToolkit/__init__.py +3 -0
- webscout/{transcriber.py → Extra/YTToolkit/transcriber.py} +1 -1
- webscout/Extra/YTToolkit/ytapi/__init__.py +6 -0
- webscout/Extra/YTToolkit/ytapi/channel.py +307 -0
- webscout/Extra/YTToolkit/ytapi/errors.py +13 -0
- webscout/Extra/YTToolkit/ytapi/extras.py +45 -0
- webscout/Extra/YTToolkit/ytapi/https.py +88 -0
- webscout/Extra/YTToolkit/ytapi/patterns.py +61 -0
- webscout/Extra/YTToolkit/ytapi/playlist.py +59 -0
- webscout/Extra/YTToolkit/ytapi/pool.py +8 -0
- webscout/Extra/YTToolkit/ytapi/query.py +37 -0
- webscout/Extra/YTToolkit/ytapi/stream.py +60 -0
- webscout/Extra/YTToolkit/ytapi/utils.py +62 -0
- webscout/Extra/YTToolkit/ytapi/video.py +102 -0
- webscout/Extra/__init__.py +2 -1
- webscout/Extra/autocoder/rawdog.py +679 -680
- webscout/Extra/gguf.py +441 -441
- webscout/Extra/markdownlite/__init__.py +862 -0
- webscout/Extra/weather_ascii.py +2 -2
- webscout/Provider/PI.py +292 -221
- webscout/Provider/Perplexity.py +6 -14
- webscout/Provider/Reka.py +0 -1
- webscout/Provider/TTS/__init__.py +5 -1
- webscout/Provider/TTS/deepgram.py +183 -0
- webscout/Provider/TTS/elevenlabs.py +137 -0
- webscout/Provider/TTS/gesserit.py +151 -0
- webscout/Provider/TTS/murfai.py +139 -0
- webscout/Provider/TTS/parler.py +134 -107
- webscout/Provider/TTS/streamElements.py +360 -275
- webscout/Provider/TTS/utils.py +280 -0
- webscout/Provider/TTS/voicepod.py +116 -116
- webscout/Provider/__init__.py +146 -146
- webscout/Provider/meta.py +794 -779
- webscout/Provider/typegpt.py +1 -2
- webscout/__init__.py +24 -28
- webscout/litprinter/__init__.py +831 -830
- webscout/optimizers.py +269 -269
- webscout/prompt_manager.py +279 -279
- webscout/scout/__init__.py +11 -0
- webscout/scout/core.py +884 -0
- webscout/scout/element.py +459 -0
- webscout/scout/parsers/__init__.py +69 -0
- webscout/scout/parsers/html5lib_parser.py +172 -0
- webscout/scout/parsers/html_parser.py +236 -0
- webscout/scout/parsers/lxml_parser.py +178 -0
- webscout/scout/utils.py +38 -0
- webscout/update_checker.py +125 -125
- webscout/version.py +1 -1
- webscout/zeroart/__init__.py +55 -0
- webscout/zeroart/base.py +61 -0
- webscout/zeroart/effects.py +99 -0
- webscout/zeroart/fonts.py +816 -0
- webscout/zerodir/__init__.py +225 -0
- {webscout-6.4.dist-info → webscout-6.5.dist-info}/METADATA +12 -68
- {webscout-6.4.dist-info → webscout-6.5.dist-info}/RECORD +62 -37
- webscout/Agents/Onlinesearcher.py +0 -182
- webscout/Agents/__init__.py +0 -2
- webscout/Agents/functioncall.py +0 -248
- webscout/Bing_search.py +0 -251
- webscout/gpt4free.py +0 -666
- webscout/requestsHTMLfix.py +0 -775
- webscout/webai.py +0 -2590
- {webscout-6.4.dist-info → webscout-6.5.dist-info}/LICENSE.md +0 -0
- {webscout-6.4.dist-info → webscout-6.5.dist-info}/WHEEL +0 -0
- {webscout-6.4.dist-info → webscout-6.5.dist-info}/entry_points.txt +0 -0
- {webscout-6.4.dist-info → webscout-6.5.dist-info}/top_level.txt +0 -0
|
@@ -1,182 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import httpx
|
|
3
|
-
from bs4 import BeautifulSoup
|
|
4
|
-
from typing import List, Dict
|
|
5
|
-
from webscout import GoogleS, GEMINIAPI
|
|
6
|
-
import re
|
|
7
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class WebSearchAgent:
|
|
11
|
-
def __init__(self):
|
|
12
|
-
self.webs = GoogleS()
|
|
13
|
-
self.ai = GEMINIAPI(is_conversation=False, api_key='AIzaSyAYlT5-V0MXZwaLYpXCF1Z-Yvy_tx1jylA')
|
|
14
|
-
|
|
15
|
-
def generate_search_queries(self, information: str, num_queries: int = 10) -> List[str]:
|
|
16
|
-
prompt = f""" Task: Generate exactly {num_queries} optimal search queries based on the given information.
|
|
17
|
-
Instructions:
|
|
18
|
-
1. Analyze the provided information thoroughly.
|
|
19
|
-
2. Identify key concepts, entities, and relationships.
|
|
20
|
-
3. Formulate {num_queries} concise and specific search queries that will yield relevant and diverse results.
|
|
21
|
-
4. Each query should focus on a different aspect or angle of the information.
|
|
22
|
-
5. The queries should be in natural language, not in the form of keywords.
|
|
23
|
-
6. Avoid unnecessary words or phrases that might limit the search results.
|
|
24
|
-
7. **Important**: Return the response **ONLY** in JSON format without any additional text or code blocks.
|
|
25
|
-
Your response must be in the following JSON format: {{
|
|
26
|
-
"search_queries": [
|
|
27
|
-
"Your first search query here",
|
|
28
|
-
"Your second search query here",
|
|
29
|
-
"...",
|
|
30
|
-
"Your last search query here"
|
|
31
|
-
]
|
|
32
|
-
}}
|
|
33
|
-
Ensure that:
|
|
34
|
-
- You provide exactly {num_queries} search queries.
|
|
35
|
-
- Each query is unique and focuses on a different aspect of the information.
|
|
36
|
-
- The queries are in plain text, suitable for a web search engine.
|
|
37
|
-
|
|
38
|
-
Information to base the search queries on:
|
|
39
|
-
{information}
|
|
40
|
-
|
|
41
|
-
Now, generate the optimal search queries: """
|
|
42
|
-
|
|
43
|
-
response = ""
|
|
44
|
-
for chunk in self.ai.chat(prompt):
|
|
45
|
-
response += chunk
|
|
46
|
-
|
|
47
|
-
json_match = re.search(r'\{.*\}', response, re.DOTALL)
|
|
48
|
-
if json_match:
|
|
49
|
-
json_str = json_match.group(0)
|
|
50
|
-
try:
|
|
51
|
-
json_response = json.loads(json_str)
|
|
52
|
-
print(json_response['search_queries'])
|
|
53
|
-
return json_response["search_queries"]
|
|
54
|
-
except json.JSONDecodeError:
|
|
55
|
-
pass
|
|
56
|
-
|
|
57
|
-
queries = re.findall(r'"([^"]+)"', response)
|
|
58
|
-
if len(queries) >= num_queries:
|
|
59
|
-
return queries[:num_queries]
|
|
60
|
-
elif queries:
|
|
61
|
-
return queries
|
|
62
|
-
else:
|
|
63
|
-
return [information]
|
|
64
|
-
|
|
65
|
-
def search(self, information: str, region: str = 'wt-wt', safe: str = 'off',
|
|
66
|
-
max_results: int = 10) -> List[Dict]:
|
|
67
|
-
search_queries = self.generate_search_queries(information, num_queries=10)
|
|
68
|
-
all_results = []
|
|
69
|
-
|
|
70
|
-
for query in search_queries:
|
|
71
|
-
results = []
|
|
72
|
-
with self.webs as webs:
|
|
73
|
-
for result in webs.search(query, region=region, safe=safe,
|
|
74
|
-
max_results=max_results):
|
|
75
|
-
results.append(result)
|
|
76
|
-
all_results.extend(results)
|
|
77
|
-
|
|
78
|
-
return all_results
|
|
79
|
-
|
|
80
|
-
def extract_urls(self, results: List[Dict]) -> List[str]:
|
|
81
|
-
urls = [result.get('href') for result in results if result.get('href')]
|
|
82
|
-
unique_urls = list(set(urls))
|
|
83
|
-
return unique_urls
|
|
84
|
-
|
|
85
|
-
def fetch_webpage(self, url: str) -> Dict[str, str]:
|
|
86
|
-
try:
|
|
87
|
-
with httpx.Client(timeout=120) as client:
|
|
88
|
-
response = client.get(url)
|
|
89
|
-
if response.status_code == 200:
|
|
90
|
-
html = response.text
|
|
91
|
-
soup = BeautifulSoup(html, 'html.parser')
|
|
92
|
-
paragraphs = soup.find_all('p')
|
|
93
|
-
text = ' '.join([p.get_text() for p in paragraphs])
|
|
94
|
-
words = text.split()
|
|
95
|
-
if len(words) > 600:
|
|
96
|
-
text = ' '.join(words[:600]) + '...'
|
|
97
|
-
return {"url": url, "content": text}
|
|
98
|
-
else:
|
|
99
|
-
return {"url": url, "content": f"Failed to fetch {url}: HTTP {response.status_code}"}
|
|
100
|
-
except Exception as e:
|
|
101
|
-
return {"url": url, "content": f"Error fetching {url}: {str(e)}"}
|
|
102
|
-
|
|
103
|
-
def fetch_all_webpages(self, urls: List[str], max_workers: int = 10) -> List[Dict[str, str]]:
|
|
104
|
-
contents = []
|
|
105
|
-
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
106
|
-
future_to_url = {executor.submit(self.fetch_webpage, url): url for url in urls}
|
|
107
|
-
for future in as_completed(future_to_url):
|
|
108
|
-
result = future.result()
|
|
109
|
-
contents.append(result)
|
|
110
|
-
return contents
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
class OnlineSearcher:
|
|
114
|
-
def __init__(self):
|
|
115
|
-
self.agent = WebSearchAgent()
|
|
116
|
-
self.ai = GEMINIAPI(is_conversation=False, api_key='GOOGLE GEMINI API')
|
|
117
|
-
|
|
118
|
-
def answer_question(self, question: str) -> None:
|
|
119
|
-
search_results = self.agent.search(question, max_results=10)
|
|
120
|
-
urls = self.agent.extract_urls(search_results)
|
|
121
|
-
webpage_contents = self.agent.fetch_all_webpages(urls)
|
|
122
|
-
|
|
123
|
-
context = "Web search results and extracted content:\n\n"
|
|
124
|
-
for i, result in enumerate(search_results, 1):
|
|
125
|
-
title = result.get('title', 'No Title')
|
|
126
|
-
href = result.get('href', 'No URL')
|
|
127
|
-
snippet = result.get('body', 'No Snippet')
|
|
128
|
-
context += f"{i}. **Title:** {title}\n **URL:** {href}\n **Snippet:** {snippet}\n\n"
|
|
129
|
-
|
|
130
|
-
context += "Extracted webpage contents:\n"
|
|
131
|
-
for i, webpage in enumerate(webpage_contents, 1):
|
|
132
|
-
content = webpage['content']
|
|
133
|
-
content_preview = content[:600] + '...' if len(content) > 600 else content
|
|
134
|
-
context += f"{i}. **URL:** {webpage['url']}\n **Content:** {content_preview}\n\n"
|
|
135
|
-
|
|
136
|
-
prompt = f""" Task: Provide a comprehensive, insightful, and well-structured answer to the given question based on the provided web search results and your general knowledge.
|
|
137
|
-
Question: {question}
|
|
138
|
-
Context: {context}
|
|
139
|
-
Instructions:
|
|
140
|
-
1. Carefully analyze the provided web search results and extracted content.
|
|
141
|
-
2. Synthesize the information to form a coherent and comprehensive answer.
|
|
142
|
-
3. If the search results contain relevant information, incorporate it into your answer seamlessly.
|
|
143
|
-
4. Avoid providing irrelevant information, and do not reference "according to web page".
|
|
144
|
-
5. If the search results don't contain sufficient information, clearly state this and provide the best answer based on your general knowledge.
|
|
145
|
-
6. Ensure your answer is well-structured, factual, and directly addresses the question.
|
|
146
|
-
7. Use clear headings, bullet points, or other formatting tools to enhance readability where appropriate.
|
|
147
|
-
8. Strive for a tone and style similar to that of professional, authoritative sources like Perplexity, ensuring clarity and depth in your response.
|
|
148
|
-
Your response should be informative, accurate, and properly sourced when possible. Begin your answer now: """
|
|
149
|
-
|
|
150
|
-
for chunk in self.ai.chat(prompt, stream=True):
|
|
151
|
-
print(chunk, end='', flush=True)
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
# Usage example
|
|
156
|
-
if __name__ == "__main__":
|
|
157
|
-
assistant = OnlineSearcher()
|
|
158
|
-
while True:
|
|
159
|
-
try:
|
|
160
|
-
question = input(">>> ")
|
|
161
|
-
if question.lower() == 'quit':
|
|
162
|
-
break
|
|
163
|
-
print("=" * 50)
|
|
164
|
-
assistant.answer_question(question)
|
|
165
|
-
print("=" * 50)
|
|
166
|
-
except KeyboardInterrupt:
|
|
167
|
-
print("\nExiting.")
|
|
168
|
-
break
|
|
169
|
-
except Exception as e:
|
|
170
|
-
print(f"An error occurred: {e}")
|
|
171
|
-
|
|
172
|
-
"""
|
|
173
|
-
def format_prompt(messages: Messages, add_special_tokens=False) -> str:
|
|
174
|
-
|
|
175
|
-
if not add_special_tokens and len(messages) <= 1:
|
|
176
|
-
return messages[0]["content"]
|
|
177
|
-
formatted = "\n".join([
|
|
178
|
-
f'{message["role"].capitalize()}: {message["content"]}'
|
|
179
|
-
for message in messages
|
|
180
|
-
])
|
|
181
|
-
return f"{formatted}\nAssistant:
|
|
182
|
-
"""
|
webscout/Agents/__init__.py
DELETED
webscout/Agents/functioncall.py
DELETED
|
@@ -1,248 +0,0 @@
|
|
|
1
|
-
from datetime import date
|
|
2
|
-
import json
|
|
3
|
-
import logging
|
|
4
|
-
import time
|
|
5
|
-
from typing import Any, Dict, List, Optional, Union, Callable
|
|
6
|
-
from dataclasses import dataclass
|
|
7
|
-
import asyncio
|
|
8
|
-
import requests
|
|
9
|
-
from jinja2 import Template
|
|
10
|
-
from webscout import WEBS, ChatGPTES
|
|
11
|
-
|
|
12
|
-
@dataclass
|
|
13
|
-
class ToolParameter:
|
|
14
|
-
name: str
|
|
15
|
-
type: str
|
|
16
|
-
description: str
|
|
17
|
-
required: bool = False
|
|
18
|
-
|
|
19
|
-
@dataclass
|
|
20
|
-
class Tool:
|
|
21
|
-
name: str
|
|
22
|
-
description: str
|
|
23
|
-
parameters: Dict[str, ToolParameter]
|
|
24
|
-
function: Callable
|
|
25
|
-
is_async: bool = False
|
|
26
|
-
|
|
27
|
-
class ToolRegistry:
|
|
28
|
-
def __init__(self):
|
|
29
|
-
self._tools: Dict[str, Tool] = {}
|
|
30
|
-
|
|
31
|
-
def register(self, tool: Tool):
|
|
32
|
-
self._tools[tool.name] = tool
|
|
33
|
-
|
|
34
|
-
def get_tool(self, name: str) -> Optional[Tool]:
|
|
35
|
-
return self._tools.get(name)
|
|
36
|
-
|
|
37
|
-
def list_tools(self) -> List[Tool]:
|
|
38
|
-
return list(self._tools.values())
|
|
39
|
-
|
|
40
|
-
def to_schema(self) -> List[Dict]:
|
|
41
|
-
return [
|
|
42
|
-
{
|
|
43
|
-
"type": "function",
|
|
44
|
-
"function": {
|
|
45
|
-
"name": tool.name,
|
|
46
|
-
"description": tool.description,
|
|
47
|
-
"parameters": {
|
|
48
|
-
"type": "object",
|
|
49
|
-
"properties": {
|
|
50
|
-
param.name: {
|
|
51
|
-
"type": param.type,
|
|
52
|
-
"description": param.description
|
|
53
|
-
} for param in tool.parameters.values()
|
|
54
|
-
},
|
|
55
|
-
"required": [
|
|
56
|
-
param.name for param in tool.parameters.values()
|
|
57
|
-
if param.required
|
|
58
|
-
]
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
for tool in self._tools.values()
|
|
63
|
-
]
|
|
64
|
-
|
|
65
|
-
class FunctionCallingAgent:
|
|
66
|
-
SYSTEM_TEMPLATE = Template("""You are an advanced AI assistant tasked with analyzing user requests and determining the most appropriate action. You have access to the following tools:
|
|
67
|
-
|
|
68
|
-
{{ tools_description }}
|
|
69
|
-
|
|
70
|
-
Instructions:
|
|
71
|
-
1. Carefully analyze the user's request.
|
|
72
|
-
2. Determine which tools (if any) are necessary to fulfill the request.
|
|
73
|
-
3. You can make multiple tool calls if needed to complete the task.
|
|
74
|
-
4. If you decide to use tool(s), respond ONLY with a JSON array in this format:
|
|
75
|
-
[
|
|
76
|
-
{
|
|
77
|
-
"name": "tool_name",
|
|
78
|
-
"arguments": {
|
|
79
|
-
"param1": "value1",
|
|
80
|
-
"param2": "value2"
|
|
81
|
-
}
|
|
82
|
-
},
|
|
83
|
-
... (more tool calls as needed)
|
|
84
|
-
]
|
|
85
|
-
|
|
86
|
-
5. If no tool is needed, respond with an empty array: []
|
|
87
|
-
|
|
88
|
-
The current date is {{ current_date }}. Your knowledge cutoff is {{ knowledge_cutoff }}.
|
|
89
|
-
|
|
90
|
-
User Request: {{ user_message }}
|
|
91
|
-
|
|
92
|
-
Your Response (JSON array only):""")
|
|
93
|
-
|
|
94
|
-
def __init__(self, registry: ToolRegistry = None):
|
|
95
|
-
self.ai = ChatGPTES(timeout=300, intro=None)
|
|
96
|
-
self.registry = registry or ToolRegistry()
|
|
97
|
-
self.knowledge_cutoff = "September 2022"
|
|
98
|
-
self.logger = logging.getLogger(__name__)
|
|
99
|
-
|
|
100
|
-
def _generate_system_message(self, user_message: str) -> str:
|
|
101
|
-
tools_description = ""
|
|
102
|
-
for tool in self.registry.list_tools():
|
|
103
|
-
tools_description += f"- {tool.name}: {tool.description}\n"
|
|
104
|
-
tools_description += " Parameters:\n"
|
|
105
|
-
for param in tool.parameters.values():
|
|
106
|
-
tools_description += f" - {param.name}: {param.description} ({param.type})\n"
|
|
107
|
-
|
|
108
|
-
current_date = date.today().strftime("%B %d, %Y")
|
|
109
|
-
return self.SYSTEM_TEMPLATE.render(
|
|
110
|
-
tools_description=tools_description,
|
|
111
|
-
current_date=current_date,
|
|
112
|
-
knowledge_cutoff=self.knowledge_cutoff,
|
|
113
|
-
user_message=user_message
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
async def _execute_tool(self, tool_call: Dict[str, Any]) -> Any:
|
|
117
|
-
tool_name = tool_call.get("name")
|
|
118
|
-
arguments = tool_call.get("arguments", {})
|
|
119
|
-
|
|
120
|
-
tool = self.registry.get_tool(tool_name)
|
|
121
|
-
if not tool:
|
|
122
|
-
raise ValueError(f"Unknown tool: {tool_name}")
|
|
123
|
-
|
|
124
|
-
try:
|
|
125
|
-
if tool.is_async:
|
|
126
|
-
return await tool.function(**arguments)
|
|
127
|
-
else:
|
|
128
|
-
return tool.function(**arguments)
|
|
129
|
-
except Exception as e:
|
|
130
|
-
self.logger.error(f"Error executing tool {tool_name}: {str(e)}")
|
|
131
|
-
raise
|
|
132
|
-
|
|
133
|
-
async def process_request(self, message: str) -> List[Any]:
|
|
134
|
-
"""Process a user request and execute any necessary tool calls."""
|
|
135
|
-
try:
|
|
136
|
-
system_message = self._generate_system_message(message)
|
|
137
|
-
response = self.ai.chat(system_message)
|
|
138
|
-
self.logger.debug(f"Raw AI response: {response}")
|
|
139
|
-
|
|
140
|
-
tool_calls = self._parse_tool_calls(response)
|
|
141
|
-
if not tool_calls:
|
|
142
|
-
return []
|
|
143
|
-
|
|
144
|
-
results = []
|
|
145
|
-
for tool_call in tool_calls:
|
|
146
|
-
result = await self._execute_tool(tool_call)
|
|
147
|
-
results.append(result)
|
|
148
|
-
return results
|
|
149
|
-
|
|
150
|
-
except Exception as e:
|
|
151
|
-
self.logger.error(f"Error processing request: {str(e)}")
|
|
152
|
-
raise
|
|
153
|
-
|
|
154
|
-
def _parse_tool_calls(self, response: str) -> List[Dict[str, Any]]:
|
|
155
|
-
"""Parse the AI response into a list of tool calls."""
|
|
156
|
-
try:
|
|
157
|
-
# Find the JSON array in the response
|
|
158
|
-
start_idx = response.find("[")
|
|
159
|
-
end_idx = response.rfind("]") + 1
|
|
160
|
-
|
|
161
|
-
if start_idx == -1 or end_idx == -1:
|
|
162
|
-
return []
|
|
163
|
-
|
|
164
|
-
# Extract and parse the JSON array
|
|
165
|
-
response_json = json.loads(response[start_idx:end_idx])
|
|
166
|
-
|
|
167
|
-
if not isinstance(response_json, list):
|
|
168
|
-
response_json = [response_json]
|
|
169
|
-
|
|
170
|
-
return response_json
|
|
171
|
-
|
|
172
|
-
except (json.JSONDecodeError, ValueError) as e:
|
|
173
|
-
self.logger.error(f"Error parsing tool calls: {str(e)}")
|
|
174
|
-
return []
|
|
175
|
-
|
|
176
|
-
# Example usage
|
|
177
|
-
if __name__ == "__main__":
|
|
178
|
-
logging.basicConfig(
|
|
179
|
-
level=logging.INFO,
|
|
180
|
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
# Create and configure the tool registry
|
|
184
|
-
registry = ToolRegistry()
|
|
185
|
-
|
|
186
|
-
def web_search(query: str) -> str:
|
|
187
|
-
# Implement actual web search logic
|
|
188
|
-
return f"Search results for: {query}"
|
|
189
|
-
|
|
190
|
-
def get_user_detail(name: str, age: int) -> Dict[str, Any]:
|
|
191
|
-
return {"name": name, "age": age}
|
|
192
|
-
|
|
193
|
-
# Register tools
|
|
194
|
-
registry.register(Tool(
|
|
195
|
-
name="web_search",
|
|
196
|
-
description="Search the web for current information",
|
|
197
|
-
parameters={
|
|
198
|
-
"query": ToolParameter(
|
|
199
|
-
name="query",
|
|
200
|
-
type="string",
|
|
201
|
-
description="The search query to execute",
|
|
202
|
-
required=True
|
|
203
|
-
)
|
|
204
|
-
},
|
|
205
|
-
function=web_search
|
|
206
|
-
))
|
|
207
|
-
|
|
208
|
-
registry.register(Tool(
|
|
209
|
-
name="get_user_detail",
|
|
210
|
-
description="Get user details",
|
|
211
|
-
parameters={
|
|
212
|
-
"name": ToolParameter(
|
|
213
|
-
name="name",
|
|
214
|
-
type="string",
|
|
215
|
-
description="User's name",
|
|
216
|
-
required=True
|
|
217
|
-
),
|
|
218
|
-
"age": ToolParameter(
|
|
219
|
-
name="age",
|
|
220
|
-
type="integer",
|
|
221
|
-
description="User's age",
|
|
222
|
-
required=True
|
|
223
|
-
)
|
|
224
|
-
},
|
|
225
|
-
function=get_user_detail
|
|
226
|
-
))
|
|
227
|
-
|
|
228
|
-
# Create agent
|
|
229
|
-
agent = FunctionCallingAgent(registry=registry)
|
|
230
|
-
|
|
231
|
-
# Test cases
|
|
232
|
-
test_messages = [
|
|
233
|
-
"What's the weather like in New York today?",
|
|
234
|
-
"Get user details name as John and age as 30",
|
|
235
|
-
"Search for latest news about AI",
|
|
236
|
-
]
|
|
237
|
-
|
|
238
|
-
async def run_tests():
|
|
239
|
-
for message in test_messages:
|
|
240
|
-
print(f"\nProcessing: {message}")
|
|
241
|
-
try:
|
|
242
|
-
results = await agent.process_request(message)
|
|
243
|
-
print(f"Results: {results}")
|
|
244
|
-
except Exception as e:
|
|
245
|
-
print(f"Error: {str(e)}")
|
|
246
|
-
|
|
247
|
-
# Run test cases
|
|
248
|
-
asyncio.run(run_tests())
|
webscout/Bing_search.py
DELETED
|
@@ -1,251 +0,0 @@
|
|
|
1
|
-
from bs4 import BeautifulSoup
|
|
2
|
-
import requests
|
|
3
|
-
from typing import Dict, List, Optional, Union
|
|
4
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
-
from urllib.parse import quote, urlparse, parse_qs
|
|
6
|
-
import base64
|
|
7
|
-
import urllib3
|
|
8
|
-
|
|
9
|
-
# Disable SSL warnings
|
|
10
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
11
|
-
|
|
12
|
-
class BingS:
|
|
13
|
-
"""A Python interface for Bing search engine.
|
|
14
|
-
|
|
15
|
-
The BingS class provides a simple interface to perform searches on Bing.com
|
|
16
|
-
and extract search results programmatically.
|
|
17
|
-
|
|
18
|
-
Basic Usage:
|
|
19
|
-
>>> from webscout.Bing_search import BingS
|
|
20
|
-
>>> searcher = BingS()
|
|
21
|
-
>>> results = searcher.search("Python programming")
|
|
22
|
-
>>> for result in results:
|
|
23
|
-
... print(result['title'], result['href'])
|
|
24
|
-
|
|
25
|
-
Advanced Usage:
|
|
26
|
-
>>> # With custom headers and proxy
|
|
27
|
-
>>> headers = {'User-Agent': 'Custom User Agent'}
|
|
28
|
-
>>> proxy = 'http://proxy.example.com:8080'
|
|
29
|
-
>>> searcher = BingS(headers=headers, proxy=proxy)
|
|
30
|
-
>>> results = searcher.search(
|
|
31
|
-
... "AI developments",
|
|
32
|
-
... max_results=5,
|
|
33
|
-
... extract_webpage_text=True,
|
|
34
|
-
... max_extract_characters=1000
|
|
35
|
-
... )
|
|
36
|
-
>>> # Access result fields
|
|
37
|
-
>>> for result in results:
|
|
38
|
-
... print(f"Title: {result['title']}")
|
|
39
|
-
... print(f"URL: {result['href']}")
|
|
40
|
-
... print(f"Description: {result['abstract']}")
|
|
41
|
-
... if result['visible_text']:
|
|
42
|
-
... print(f"Page Content: {result['visible_text'][:100]}...")
|
|
43
|
-
|
|
44
|
-
The class supports context management protocol:
|
|
45
|
-
>>> with BingS() as searcher:
|
|
46
|
-
... results = searcher.search("Python tutorials")
|
|
47
|
-
|
|
48
|
-
Return Dictionary Format:
|
|
49
|
-
{
|
|
50
|
-
'title': str, # The title of the search result
|
|
51
|
-
'href': str, # The URL of the search result
|
|
52
|
-
'abstract': str, # Brief description or snippet
|
|
53
|
-
'index': int, # Position in search results
|
|
54
|
-
'type': str, # Type of result (always 'web')
|
|
55
|
-
'visible_text': str # Extracted webpage text (if requested)
|
|
56
|
-
}
|
|
57
|
-
"""
|
|
58
|
-
|
|
59
|
-
_executor: ThreadPoolExecutor = ThreadPoolExecutor(max_workers=10)
|
|
60
|
-
|
|
61
|
-
def __init__(
|
|
62
|
-
self,
|
|
63
|
-
headers: Optional[Dict[str, str]] = None,
|
|
64
|
-
proxy: Optional[str] = None,
|
|
65
|
-
timeout: Optional[int] = 10,
|
|
66
|
-
) -> None:
|
|
67
|
-
"""Initialize a new BingS instance.
|
|
68
|
-
|
|
69
|
-
Args:
|
|
70
|
-
headers (Optional[Dict[str, str]]): Custom HTTP headers for requests.
|
|
71
|
-
Defaults to a standard User-Agent if not provided.
|
|
72
|
-
proxy (Optional[str]): Proxy URL to use for requests.
|
|
73
|
-
Example: 'http://proxy.example.com:8080'
|
|
74
|
-
timeout (Optional[int]): Request timeout in seconds. Defaults to 10.
|
|
75
|
-
|
|
76
|
-
Example:
|
|
77
|
-
>>> searcher = BingS(
|
|
78
|
-
... headers={'User-Agent': 'Custom UA'},
|
|
79
|
-
... proxy='http://proxy.example.com:8080',
|
|
80
|
-
... timeout=15
|
|
81
|
-
... )
|
|
82
|
-
"""
|
|
83
|
-
self.proxy: Optional[str] = proxy
|
|
84
|
-
self.headers = headers if headers else {
|
|
85
|
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
86
|
-
}
|
|
87
|
-
self.headers["Referer"] = "https://www.bing.com/"
|
|
88
|
-
self.client = requests.Session()
|
|
89
|
-
self.client.headers.update(self.headers)
|
|
90
|
-
self.client.proxies.update({"http": self.proxy, "https": self.proxy})
|
|
91
|
-
self.timeout = timeout
|
|
92
|
-
|
|
93
|
-
def __enter__(self) -> "BingS":
|
|
94
|
-
return self
|
|
95
|
-
|
|
96
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
97
|
-
self.client.close()
|
|
98
|
-
|
|
99
|
-
def _get_url(
|
|
100
|
-
self,
|
|
101
|
-
method: str,
|
|
102
|
-
url: str,
|
|
103
|
-
params: Optional[Dict[str, str]] = None,
|
|
104
|
-
data: Optional[Union[Dict[str, str], bytes]] = None,
|
|
105
|
-
) -> bytes:
|
|
106
|
-
try:
|
|
107
|
-
resp = self.client.request(method, url, params=params, data=data, timeout=self.timeout, verify=False)
|
|
108
|
-
except Exception as ex:
|
|
109
|
-
raise Exception(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
110
|
-
if resp.status_code == 200:
|
|
111
|
-
return resp.content
|
|
112
|
-
raise Exception(f"{resp.url} returned status code {resp.status_code}. {params=} {data=}")
|
|
113
|
-
|
|
114
|
-
def extract_text_from_webpage(self, html_content, max_characters=None):
|
|
115
|
-
"""Extracts visible text from HTML content using BeautifulSoup."""
|
|
116
|
-
soup = BeautifulSoup(html_content, "html.parser")
|
|
117
|
-
# Remove unwanted tags
|
|
118
|
-
for tag in soup(["script", "style", "header", "footer", "nav"]):
|
|
119
|
-
tag.extract()
|
|
120
|
-
# Get the remaining visible text
|
|
121
|
-
visible_text = soup.get_text(separator=' ', strip=True)
|
|
122
|
-
if max_characters:
|
|
123
|
-
visible_text = visible_text[:max_characters]
|
|
124
|
-
return visible_text
|
|
125
|
-
|
|
126
|
-
def search(
|
|
127
|
-
self,
|
|
128
|
-
keywords: str,
|
|
129
|
-
max_results: Optional[int] = 10,
|
|
130
|
-
extract_webpage_text: bool = False,
|
|
131
|
-
max_extract_characters: Optional[int] = 100,
|
|
132
|
-
) -> List[Dict[str, str]]:
|
|
133
|
-
"""Perform a Bing search and return results.
|
|
134
|
-
|
|
135
|
-
Args:
|
|
136
|
-
keywords (str): Search query string.
|
|
137
|
-
max_results (Optional[int]): Maximum number of results to return.
|
|
138
|
-
Defaults to 10.
|
|
139
|
-
extract_webpage_text (bool): If True, fetches and extracts text from
|
|
140
|
-
each result webpage. Defaults to False.
|
|
141
|
-
max_extract_characters (Optional[int]): Maximum number of characters
|
|
142
|
-
to extract from each webpage. Only used if extract_webpage_text
|
|
143
|
-
is True. Defaults to 100.
|
|
144
|
-
|
|
145
|
-
Returns:
|
|
146
|
-
List[Dict[str, str]]: List of search results. Each result contains:
|
|
147
|
-
- title: The title of the search result
|
|
148
|
-
- href: The URL of the search result
|
|
149
|
-
- abstract: Brief description or snippet
|
|
150
|
-
- index: Position in search results
|
|
151
|
-
- type: Type of result (always 'web')
|
|
152
|
-
- visible_text: Extracted webpage text (if extract_webpage_text=True)
|
|
153
|
-
|
|
154
|
-
Raises:
|
|
155
|
-
AssertionError: If keywords is empty.
|
|
156
|
-
Exception: If request fails or returns non-200 status code.
|
|
157
|
-
|
|
158
|
-
Example:
|
|
159
|
-
>>> searcher = BingS()
|
|
160
|
-
>>> results = searcher.search(
|
|
161
|
-
... "Python tutorials",
|
|
162
|
-
... max_results=5,
|
|
163
|
-
... extract_webpage_text=True
|
|
164
|
-
... )
|
|
165
|
-
>>> for result in results:
|
|
166
|
-
... print(f"Title: {result['title']}")
|
|
167
|
-
... print(f"URL: {result['href']}")
|
|
168
|
-
... print(f"Description: {result['abstract']}")
|
|
169
|
-
... if result['visible_text']:
|
|
170
|
-
... print(f"Content: {result['visible_text'][:100]}...")
|
|
171
|
-
"""
|
|
172
|
-
assert keywords, "keywords is mandatory"
|
|
173
|
-
|
|
174
|
-
results = []
|
|
175
|
-
futures = []
|
|
176
|
-
start = 1
|
|
177
|
-
while len(results) < max_results:
|
|
178
|
-
params = {
|
|
179
|
-
"q": keywords,
|
|
180
|
-
"first": start
|
|
181
|
-
}
|
|
182
|
-
futures.append(self._executor.submit(self._get_url, "GET", "https://www.bing.com/search", params=params))
|
|
183
|
-
start += 10
|
|
184
|
-
|
|
185
|
-
for future in as_completed(futures):
|
|
186
|
-
try:
|
|
187
|
-
resp_content = future.result()
|
|
188
|
-
soup = BeautifulSoup(resp_content, "html.parser")
|
|
189
|
-
result_block = soup.select('li.b_algo')
|
|
190
|
-
|
|
191
|
-
if not result_block:
|
|
192
|
-
break
|
|
193
|
-
|
|
194
|
-
for result in result_block:
|
|
195
|
-
try:
|
|
196
|
-
link = result.select_one('h2 a')
|
|
197
|
-
title = link.text if link else ""
|
|
198
|
-
url = link['href'] if link else ""
|
|
199
|
-
abstract = result.select_one('.b_caption p')
|
|
200
|
-
description = abstract.text if abstract else ""
|
|
201
|
-
|
|
202
|
-
# Remove "WEB" from the beginning of the description if it exists
|
|
203
|
-
if description.startswith("WEB"):
|
|
204
|
-
description = description[3:].strip()
|
|
205
|
-
|
|
206
|
-
visible_text = ""
|
|
207
|
-
if extract_webpage_text:
|
|
208
|
-
try:
|
|
209
|
-
actual_url = self._decode_bing_url(url)
|
|
210
|
-
page_content = self._get_url("GET", actual_url)
|
|
211
|
-
visible_text = self.extract_text_from_webpage(
|
|
212
|
-
page_content, max_characters=max_extract_characters
|
|
213
|
-
)
|
|
214
|
-
except Exception as e:
|
|
215
|
-
print(f"Error extracting text from {url}: {e}")
|
|
216
|
-
|
|
217
|
-
results.append({
|
|
218
|
-
"title": title,
|
|
219
|
-
"href": url,
|
|
220
|
-
"abstract": description,
|
|
221
|
-
"index": len(results),
|
|
222
|
-
"type": "web",
|
|
223
|
-
"visible_text": visible_text,
|
|
224
|
-
})
|
|
225
|
-
|
|
226
|
-
if len(results) >= max_results:
|
|
227
|
-
return results
|
|
228
|
-
|
|
229
|
-
except Exception as e:
|
|
230
|
-
print(f"Error extracting result: {e}")
|
|
231
|
-
|
|
232
|
-
except Exception as e:
|
|
233
|
-
print(f"Error fetching URL: {e}")
|
|
234
|
-
|
|
235
|
-
return results
|
|
236
|
-
|
|
237
|
-
def _decode_bing_url(self, url):
|
|
238
|
-
if 'bing.com/ck/a' in url:
|
|
239
|
-
parsed_url = urlparse(url)
|
|
240
|
-
query_params = parse_qs(parsed_url.query)
|
|
241
|
-
if 'u' in query_params:
|
|
242
|
-
encoded_url = query_params['u'][0]
|
|
243
|
-
return base64.b64decode(encoded_url).decode('utf-8')
|
|
244
|
-
return url
|
|
245
|
-
|
|
246
|
-
if __name__ == "__main__":
|
|
247
|
-
from rich import print
|
|
248
|
-
searcher = BingS()
|
|
249
|
-
results = searcher.search("Python development tools", max_results=5, extract_webpage_text=True, max_extract_characters=2000)
|
|
250
|
-
for result in results:
|
|
251
|
-
print(result)
|