webscout 1.2.3__tar.gz → 1.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- {webscout-1.2.3 → webscout-1.2.5}/PKG-INFO +21 -1
- {webscout-1.2.3 → webscout-1.2.5}/README.md +19 -0
- {webscout-1.2.3 → webscout-1.2.5}/setup.py +2 -1
- {webscout-1.2.3 → webscout-1.2.5}/webscout/AI.py +204 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout/AIutel.py +1 -1
- {webscout-1.2.3 → webscout-1.2.5}/webscout/DWEBS.py +197 -197
- {webscout-1.2.3 → webscout-1.2.5}/webscout/__init__.py +3 -1
- webscout-1.2.5/webscout/version.py +2 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout.egg-info/PKG-INFO +21 -1
- {webscout-1.2.3 → webscout-1.2.5}/webscout.egg-info/requires.txt +1 -0
- webscout-1.2.3/webscout/version.py +0 -2
- {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/__init__.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/documents/__init__.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/documents/query_results_extractor.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/documents/webpage_content_extractor.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/networks/__init__.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/networks/filepath_converter.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/networks/google_searcher.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/networks/network_configs.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/networks/webpage_fetcher.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/utilsdw/__init__.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/utilsdw/enver.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/utilsdw/logger.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/LICENSE.md +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/setup.cfg +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout/AIbase.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout/HelpingAI.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout/LLM.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout/__main__.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout/cli.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout/exceptions.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout/models.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout/utils.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout/webscout_search.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout/webscout_search_async.py +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout.egg-info/SOURCES.txt +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout.egg-info/dependency_links.txt +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout.egg-info/entry_points.txt +0 -0
- {webscout-1.2.3 → webscout-1.2.5}/webscout.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: webscout
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.5
|
|
4
4
|
Summary: Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models
|
|
5
5
|
Author: OEvortex
|
|
6
6
|
Author-email: helpingai5@gmail.com
|
|
@@ -45,6 +45,7 @@ Requires-Dist: sse_starlette
|
|
|
45
45
|
Requires-Dist: termcolor
|
|
46
46
|
Requires-Dist: tiktoken
|
|
47
47
|
Requires-Dist: tldextract
|
|
48
|
+
Requires-Dist: orjson
|
|
48
49
|
Provides-Extra: dev
|
|
49
50
|
Requires-Dist: ruff>=0.1.6; extra == "dev"
|
|
50
51
|
Requires-Dist: pytest>=7.4.2; extra == "dev"
|
|
@@ -92,6 +93,7 @@ Also containes AI models that you can use
|
|
|
92
93
|
- [6. `BlackBox` - Search/chat With BlackBox](#6-blackbox---searchchat-with-blackbox)
|
|
93
94
|
- [7. `PERPLEXITY` - Search With PERPLEXITY](#7-perplexity---search-with-perplexity)
|
|
94
95
|
- [8. `OpenGPT` - chat With OPENGPT](#8-opengpt---chat-with-opengpt)
|
|
96
|
+
- [9. `KOBOLDIA` -](#9-koboldia--)
|
|
95
97
|
- [usage of special .LLM file from webscout (webscout.LLM)](#usage-of-special-llm-file-from-webscout-webscoutllm)
|
|
96
98
|
- [`LLM`](#llm)
|
|
97
99
|
|
|
@@ -604,6 +606,24 @@ opengpt = OPENGPT(is_conversation=True, max_tokens=8000, timeout=30)
|
|
|
604
606
|
prompt = "tell me about india"
|
|
605
607
|
response_str = opengpt.chat(prompt)
|
|
606
608
|
print(response_str)
|
|
609
|
+
```
|
|
610
|
+
### 9. `KOBOLDIA` -
|
|
611
|
+
```python
|
|
612
|
+
from webscout.AI import KOBOLDAI
|
|
613
|
+
|
|
614
|
+
# Instantiate the KOBOLDAI class with default parameters
|
|
615
|
+
koboldai = KOBOLDAI()
|
|
616
|
+
|
|
617
|
+
# Define a prompt to send to the AI
|
|
618
|
+
prompt = "What is the capital of France?"
|
|
619
|
+
|
|
620
|
+
# Use the 'ask' method to get a response from the AI
|
|
621
|
+
response = koboldai.ask(prompt)
|
|
622
|
+
|
|
623
|
+
# Extract and print the message from the response
|
|
624
|
+
message = koboldai.get_message(response)
|
|
625
|
+
print(message)
|
|
626
|
+
|
|
607
627
|
```
|
|
608
628
|
|
|
609
629
|
## usage of special .LLM file from webscout (webscout.LLM)
|
|
@@ -41,6 +41,7 @@ Also containes AI models that you can use
|
|
|
41
41
|
- [6. `BlackBox` - Search/chat With BlackBox](#6-blackbox---searchchat-with-blackbox)
|
|
42
42
|
- [7. `PERPLEXITY` - Search With PERPLEXITY](#7-perplexity---search-with-perplexity)
|
|
43
43
|
- [8. `OpenGPT` - chat With OPENGPT](#8-opengpt---chat-with-opengpt)
|
|
44
|
+
- [9. `KOBOLDIA` -](#9-koboldia--)
|
|
44
45
|
- [usage of special .LLM file from webscout (webscout.LLM)](#usage-of-special-llm-file-from-webscout-webscoutllm)
|
|
45
46
|
- [`LLM`](#llm)
|
|
46
47
|
|
|
@@ -553,6 +554,24 @@ opengpt = OPENGPT(is_conversation=True, max_tokens=8000, timeout=30)
|
|
|
553
554
|
prompt = "tell me about india"
|
|
554
555
|
response_str = opengpt.chat(prompt)
|
|
555
556
|
print(response_str)
|
|
557
|
+
```
|
|
558
|
+
### 9. `KOBOLDIA` -
|
|
559
|
+
```python
|
|
560
|
+
from webscout.AI import KOBOLDAI
|
|
561
|
+
|
|
562
|
+
# Instantiate the KOBOLDAI class with default parameters
|
|
563
|
+
koboldai = KOBOLDAI()
|
|
564
|
+
|
|
565
|
+
# Define a prompt to send to the AI
|
|
566
|
+
prompt = "What is the capital of France?"
|
|
567
|
+
|
|
568
|
+
# Use the 'ask' method to get a response from the AI
|
|
569
|
+
response = koboldai.ask(prompt)
|
|
570
|
+
|
|
571
|
+
# Extract and print the message from the response
|
|
572
|
+
message = koboldai.get_message(response)
|
|
573
|
+
print(message)
|
|
574
|
+
|
|
556
575
|
```
|
|
557
576
|
|
|
558
577
|
## usage of special .LLM file from webscout (webscout.LLM)
|
|
@@ -9,7 +9,7 @@ with open("README.md", encoding="utf-8") as f:
|
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name="webscout",
|
|
12
|
-
version="1.2.
|
|
12
|
+
version="1.2.5",
|
|
13
13
|
description="Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models",
|
|
14
14
|
long_description=README,
|
|
15
15
|
long_description_content_type="text/markdown",
|
|
@@ -53,6 +53,7 @@ setup(
|
|
|
53
53
|
"termcolor",
|
|
54
54
|
"tiktoken",
|
|
55
55
|
"tldextract",
|
|
56
|
+
"orjson",
|
|
56
57
|
],
|
|
57
58
|
entry_points={
|
|
58
59
|
"console_scripts": [
|
|
@@ -21,10 +21,200 @@ import yaml
|
|
|
21
21
|
from webscout.AIutel import Optimizers
|
|
22
22
|
from webscout.AIutel import Conversation
|
|
23
23
|
from webscout.AIutel import AwesomePrompts
|
|
24
|
+
from webscout.AIbase import Provider
|
|
24
25
|
from Helpingai_T2 import Perplexity
|
|
25
26
|
from typing import Any
|
|
26
27
|
import logging
|
|
27
28
|
#------------------------------------------------------OpenGPT-----------------------------------------------------------
|
|
29
|
+
class KOBOLDAI(Provider):
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
is_conversation: bool = True,
|
|
33
|
+
max_tokens: int = 600,
|
|
34
|
+
temperature: float = 1,
|
|
35
|
+
top_p: float = 1,
|
|
36
|
+
timeout: int = 30,
|
|
37
|
+
intro: str = None,
|
|
38
|
+
filepath: str = None,
|
|
39
|
+
update_file: bool = True,
|
|
40
|
+
proxies: dict = {},
|
|
41
|
+
history_offset: int = 10250,
|
|
42
|
+
act: str = None,
|
|
43
|
+
):
|
|
44
|
+
"""Instantiate TGPT
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
is_conversation (str, optional): Flag for chatting conversationally. Defaults to True.
|
|
48
|
+
max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600.
|
|
49
|
+
temperature (float, optional): Charge of the generated text's randomness. Defaults to 0.2.
|
|
50
|
+
top_p (float, optional): Sampling threshold during inference time. Defaults to 0.999.
|
|
51
|
+
timeout (int, optional): Http requesting timeout. Defaults to 30
|
|
52
|
+
intro (str, optional): Conversation introductory prompt. Defaults to `Conversation.intro`.
|
|
53
|
+
filepath (str, optional): Path to file containing conversation history. Defaults to None.
|
|
54
|
+
update_file (bool, optional): Add new prompts and responses to the file. Defaults to True.
|
|
55
|
+
proxies (dict, optional) : Http reqiuest proxies (socks). Defaults to {}.
|
|
56
|
+
history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250.
|
|
57
|
+
act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None.
|
|
58
|
+
"""
|
|
59
|
+
self.session = requests.Session()
|
|
60
|
+
self.is_conversation = is_conversation
|
|
61
|
+
self.max_tokens_to_sample = max_tokens
|
|
62
|
+
self.temperature = temperature
|
|
63
|
+
self.top_p = top_p
|
|
64
|
+
self.chat_endpoint = (
|
|
65
|
+
"https://koboldai-koboldcpp-tiefighter.hf.space/api/extra/generate/stream"
|
|
66
|
+
)
|
|
67
|
+
self.stream_chunk_size = 64
|
|
68
|
+
self.timeout = timeout
|
|
69
|
+
self.last_response = {}
|
|
70
|
+
self.headers = {
|
|
71
|
+
"Content-Type": "application/json",
|
|
72
|
+
"Accept": "application/json",
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
self.__available_optimizers = (
|
|
76
|
+
method
|
|
77
|
+
for method in dir(Optimizers)
|
|
78
|
+
if callable(getattr(Optimizers, method)) and not method.startswith("__")
|
|
79
|
+
)
|
|
80
|
+
self.session.headers.update(self.headers)
|
|
81
|
+
Conversation.intro = (
|
|
82
|
+
AwesomePrompts().get_act(
|
|
83
|
+
act, raise_not_found=True, default=None, case_insensitive=True
|
|
84
|
+
)
|
|
85
|
+
if act
|
|
86
|
+
else intro or Conversation.intro
|
|
87
|
+
)
|
|
88
|
+
self.conversation = Conversation(
|
|
89
|
+
is_conversation, self.max_tokens_to_sample, filepath, update_file
|
|
90
|
+
)
|
|
91
|
+
self.conversation.history_offset = history_offset
|
|
92
|
+
self.session.proxies = proxies
|
|
93
|
+
|
|
94
|
+
def ask(
|
|
95
|
+
self,
|
|
96
|
+
prompt: str,
|
|
97
|
+
stream: bool = False,
|
|
98
|
+
raw: bool = False,
|
|
99
|
+
optimizer: str = None,
|
|
100
|
+
conversationally: bool = False,
|
|
101
|
+
) -> dict:
|
|
102
|
+
"""Chat with AI
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
prompt (str): Prompt to be send.
|
|
106
|
+
stream (bool, optional): Flag for streaming response. Defaults to False.
|
|
107
|
+
raw (bool, optional): Stream back raw response as received. Defaults to False.
|
|
108
|
+
optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
|
|
109
|
+
conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
|
|
110
|
+
Returns:
|
|
111
|
+
dict : {}
|
|
112
|
+
```json
|
|
113
|
+
{
|
|
114
|
+
"token" : "How may I assist you today?"
|
|
115
|
+
}
|
|
116
|
+
```
|
|
117
|
+
"""
|
|
118
|
+
conversation_prompt = self.conversation.gen_complete_prompt(prompt)
|
|
119
|
+
if optimizer:
|
|
120
|
+
if optimizer in self.__available_optimizers:
|
|
121
|
+
conversation_prompt = getattr(Optimizers, optimizer)(
|
|
122
|
+
conversation_prompt if conversationally else prompt
|
|
123
|
+
)
|
|
124
|
+
else:
|
|
125
|
+
raise Exception(
|
|
126
|
+
f"Optimizer is not one of {self.__available_optimizers}"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
self.session.headers.update(self.headers)
|
|
130
|
+
payload = {
|
|
131
|
+
"prompt": conversation_prompt,
|
|
132
|
+
"temperature": self.temperature,
|
|
133
|
+
"top_p": self.top_p,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
def for_stream():
|
|
137
|
+
response = self.session.post(
|
|
138
|
+
self.chat_endpoint, json=payload, stream=True, timeout=self.timeout
|
|
139
|
+
)
|
|
140
|
+
if not response.ok:
|
|
141
|
+
raise Exception(
|
|
142
|
+
f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
message_load = ""
|
|
146
|
+
for value in response.iter_lines(
|
|
147
|
+
decode_unicode=True,
|
|
148
|
+
delimiter="" if raw else "event: message\ndata:",
|
|
149
|
+
chunk_size=self.stream_chunk_size,
|
|
150
|
+
):
|
|
151
|
+
try:
|
|
152
|
+
resp = json.loads(value)
|
|
153
|
+
message_load += self.get_message(resp)
|
|
154
|
+
resp["token"] = message_load
|
|
155
|
+
self.last_response.update(resp)
|
|
156
|
+
yield value if raw else resp
|
|
157
|
+
except json.decoder.JSONDecodeError:
|
|
158
|
+
pass
|
|
159
|
+
self.conversation.update_chat_history(
|
|
160
|
+
prompt, self.get_message(self.last_response)
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def for_non_stream():
|
|
164
|
+
# let's make use of stream
|
|
165
|
+
for _ in for_stream():
|
|
166
|
+
pass
|
|
167
|
+
return self.last_response
|
|
168
|
+
|
|
169
|
+
return for_stream() if stream else for_non_stream()
|
|
170
|
+
|
|
171
|
+
def chat(
|
|
172
|
+
self,
|
|
173
|
+
prompt: str,
|
|
174
|
+
stream: bool = False,
|
|
175
|
+
optimizer: str = None,
|
|
176
|
+
conversationally: bool = False,
|
|
177
|
+
) -> str:
|
|
178
|
+
"""Generate response `str`
|
|
179
|
+
Args:
|
|
180
|
+
prompt (str): Prompt to be send.
|
|
181
|
+
stream (bool, optional): Flag for streaming response. Defaults to False.
|
|
182
|
+
optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
|
|
183
|
+
conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
|
|
184
|
+
Returns:
|
|
185
|
+
str: Response generated
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
def for_stream():
|
|
189
|
+
for response in self.ask(
|
|
190
|
+
prompt, True, optimizer=optimizer, conversationally=conversationally
|
|
191
|
+
):
|
|
192
|
+
yield self.get_message(response)
|
|
193
|
+
|
|
194
|
+
def for_non_stream():
|
|
195
|
+
return self.get_message(
|
|
196
|
+
self.ask(
|
|
197
|
+
prompt,
|
|
198
|
+
False,
|
|
199
|
+
optimizer=optimizer,
|
|
200
|
+
conversationally=conversationally,
|
|
201
|
+
)
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
return for_stream() if stream else for_non_stream()
|
|
205
|
+
|
|
206
|
+
def get_message(self, response: dict) -> str:
|
|
207
|
+
"""Retrieves message only from response
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
response (dict): Response generated by `self.ask`
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
str: Message extracted
|
|
214
|
+
"""
|
|
215
|
+
assert isinstance(response, dict), "Response should be of dict data-type only"
|
|
216
|
+
return response.get("token")
|
|
217
|
+
#------------------------------------------------------OpenGPT-----------------------------------------------------------
|
|
28
218
|
class OPENGPT:
|
|
29
219
|
def __init__(
|
|
30
220
|
self,
|
|
@@ -1237,5 +1427,19 @@ def opengpt(prompt, stream):
|
|
|
1237
1427
|
else:
|
|
1238
1428
|
response_str = opengpt.chat(prompt)
|
|
1239
1429
|
print(response_str)
|
|
1430
|
+
|
|
1431
|
+
@cli.command()
|
|
1432
|
+
@click.option('--prompt', prompt='Enter your prompt', help='The prompt to send.')
|
|
1433
|
+
@click.option('--stream', is_flag=True, help='Flag for streaming response.')
|
|
1434
|
+
@click.option('--raw', is_flag=True, help='Stream back raw response as received.')
|
|
1435
|
+
@click.option('--optimizer', type=str, help='Prompt optimizer name.')
|
|
1436
|
+
@click.option('--conversationally', is_flag=True, help='Chat conversationally when using optimizer.')
|
|
1437
|
+
def koboldai_cli(prompt, stream, raw, optimizer, conversationally):
|
|
1438
|
+
"""Chat with KOBOLDAI using the provided prompt."""
|
|
1439
|
+
koboldai_instance = KOBOLDAI() # Initialize a KOBOLDAI instance
|
|
1440
|
+
response = koboldai_instance.ask(prompt, stream, raw, optimizer, conversationally)
|
|
1441
|
+
processed_response = koboldai_instance.get_message(response) # Process the response
|
|
1442
|
+
print(processed_response)
|
|
1443
|
+
|
|
1240
1444
|
if __name__ == '__main__':
|
|
1241
1445
|
cli()
|
|
@@ -1,197 +1,197 @@
|
|
|
1
|
-
|
|
2
|
-
from pydantic import BaseModel, Field
|
|
3
|
-
from typing import Union
|
|
4
|
-
|
|
5
|
-
from DeepWEBS.utilsdw.logger import logger
|
|
6
|
-
from DeepWEBS.networks.google_searcher import GoogleSearcher
|
|
7
|
-
from DeepWEBS.networks.webpage_fetcher import BatchWebpageFetcher
|
|
8
|
-
from DeepWEBS.documents.query_results_extractor import QueryResultsExtractor
|
|
9
|
-
from DeepWEBS.documents.webpage_content_extractor import BatchWebpageContentExtractor
|
|
10
|
-
from DeepWEBS.utilsdw.logger import logger
|
|
11
|
-
import argparse
|
|
12
|
-
|
|
13
|
-
class DeepWEBS:
|
|
14
|
-
def __init__(self):
|
|
15
|
-
pass
|
|
16
|
-
|
|
17
|
-
class DeepSearch(BaseModel):
|
|
18
|
-
queries: list = Field(
|
|
19
|
-
default=[""],
|
|
20
|
-
description="(list[str]) Queries to search",
|
|
21
|
-
)
|
|
22
|
-
result_num: int = Field(
|
|
23
|
-
default=10,
|
|
24
|
-
description="(int) Number of search results",
|
|
25
|
-
)
|
|
26
|
-
safe: bool = Field(
|
|
27
|
-
default=False,
|
|
28
|
-
description="(bool) Enable SafeSearch",
|
|
29
|
-
)
|
|
30
|
-
types: list = Field(
|
|
31
|
-
default=["web"],
|
|
32
|
-
description="(list[str]) Types of search results: `web`, `image`, `videos`, `news`",
|
|
33
|
-
)
|
|
34
|
-
extract_webpage: bool = Field(
|
|
35
|
-
default=False,
|
|
36
|
-
description="(bool) Enable extracting main text contents from webpage, will add `text` filed in each `query_result` dict",
|
|
37
|
-
)
|
|
38
|
-
overwrite_query_html: bool = Field(
|
|
39
|
-
default=False,
|
|
40
|
-
description="(bool) Overwrite HTML file of query results",
|
|
41
|
-
)
|
|
42
|
-
overwrite_webpage_html: bool = Field(
|
|
43
|
-
default=False,
|
|
44
|
-
description="(bool) Overwrite HTML files of webpages from query results",
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
def queries_to_search_results(self, item: DeepSearch):
|
|
48
|
-
google_searcher = GoogleSearcher()
|
|
49
|
-
queries_search_results = []
|
|
50
|
-
for query in item.queries:
|
|
51
|
-
query_results_extractor = QueryResultsExtractor()
|
|
52
|
-
if not query.strip():
|
|
53
|
-
continue
|
|
54
|
-
try:
|
|
55
|
-
query_html_path = google_searcher.search(
|
|
56
|
-
query=query,
|
|
57
|
-
result_num=item.result_num,
|
|
58
|
-
safe=item.safe,
|
|
59
|
-
overwrite=item.overwrite_query_html,
|
|
60
|
-
)
|
|
61
|
-
except Exception as e:
|
|
62
|
-
logger.error(f"Failed to search for query '{query}': {e}")
|
|
63
|
-
continue
|
|
64
|
-
|
|
65
|
-
try:
|
|
66
|
-
query_search_results = query_results_extractor.extract(query_html_path)
|
|
67
|
-
except Exception as e:
|
|
68
|
-
logger.error(f"Failed to extract search results for query '{query}': {e}")
|
|
69
|
-
continue
|
|
70
|
-
|
|
71
|
-
queries_search_results.append(query_search_results)
|
|
72
|
-
logger.note(queries_search_results)
|
|
73
|
-
|
|
74
|
-
if item.extract_webpage:
|
|
75
|
-
queries_search_results = self.extract_webpages(
|
|
76
|
-
queries_search_results,
|
|
77
|
-
overwrite_webpage_html=item.overwrite_webpage_html,
|
|
78
|
-
)
|
|
79
|
-
return queries_search_results
|
|
80
|
-
|
|
81
|
-
def extract_webpages(self, queries_search_results, overwrite_webpage_html=False):
|
|
82
|
-
for query_idx, query_search_results in enumerate(queries_search_results):
|
|
83
|
-
try:
|
|
84
|
-
# Fetch webpages with urls
|
|
85
|
-
batch_webpage_fetcher = BatchWebpageFetcher()
|
|
86
|
-
urls = [
|
|
87
|
-
query_result["url"]
|
|
88
|
-
for query_result in query_search_results["query_results"]
|
|
89
|
-
]
|
|
90
|
-
url_and_html_path_list = batch_webpage_fetcher.fetch(
|
|
91
|
-
urls,
|
|
92
|
-
overwrite=overwrite_webpage_html,
|
|
93
|
-
output_parent=query_search_results["query"],
|
|
94
|
-
)
|
|
95
|
-
except Exception as e:
|
|
96
|
-
logger.error(f"Failed to fetch webpages for query '{query_search_results['query']}': {e}")
|
|
97
|
-
continue
|
|
98
|
-
|
|
99
|
-
# Extract webpage contents from htmls
|
|
100
|
-
html_paths = [
|
|
101
|
-
str(url_and_html_path["html_path"])
|
|
102
|
-
for url_and_html_path in url_and_html_path_list
|
|
103
|
-
]
|
|
104
|
-
batch_webpage_content_extractor = BatchWebpageContentExtractor()
|
|
105
|
-
try:
|
|
106
|
-
html_path_and_extracted_content_list = (
|
|
107
|
-
batch_webpage_content_extractor.extract(html_paths)
|
|
108
|
-
)
|
|
109
|
-
except Exception as e:
|
|
110
|
-
logger.error(f"Failed to extract webpage contents for query '{query_search_results['query']}': {e}")
|
|
111
|
-
continue
|
|
112
|
-
|
|
113
|
-
# Build the map of url to extracted_content
|
|
114
|
-
html_path_to_url_dict = {
|
|
115
|
-
str(url_and_html_path["html_path"]): url_and_html_path["url"]
|
|
116
|
-
for url_and_html_path in url_and_html_path_list
|
|
117
|
-
}
|
|
118
|
-
url_to_extracted_content_dict = {
|
|
119
|
-
html_path_to_url_dict[
|
|
120
|
-
html_path_and_extracted_content["html_path"]
|
|
121
|
-
]: html_path_and_extracted_content["extracted_content"]
|
|
122
|
-
for html_path_and_extracted_content in html_path_and_extracted_content_list
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
# Write extracted contents (as 'text' field) to query_search_results
|
|
126
|
-
for query_result_idx, query_result in enumerate(
|
|
127
|
-
query_search_results["query_results"]
|
|
128
|
-
):
|
|
129
|
-
url = query_result["url"]
|
|
130
|
-
extracted_content = url_to_extracted_content_dict.get(url, "")
|
|
131
|
-
queries_search_results[query_idx]["query_results"][query_result_idx][
|
|
132
|
-
"text"
|
|
133
|
-
] = extracted_content
|
|
134
|
-
|
|
135
|
-
return queries_search_results
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
class ArgParser(argparse.ArgumentParser):
|
|
139
|
-
def __init__(self, *args, **kwargs):
|
|
140
|
-
super(ArgParser, self).__init__(*args, **kwargs)
|
|
141
|
-
|
|
142
|
-
self.add_argument(
|
|
143
|
-
"-q",
|
|
144
|
-
"--queries",
|
|
145
|
-
type=str,
|
|
146
|
-
nargs="+",
|
|
147
|
-
required=True,
|
|
148
|
-
help="Queries to search",
|
|
149
|
-
)
|
|
150
|
-
self.add_argument(
|
|
151
|
-
"-n",
|
|
152
|
-
"--result_num",
|
|
153
|
-
type=int,
|
|
154
|
-
default=10,
|
|
155
|
-
help="Number of search results",
|
|
156
|
-
)
|
|
157
|
-
self.add_argument(
|
|
158
|
-
"-s",
|
|
159
|
-
"--safe",
|
|
160
|
-
default=False,
|
|
161
|
-
action="store_true",
|
|
162
|
-
help="Enable SafeSearch",
|
|
163
|
-
)
|
|
164
|
-
self.add_argument(
|
|
165
|
-
"-t",
|
|
166
|
-
"--types",
|
|
167
|
-
type=str,
|
|
168
|
-
nargs="+",
|
|
169
|
-
default=["web"],
|
|
170
|
-
choices=["web", "image", "videos", "news"],
|
|
171
|
-
help="Types of search results",
|
|
172
|
-
)
|
|
173
|
-
self.add_argument(
|
|
174
|
-
"-e",
|
|
175
|
-
"--extract_webpage",
|
|
176
|
-
default=False,
|
|
177
|
-
action="store_true",
|
|
178
|
-
help="Enable extracting main text contents from webpage",
|
|
179
|
-
)
|
|
180
|
-
self.add_argument(
|
|
181
|
-
"-o",
|
|
182
|
-
"--overwrite_query_html",
|
|
183
|
-
default=False,
|
|
184
|
-
action="store_true",
|
|
185
|
-
help="Overwrite HTML file of query results",
|
|
186
|
-
)
|
|
187
|
-
self.add_argument(
|
|
188
|
-
"-w",
|
|
189
|
-
"--overwrite_webpage_html",
|
|
190
|
-
default=False,
|
|
191
|
-
action="store_true",
|
|
192
|
-
help="Overwrite HTML files of webpages from query results",
|
|
193
|
-
)
|
|
194
|
-
|
|
195
|
-
self.args = self.parse_args()
|
|
196
|
-
|
|
197
|
-
|
|
1
|
+
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
from typing import Union
|
|
4
|
+
|
|
5
|
+
from DeepWEBS.utilsdw.logger import logger
|
|
6
|
+
from DeepWEBS.networks.google_searcher import GoogleSearcher
|
|
7
|
+
from DeepWEBS.networks.webpage_fetcher import BatchWebpageFetcher
|
|
8
|
+
from DeepWEBS.documents.query_results_extractor import QueryResultsExtractor
|
|
9
|
+
from DeepWEBS.documents.webpage_content_extractor import BatchWebpageContentExtractor
|
|
10
|
+
from DeepWEBS.utilsdw.logger import logger
|
|
11
|
+
import argparse
|
|
12
|
+
|
|
13
|
+
class DeepWEBS:
|
|
14
|
+
def __init__(self):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
class DeepSearch(BaseModel):
|
|
18
|
+
queries: list = Field(
|
|
19
|
+
default=[""],
|
|
20
|
+
description="(list[str]) Queries to search",
|
|
21
|
+
)
|
|
22
|
+
result_num: int = Field(
|
|
23
|
+
default=10,
|
|
24
|
+
description="(int) Number of search results",
|
|
25
|
+
)
|
|
26
|
+
safe: bool = Field(
|
|
27
|
+
default=False,
|
|
28
|
+
description="(bool) Enable SafeSearch",
|
|
29
|
+
)
|
|
30
|
+
types: list = Field(
|
|
31
|
+
default=["web"],
|
|
32
|
+
description="(list[str]) Types of search results: `web`, `image`, `videos`, `news`",
|
|
33
|
+
)
|
|
34
|
+
extract_webpage: bool = Field(
|
|
35
|
+
default=False,
|
|
36
|
+
description="(bool) Enable extracting main text contents from webpage, will add `text` filed in each `query_result` dict",
|
|
37
|
+
)
|
|
38
|
+
overwrite_query_html: bool = Field(
|
|
39
|
+
default=False,
|
|
40
|
+
description="(bool) Overwrite HTML file of query results",
|
|
41
|
+
)
|
|
42
|
+
overwrite_webpage_html: bool = Field(
|
|
43
|
+
default=False,
|
|
44
|
+
description="(bool) Overwrite HTML files of webpages from query results",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def queries_to_search_results(self, item: DeepSearch):
|
|
48
|
+
google_searcher = GoogleSearcher()
|
|
49
|
+
queries_search_results = []
|
|
50
|
+
for query in item.queries:
|
|
51
|
+
query_results_extractor = QueryResultsExtractor()
|
|
52
|
+
if not query.strip():
|
|
53
|
+
continue
|
|
54
|
+
try:
|
|
55
|
+
query_html_path = google_searcher.search(
|
|
56
|
+
query=query,
|
|
57
|
+
result_num=item.result_num,
|
|
58
|
+
safe=item.safe,
|
|
59
|
+
overwrite=item.overwrite_query_html,
|
|
60
|
+
)
|
|
61
|
+
except Exception as e:
|
|
62
|
+
logger.error(f"Failed to search for query '{query}': {e}")
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
query_search_results = query_results_extractor.extract(query_html_path)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.error(f"Failed to extract search results for query '{query}': {e}")
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
queries_search_results.append(query_search_results)
|
|
72
|
+
logger.note(queries_search_results)
|
|
73
|
+
|
|
74
|
+
if item.extract_webpage:
|
|
75
|
+
queries_search_results = self.extract_webpages(
|
|
76
|
+
queries_search_results,
|
|
77
|
+
overwrite_webpage_html=item.overwrite_webpage_html,
|
|
78
|
+
)
|
|
79
|
+
return queries_search_results
|
|
80
|
+
|
|
81
|
+
def extract_webpages(self, queries_search_results, overwrite_webpage_html=False):
|
|
82
|
+
for query_idx, query_search_results in enumerate(queries_search_results):
|
|
83
|
+
try:
|
|
84
|
+
# Fetch webpages with urls
|
|
85
|
+
batch_webpage_fetcher = BatchWebpageFetcher()
|
|
86
|
+
urls = [
|
|
87
|
+
query_result["url"]
|
|
88
|
+
for query_result in query_search_results["query_results"]
|
|
89
|
+
]
|
|
90
|
+
url_and_html_path_list = batch_webpage_fetcher.fetch(
|
|
91
|
+
urls,
|
|
92
|
+
overwrite=overwrite_webpage_html,
|
|
93
|
+
output_parent=query_search_results["query"],
|
|
94
|
+
)
|
|
95
|
+
except Exception as e:
|
|
96
|
+
logger.error(f"Failed to fetch webpages for query '{query_search_results['query']}': {e}")
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
# Extract webpage contents from htmls
|
|
100
|
+
html_paths = [
|
|
101
|
+
str(url_and_html_path["html_path"])
|
|
102
|
+
for url_and_html_path in url_and_html_path_list
|
|
103
|
+
]
|
|
104
|
+
batch_webpage_content_extractor = BatchWebpageContentExtractor()
|
|
105
|
+
try:
|
|
106
|
+
html_path_and_extracted_content_list = (
|
|
107
|
+
batch_webpage_content_extractor.extract(html_paths)
|
|
108
|
+
)
|
|
109
|
+
except Exception as e:
|
|
110
|
+
logger.error(f"Failed to extract webpage contents for query '{query_search_results['query']}': {e}")
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
# Build the map of url to extracted_content
|
|
114
|
+
html_path_to_url_dict = {
|
|
115
|
+
str(url_and_html_path["html_path"]): url_and_html_path["url"]
|
|
116
|
+
for url_and_html_path in url_and_html_path_list
|
|
117
|
+
}
|
|
118
|
+
url_to_extracted_content_dict = {
|
|
119
|
+
html_path_to_url_dict[
|
|
120
|
+
html_path_and_extracted_content["html_path"]
|
|
121
|
+
]: html_path_and_extracted_content["extracted_content"]
|
|
122
|
+
for html_path_and_extracted_content in html_path_and_extracted_content_list
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
# Write extracted contents (as 'text' field) to query_search_results
|
|
126
|
+
for query_result_idx, query_result in enumerate(
|
|
127
|
+
query_search_results["query_results"]
|
|
128
|
+
):
|
|
129
|
+
url = query_result["url"]
|
|
130
|
+
extracted_content = url_to_extracted_content_dict.get(url, "")
|
|
131
|
+
queries_search_results[query_idx]["query_results"][query_result_idx][
|
|
132
|
+
"text"
|
|
133
|
+
] = extracted_content
|
|
134
|
+
|
|
135
|
+
return queries_search_results
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class ArgParser(argparse.ArgumentParser):
|
|
139
|
+
def __init__(self, *args, **kwargs):
|
|
140
|
+
super(ArgParser, self).__init__(*args, **kwargs)
|
|
141
|
+
|
|
142
|
+
self.add_argument(
|
|
143
|
+
"-q",
|
|
144
|
+
"--queries",
|
|
145
|
+
type=str,
|
|
146
|
+
nargs="+",
|
|
147
|
+
required=True,
|
|
148
|
+
help="Queries to search",
|
|
149
|
+
)
|
|
150
|
+
self.add_argument(
|
|
151
|
+
"-n",
|
|
152
|
+
"--result_num",
|
|
153
|
+
type=int,
|
|
154
|
+
default=10,
|
|
155
|
+
help="Number of search results",
|
|
156
|
+
)
|
|
157
|
+
self.add_argument(
|
|
158
|
+
"-s",
|
|
159
|
+
"--safe",
|
|
160
|
+
default=False,
|
|
161
|
+
action="store_true",
|
|
162
|
+
help="Enable SafeSearch",
|
|
163
|
+
)
|
|
164
|
+
self.add_argument(
|
|
165
|
+
"-t",
|
|
166
|
+
"--types",
|
|
167
|
+
type=str,
|
|
168
|
+
nargs="+",
|
|
169
|
+
default=["web"],
|
|
170
|
+
choices=["web", "image", "videos", "news"],
|
|
171
|
+
help="Types of search results",
|
|
172
|
+
)
|
|
173
|
+
self.add_argument(
|
|
174
|
+
"-e",
|
|
175
|
+
"--extract_webpage",
|
|
176
|
+
default=False,
|
|
177
|
+
action="store_true",
|
|
178
|
+
help="Enable extracting main text contents from webpage",
|
|
179
|
+
)
|
|
180
|
+
self.add_argument(
|
|
181
|
+
"-o",
|
|
182
|
+
"--overwrite_query_html",
|
|
183
|
+
default=False,
|
|
184
|
+
action="store_true",
|
|
185
|
+
help="Overwrite HTML file of query results",
|
|
186
|
+
)
|
|
187
|
+
self.add_argument(
|
|
188
|
+
"-w",
|
|
189
|
+
"--overwrite_webpage_html",
|
|
190
|
+
default=False,
|
|
191
|
+
action="store_true",
|
|
192
|
+
help="Overwrite HTML files of webpages from query results",
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
self.args = self.parse_args()
|
|
196
|
+
|
|
197
|
+
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Webscout.
|
|
2
2
|
|
|
3
3
|
Search for words, documents, images, videos, news, maps and text translation
|
|
4
|
-
using the DuckDuckGo.com
|
|
4
|
+
using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import logging
|
|
@@ -9,6 +9,8 @@ from .webscout_search import WEBS
|
|
|
9
9
|
from .webscout_search_async import AsyncWEBS
|
|
10
10
|
from .version import __version__
|
|
11
11
|
from .DWEBS import DeepWEBS
|
|
12
|
+
from .AIutel import appdir
|
|
13
|
+
|
|
12
14
|
__all__ = ["WEBS", "AsyncWEBS", "__version__", "cli"]
|
|
13
15
|
|
|
14
16
|
logging.getLogger("webscout").addHandler(logging.NullHandler())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: webscout
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.5
|
|
4
4
|
Summary: Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models
|
|
5
5
|
Author: OEvortex
|
|
6
6
|
Author-email: helpingai5@gmail.com
|
|
@@ -45,6 +45,7 @@ Requires-Dist: sse_starlette
|
|
|
45
45
|
Requires-Dist: termcolor
|
|
46
46
|
Requires-Dist: tiktoken
|
|
47
47
|
Requires-Dist: tldextract
|
|
48
|
+
Requires-Dist: orjson
|
|
48
49
|
Provides-Extra: dev
|
|
49
50
|
Requires-Dist: ruff>=0.1.6; extra == "dev"
|
|
50
51
|
Requires-Dist: pytest>=7.4.2; extra == "dev"
|
|
@@ -92,6 +93,7 @@ Also containes AI models that you can use
|
|
|
92
93
|
- [6. `BlackBox` - Search/chat With BlackBox](#6-blackbox---searchchat-with-blackbox)
|
|
93
94
|
- [7. `PERPLEXITY` - Search With PERPLEXITY](#7-perplexity---search-with-perplexity)
|
|
94
95
|
- [8. `OpenGPT` - chat With OPENGPT](#8-opengpt---chat-with-opengpt)
|
|
96
|
+
- [9. `KOBOLDIA` -](#9-koboldia--)
|
|
95
97
|
- [usage of special .LLM file from webscout (webscout.LLM)](#usage-of-special-llm-file-from-webscout-webscoutllm)
|
|
96
98
|
- [`LLM`](#llm)
|
|
97
99
|
|
|
@@ -604,6 +606,24 @@ opengpt = OPENGPT(is_conversation=True, max_tokens=8000, timeout=30)
|
|
|
604
606
|
prompt = "tell me about india"
|
|
605
607
|
response_str = opengpt.chat(prompt)
|
|
606
608
|
print(response_str)
|
|
609
|
+
```
|
|
610
|
+
### 9. `KOBOLDIA` -
|
|
611
|
+
```python
|
|
612
|
+
from webscout.AI import KOBOLDAI
|
|
613
|
+
|
|
614
|
+
# Instantiate the KOBOLDAI class with default parameters
|
|
615
|
+
koboldai = KOBOLDAI()
|
|
616
|
+
|
|
617
|
+
# Define a prompt to send to the AI
|
|
618
|
+
prompt = "What is the capital of France?"
|
|
619
|
+
|
|
620
|
+
# Use the 'ask' method to get a response from the AI
|
|
621
|
+
response = koboldai.ask(prompt)
|
|
622
|
+
|
|
623
|
+
# Extract and print the message from the response
|
|
624
|
+
message = koboldai.get_message(response)
|
|
625
|
+
print(message)
|
|
626
|
+
|
|
607
627
|
```
|
|
608
628
|
|
|
609
629
|
## usage of special .LLM file from webscout (webscout.LLM)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|