webscout 1.2.2__tar.gz → 1.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout-1.2.4/DeepWEBS/documents/query_results_extractor.py +99 -0
- webscout-1.2.4/DeepWEBS/networks/google_searcher.py +52 -0
- webscout-1.2.4/DeepWEBS/networks/webpage_fetcher.py +97 -0
- {webscout-1.2.2 → webscout-1.2.4}/PKG-INFO +57 -10
- {webscout-1.2.2 → webscout-1.2.4}/README.md +54 -9
- {webscout-1.2.2 → webscout-1.2.4}/setup.py +3 -1
- {webscout-1.2.2 → webscout-1.2.4}/webscout/DWEBS.py +197 -179
- {webscout-1.2.2 → webscout-1.2.4}/webscout/__init__.py +1 -0
- webscout-1.2.4/webscout/offlineAI.py +206 -0
- webscout-1.2.4/webscout/version.py +2 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout.egg-info/PKG-INFO +57 -10
- {webscout-1.2.2 → webscout-1.2.4}/webscout.egg-info/SOURCES.txt +1 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout.egg-info/requires.txt +2 -0
- webscout-1.2.2/DeepWEBS/documents/query_results_extractor.py +0 -78
- webscout-1.2.2/DeepWEBS/networks/google_searcher.py +0 -48
- webscout-1.2.2/DeepWEBS/networks/webpage_fetcher.py +0 -107
- webscout-1.2.2/webscout/version.py +0 -2
- {webscout-1.2.2 → webscout-1.2.4}/DeepWEBS/__init__.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/DeepWEBS/documents/__init__.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/DeepWEBS/documents/webpage_content_extractor.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/DeepWEBS/networks/__init__.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/DeepWEBS/networks/filepath_converter.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/DeepWEBS/networks/network_configs.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/DeepWEBS/utilsdw/__init__.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/DeepWEBS/utilsdw/enver.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/DeepWEBS/utilsdw/logger.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/LICENSE.md +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/setup.cfg +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout/AI.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout/AIbase.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout/AIutel.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout/HelpingAI.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout/LLM.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout/__main__.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout/cli.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout/exceptions.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout/models.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout/utils.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout/webscout_search.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout/webscout_search_async.py +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout.egg-info/dependency_links.txt +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout.egg-info/entry_points.txt +0 -0
- {webscout-1.2.2 → webscout-1.2.4}/webscout.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from bs4 import BeautifulSoup
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from DeepWEBS.utilsdw.logger import logger
|
|
4
|
+
|
|
5
|
+
class QueryResultsExtractor:
|
|
6
|
+
def __init__(self) -> None:
|
|
7
|
+
self.query_results = []
|
|
8
|
+
self.related_questions = []
|
|
9
|
+
|
|
10
|
+
def load_html(self, html_path):
|
|
11
|
+
try:
|
|
12
|
+
with open(html_path, "r", encoding="utf-8") as f:
|
|
13
|
+
html = f.read()
|
|
14
|
+
self.soup = BeautifulSoup(html, "html.parser")
|
|
15
|
+
except FileNotFoundError:
|
|
16
|
+
logger.error(f"File not found: {html_path}")
|
|
17
|
+
except Exception as e:
|
|
18
|
+
logger.error(f"Error loading HTML: {e}")
|
|
19
|
+
|
|
20
|
+
def extract_query_results(self):
|
|
21
|
+
try:
|
|
22
|
+
self.query = self.soup.find("textarea").text.strip()
|
|
23
|
+
query_result_elements = self.soup.find_all("div", class_="g")
|
|
24
|
+
for idx, result in enumerate(query_result_elements):
|
|
25
|
+
try:
|
|
26
|
+
site = result.find("cite").find_previous("span").text.strip()
|
|
27
|
+
url = result.find("a")["href"]
|
|
28
|
+
title = result.find("h3").text.strip()
|
|
29
|
+
abstract_element_conditions = [
|
|
30
|
+
{"data-sncf": "1"},
|
|
31
|
+
{"class_": "ITZIwc"},
|
|
32
|
+
]
|
|
33
|
+
for condition in abstract_element_conditions:
|
|
34
|
+
abstract_element = result.find("div", condition)
|
|
35
|
+
if abstract_element is not None:
|
|
36
|
+
abstract = abstract_element.text.strip()
|
|
37
|
+
break
|
|
38
|
+
else:
|
|
39
|
+
abstract = ""
|
|
40
|
+
logger.mesg(
|
|
41
|
+
f"{title}\n"
|
|
42
|
+
f" - {site}\n"
|
|
43
|
+
f" - {url}\n"
|
|
44
|
+
f" - {abstract}\n"
|
|
45
|
+
f"\n"
|
|
46
|
+
)
|
|
47
|
+
self.query_results.append(
|
|
48
|
+
{
|
|
49
|
+
"title": title,
|
|
50
|
+
"site": site,
|
|
51
|
+
"url": url,
|
|
52
|
+
"abstract": abstract,
|
|
53
|
+
"index": idx,
|
|
54
|
+
"type": "web",
|
|
55
|
+
}
|
|
56
|
+
)
|
|
57
|
+
except Exception as e:
|
|
58
|
+
logger.error(f"Error extracting query result: {e}")
|
|
59
|
+
logger.success(f"- {len(query_result_elements)} query results")
|
|
60
|
+
except Exception as e:
|
|
61
|
+
logger.error(f"Error extracting query results: {e}")
|
|
62
|
+
|
|
63
|
+
def extract_related_questions(self):
|
|
64
|
+
try:
|
|
65
|
+
related_question_elements = self.soup.find_all(
|
|
66
|
+
"div", class_="related-question-pair"
|
|
67
|
+
)
|
|
68
|
+
for question_element in related_question_elements:
|
|
69
|
+
try:
|
|
70
|
+
question = question_element.find("span").text.strip()
|
|
71
|
+
print(question)
|
|
72
|
+
self.related_questions.append(question)
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.error(f"Error extracting related question: {e}")
|
|
75
|
+
logger.success(f"- {len(self.related_questions)} related questions")
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logger.error(f"Error extracting related questions: {e}")
|
|
78
|
+
|
|
79
|
+
def extract(self, html_path):
|
|
80
|
+
self.load_html(html_path)
|
|
81
|
+
self.extract_query_results()
|
|
82
|
+
self.extract_related_questions()
|
|
83
|
+
self.search_results = {
|
|
84
|
+
"query": self.query,
|
|
85
|
+
"query_results": self.query_results,
|
|
86
|
+
"related_questions": self.related_questions,
|
|
87
|
+
}
|
|
88
|
+
return self.search_results
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
html_path_root = Path(__file__).parents[1] / "files"
|
|
93
|
+
html_filename = "python_tutorials"
|
|
94
|
+
html_path = html_path_root / f"{html_filename}.html"
|
|
95
|
+
extractor = QueryResultsExtractor()
|
|
96
|
+
try:
|
|
97
|
+
extractor.extract(html_path)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
logger.error(f"Error in main function: {e}")
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Optional
|
|
4
|
+
import random
|
|
5
|
+
from DeepWEBS.utilsdw.enver import enver
|
|
6
|
+
from DeepWEBS.utilsdw.logger import logger
|
|
7
|
+
from DeepWEBS.networks.filepath_converter import QueryToFilepathConverter
|
|
8
|
+
from DeepWEBS.networks.network_configs import REQUESTS_HEADERS
|
|
9
|
+
|
|
10
|
+
class GoogleSearcher:
|
|
11
|
+
def __init__(self):
|
|
12
|
+
self.url = "https://www.google.com/search"
|
|
13
|
+
self.enver = enver
|
|
14
|
+
self.enver.set_envs(proxies=True)
|
|
15
|
+
self.filepath_converter = QueryToFilepathConverter()
|
|
16
|
+
|
|
17
|
+
def send_request(self, query: str, result_num: int = 10, safe: bool = False) -> requests.Response:
|
|
18
|
+
params = {
|
|
19
|
+
"q": query,
|
|
20
|
+
"num": result_num,
|
|
21
|
+
}
|
|
22
|
+
response = requests.get(
|
|
23
|
+
self.url,
|
|
24
|
+
headers=REQUESTS_HEADERS,
|
|
25
|
+
params=params,
|
|
26
|
+
proxies=self.enver.requests_proxies,
|
|
27
|
+
)
|
|
28
|
+
response.raise_for_status() # Raise an exception for non-2xx status codes
|
|
29
|
+
return response
|
|
30
|
+
|
|
31
|
+
def save_response(self, response: requests.Response, html_path: Path) -> None:
|
|
32
|
+
html_path.parent.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
logger.note(f"Saving to: [{html_path}]")
|
|
34
|
+
with html_path.open("wb") as wf:
|
|
35
|
+
wf.write(response.content)
|
|
36
|
+
|
|
37
|
+
def search(self, query: str, result_num: int = 10, safe: bool = False, overwrite: bool = False) -> Path:
|
|
38
|
+
html_path = self.filepath_converter.convert(query)
|
|
39
|
+
logger.note(f"Searching: [{query}]")
|
|
40
|
+
|
|
41
|
+
if html_path.exists() and not overwrite:
|
|
42
|
+
logger.success(f"HTML existed: {html_path}")
|
|
43
|
+
else:
|
|
44
|
+
response = self.send_request(query, result_num, safe)
|
|
45
|
+
self.save_response(response, html_path)
|
|
46
|
+
|
|
47
|
+
return html_path
|
|
48
|
+
|
|
49
|
+
if __name__ == "__main__":
|
|
50
|
+
searcher = GoogleSearcher()
|
|
51
|
+
html_path = searcher.search("python tutorials")
|
|
52
|
+
print(f"HTML file saved at: {html_path}")
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
import random
|
|
3
|
+
import requests
|
|
4
|
+
import tldextract
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List, Tuple, Dict
|
|
7
|
+
|
|
8
|
+
from DeepWEBS.utilsdw.enver import enver
|
|
9
|
+
from DeepWEBS.utilsdw.logger import logger
|
|
10
|
+
from DeepWEBS.networks.filepath_converter import UrlToFilepathConverter
|
|
11
|
+
from DeepWEBS.networks.network_configs import IGNORE_HOSTS, REQUESTS_HEADERS
|
|
12
|
+
|
|
13
|
+
class WebpageFetcher:
|
|
14
|
+
def __init__(self):
|
|
15
|
+
self.enver = enver
|
|
16
|
+
self.enver.set_envs(proxies=True)
|
|
17
|
+
self.filepath_converter = UrlToFilepathConverter()
|
|
18
|
+
|
|
19
|
+
def is_ignored_host(self, url: str) -> bool:
|
|
20
|
+
host = tldextract.extract(url).registered_domain
|
|
21
|
+
return host in IGNORE_HOSTS
|
|
22
|
+
|
|
23
|
+
def send_request(self, url: str) -> requests.Response:
|
|
24
|
+
try:
|
|
25
|
+
user_agent = random.choice(REQUESTS_HEADERS["User-Agent"])
|
|
26
|
+
response = requests.get(
|
|
27
|
+
url=url,
|
|
28
|
+
headers={"User-Agent": user_agent},
|
|
29
|
+
proxies=self.enver.requests_proxies,
|
|
30
|
+
timeout=15,
|
|
31
|
+
)
|
|
32
|
+
response.raise_for_status()
|
|
33
|
+
return response
|
|
34
|
+
except requests.exceptions.RequestException as e:
|
|
35
|
+
logger.warn(f"Failed to fetch: [{url}] | {e}")
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
def save_response(self, response: requests.Response, html_path: Path) -> None:
|
|
39
|
+
if response is None:
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
html_path.parent.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
logger.success(f"Saving to: [{html_path}]")
|
|
44
|
+
with html_path.open("wb") as wf:
|
|
45
|
+
wf.write(response.content)
|
|
46
|
+
|
|
47
|
+
def fetch(self, url: str, overwrite: bool = False, output_parent: str = None) -> Path:
|
|
48
|
+
logger.note(f"Fetching: [{url}]")
|
|
49
|
+
html_path = self.filepath_converter.convert(url, parent=output_parent)
|
|
50
|
+
|
|
51
|
+
if self.is_ignored_host(url):
|
|
52
|
+
logger.warn(f"Ignored host: [{tldextract.extract(url).registered_domain}]")
|
|
53
|
+
return html_path
|
|
54
|
+
|
|
55
|
+
if html_path.exists() and not overwrite:
|
|
56
|
+
logger.success(f"HTML existed: [{html_path}]")
|
|
57
|
+
else:
|
|
58
|
+
response = self.send_request(url)
|
|
59
|
+
self.save_response(response, html_path)
|
|
60
|
+
|
|
61
|
+
return html_path
|
|
62
|
+
|
|
63
|
+
class BatchWebpageFetcher:
|
|
64
|
+
def __init__(self):
|
|
65
|
+
self.done_count = 0
|
|
66
|
+
self.total_count = 0
|
|
67
|
+
self.url_and_html_path_list: List[Dict[str, str]] = []
|
|
68
|
+
|
|
69
|
+
def fetch_single_webpage(self, url: str, overwrite: bool = False, output_parent: str = None) -> Tuple[str, Path]:
|
|
70
|
+
webpage_fetcher = WebpageFetcher()
|
|
71
|
+
html_path = webpage_fetcher.fetch(url, overwrite, output_parent)
|
|
72
|
+
self.url_and_html_path_list.append({"url": url, "html_path": str(html_path)})
|
|
73
|
+
self.done_count += 1
|
|
74
|
+
logger.success(f"> [{self.done_count}/{self.total_count}] Fetched: {url}")
|
|
75
|
+
return url, html_path
|
|
76
|
+
|
|
77
|
+
def fetch(self, urls: List[str], overwrite: bool = False, output_parent: str = None) -> List[Dict[str, str]]:
|
|
78
|
+
self.urls = urls
|
|
79
|
+
self.total_count = len(self.urls)
|
|
80
|
+
|
|
81
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
82
|
+
futures = [
|
|
83
|
+
executor.submit(self.fetch_single_webpage, url, overwrite, output_parent)
|
|
84
|
+
for url in urls
|
|
85
|
+
]
|
|
86
|
+
concurrent.futures.wait(futures)
|
|
87
|
+
|
|
88
|
+
return self.url_and_html_path_list
|
|
89
|
+
|
|
90
|
+
if __name__ == "__main__":
|
|
91
|
+
urls = [
|
|
92
|
+
"https://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename",
|
|
93
|
+
"https://www.liaoxuefeng.com/wiki/1016959663602400/1017495723838528",
|
|
94
|
+
"https://docs.python.org/zh-cn/3/tutorial/interpreter.html",
|
|
95
|
+
]
|
|
96
|
+
batch_webpage_fetcher = BatchWebpageFetcher()
|
|
97
|
+
batch_webpage_fetcher.fetch(urls=urls, overwrite=True, output_parent="python tutorials")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: webscout
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.4
|
|
4
4
|
Summary: Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models
|
|
5
5
|
Author: OEvortex
|
|
6
6
|
Author-email: helpingai5@gmail.com
|
|
@@ -45,6 +45,8 @@ Requires-Dist: sse_starlette
|
|
|
45
45
|
Requires-Dist: termcolor
|
|
46
46
|
Requires-Dist: tiktoken
|
|
47
47
|
Requires-Dist: tldextract
|
|
48
|
+
Requires-Dist: gpt4all
|
|
49
|
+
Requires-Dist: orjson
|
|
48
50
|
Provides-Extra: dev
|
|
49
51
|
Requires-Dist: ruff>=0.1.6; extra == "dev"
|
|
50
52
|
Requires-Dist: pytest>=7.4.2; extra == "dev"
|
|
@@ -69,6 +71,7 @@ Also containes AI models that you can use
|
|
|
69
71
|
- [Regions](#regions)
|
|
70
72
|
- [DeepWEBS: Advanced Web Searches](#deepwebs-advanced-web-searches)
|
|
71
73
|
- [Activating DeepWEBS](#activating-deepwebs)
|
|
74
|
+
- [Point to remember before using `DeepWEBS`](#point-to-remember-before-using-deepwebs)
|
|
72
75
|
- [Usage Example](#usage-example)
|
|
73
76
|
- [WEBS and AsyncWEBS classes](#webs-and-asyncwebs-classes)
|
|
74
77
|
- [Exceptions](#exceptions)
|
|
@@ -91,6 +94,7 @@ Also containes AI models that you can use
|
|
|
91
94
|
- [6. `BlackBox` - Search/chat With BlackBox](#6-blackbox---searchchat-with-blackbox)
|
|
92
95
|
- [7. `PERPLEXITY` - Search With PERPLEXITY](#7-perplexity---search-with-perplexity)
|
|
93
96
|
- [8. `OpenGPT` - chat With OPENGPT](#8-opengpt---chat-with-opengpt)
|
|
97
|
+
- [9. `GPT4ALL` - chat offline with Language models using gpt4all from webscout](#9-gpt4all---chat-offline-with-language-models-using-gpt4all-from-webscout)
|
|
94
98
|
- [usage of special .LLM file from webscout (webscout.LLM)](#usage-of-special-llm-file-from-webscout-webscoutllm)
|
|
95
99
|
- [`LLM`](#llm)
|
|
96
100
|
|
|
@@ -224,26 +228,33 @@ ___
|
|
|
224
228
|
|
|
225
229
|
To utilize the `DeepWEBS` feature, you must first create an instance of the `DeepWEBS` . This is designed to be used independently of the `WEBS` , offering a focused approach to web searches.
|
|
226
230
|
|
|
231
|
+
### Point to remember before using `DeepWEBS`
|
|
232
|
+
As `DeepWEBS` is designed to extract relevant information directly from webpages and Search engine, It extracts html from webpages and saves them to folder named files in `DeepWEBS` that can be found at `C:\Users\Username\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\DeepWEBS`
|
|
233
|
+
|
|
227
234
|
### Usage Example
|
|
228
235
|
|
|
229
236
|
Here's a basic example of how to use the `DeepWEBS` :
|
|
230
237
|
```python
|
|
231
238
|
from webscout import DeepWEBS
|
|
232
239
|
|
|
233
|
-
|
|
234
240
|
def perform_web_search(query):
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
+
# Initialize the DeepWEBS class
|
|
242
|
+
D = DeepWEBS()
|
|
243
|
+
|
|
244
|
+
# Set up the search parameters
|
|
245
|
+
search_params = D.DeepSearch(
|
|
246
|
+
queries=[query], # Query to search
|
|
247
|
+
result_num=5, # Number of search results
|
|
248
|
+
safe=True, # Enable SafeSearch
|
|
249
|
+
types=["web"], # Search type: web
|
|
241
250
|
extract_webpage=True, # True for extracting webpages
|
|
242
251
|
overwrite_query_html=False,
|
|
243
252
|
overwrite_webpage_html=False,
|
|
244
253
|
)
|
|
245
|
-
|
|
246
|
-
|
|
254
|
+
|
|
255
|
+
# Execute the search and retrieve results
|
|
256
|
+
results = D.queries_to_search_results(search_params)
|
|
257
|
+
|
|
247
258
|
return results
|
|
248
259
|
|
|
249
260
|
def print_search_results(results):
|
|
@@ -260,8 +271,13 @@ def print_search_results(results):
|
|
|
260
271
|
print("No search results found.")
|
|
261
272
|
|
|
262
273
|
def main():
|
|
274
|
+
# Prompt the user for a search query
|
|
263
275
|
query = input("Enter your search query: ")
|
|
276
|
+
|
|
277
|
+
# Perform the web search
|
|
264
278
|
results = perform_web_search(query)
|
|
279
|
+
|
|
280
|
+
# Print the search results
|
|
265
281
|
print_search_results(results)
|
|
266
282
|
|
|
267
283
|
if __name__ == "__main__":
|
|
@@ -592,6 +608,37 @@ prompt = "tell me about india"
|
|
|
592
608
|
response_str = opengpt.chat(prompt)
|
|
593
609
|
print(response_str)
|
|
594
610
|
```
|
|
611
|
+
### 9. `GPT4ALL` - chat offline with Language models using gpt4all from webscout
|
|
612
|
+
```python
|
|
613
|
+
from webscout import GPT4ALL
|
|
614
|
+
|
|
615
|
+
# Initialize the GPT4ALL class with your model path and other optional parameters
|
|
616
|
+
gpt4all_instance = GPT4ALL(
|
|
617
|
+
model="path/to/your/model/file", # Replace with the actual path to your model file
|
|
618
|
+
is_conversation=True,
|
|
619
|
+
max_tokens=800,
|
|
620
|
+
temperature=0.7,
|
|
621
|
+
presence_penalty=0,
|
|
622
|
+
frequency_penalty=1.18,
|
|
623
|
+
top_p=0.4,
|
|
624
|
+
intro="Hello, how can I assist you today?",
|
|
625
|
+
filepath="path/to/conversation/history/file", # Optional, for conversation history
|
|
626
|
+
update_file=True,
|
|
627
|
+
history_offset=10250,
|
|
628
|
+
act=None # Optional, for using an awesome prompt as intro
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
# Generate a response from the AI model
|
|
632
|
+
response = gpt4all_instance.chat(
|
|
633
|
+
prompt="What is the weather like today?",
|
|
634
|
+
stream=False, # Set to True if you want to stream the response
|
|
635
|
+
optimizer=None, # Optional, specify an optimizer if needed
|
|
636
|
+
conversationally=False # Set to True for conversationally generated responses
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
# Print the generated response
|
|
640
|
+
print(response)
|
|
641
|
+
```
|
|
595
642
|
|
|
596
643
|
## usage of special .LLM file from webscout (webscout.LLM)
|
|
597
644
|
|
|
@@ -18,6 +18,7 @@ Also containes AI models that you can use
|
|
|
18
18
|
- [Regions](#regions)
|
|
19
19
|
- [DeepWEBS: Advanced Web Searches](#deepwebs-advanced-web-searches)
|
|
20
20
|
- [Activating DeepWEBS](#activating-deepwebs)
|
|
21
|
+
- [Point to remember before using `DeepWEBS`](#point-to-remember-before-using-deepwebs)
|
|
21
22
|
- [Usage Example](#usage-example)
|
|
22
23
|
- [WEBS and AsyncWEBS classes](#webs-and-asyncwebs-classes)
|
|
23
24
|
- [Exceptions](#exceptions)
|
|
@@ -40,6 +41,7 @@ Also containes AI models that you can use
|
|
|
40
41
|
- [6. `BlackBox` - Search/chat With BlackBox](#6-blackbox---searchchat-with-blackbox)
|
|
41
42
|
- [7. `PERPLEXITY` - Search With PERPLEXITY](#7-perplexity---search-with-perplexity)
|
|
42
43
|
- [8. `OpenGPT` - chat With OPENGPT](#8-opengpt---chat-with-opengpt)
|
|
44
|
+
- [9. `GPT4ALL` - chat offline with Language models using gpt4all from webscout](#9-gpt4all---chat-offline-with-language-models-using-gpt4all-from-webscout)
|
|
43
45
|
- [usage of special .LLM file from webscout (webscout.LLM)](#usage-of-special-llm-file-from-webscout-webscoutllm)
|
|
44
46
|
- [`LLM`](#llm)
|
|
45
47
|
|
|
@@ -173,26 +175,33 @@ ___
|
|
|
173
175
|
|
|
174
176
|
To utilize the `DeepWEBS` feature, you must first create an instance of the `DeepWEBS` . This is designed to be used independently of the `WEBS` , offering a focused approach to web searches.
|
|
175
177
|
|
|
178
|
+
### Point to remember before using `DeepWEBS`
|
|
179
|
+
As `DeepWEBS` is designed to extract relevant information directly from webpages and Search engine, It extracts html from webpages and saves them to folder named files in `DeepWEBS` that can be found at `C:\Users\Username\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\DeepWEBS`
|
|
180
|
+
|
|
176
181
|
### Usage Example
|
|
177
182
|
|
|
178
183
|
Here's a basic example of how to use the `DeepWEBS` :
|
|
179
184
|
```python
|
|
180
185
|
from webscout import DeepWEBS
|
|
181
186
|
|
|
182
|
-
|
|
183
187
|
def perform_web_search(query):
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
188
|
+
# Initialize the DeepWEBS class
|
|
189
|
+
D = DeepWEBS()
|
|
190
|
+
|
|
191
|
+
# Set up the search parameters
|
|
192
|
+
search_params = D.DeepSearch(
|
|
193
|
+
queries=[query], # Query to search
|
|
194
|
+
result_num=5, # Number of search results
|
|
195
|
+
safe=True, # Enable SafeSearch
|
|
196
|
+
types=["web"], # Search type: web
|
|
190
197
|
extract_webpage=True, # True for extracting webpages
|
|
191
198
|
overwrite_query_html=False,
|
|
192
199
|
overwrite_webpage_html=False,
|
|
193
200
|
)
|
|
194
|
-
|
|
195
|
-
|
|
201
|
+
|
|
202
|
+
# Execute the search and retrieve results
|
|
203
|
+
results = D.queries_to_search_results(search_params)
|
|
204
|
+
|
|
196
205
|
return results
|
|
197
206
|
|
|
198
207
|
def print_search_results(results):
|
|
@@ -209,8 +218,13 @@ def print_search_results(results):
|
|
|
209
218
|
print("No search results found.")
|
|
210
219
|
|
|
211
220
|
def main():
|
|
221
|
+
# Prompt the user for a search query
|
|
212
222
|
query = input("Enter your search query: ")
|
|
223
|
+
|
|
224
|
+
# Perform the web search
|
|
213
225
|
results = perform_web_search(query)
|
|
226
|
+
|
|
227
|
+
# Print the search results
|
|
214
228
|
print_search_results(results)
|
|
215
229
|
|
|
216
230
|
if __name__ == "__main__":
|
|
@@ -541,6 +555,37 @@ prompt = "tell me about india"
|
|
|
541
555
|
response_str = opengpt.chat(prompt)
|
|
542
556
|
print(response_str)
|
|
543
557
|
```
|
|
558
|
+
### 9. `GPT4ALL` - chat offline with Language models using gpt4all from webscout
|
|
559
|
+
```python
|
|
560
|
+
from webscout import GPT4ALL
|
|
561
|
+
|
|
562
|
+
# Initialize the GPT4ALL class with your model path and other optional parameters
|
|
563
|
+
gpt4all_instance = GPT4ALL(
|
|
564
|
+
model="path/to/your/model/file", # Replace with the actual path to your model file
|
|
565
|
+
is_conversation=True,
|
|
566
|
+
max_tokens=800,
|
|
567
|
+
temperature=0.7,
|
|
568
|
+
presence_penalty=0,
|
|
569
|
+
frequency_penalty=1.18,
|
|
570
|
+
top_p=0.4,
|
|
571
|
+
intro="Hello, how can I assist you today?",
|
|
572
|
+
filepath="path/to/conversation/history/file", # Optional, for conversation history
|
|
573
|
+
update_file=True,
|
|
574
|
+
history_offset=10250,
|
|
575
|
+
act=None # Optional, for using an awesome prompt as intro
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
# Generate a response from the AI model
|
|
579
|
+
response = gpt4all_instance.chat(
|
|
580
|
+
prompt="What is the weather like today?",
|
|
581
|
+
stream=False, # Set to True if you want to stream the response
|
|
582
|
+
optimizer=None, # Optional, specify an optimizer if needed
|
|
583
|
+
conversationally=False # Set to True for conversationally generated responses
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
# Print the generated response
|
|
587
|
+
print(response)
|
|
588
|
+
```
|
|
544
589
|
|
|
545
590
|
## usage of special .LLM file from webscout (webscout.LLM)
|
|
546
591
|
|
|
@@ -9,7 +9,7 @@ with open("README.md", encoding="utf-8") as f:
|
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name="webscout",
|
|
12
|
-
version="1.2.
|
|
12
|
+
version="1.2.4", # Use the version variable from the version.py file
|
|
13
13
|
description="Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models",
|
|
14
14
|
long_description=README,
|
|
15
15
|
long_description_content_type="text/markdown",
|
|
@@ -53,6 +53,8 @@ setup(
|
|
|
53
53
|
"termcolor",
|
|
54
54
|
"tiktoken",
|
|
55
55
|
"tldextract",
|
|
56
|
+
"gpt4all",
|
|
57
|
+
"orjson",
|
|
56
58
|
],
|
|
57
59
|
entry_points={
|
|
58
60
|
"console_scripts": [
|